Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

VisualManager module: preprocessor, imagesaver and trainer #7

Open
wants to merge 20 commits into
base: master
Choose a base branch
from
92 changes: 87 additions & 5 deletions robosuite/demos/demo_random_action.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
from robosuite.controllers import load_controller_config
from robosuite.utils.input_utils import *

from robosuite.utils.visual.VisualManager import VisualManager


if __name__ == "__main__":

Expand Down Expand Up @@ -49,20 +51,100 @@
# initialize the task
env = suite.make(
**options,
has_renderer=True,
has_offscreen_renderer=False,
has_renderer=False,
has_offscreen_renderer=True,
ignore_done=True,
use_camera_obs=False,
use_camera_obs=True,
control_freq=20,
#camera_names = ['agentview','agentview2']
)
env.reset()
env.viewer.set_camera(camera_id=0)
#env.set_camera(camera_id=0)

# Get action limits
low, high = env.action_spec

eyes = VisualManager(
MODEL_ROOT ='/home/dizzyi/GNN/detectron/tutorial/output',
# The directory to the model

DATA_ROOT = './imagesave', # THe directoey to save image and data

verbose = True, # verbose

train_schedule = (10_000,), # The trainer will tune the model when saved image hit the number listed

preprocessor_kwarg = dict(
mask_size = (128,128), # size that image will be wrap to
grayscale = True, # allow gray for more information
threshold = 0.5, # thresold of confident score
backbone = None, # backbone for image and masks
getVec = None, # get vector from feature map
norm = None, # norm layer for image and masks
acti = None, # activation layer for image and masks
),
imagesaver_kwarg = dict(
save_mode = True, # True to turn on image saving mode
save_freq = 5 # how often will save image and annotations
),
trainer_kwarg = dict(
NUM_CLASSES = 20, # Number for classes for classify
train_mode = True, # True to turn on training mode
NEW_MODEL_ROOT = './new_model',
# The directory that all newly tuned model will be saved
)
)

from PIL import Image
import time
# do visualization
for i in range(1000):

print("--------------------------------------")
#delta = time.time()
action = np.random.uniform(low, high)
obs, reward, done, _ = env.step(action)
env.render()

feature_vectors = eyes(obs['agentview_image'],env)

'''

img = Image.fromarray(img).rotate(180)
segment = Image.fromarray(seg).rotate(180)

objects = {}
for i in seg.reshape(-1,3):
name = env.sim.model.geom_id2name(i[1])
objects[i[1]] = name

for k, v in sorted(objects.items()): print(k, v.split("_") if v is not None else v)

# objects [id] => name
# ids list of M id
ids = np.unique(seg)
#ids = np.array(list(filter(lambda id: objects[id] != None, ids)))

# mask (256,256,1) with M ID
_,mask,_ = np.split(seg,3,axis=2)

# mask[np.newaxis] ==> ( 1, 256, 256, 1)
# ids[:, np.newaxis, np.newaxis, np.newaxis] ==> ( M, 1, 1, 1)
# L Broadcastable

# masks ==> (M, 256, 256, 1)
masks = ( mask[np.newaxis] == ids[:, np.newaxis, np.newaxis, np.newaxis]).squeeze().astype(np.uint8)
#masks = np.array(list( filter( lambda m: m.sum() > 100, masks ) )) outdated
masks = masks * 255



img.save('./image.png')
segment.save('./segment.png')

for ind, msk in enumerate( masks ):
seg_png = Image.fromarray(msk,mode='L').rotate(180)
seg_png.save(f'./seg/{ids[ind]}-{objects[ids[ind]]}.png')
print(ind)
'''
#print(time.time()-delta)
#env.render()
23 changes: 21 additions & 2 deletions robosuite/environments/robot_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -361,12 +361,31 @@ def camera_rgb(obs_cache):
height=cam_h,
depth=cam_d,
)
######################################################################################
# Added to get render images as segmentation also
img_seg = self.sim.render(
camera_name=cam_name,
width=cam_w,
height=cam_h,
depth=cam_d,
segmentation=True
)
img_seg = np.concatenate( (img_seg, np.zeros( (256,256,1), dtype=np.uint8) ), axis=2 )
######################################################################################
if cam_d:
rgb, depth = img
obs_cache[depth_sensor_name] = np.expand_dims(depth[::convention], axis=-1)
return rgb[::convention]

##################################################################################

return (rgb[::convention], img_seg[::convention])
##################################################################################
else:
return img[::convention]

##################################################################################
# Modified to return (2, 256, 256, 3) (rgb, seg)
return (img[::convention], img_seg[::convention])
##################################################################################

sensors.append(camera_rgb)
names.append(rgb_sensor_name)
Expand Down
121 changes: 121 additions & 0 deletions robosuite/utils/visual/Trainer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
import torch

from detectron2.utils.logger import setup_logger
setup_logger()

# import some common libraries
import numpy as np
import os, json, pickle

#import some common detectron2 utilities
from detectron2.data.datasets import register_coco_instances
from detectron2.engine import DefaultTrainer
from detectron2.data import DatasetCatalog

class Trainer():
def __init__(
self,
NUM_CLASSES = 20,
train_mode = False,
DATA_ROOT = None,
MODEL_ROOT = None,
NEW_MODEL_ROOT = None
):
self.train_mode = train_mode
self.DATA_ROOT = DATA_ROOT
self.MODEL_ROOT = [MODEL_ROOT]
self.NEW_MODEL_ROOT = NEW_MODEL_ROOT
self.NUM_CLASSES = NUM_CLASSES
if self.train_mode:
assert MODEL_ROOT is not None, "Need to provide MODEL_ROOT"
assert DATA_ROOT is not None, "Need to provide DATA_ROOT"
assert NEW_MODEL_ROOT is not None, "Need to provide NEW_DATA_ROOT"


def train(self, sche, hyperparam_kwarg = None):
self.current_dir = sche
self.set_hyperparam(**hyperparam_kwarg)

trainer = DefaultTrainer(self.cfg)
trainer.resume_or_load(resume=False)
trainer.train()

with open(os.path.join(self.NEW_MODEL_ROOT, sche , 'model_cfg.pickle'), 'wb') as f:
pickle.dump(self.cfg,f)

self.MODEL_ROOT.append( os.path.join(self.NEW_MODEL_ROOT, self.current_dir) )


def get_current_root(self):
return self.MODEL_ROOT[-1]

def set_hyperparam(self):

def VisualManager_Trainer_dataset_function():
returnList = []
for file in sorted(os.listdir(self.DATA_ROOT)):
if not file.endswith('.pickle'): continue

f_path = os.path.join(self.DATA_ROOT,file)

with open(f_path,'rb') as f:
returnList.append(pickle.load(f))
return returnList

DatasetCatalog.register('VisualManager_Trainer_Dataset', VisualManager_Trainer_dataset_function)


with open(os.path.join(self.MODEL_ROOT[-1], 'model_cfg.pickle'), 'rb') as f:
self.cfg = pickle.load(f)

self.cfg.DATASETS.TRAIN = ('VisualManager_Trainer_Dataset',)

self.cfg.MODEL.WEIGHTS = os.path.join(self.MODEL_ROOT[-1], "model_final.pth")

# Detectron default 4
self.cfg.DATALOADER.NUM_WORKERS = 4
# Detectron default 40000
self.cfg.SOLVER.MAX_ITER = 120_000
'''
Detectron default
Base Learning rate 0.001
GAMMA 0.1
STEP (30000,)
GAMMA : Learning rate decay factor
STEPS: num of iter for learning rate decay by gamma

MASK RCNN PAPER : https://arxiv.org/pdf/1703.06870.pdf
Base LR 0.02
decay by 10 @ 120k/160k

Cityscapes finetuning
Base LR 0.001
decay by 10 @ 18k/24k

update baseline
Base LR 0.001
decay by 10 @ 120k,160k/180k

Benefit form deeper model
'''
self.cfg.SOLVER.BASE_LR = 0.001
self.cfg.SOLVER.GAMMA = 0.1
self.cfg.SOLVER.STEPS = (90_000,)
self.cfg.SOLVER.WEIGHT_DECAY = 0.000_1


# ROI_HEADS.BATCH_SIZE_PER_IMAGE * SOLVER.IMS_PER_BATCH
# E.g., a common configuration is: 512 * 16 = 8192
# Detectron default 16
self.cfg.SOLVER.IMS_PER_BATCH = 32
# Detectron default 512
self.cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 2048

# Number of classes
self.cfg.MODEL.ROI_HEADS.NUM_CLASSES = self.NUM_CLASSES

# Confident Level
self.cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.7 # set a custom testing threshold

self.cfg.OUTPUT_DIR = os.path.join(self.NEW_MODEL_ROOT, self.current_dir)

Loading