In [1]:
import os
import time

from libero.libero import benchmark, get_libero_path
from libero.libero.envs import OffScreenRenderEnv

In [2]:
benchmark_dict = benchmark.get_benchmark_dict()
task_suite_name = "libero_object"
task_suite = benchmark_dict[task_suite_name]()

[info] using task orders [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]


In [3]:
task_id = 0
task = task_suite.get_task(task_id)
task_name = task.name
task_description = task.language
task_bddl_file = os.path.join(get_libero_path("bddl_files"), task.problem_folder, task.bddl_file)
print(f"[info] retrieving task {task_id} from suite {task_suite_name}, the " + \
      f"language instruction is {task_description}, and the bddl file is {task_bddl_file}")

[info] retrieving task 0 from suite libero_object, the language instruction is pick up the alphabet soup and place it in the basket, and the bddl file is /home/neo/Research/LIBERO/libero/libero/./bddl_files/libero_object/pick_up_the_alphabet_soup_and_place_it_in_the_basket.bddl


In [4]:
from easydict import EasyDict

cfg = EasyDict({'seed': 10000, 'use_wandb': False, 'wandb_project': 'lifelong learning', 'folder': None, 'bddl_folder': None, 'init_states_folder': None, 'load_previous_model': False, 'device': 'cuda', 'task_embedding_format': 'bert', 'task_embedding_one_hot_offset': 1, 'pretrain': False, 'pretrain_model_path': '', 'benchmark_name': 'LIBERO_OBJECT', 'data': {'data_modality': ['image', 'proprio'], 'seq_len': 10, 'frame_stack': 1, 'use_eye_in_hand': True, 'use_gripper': True, 'use_joint': True, 'use_ee': False, 'max_word_len': 25, 'state_dim': None, 'num_kp': 64, 'img_h': 128, 'img_w': 128, 'task_group_size': 1, 'task_order_index': 0, 'shuffle_task': False, 'obs': {'modality': {'rgb': ['agentview_rgb', 'eye_in_hand_rgb'], 'depth': [], 'low_dim': ['gripper_states', 'joint_states']}}, 'obs_key_mapping': {'agentview_rgb': 'agentview_image', 'eye_in_hand_rgb': 'robot0_eye_in_hand_image', 'gripper_states': 'robot0_gripper_qpos', 'joint_states': 'robot0_joint_pos'}, 'affine_translate': 4, 'action_scale': 1.0, 'train_dataset_ratio': 0.8}, 'policy': {'color_aug': {'network': 'BatchWiseImgColorJitterAug', 'network_kwargs': {'input_shape': None, 'brightness': 0.3, 'contrast': 0.3, 'saturation': 0.3, 'hue': 0.3, 'epsilon': 0.1}}, 'translation_aug': {'network': 'TranslationAug', 'network_kwargs': {'input_shape': None, 'translation': 8}}, 'image_encoder': {'network': 'ResnetEncoder', 'network_kwargs': {'pretrained': False, 'freeze': False, 'remove_layer_num': 4, 'no_stride': False, 'language_fusion': 'film'}}, 'language_encoder': {'network': 'MLPEncoder', 'network_kwargs': {'input_size': 768, 'hidden_size': 128, 'output_size': 128, 'num_layers': 1}}, 'policy_head': {'network': 'GMMHead', 'network_kwargs': {'hidden_size': 512, 'num_layers': 2, 'min_std': 0.0001, 'num_modes': 5, 'low_eval_noise': False, 'activation': 'softplus'}, 'loss_kwargs': {'loss_coef': 1.0}}, 'policy_type': 'BCRNNPolicy', 'image_embed_size': 64, 'text_embed_size': 32, 'rnn_hidden_size': 512, 'rnn_num_layers': 2, 'rnn_dropout': 0.0, 'rnn_bidirectional': False}, 'train': {'optimizer': {'name': 'torch.optim.AdamW', 'kwargs': {'lr': 0.0001, 'betas': [0.9, 0.999], 'weight_decay': 0.0001}}, 'scheduler': {'name': 'torch.optim.lr_scheduler.CosineAnnealingLR', 'kwargs': {'eta_min': 1e-05, 'last_epoch': -1}}, 'n_epochs': 15, 'batch_size': 32, 'num_workers': 4, 'grad_clip': 100.0, 'loss_scale': 1.0, 'resume': False, 'resume_path': '', 'debug': False, 'use_augmentation': True}, 'eval': {'load_path': '', 'eval': True, 'batch_size': 64, 'num_workers': 4, 'n_eval': 20, 'eval_every': 5, 'max_steps': 600, 'use_mp': True, 'num_procs': 20, 'save_sim_states': False}, 'lifelong': {'algo': 'Sequential'}})

In [5]:
from bc_algo import BehavioralCloningLifelongAlgo
from libero.lifelong.utils import safe_device


In [21]:
env_args = {
    "bddl_file_name": task_bddl_file,
    "camera_heights": 128,
    "camera_widths": 128
}

env = OffScreenRenderEnv(**env_args)
env.seed(0)
env.reset()
init_states = task_suite.get_task_init_states(task_id) # for benchmarking purpose, we fix the a set of initial states
init_state_id = 0
obs = env.set_init_state(init_states[init_state_id])

algo = BehavioralCloningLifelongAlgo(n_tasks=1, cfg=cfg)

for step in range(10):
    # observe the current state
    algo.observe(obs)
    # predict the action
    action = algo.predict()
    # execute the action
    obs, reward, done, info = env.step(action)
    
    # render the current state
    env.render()

    

In [None]:
import numpy as np
import h5py
import cv2

images = [obs["agentview_image"]]

env.close()
