In [1]:
import os
import time

from libero.libero import benchmark, get_libero_path
from libero.libero.envs import OffScreenRenderEnv

In [2]:
benchmark_dict = benchmark.get_benchmark_dict()
task_suite_name = "libero_object"
task_suite = benchmark_dict[task_suite_name]()

[info] using task orders [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]


In [3]:
task_id = 0
task = task_suite.get_task(task_id)
task_name = task.name
task_description = task.language
task_bddl_file = os.path.join(get_libero_path("bddl_files"), task.problem_folder, task.bddl_file)
print(f"[info] retrieving task {task_id} from suite {task_suite_name}, the " + \
      f"language instruction is {task_description}, and the bddl file is {task_bddl_file}")

[info] retrieving task 0 from suite libero_object, the language instruction is pick up the alphabet soup and place it in the basket, and the bddl file is /home/neo/Research/LIBERO/libero/libero/./bddl_files/libero_object/pick_up_the_alphabet_soup_and_place_it_in_the_basket.bddl


In [4]:
from easydict import EasyDict

cfg = EasyDict({'seed': 10000, 'use_wandb': False, 'wandb_project': 'lifelong learning', 'folder': None, 'bddl_folder': None, 'init_states_folder': None, 'load_previous_model': False, 'device': 'cuda', 'task_embedding_format': 'bert', 'task_embedding_one_hot_offset': 1, 'pretrain': False, 'pretrain_model_path': '', 'benchmark_name': 'LIBERO_OBJECT', 'data': {'data_modality': ['image', 'proprio'], 'seq_len': 10, 'frame_stack': 1, 'use_eye_in_hand': True, 'use_gripper': True, 'use_joint': True, 'use_ee': False, 'max_word_len': 25, 'state_dim': None, 'num_kp': 64, 'img_h': 128, 'img_w': 128, 'task_group_size': 1, 'task_order_index': 0, 'shuffle_task': False, 'obs': {'modality': {'rgb': ['agentview_rgb', 'eye_in_hand_rgb'], 'depth': [], 'low_dim': ['gripper_states', 'joint_states']}}, 'obs_key_mapping': {'agentview_rgb': 'agentview_image', 'eye_in_hand_rgb': 'robot0_eye_in_hand_image', 'gripper_states': 'robot0_gripper_qpos', 'joint_states': 'robot0_joint_pos'}, 'affine_translate': 4, 'action_scale': 1.0, 'train_dataset_ratio': 0.8}, 'policy': {'color_aug': {'network': 'BatchWiseImgColorJitterAug', 'network_kwargs': {'input_shape': None, 'brightness': 0.3, 'contrast': 0.3, 'saturation': 0.3, 'hue': 0.3, 'epsilon': 0.1}}, 'translation_aug': {'network': 'TranslationAug', 'network_kwargs': {'input_shape': None, 'translation': 8}}, 'image_encoder': {'network': 'ResnetEncoder', 'network_kwargs': {'pretrained': False, 'freeze': False, 'remove_layer_num': 4, 'no_stride': False, 'language_fusion': 'film'}}, 'language_encoder': {'network': 'MLPEncoder', 'network_kwargs': {'input_size': 768, 'hidden_size': 128, 'output_size': 128, 'num_layers': 1}}, 'policy_head': {'network': 'GMMHead', 'network_kwargs': {'hidden_size': 512, 'num_layers': 2, 'min_std': 0.0001, 'num_modes': 5, 'low_eval_noise': False, 'activation': 'softplus'}, 'loss_kwargs': {'loss_coef': 1.0}}, 'policy_type': 'BCRNNPolicy', 'image_embed_size': 64, 'text_embed_size': 32, 'rnn_hidden_size': 512, 'rnn_num_layers': 2, 'rnn_dropout': 0.0, 'rnn_bidirectional': False}, 'train': {'optimizer': {'name': 'torch.optim.AdamW', 'kwargs': {'lr': 0.0001, 'betas': [0.9, 0.999], 'weight_decay': 0.0001}}, 'scheduler': {'name': 'torch.optim.lr_scheduler.CosineAnnealingLR', 'kwargs': {'eta_min': 1e-05, 'last_epoch': -1}}, 'n_epochs': 15, 'batch_size': 32, 'num_workers': 4, 'grad_clip': 100.0, 'loss_scale': 1.0, 'resume': False, 'resume_path': '', 'debug': False, 'use_augmentation': True}, 'eval': {'load_path': '', 'eval': True, 'batch_size': 64, 'num_workers': 4, 'n_eval': 20, 'eval_every': 5, 'max_steps': 600, 'use_mp': True, 'num_procs': 20, 'save_sim_states': False}, 'lifelong': {'algo': 'Sequential'}})

In [5]:
from bc_algo import BehavioralCloningLifelongAlgo
from libero.lifelong.utils import safe_device


In [9]:
from libero.lifelong.datasets import SequenceVLDataset, get_dataset
from libero.lifelong.main import get_task_embs

env_args = {
    "bddl_file_name": task_bddl_file,
    "camera_heights": 128,
    "camera_widths": 128
}

env = OffScreenRenderEnv(**env_args)
env.seed(0)
env.reset()
init_states = task_suite.get_task_init_states(task_id)  # for benchmarking purpose, we fix the a set of initial states
init_state_id = 0
env.set_init_state(init_states[init_state_id])

demo_path = "libero_object/pick_up_the_alphabet_soup_and_place_it_in_the_basket_demo.hdf5"

task_dataset, shape_meta = get_dataset(
    dataset_path=os.path.join(cfg['folder'] or get_libero_path("datasets"), demo_path),
    obs_modality=cfg['data']['obs']['modality'],
    initialize_obs_utils=True,
    seq_len=cfg['data']['seq_len'],
)

cfg.shape_meta = shape_meta

task_embeddings = safe_device(get_task_embs(cfg, [task_description])[0])
dataset = SequenceVLDataset(task_dataset, task_embeddings)
n_demos = dataset.n_demos
n_sequences = dataset.total_num_sequences




using obs modality: rgb with keys: ['eye_in_hand_rgb', 'agentview_rgb']
using obs modality: depth with keys: []
using obs modality: low_dim with keys: ['gripper_states', 'joint_states']
SequenceDataset: loading dataset into memory...
100%|██████████| 50/50 [00:00<00:00, 471.50it/s]


In [7]:
from libero.lifelong.models.modules.rgb_modules import ResnetEncoder
from torch.utils.data import DataLoader, RandomSampler

algo = safe_device(BehavioralCloningLifelongAlgo(n_tasks=0, cfg=cfg, shape_meta=shape_meta))

# see: https://github.com/Lifelong-Robot-Learning/LIBERO/blob/3f0dd188960c25b608babc7de34e1e20a9d1306f/libero/lifelong/algos/base.py#L128

# start the task
algo.start_task(task)
best_state_dict = algo.policy.state_dict()

train_dataloader = DataLoader(
    dataset,
    batch_size=cfg.train.batch_size,
    num_workers=cfg.train.num_workers,
    sampler=RandomSampler(dataset),
    persistent_workers=True
)

[info] Experiment directory not specified. Creating a default one: ./experiments/LIBERO_OBJECT/Sequential/BCRNNPolicy_seed10000/run_005


  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


In [16]:
from libero.lifelong.utils import torch_save_model, confidence_interval
from libero.lifelong.metric import evaluate_one_task_success, raw_obs_to_tensor_obs
import numpy as np

epochs = 100

losses = []
successes = []
n_manip_tasks = 1
cumulated_counter = 0.0

result_summary = {
    "L_conf_mat": np.zeros((n_manip_tasks, n_manip_tasks)),  # loss confusion matrix
    "S_conf_mat": np.zeros((n_manip_tasks, n_manip_tasks)),  # success confusion matrix
    "L_fwd": np.zeros((n_manip_tasks,)),  # loss AUC, how fast the agent learns
    "S_fwd": np.zeros((n_manip_tasks,)),  # success AUC, how fast the agent succeeds
}

model_checkpoint_name = os.path.join(
    algo.experiment_dir, f"task{task_id}_model.pth"
)

for epoch in range(epochs):
    t0 = time.time()
    
    if epoch > 0:
        algo.train()
        training_loss = 0.0
        for (idx, data) in enumerate(train_dataloader):
            loss = algo.observe(data)
            training_loss += loss
        training_loss /= len(train_dataloader)
    else:
        training_loss = 0.0
        for (idx, data) in enumerate(train_dataloader):
            loss = algo.observe(data)
            training_loss += loss
        training_loss /= len(train_dataloader)
    t1 = time.time()

    print(
        f"[info] Epoch: {epoch:3d} | train loss: {training_loss:5.2f} | time: {(t1-t0)/60:4.2f}"
    )
    
    if epoch % algo.cfg.eval.eval_every == 0:  # evaluate BC loss
        # every eval_every epoch, we evaluate the agent on the current task,
        # then we pick the best performant agent on the current task as
        # if it stops learning after that specific epoch. So the stopping
        # criterion for learning a new task is achieving the peak performance
        # on the new task. Future work can explore how to decide this stopping
        # epoch by also considering the agent's performance on old tasks.
        losses.append(training_loss)

        t0 = time.time()

        task_str = f"k{task_id}_e{epoch//algo.eval.eval_every}"
        sim_states = (
            result_summary[task_str] if algo.cfg.eval.save_sim_states else None
        )
        success_rate = evaluate_one_task_success(
            cfg=algo.cfg,
            algo=algo,
            task=task,
            task_emb=task_embeddings,
            task_id=task_id,
            sim_states=sim_states,
            task_str="",
        )
        successes.append(success_rate)

        if prev_success_rate < success_rate:
            torch_save_model(algo.policy, model_checkpoint_name, cfg=algo.cfg)
            prev_success_rate = success_rate
            idx_at_best_succ = len(losses) - 1

        t1 = time.time()

        cumulated_counter += 1.0
        ci = confidence_interval(success_rate, algo.cfg.eval.n_eval)
        tmp_successes = np.array(successes)
        tmp_successes[idx_at_best_succ:] = successes[idx_at_best_succ]
        print(
            f"[info] Epoch: {epoch:3d} | succ: {success_rate:4.2f} ± {ci:4.2f} | best succ: {prev_success_rate} "
            + f"| succ. AoC {tmp_successes.sum()/cumulated_counter:4.2f} | time: {(t1-t0)/60:4.2f}",
            flush=True,
        )

    if algo.scheduler is not None and epoch > 0:
        algo.scheduler.step()
