In [1]:
import argparse
import json
import numpy as np
import time
import os
import shutil
import psutil
import sys
import socket
import traceback

from collections import OrderedDict

import torch
from torch.utils.data import DataLoader

import robomimic
import robomimic.utils.train_utils as TrainUtils
import robomimic.utils.torch_utils as TorchUtils
import robomimic.utils.obs_utils as ObsUtils
import robomimic.utils.env_utils as EnvUtils
import robomimic.utils.file_utils as FileUtils
from robomimic.config import config_factory
from robomimic.algo import algo_factory, RolloutPolicy
from robomimic.utils.log_utils import PrintLogger, DataLogger, flush_warnings

### Simple block lifting task

In [2]:
config_file = "/home/ns/hello_bc/configs/real/block_bc_rnn.json"

In [3]:
ext_cfg = json.load(open(config_file, 'r'))
config = config_factory(ext_cfg["algo_name"])
# update config with external json - this will throw errors if
# the external config has keys not present in the base algo config
with config.values_unlocked():
    config.update(ext_cfg)

config.lock()

# get torch device
device = TorchUtils.get_torch_device(try_to_use_cuda=config.train.cuda)
device

device(type='cuda', index=0)

In [4]:
# first set seeds
np.random.seed(config.train.seed)
torch.manual_seed(config.train.seed)

torch.set_num_threads(2)

print("\n============= New Training Run with Config =============")
print(config)
print("")
log_dir, ckpt_dir, video_dir = TrainUtils.get_exp_dir(config)

if config.experiment.logging.terminal_output_to_txt:
    # log stdout and stderr to a text file
    logger = PrintLogger(os.path.join(log_dir, 'log.txt'))
    sys.stdout = logger
    sys.stderr = logger

# read config to set up metadata for observation modalities (e.g. detecting rgb observations)
ObsUtils.initialize_obs_utils_with_config(config)

# make sure the dataset exists
dataset_path = os.path.expanduser(config.train.data)
if not os.path.exists(dataset_path):
    raise Exception("Dataset at provided path {} not found!".format(dataset_path))

# load basic metadata from training file
shape_meta = FileUtils.get_shape_metadata_from_dataset(
    dataset_path=config.train.data,
    all_obs_keys=config.all_obs_keys,
    verbose=True
)

# setup for a new training run
data_logger = DataLogger(
    log_dir,
    config,
    log_tb=config.experiment.logging.log_tb,
    log_wandb=config.experiment.logging.log_wandb,
)
model = algo_factory(
    algo_name=config.algo_name,
    config=config,
    obs_key_shapes=shape_meta["all_shapes"],
    ac_dim=shape_meta["ac_dim"],
    device=device,
)

# save the config as a json file
with open(os.path.join(log_dir, '..', 'config.json'), 'w') as outfile:
    json.dump(config, outfile, indent=4)

print("\n============= Model Summary =============")
print(model)  # print model summary
print("")

# load training data
trainset, validset = TrainUtils.load_data_for_training(
    config, obs_keys=shape_meta["all_obs_keys"])
train_sampler = trainset.get_dataset_sampler()
print("\n============= Training Dataset =============")
print(trainset)
print("")
if validset is not None:
    print("\n============= Validation Dataset =============")
    print(validset)
    print("")

# maybe retreve statistics for normalizing observations
obs_normalization_stats = None
if config.train.hdf5_normalize_obs:
    obs_normalization_stats = trainset.get_obs_normalization_stats()

# initialize data loaders
train_loader = DataLoader(
    dataset=trainset,
    sampler=train_sampler,
    batch_size=config.train.batch_size,
    shuffle=(train_sampler is None),
    num_workers=config.train.num_data_workers,
    drop_last=True
)

if config.experiment.validate:
    # cap num workers for validation dataset at 1
    num_workers = min(config.train.num_data_workers, 1)
    valid_sampler = validset.get_dataset_sampler()
    valid_loader = DataLoader(
        dataset=validset,
        sampler=valid_sampler,
        batch_size=config.train.batch_size,
        shuffle=(valid_sampler is None),
        num_workers=num_workers,
        drop_last=True
    )
else:
    valid_loader = None

# print all warnings before training begins
print("*" * 50)
print("Warnings generated by robomimic have been duplicated here (from above) for convenience. Please check them carefully.")
flush_warnings()
print("*" * 50)
print("")

# main training loop
best_valid_loss = None
last_ckpt_time = time.time()

# number of learning steps per epoch (defaults to a full dataset pass)
train_num_steps = config.experiment.epoch_every_n_steps
valid_num_steps = config.experiment.validation_epoch_every_n_steps

for epoch in range(1, config.train.num_epochs + 1): # epoch numbers start at 1
    step_log = TrainUtils.run_epoch(
        model=model,
        data_loader=train_loader,
        epoch=epoch,
        num_steps=train_num_steps,
        obs_normalization_stats=obs_normalization_stats,
    )
    model.on_epoch_end(epoch)

    # setup checkpoint path
    epoch_ckpt_name = "model_epoch_{}".format(epoch)

    # check for recurring checkpoint saving conditions
    should_save_ckpt = False
    if config.experiment.save.enabled:
        time_check = (config.experiment.save.every_n_seconds is not None) and \
            (time.time() - last_ckpt_time > config.experiment.save.every_n_seconds)
        epoch_check = (config.experiment.save.every_n_epochs is not None) and \
            (epoch > 0) and (epoch % config.experiment.save.every_n_epochs == 0)
        epoch_list_check = (epoch in config.experiment.save.epochs)
        should_save_ckpt = (time_check or epoch_check or epoch_list_check)
    ckpt_reason = None
    if should_save_ckpt:
        last_ckpt_time = time.time()
        ckpt_reason = "time"

    print("Train Epoch {}".format(epoch))
    print(json.dumps(step_log, sort_keys=True, indent=4))
    for k, v in step_log.items():
        if k.startswith("Time_"):
            data_logger.record("Timing_Stats/Train_{}".format(k[5:]), v, epoch)
        else:
            data_logger.record("Train/{}".format(k), v, epoch)

    # Evaluate the model on validation set
    if config.experiment.validate:
        with torch.no_grad():
            step_log = TrainUtils.run_epoch(model=model, data_loader=valid_loader, epoch=epoch, validate=True, num_steps=valid_num_steps)
        for k, v in step_log.items():
            if k.startswith("Time_"):
                data_logger.record("Timing_Stats/Valid_{}".format(k[5:]), v, epoch)
            else:
                data_logger.record("Valid/{}".format(k), v, epoch)

        print("Validation Epoch {}".format(epoch))
        print(json.dumps(step_log, sort_keys=True, indent=4))

        # save checkpoint if achieve new best validation loss
        valid_check = "Loss" in step_log
        if valid_check and (best_valid_loss is None or (step_log["Loss"] <= best_valid_loss)):
            best_valid_loss = step_log["Loss"]
            if config.experiment.save.enabled and config.experiment.save.on_best_validation:
                epoch_ckpt_name += "_best_validation_{}".format(best_valid_loss)
                should_save_ckpt = True
                ckpt_reason = "valid" if ckpt_reason is None else ckpt_reason


    # Save model checkpoints based on conditions (success rate, validation loss, etc)
    if epoch%20==0:
        TrainUtils.save_model(
            model=model,
            config=config,
            env_meta=None,
            shape_meta=shape_meta,
            ckpt_path=os.path.join(ckpt_dir, epoch_ckpt_name + ".pth"),
            obs_normalization_stats=obs_normalization_stats,
        )

    # Finally, log memory usage in MB
    process = psutil.Process(os.getpid())
    mem_usage = int(process.memory_info().rss / 1000000)
    data_logger.record("System/RAM Usage (MB)", mem_usage, epoch)
    print("\nEpoch {} Memory Usage: {} MB\n".format(epoch, mem_usage))

# terminate logging
data_logger.close()


{
    "algo_name": "bc",
    "experiment": {
        "name": "sawyer_block",
        "validate": false,
        "logging": {
            "terminal_output_to_txt": true,
            "log_tb": true,
            "log_wandb": false,
            "wandb_proj_name": "block"
        },
        "save": {
            "enabled": true,
            "every_n_seconds": null,
            "every_n_epochs": 20,
            "epochs": [],
            "on_best_validation": false,
            "on_best_rollout_return": false,
            "on_best_rollout_success_rate": true
        },
        "epoch_every_n_steps": 500,
        "validation_epoch_every_n_steps": 50,
        "env": null,
        "additional_envs": null,
        "render": false,
        "render_video": true,
        "keep_all_videos": false,
        "video_skip": 5,
        "rollout": {
            "enabled": false,
            "n": 50,
            "horizon": 1000,
            "rate": 20,
            "warmstart": 0,
            "terminate_on_s

save checkpoint to /home/ns/training_data/block/sawyer_block/20231111200909/models/model_epoch_600.pth

Epoch 600 Memory Usage: 2892 MB



using obs modality: low_dim with keys: ['robot0_eef_pos']
using obs modality: rgb with keys: ['agentview_image', 'robot0_eye_in_hand_image']
using obs modality: depth with keys: []
using obs modality: scan with keys: []
obs key agentview_image with shape (84, 84, 3)
obs key robot0_eef_pos with shape (6,)
obs key robot0_eye_in_hand_image with shape (84, 84, 3)


### inference

In [5]:
ObsUtils.initialize_obs_utils_with_config(config)
dataset_path = os.path.expanduser(config.train.data)
shape_meta = FileUtils.get_shape_metadata_from_dataset(
    dataset_path=config.train.data,
    all_obs_keys=config.all_obs_keys,
    verbose=True
)

model = algo_factory(
    algo_name=config.algo_name,
    config=config,
    obs_key_shapes=shape_meta["all_shapes"],
    ac_dim=shape_meta["ac_dim"],
    device=device,
)
device = TorchUtils.get_torch_device(try_to_use_cuda=True)

In [6]:
# dir="/home/ns/robomimic/sawyer_models/sawery_drawer/20230824111633/models/"
# fn="model_epoch_300.pth"
# ckpt_path=dir+fn
# assert os.path.exists(ckpt_path)

In [6]:
# ckpt_dict = torch.load(ckpt_path)
# config_dict = json.loads(ckpt_dict['config'])

# model.deserialize(ckpt_dict["model"])
model.set_eval()

policy = RolloutPolicy(model, obs_normalization_stats=None)

In [7]:
# trainset, validset = TrainUtils.load_data_for_training(
#     config, obs_keys=shape_meta["all_obs_keys"])
len(trainset.demos)

25

In [28]:
batch=trainset.get_trajectory_at_index(2)
batch.keys(), batch['actions'].shape

(dict_keys(['actions', 'rewards', 'dones', 'obs', 'ep']), (245, 7))

In [22]:
ob = policy._prepare_observation(batch['obs'])

In [23]:
# ac = policy.policy.get_action(obs_dict=ob, goal_dict=None)

In [24]:
ob.keys()

dict_keys(['agentview_image', 'robot0_eef_pos', 'robot0_eye_in_hand_image'])

In [25]:
def demo2actions(hdf5_file, demo_key, rollout_policy): 
    rollout_policy.start_episode()


    demo=hdf5_file['data'][demo_key]
    actions_real=demo['actions']
    obs=demo['obs']
    N=obs['robot0_eye_in_hand_image'].shape[0]

    obss=[]
    for i in range(N):
        ob={k:obs[k][i] for k in obs.keys()} 
        ob['robot0_eye_in_hand_image']=ob['robot0_eye_in_hand_image'].transpose(2,0,1)
        ob['agentview_image']=ob['agentview_image'].transpose(2,0,1)
        obss.append(ob)

    acs=[]
    for ob in obss: 
        ob = rollout_policy._prepare_observation(ob)
        ac = rollout_policy.policy.get_action(obs_dict=ob, goal_dict=None)
        ac=ac.detach().cpu().numpy()[0]
        acs.append(ac)

    actions_pred=np.array(acs)
    return actions_real, actions_pred

In [33]:
file=trainset.hdf5_file
demo_key="demo_4"
actions_real, actions_pred=demo2actions(file, demo_key, policy)
actions_real.shape, actions_pred.shape

((128, 7), (128, 7))

In [34]:
np.save('bc_block_d4_1r.npy', actions_real)
np.save('bc_block_d4_1p.npy', actions_pred)