In [1]:
import os
import composuite
from CORL.algorithms.offline.td3_bc import *
from diffusion.utils import *
import numpy as np
import imageio

def get_weights_norm(model):
    total_norm = 0.0
    for param in model.parameters():
        if param.requires_grad:
            total_norm += param.norm(2).item() ** 2
    return total_norm ** 0.5


base_agent_data_path = '/Users/shubhankar/Developer/compositional-rl-synth-data/data'
base_synthetic_data_path = '/Users/shubhankar/Developer/compositional-rl-synth-data/cluster_results/diffusion'

base_results_folder = '/Users/shubhankar/Developer/compositional-rl-synth-data/local_results/offline_learning'

In [2]:
# robot = 'Kinova3'
# obj = 'Hollowbox'
# obst = 'None'
# subtask = 'Trashcan'

# robot = 'Panda'
# obj = 'Dumbbell'
# obst = 'ObjectDoor'
# subtask = 'PickPlace'

robot = 'Jaco'
obj = 'Plate'
obst = 'GoalWall'
subtask = 'PickPlace'

data_type = 'synthetic'

In [3]:
config = TrainConfig()

run = f'offline_learning_{data_type}_12'  # agent --> Kinova3-None-Hollowbox-Trashcan
run = f'offline_learning_{data_type}_14'  # agent --> Panda-Dumbbell-ObjectDoor-PickPlace
run = f'offline_learning_{data_type}_16'  # agent --> Jaco-Plate-GoalWall-PickPlace

if data_type == 'synthetic':
    synthetic_run_id = 'cond_diff_20'
    mode = 'train'
    run = f'offline_learning_{data_type}_6'  # synthetic --> Kinova3-None-Hollowbox-Trashcan
    run = f'offline_learning_{data_type}_12'  # synthetic --> Panda-Dumbbell-ObjectDoor-PickPlace
    run = f'offline_learning_{data_type}_13'  # synthetic --> Jaco-Plate-GoalWall-PickPlace

checkpoint = 'checkpoint_5000.pt'

In [4]:
checkpoint_path = os.path.join(base_results_folder, run, 'checkpoint_5000.pt')
checkpoint = torch.load(checkpoint_path, map_location=torch.device('cpu'), weights_only=True)

In [5]:
if data_type == 'agent':
    env = composuite.make(robot, obj, obst, subtask, use_task_id_obs=True, ignore_done=False)
    dataset = load_single_composuite_dataset(base_path=base_agent_data_path, 
                                             dataset_type='expert', 
                                             robot=robot, obj=obj, 
                                             obst=obst, task=subtask)
    dataset, _ = remove_indicator_vectors(env.modality_dims, transitions_dataset(dataset))

if data_type == 'synthetic':
    dataset = load_single_synthetic_dataset(base_path=os.path.join(base_synthetic_data_path, synthetic_run_id, mode), 
                                            robot=robot, obj=obj, 
                                            obst=obst, task=subtask)

In [None]:
env = composuite.make(
    robot=robot,
    obj=obj,
    obstacle=obst,
    task=subtask,
    has_renderer=True,
    ignore_done=True,
)

state_dim = env.observation_space.shape[0]
action_dim = env.action_space.shape[0]
state_mean, state_std = compute_mean_std(dataset["observations"], eps=1e-3)
env = wrap_env(env, state_mean=state_mean, state_std=state_std)

max_action = float(env.action_space.high[0])

In [None]:
print(state_mean.mean(), state_std.mean())

In [None]:
actor = Actor(state_dim, action_dim, max_action, hidden_dim=config.network_width, n_hidden=config.network_depth).to(config.device)
print('Before:', get_weights_norm(actor))
actor.load_state_dict(checkpoint['actor'])
print('After:', get_weights_norm(actor))

In [9]:
# state = env.reset()
# env.viewer.set_camera(camera_id=3)

# low, high = env.action_spec

# for _ in range(1000):
#     action = actor.act(state)
#     state, _, _, _ = env.step(action)
#     env.render()

In [None]:
print(env.sim.model.camera_names)

In [11]:
state = env.reset()

low, high = env.action_spec
frames = []

for _ in range(450):
    action = actor.act(state)
    state, _, _, _ = env.step(action)
    frame = env.sim.render(width=1024, height=1024, camera_name='sideview') 
    frame = np.flipud(frame)
    frames.append(frame)

In [12]:
fps = 50
if frames:
    imageio.mimsave("simulation.gif", frames, duration=1000/fps)