In [4]:
import wandb
import composuite
from diffusion.utils import *
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from diffusion.elucidated_diffusion import Trainer
from diffusion.train_diffuser import SimpleDiffusionGenerator

gin.parse_config_file("/Users/shubhankar/Developer/compositional-rl-synth-data/config/diffusion.gin")

base_data_path = '/Users/shubhankar/Developer/compositional-rl-synth-data/data'
base_results_folder = '/Users/shubhankar/Developer/compositional-rl-synth-data/results'

In [5]:
dataset_type = 'expert'

robot = 'IIWA'
obj = 'Box'
obst = 'None'
task = 'PickPlace'

results_folder = os.path.join(base_results_folder, robot + '_' + obj + '_' + obst + '_' + task)

dataset = load_single_composuite_dataset(base_path=base_data_path, 
                                         dataset_type=dataset_type, 
                                         robot=robot, obj=obj, 
                                         obst=obst, task=task)
dataset = transitions_dataset(dataset)
inputs = make_inputs(dataset)
print('Before removing task indicators:', inputs.shape)
env = composuite.make(robot, obj, obst, task, use_task_id_obs=True, ignore_done=False)
inputs, indicators = remove_indicator_vectors(inputs, env)
print('After removing task indicators:', inputs.shape, indicators.shape)

Before removing task indicators: (999999, 196)
After removing task indicators: (999999, 164) (999999, 16)


In [6]:
# idx = 0

# task_vector = indicators[idx, :].reshape(1, -1)

# labels = ['Object', 'Robot', 'Obstacle', 'Subtask']
# plt.figure(figsize=(10, 2))
# plt.imshow(task_vector, cmap="viridis", aspect="auto")
# plt.colorbar(label="Value")
# plt.xticks(ticks=[2, 6, 10, 14], labels=labels, ha='right')
# plt.yticks([])
# plt.show()

In [7]:
inputs = torch.from_numpy(inputs).float()
indicators = torch.from_numpy(indicators).float()
dataset = torch.utils.data.TensorDataset(inputs, indicators)

In [8]:
"""
1. The model needs to accept conditioning variables.
2. Currently, the conditioning variables reside in the inputs.
    a. Separate the multitask indicator vector from the observations.
    b. The observation should be 164 dimensions now.
"""

'\n1. The model needs to accept conditioning variables.\n2. Currently, the conditioning variables reside in the inputs.\n    a. Separate the multitask indicator vector from the observations.\n    b. The observation should be 164 dimensions now.\n'

In [9]:
diffusion = construct_diffusion_model(inputs=inputs, cond_dim=16)

Skipping normalization for dimensions [163].
Means: tensor([ 6.3589e-02,  1.0651e-01,  8.6088e-01, -1.6371e-02, -1.2360e-02,
         1.7438e-01,  9.5864e-01,  1.6769e-02,  9.9819e-03,  3.5345e-02,
         5.4525e-01, -3.0514e-03,  9.8454e-02,  6.1551e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00, -3.6970e-01,  1.0005e-01,  7.7840e-01,  4.5343e-01,
         1.5858e-01,  8.5067e-02,  7.3912e-02,  1.0000e-01,  1.8000e-01,
         8.3000e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  1.0000e+00,
         2.3125e-02, -4.1920e-02,  4.6128e-02,  4.5343e-01,  1.5858e-01,
         8.5067e-02,  7.3912e-02,  3.6411e-02,  7.3486e-02, -3.0883e-02,
         9.1670e-01,  5.0138e-01,  9.8886e-01,  3.1583e-04,  9.6788e-01,
         9.6581e-01,  9.7018e-01,  2.6684e-01,  8.5701e-01,  9.7417e-02,
        -9.9253e-01, -2.0960e-01,  2.3004e-01,  1.1947e-01,  2.5523e-02,
         1.8551e-02,  2.0060e-03,  1.5352e-02, -1.0075e-02, -1.6298e-02,

In [12]:
diffusion.net

ResidualMLPDenoiser(
  (residual_mlp): ResidualMLP(
    (network): Sequential(
      (0): Linear(in_features=128, out_features=2048, bias=True)
      (1): ResidualBlock(
        (linear): Linear(in_features=2048, out_features=2048, bias=True)
        (ln): Identity()
      )
      (2): ResidualBlock(
        (linear): Linear(in_features=2048, out_features=2048, bias=True)
        (ln): Identity()
      )
      (3): ResidualBlock(
        (linear): Linear(in_features=2048, out_features=2048, bias=True)
        (ln): Identity()
      )
      (4): ResidualBlock(
        (linear): Linear(in_features=2048, out_features=2048, bias=True)
        (ln): Identity()
      )
      (5): ResidualBlock(
        (linear): Linear(in_features=2048, out_features=2048, bias=True)
        (ln): Identity()
      )
      (6): ResidualBlock(
        (linear): Linear(in_features=2048, out_features=2048, bias=True)
        (ln): Identity()
      )
      (7): Identity()
    )
    (final_linear): Linear(in_featur

In [None]:
wandb_project = 'offline_rl_diffusion'
wandb_entity = ''
wandb_group = 'diffusion_training'

wandb.init(
    project=wandb_project,
    entity=wandb_entity,
    group=wandb_group,
    name=results_folder.split('/')[-1],
)

In [None]:
trainer = Trainer(diffusion, dataset, results_folder=results_folder)
trainer.train()

In [None]:
env = composuite.make(robot, obj, obst, task, use_task_id_obs=True, ignore_done=False)

In [None]:
generator = SimpleDiffusionGenerator(env=env, ema_model=trainer.ema.ema_model)
observations, actions, rewards, next_observations, terminals = generator.sample(num_samples=100000)

In [None]:
np.savez_compressed(
    os.path.join(results_folder, 'samples.npz'),
    observations=observations,
    actions=actions,
    rewards=rewards,
    next_observations=next_observations,
    terminals=terminals
)