In [1]:
import os
import h5py
import numpy as np
import torch
import gin
import wandb
from CompoSuite import composuite
from diffusion.utils import construct_diffusion_model
from diffusion.elucidated_diffusion import Trainer
from diffusion.train_diffuser import SimpleDiffusionGenerator

gin.parse_config_file("/Users/shubhankar/Developer/compositional-rl-synth-data/denoiser.gin")

base_data_path = '/Users/shubhankar/Developer/compositional-rl-synth-data/data'
results_folder = '/Users/shubhankar/Developer/compositional-rl-synth-data/results_temp'

ModuleNotFoundError: No module named 'denoiser_network'

In [None]:
def load_single_dataset(base_path, dataset_type, robot, obj, obst, task):

    keys = ["observations", "actions", "rewards", "successes", "terminals", "timeouts"]

    data_path = os.path.join(
        base_path, dataset_type, f"{robot}_{obj}_{obst}_{task}", "data.hdf5"
    )

    data_dict = {}

    with h5py.File(data_path, "r") as dataset_file:
        for k in keys:
            assert k in dataset_file, f"Key {k} not found in dataset"
            data_dict[k] = dataset_file[k][:]
            assert len(data_dict[k]) == 1000000, f"Key {k} has wrong length"

    return data_dict


def qlearning_dataset(dataset):

    N = dataset['rewards'].shape[0]

    obs_ = []
    next_obs_ = []
    action_ = []
    reward_ = []
    done_ = []

    episode_step = 0

    for i in range(N - 1):

        obs = dataset['observations'][i].astype(np.float32)
        new_obs = dataset['observations'][i + 1].astype(np.float32)
        action = dataset['actions'][i].astype(np.float32)
        reward = dataset['rewards'][i].astype(np.float32)
        done_bool = bool(dataset['terminals'][i])
        final_timestep = dataset['timeouts'][i]

        if done_bool or final_timestep:
            episode_step = 0

        obs_.append(obs)
        next_obs_.append(new_obs)
        action_.append(action)
        reward_.append(reward)
        done_.append(done_bool)

        episode_step += 1

    return {
        'observations': np.array(obs_),
        'actions': np.array(action_),
        'next_observations': np.array(next_obs_),
        'rewards': np.array(reward_),
        'terminals': np.array(done_),
    }


def make_inputs(dataset, modelled_terminals: bool = True) -> np.ndarray:

    obs = dataset['observations']
    actions = dataset['actions']
    next_obs = dataset['next_observations']
    rewards = dataset['rewards']
    inputs = np.concatenate([obs, actions, rewards[:, None], next_obs], axis=1)

    if modelled_terminals:
        terminals = dataset['terminals'].astype(np.float32)
        inputs = np.concatenate([inputs, terminals[:, None]], axis=1)
        
    return inputs

In [None]:
dataset_type = 'expert-iiwa-offline-comp-data'

robot = 'IIWA'
obj = 'Box'
obst = 'None'
task = 'Push'

dataset = load_single_dataset(base_path=base_data_path, dataset_type=dataset_type, 
                              robot=robot, obj=obj, obst=obst, task=task)
dataset = qlearning_dataset(dataset)
inputs = make_inputs(dataset)
inputs = torch.from_numpy(inputs).float()
dataset = torch.utils.data.TensorDataset(inputs)

In [None]:
diffusion = construct_diffusion_model(inputs=inputs)

In [None]:
wandb_project = 'offline_rl_diffusion'
wandb_entity = ''
wandb_group = 'diffusion_training'

wandb.init(
    project=wandb_project,
    entity=wandb_entity,
    group=wandb_group,
    name=results_folder.split('/')[-1],
)

In [None]:
trainer = Trainer(diffusion, dataset, results_folder=results_folder)
trainer.train()

In [None]:
env = composuite.make(robot, obj, obst, task, use_task_id_obs=False)

In [None]:
generator = SimpleDiffusionGenerator(env=env, ema_model=trainer.ema.ema_model)
observations, actions, rewards, next_observations, terminals = generator.sample(num_samples=500000)

In [None]:
np.savez_compressed(
    os.path.join(results_folder, 'samples.npz'),
    observations=observations,
    actions=actions,
    rewards=rewards,
    next_observations=next_observations,
    terminals=terminals
)