In [1]:
import composuite
from diffusion.utils import *
from CORL.algorithms.offline.td3_bc import *
from CORL.shared.buffer import *
from CORL.shared.logger import *

from diffusion.utils import *
from collections import defaultdict
import composuite
from sklearn.preprocessing import StandardScaler
from sklearn.manifold import TSNE
import matplotlib
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D
import seaborn as sns

import numpy as np

def transitions_dataset(dataset):
    """
    https://github.com/Farama-Foundation/D4RL/blob/89141a689b0353b0dac3da5cba60da4b1b16254d/d4rl/__init__.py#L69
    """

    N = dataset['rewards'].shape[0]

    obs_ = []
    next_obs_ = []
    action_ = []
    reward_ = []
    terminal_ = []
    timestep_ = []

    timestep = 0
    for i in range(N - 1):
        obs = dataset['observations'][i].astype(np.float32)
        new_obs = dataset['observations'][i + 1].astype(np.float32)
        action = dataset['actions'][i].astype(np.float32)
        reward = dataset['rewards'][i].astype(np.float32)
        done_bool = bool(dataset['terminals'][i])
        final_timestep = dataset['timeouts'][i]
        terminal = done_bool or final_timestep

        obs_.append(obs)
        next_obs_.append(new_obs)
        action_.append(action)
        reward_.append(reward)
        terminal_.append(terminal)
        timestep_.append(timestep)

        timestep = 0 if terminal else timestep + 1

    return {
        'observations': np.array(obs_),
        'actions': np.array(action_),
        'next_observations': np.array(next_obs_),
        'rewards': np.array(reward_),
        'terminals': np.array(terminal_),
        'timesteps': np.array(timestep_),
    }

In [None]:
robot = 'Kinova3'
obj = 'Hollowbox'
obst = 'None'
subtask = 'Trashcan'

representative_indicators_env = composuite.make(robot, obj, obst, subtask, use_task_id_obs=True, ignore_done=False)
modality_dims = representative_indicators_env.modality_dims

base_agent_data_path = '/Users/shubhankar/Developer/compositional-rl-synth-data/data'
dataset = load_single_composuite_dataset(base_path=base_agent_data_path, 
                                            dataset_type='expert', 
                                            robot=robot, obj=obj, 
                                            obst=obst, task=subtask)                                        
agent_dataset = transitions_dataset(dataset)
agent_dataset, _ = remove_indicator_vectors(modality_dims, agent_dataset)
agent_obs = agent_dataset['observations']
agent_actions = agent_dataset['actions']
agent_next_obs = agent_dataset['next_observations']
agent_rewards = agent_dataset['rewards']
agent_terminals = agent_dataset['terminals']
agent_timesteps = agent_dataset['timesteps']
agent_dataset = make_inputs(agent_dataset)

In [None]:
gripper_closed = np.argmax(agent_actions, axis=1) == 7  # gripper action
print(gripper_closed.shape)

In [4]:
random_indices = np.random.choice(agent_dataset.shape[0], 2500, replace=False)
sampled_timesteps = agent_timesteps[random_indices]
sampled_gripper = gripper_closed[random_indices]
sampled_agent_data = agent_dataset[random_indices]

In [None]:
print(sampled_timesteps.shape, sampled_gripper.shape, sampled_agent_data.shape)

In [6]:
normalized_data = StandardScaler().fit_transform(sampled_agent_data)
tsne = TSNE(n_components=2, random_state=42)
embeddings = tsne.fit_transform(normalized_data)

In [None]:
plt.figure(figsize=(8, 6))
scatter = plt.scatter(embeddings[:, 0], embeddings[:, 1], 
                      c=sampled_timesteps, cmap='viridis', alpha=0.8)
plt.colorbar(scatter, label="Timestep")
plt.scatter(embeddings[sampled_gripper, 0], embeddings[sampled_gripper, 1], 
            color='red', marker='x', label='Gripper Closed')
plt.xlabel("t-SNE Dimension 1")
plt.ylabel("t-SNE Dimension 2")
plt.title("t-SNE colored by Timestep")
plt.legend()
plt.show()