In [None]:
import composuite
from diffusion.utils import *
from corl.algorithms.offline.td3_bc import *
from corl.shared.buffer import *
from corl.shared.logger import *

from diffusion.utils import *
from collections import defaultdict
import composuite
from sklearn.preprocessing import StandardScaler
from sklearn.manifold import TSNE
import matplotlib
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D
import seaborn as sns


def identify_special_dimensions(data):

    integer_dims = []
    constant_dims = []
    
    for i in range(data.shape[1]):
        column = data[:, i]
        if np.all(np.equal(column, np.round(column))):
            integer_dims.append(i)
        elif np.all(column == column[0]):
            constant_dims.append(i)
    
    return integer_dims, constant_dims


def process_special_dimensions(synthetic_dataset, integer_dims, constant_dims):

    processed_dataset = {k: v.copy() for k, v in synthetic_dataset.items()}
    
    for key in ['observations', 'next_observations']:
        # Round integer dimensions
        if integer_dims:
            processed_dataset[key][:, integer_dims] = np.round(
                synthetic_dataset[key][:, integer_dims]
            )
        
        # Round constant dimensions to 2 decimal places
        if constant_dims:
            processed_dataset[key][:, constant_dims] = np.round(
                synthetic_dataset[key][:, constant_dims], 
                decimals=1
            )
    
    return processed_dataset

In [None]:
robot = 'IIWA'
obj = 'Dumbbell'
obst = 'ObjectDoor'
subtask = 'Trashcan'

representative_indicators_env = composuite.make(robot, obj, obst, subtask, use_task_id_obs=False, ignore_done=False)

In [None]:
type(representative_indicators_env)

In [None]:
representative_indicators_env.env.step([0, 0, 0, 0, 0, 0, 0, 0])

In [None]:
representative_indicators_env.observation_space

In [None]:
# robot = 'IIWA'
# obj = 'Plate'
# obst = 'None'
# subtask = 'Push'

robot = 'IIWA'
obj = 'Dumbbell'
obst = 'ObjectDoor'
subtask = 'Trashcan'

representative_indicators_env = composuite.make(robot, obj, obst, subtask, use_task_id_obs=True, ignore_done=False)
modality_dims = representative_indicators_env.modality_dims


base_agent_data_path = '/Users/shubhankar/Developer/compositional-rl-synth-data/data'
dataset = load_single_composuite_dataset(base_path=base_agent_data_path, 
                                            dataset_type='expert', 
                                            robot=robot, obj=obj, 
                                            obst=obst, task=subtask)
agent_dataset = transitions_dataset(dataset)
agent_dataset, _ = remove_indicator_vectors(modality_dims, agent_dataset)
agent_obs = agent_dataset['observations']
integer_dims, constant_dims = identify_special_dimensions(agent_obs)
agent_actions = agent_dataset['actions']
agent_next_obs = agent_dataset['next_observations']
agent_rewards = agent_dataset['rewards']
agent_terminals = agent_dataset['terminals']
agent_dataset = make_inputs(agent_dataset)

base_synthetic_data_path = '/Users/shubhankar/Developer/compositional-rl-synth-data/cluster_results/diffusion/cond_diff_20/train/'
synthetic_dataset = load_single_synthetic_dataset(base_path=base_synthetic_data_path, 
                                                  robot=robot, obj=obj, 
                                                  obst=obst, task=subtask)
synthetic_dataset = process_special_dimensions(synthetic_dataset, integer_dims, constant_dims)
synthetic_obs = synthetic_dataset['observations']
synthetic_actions = synthetic_dataset['actions']
synthetic_next_obs = synthetic_dataset['next_observations']
synthetic_rewards = synthetic_dataset['rewards']
synthetic_terminals = synthetic_dataset['terminals']
synthetic_dataset = make_inputs(synthetic_dataset)

print(agent_dataset.shape, synthetic_dataset.shape)

In [None]:
agent_gripper = np.argmax(agent_actions, axis=1) == 7  # gripper action
synthetic_gripper = np.argmax(synthetic_actions, axis=1) == 7
print(agent_gripper.shape, synthetic_gripper.shape)

In [None]:
env = composuite.make(robot, obj, obst, subtask, use_task_id_obs=False, ignore_done=False)
print(env.modality_dims)

In [None]:
cumulative_ranges = {}
start_idx = 0
for key, dim in env.modality_dims.items():
    end_idx = start_idx + dim[0]
    cumulative_ranges[key] = (start_idx, end_idx)
    start_idx = end_idx

dim_names = {}
for dim, val_range in cumulative_ranges.items():
    print(dim, val_range)
    for idx in range(val_range[0], val_range[1]):
        dim_names[idx] = dim

In [None]:
dataset1 = agent_obs
dataset2 = synthetic_obs

num_dimensions = dataset1.shape[1]

for idx in range(num_dimensions):
    plt.figure(figsize=(8, 6))
    plt.hist(dataset1[:, idx], bins=50, alpha=0.5, label='Agent', color='blue', density=True)
    plt.hist(dataset2[:, idx], bins=50, alpha=0.5, label='Synthetic', color='orange', density=True)
    plt.title(f"{dim_names[idx]}, {idx}")
    plt.legend()
    plt.show()

In [None]:
# idx = 14

# fig1 = plt.figure()
# _, _, _ = plt.hist(dataset1[:, idx], alpha=0.5, label='Agent', color='orange', density=True)

In [None]:
# fig2 = plt.figure()
# _, _, _ = plt.hist(dataset2[:, idx], alpha=0.5, label='Synthetic', color='orange', density=True)

In [None]:
# dataset1[:, idx][:10]

In [None]:
# np.unique(dataset1[:, idx])

In [None]:
# dataset2[:, idx][:10]

In [None]:
# np.unique(dataset2[:, idx])

In [None]:
state_mean, state_std = compute_mean_std(agent_dataset, eps=1e-3)
print(state_mean.mean(), state_std.mean())

In [None]:
state_mean, state_std = compute_mean_std(synthetic_dataset, eps=1e-3)
print(state_mean.mean(), state_std.mean())

In [None]:
random_indices = np.random.choice(agent_dataset.shape[0], 2500, replace=False)
sampled_agent_data = agent_dataset[random_indices]
sampled_agent_gripper = agent_gripper[random_indices]

random_indices = np.random.choice(synthetic_dataset.shape[0], 2500, replace=False)
sampled_synthetic_data = synthetic_dataset[random_indices]
sampled_synthetic_gripper = synthetic_gripper[random_indices]

print(sampled_agent_data.shape, sampled_synthetic_data.shape)
print(sampled_agent_gripper.shape, sampled_synthetic_gripper.shape)

In [None]:
# for idx in range(sampled_agent_data.shape[1]):
#     print(idx)
#     print('Mean:', sampled_agent_data[:, idx].mean(), sampled_synthetic_data[:, idx].mean())
#     print('Std:', sampled_agent_data[:, idx].std(), sampled_synthetic_data[:, idx].std())

In [None]:
combined_data = np.concatenate((sampled_agent_data, sampled_synthetic_data), axis=0)
combined_gripper = np.concatenate((sampled_agent_gripper, sampled_synthetic_gripper), axis=0)

In [None]:
agent_origin = np.zeros(sampled_agent_data.shape[0], dtype=int)  # 0 for agent
synthetic_origin = np.ones(sampled_synthetic_data.shape[0], dtype=int)   # 1 for synthetic
combined_origins = np.concatenate((agent_origin, synthetic_origin), axis=0)

In [None]:
normalized_data = StandardScaler().fit_transform(combined_data)
tsne = TSNE(n_components=2, random_state=42)
embeddings = tsne.fit_transform(normalized_data)

In [None]:
plt.figure(figsize=(8, 6))

plt.scatter(embeddings[combined_origins == 0, 0], embeddings[combined_origins == 0, 1], 
            c='blue', label='Agent', alpha=0.6, edgecolor='k', s=20)
plt.scatter(embeddings[combined_origins == 1, 0], embeddings[combined_origins == 1, 1], 
            c='red', label='Synthetic', alpha=0.6, edgecolor='k', s=20)

plt.title('t-SNE')
plt.xlabel('Dimension 1')
plt.ylabel('Dimension 2')
plt.legend()
plt.show()

In [None]:
plt.figure(figsize=(8, 6))

plt.scatter(embeddings[(combined_origins == 0) & (combined_gripper == True), 0], 
            embeddings[(combined_origins == 0) & (combined_gripper == True), 1], 
            c='blue', marker='x', label='Agent', s=20)
plt.scatter(embeddings[(combined_origins == 1) & (combined_gripper == True), 0], 
            embeddings[(combined_origins == 1) & (combined_gripper == True), 1], 
            c='red', marker='x', label='Synthetic', s=20)

plt.title('t-SNE: Gripper Closed')
plt.xlabel('Dimension 1')
plt.ylabel('Dimension 2')
plt.legend()
plt.show()