In [None]:
import math
import yaml

from types import SimpleNamespace as SN

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import torch as th

from openTSNE import TSNE
from sklearn.decomposition import PCA
from sklearn.preprocessing import LabelEncoder
from torch.utils.data import random_split

In [None]:
from src.envs import REGISTRY as env_REGISTRY
from src.components.offline_buffer import OfflineBufferH5
from src.components.transforms import OneHot

In [None]:
sns.set_style("white")

plt.rcParams["axes.spines.top"] = False
plt.rcParams["axes.spines.right"] = False
plt.rcParams["xtick.bottom"] = True
plt.rcParams["ytick.left"] = True
plt.rcParams["axes.grid"] = False
plt.rcParams["lines.linewidth"] = 1.5
plt.rcParams["axes.linewidth"] = 2
plt.rcParams["font.size"] = 10
plt.rcParams["figure.titleweight"] = "bold"
plt.rcParams['figure.dpi'] = 300

In [None]:
SCENARIO = "manhattan_28_7"
ENV = f"{SCENARIO}_mixed_offline"
MAP_NAME = SCENARIO

In [None]:
def get_title():
    if "hangzhou" in SCENARIO:
        return "Hangzhou"
    elif "jinan" in SCENARIO:
        return "Jinan"
    elif "manhattan" in SCENARIO:
        return "Manhattan"
    else:
        return "None"

In [None]:
%%capture
with open(f"src/config/envs/{ENV}.yaml", "r") as f:
    env_config = yaml.load(f, Loader=yaml.FullLoader)
env_args = SN(**env_config)
env_args.map_name = MAP_NAME

env = env_REGISTRY[env_args.env](**env_args.env_args)
env_info = env.get_env_info()
for k, v in env_info.items():
    setattr(env_args, k, v)


scheme = {
    "state": {"vshape": env_info["state_shape"]},
    "obs": {"vshape": env_info["obs_shape"], "group": "agents"},
    "actions": {"vshape": (1,), "group": "agents", "dtype": th.long},
    "avail_actions": {"vshape": (env_info["n_actions"],), "group": "agents", "dtype": th.int},
    "reward": {"vshape": (1,)},
    "terminated": {"vshape": (1,), "dtype": th.uint8},
    "corrected_terminated": {"vshape": (1,), "dtype": th.uint8},
}
groups = {
    "agents": env_args.n_agents
}
preprocess = {
    "actions": ("actions_onehot", [OneHot(out_dim=env_args.n_actions)])
}

In [None]:
env_args.use_cuda = True

In [None]:
from src.components.offline_buffer import ISOfflineBufferH5

buffer = ISOfflineBufferH5(
    env_args,
    env_args.map_name,
    env_args.offline_data_quality,
    env_args.offline_bottom_data_path,
    env_args.offline_max_buffer_size,
    shuffle=env_args.offline_data_shuffle,
    init_priorities=False,
)

In [None]:
def normalize(inp):
    return (inp - inp.min(axis=0)) / (inp.max(axis=0) - inp.min(axis=0) + 1e-6)

In [None]:
encodings = buffer.encodings.squeeze()
encodings = encodings.reshape(-1, encodings.shape[-1])

global_state = buffer.data["state"]
state = normalize(global_state[:, :-1, :])
next_state = normalize(global_state[:, 1:, :])
ep, t, _ = state.shape
actions = buffer.data["actions"][:, :-1].reshape(ep, t, -1)
traj = np.concatenate((state, actions, next_state), axis=-1)
traj = traj.reshape(ep * t, -1)

label_encoder = LabelEncoder()
labels = [q.replace("_", " ").title() for q in buffer.qualities]
dataset_labels = np.repeat(labels, ep * t // len(labels))
numeric_labels = label_encoder.fit_transform(dataset_labels)

In [None]:
# # enc_pca = PCA(n_components=2)
# # encodings_pca = enc_pca.fit_transform(encodings)

encodings_tsne = TSNE(
    perplexity=25, metric="cosine", n_jobs=16,
    random_state=42, verbose=True,
).fit(encodings)

In [None]:
traj_pca = PCA(n_components=128)
trajectory_pca = traj_pca.fit_transform(traj)

trajectory_tsne = TSNE(
    perplexity=25, metric="cosine", n_jobs=16,
    random_state=42, verbose=True,
).fit(trajectory_pca)

In [None]:
plt.figure(figsize=(5, 5.5))
df_plot = pd.DataFrame({
    'Component 1': encodings_tsne[:, 0],
    'Component 2': encodings_tsne[:, 1],
    'Labels': dataset_labels,
})
sns.scatterplot(
    x='Component 1',
    y='Component 2',
    hue='Labels',
    data=df_plot,
    s=10,
    alpha=1.0,
)
plt.title("t-SNE: Learned Trajectory Encodings")
plt.xlabel("t-SNE Dimension 1")
plt.ylabel("t-SNE Dimension 2")
plt.legend(
    frameon=False, title='Controller', 
    bbox_to_anchor=(0.45, -0.15), 
    loc='upper center', ncol=3,
)
plt.tight_layout()

In [None]:
plt.figure(figsize=(5, 5.5))
df_plot = pd.DataFrame({
    'Component 1': trajectory_tsne[:, 0],
    'Component 2': trajectory_tsne[:, 1],
    'Labels': dataset_labels,
})
sns.scatterplot(
    x='Component 1',
    y='Component 2',
    hue='Labels',
    data=df_plot,
    s=10,
    alpha=1.0,
)
plt.title("t-SNE: $<\mathcal{O}_t, \mathcal{A}_t, \mathcal{O}'_t>$")
plt.xlabel("t-SNE Dimension 1")
plt.ylabel("t-SNE Dimension 2")
plt.legend(
    frameon=False, title='Controller', 
    bbox_to_anchor=(0.45, -0.15), 
    loc='upper center', ncol=3,
)
plt.tight_layout()

In [None]:
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(6.8, 3.5))

df_plot_1 = pd.DataFrame({
    'Component 1': encodings_tsne[:, 0],
    'Component 2': encodings_tsne[:, 1],
    'Labels': dataset_labels,
})
sns.scatterplot(
    x='Component 1',
    y='Component 2',
    hue='Labels',
    data=df_plot_1,
    s=10,
    alpha=1.0,
    ax=ax2
)
ax2.set_title("t-SNE: Learned Trajectory Encodings")
ax2.set_xlabel("t-SNE Dimension 1")
ax2.set_ylabel("t-SNE Dimension 2")

df_plot_2 = pd.DataFrame({
    'Component 1': trajectory_tsne[:, 0],
    'Component 2': trajectory_tsne[:, 1],
    'Labels': dataset_labels,
})
sns.scatterplot(
    x='Component 1',
    y='Component 2',
    hue='Labels',
    data=df_plot_2,
    s=10,
    alpha=1.0,
    ax=ax1
)
ax1.set_title("t-SNE: $<\mathcal{O}_t, \mathcal{A}_t, \mathcal{O}'_t>$")
ax1.set_xlabel("t-SNE Dimension 1")
ax1.set_ylabel("t-SNE Dimension 2")

handles, labels = ax1.get_legend_handles_labels()
fig.legend(
    handles, labels, frameon=False, 
    # title='Controller',
    bbox_to_anchor=(0.5, 0.1), loc='upper center', ncol=3
)

ax1.get_legend().remove()
ax2.get_legend().remove()

plt.tight_layout(rect=[0, 0.1, 1, 1])

In [None]:
alphas = [0.5, 0.75, 1.0]
return_per_episode = buffer.data["reward"].sum(axis=1)
norm_returns = buffer._normalize_returns(return_per_episode, norm_type="linear", alpha=0.75)
# # exp_norm_returns = buffer._normalize_returns(return_per_episode, norm_type="exponential")
priority_dict = {}
for alpha in alphas:
    # priorities = (norm_returns + 1e-6) ** alpha
    # priorities = priorities / np.sum(priorities)
    n_returns = buffer._normalize_returns(
        return_per_episode, norm_type="linear", alpha=alpha-0.25,
    )
    priority_dict[alpha] = (n_returns + 1e-6) / np.sum(n_returns)
# # linear_priorities = (norm_returns + 1e-6) / np.sum(norm_returns)
# # exp_priorities = (exp_norm_returns + 1e-6) / np.sum(exp_norm_returns)

In [None]:
plt.figure(figsize=(3, 3))
_ = sns.histplot(norm_returns, legend=False)
plt.xlabel('Normalized Episodic Return')
plt.title(get_title())
plt.ylabel('Frequency')
plt.tight_layout()

In [None]:
from itertools import cycle
from scipy.stats import gaussian_kde

In [None]:
plt.figure(figsize=(3, 3))

# plt.subplot(1, 2, 1)
# _ = sns.histplot(norm_returns, kde=True, legend=False)
# plt.xlabel('Normalized Episodic Return')
# plt.title(get_title())
# plt.ylabel('Frequency')
# plt.tight_layout()

# plt.subplot(1, 2, 2)
# _ = sns.histplot(exp_priorities, kde=True)
# plt.title('Exponential Weights')
# plt.xlabel('Episodic Return')
# plt.ylabel('Frequency')

# plt.subplot(1, 2, 2)
tableau_colors = cycle(sns.color_palette("tab10"))
color_alphas = [alpha / max(alphas) for alpha in alphas]
for i, alpha in enumerate(alphas):
    color = next(tableau_colors)
    kde = gaussian_kde(priority_dict[alpha].squeeze())
    x_range = np.linspace(
        min(priority_dict[alpha].squeeze()), 
        max(priority_dict[alpha].squeeze()), 
        100,
    )
    plt.plot(
        x_range, kde(x_range), label=f'{alpha}', color=color, alpha=color_alphas[i]
    )
    plt.ticklabel_format(style='sci', axis='x', scilimits=(0,0))
# linear_kde = gaussian_kde(linear_priorities.squeeze())
# exp_kde = gaussian_kde(exp_priorities.squeeze())
# linear_x_range = np.linspace(min(linear_priorities), max(linear_priorities), 100).squeeze()
# exp_x_range = np.linspace(min(exp_priorities), max(exp_priorities), 100).squeeze()
# plt.plot(linear_x_range, linear_kde(linear_x_range), label='Linear')
# plt.plot(exp_x_range, exp_kde(exp_x_range), label='Exponential')
plt.ticklabel_format(style='sci', axis='x', scilimits=(0,0))
plt.legend(title="($\alpha$)")
plt.xlabel('Priorities')
plt.ylabel('Density')
plt.legend()
plt.tight_layout()

In [None]:
latent_dims = [2, 8, 16, 32]
val_accuracies = {
    2: [0.8236, 0.8251, 0.8282, 0.8295, 0.8325],  
    8: [0.8337, 0.8341, 0.8348, 0.8351, 0.8372],  
    16: [0.8342, 0.8342, 0.8342, 0.8342, 0.8342],  
    32: [0.8388, 0.8397, 0.8397, 0.8397, 0.8397],  
}
avg_val_accuracies = [
    sum(val_accuracies[dim]) / len(val_accuracies[dim]) 
    for dim in latent_dims
]
plt.figure(figsize=(4, 2.5))
plt.plot(
    latent_dims, avg_val_accuracies, marker='o', 
    linestyle='-', color='tab:blue'
)
plt.xticks([2, 8, 16, 32])
plt.xlabel('Latent Dimension')
plt.ylabel('Validation Accuracy')