In [None]:
import pandas as pd
import numpy as np
import glob
from typing import Iterable
from pathlib import Path

import seaborn as sns
import matplotlib.pyplot as plt
from pandas.api.types import is_numeric_dtype

In [None]:
sns.set()

In [None]:
def combine_data(data_paths: Iterable[Path]):
    dfs = []
    for data_path in data_paths:
        zipped_data = np.load(data_path, allow_pickle=True)
        data = dict(zipped_data)
        n_timesteps = data["timesteps"].shape[0]
        n_eval_episodes = data["results"].shape[1]
        best_successes = np.max(np.mean(data["successes"], axis=1), axis=0)
        mean_results = np.mean(data["results"], axis=1)
        mean_successes = np.mean(data["successes"], axis=1)
        mean_ep_lengths = np.mean(data["ep_lengths"], axis=1)
        obj = {
            "timesteps": data["timesteps"].flatten(),
            "results": mean_results.flatten(),
            "ep_lengths": mean_ep_lengths.flatten(),
            "successes": mean_successes.flatten(),
            "best_successes": np.full(n_timesteps, best_successes),
            "n_params": np.full(n_timesteps, data["num_params"] if "num_params" in data else None),
        }
        if "args" in data:
            for key, value in data["args"].item().items():
                obj[key] = np.full(n_timesteps, value)
            dfs.append(pd.DataFrame(obj))
        else:
            print(f"{data_path}")
        zipped_data.close()
    df = pd.concat(dfs)
    return df


In [None]:
def plot_timesteps(df, y, ylabel, hue, huelabel, plot_dir, dofs, title_prefix, yticks=None, ylim=None, axhline=None, nrows=1, ncols=3, figsize=(16, 4)):
    fig, axs = plt.subplots(nrows, ncols, sharey=True, sharex=True, figsize=figsize)
    for i, (dof, ax) in enumerate(zip(dofs, axs)):
        df_dof = df[df["dof"] == dof]
        if is_numeric_dtype(df_dof[hue]):
            norm = plt.Normalize(df_dof[hue].min(), df_dof[hue].max())
        else:
            norm = None
        sns.lineplot(
            df_dof,
            ax=ax,
            x="timesteps",
            y=y,
            hue=hue,
            hue_norm=norm,
            legend=i==len(dofs) - 1,
        )
        if i == len(dofs) - 1:
            ax.legend(title=huelabel)
        if axhline is not None:
            ax.axhline(axhline, ls="--", color="black")
        ax.set_xlabel("Time Steps")
        ax.set_xticks(np.arange(0, df_dof["timesteps"].max() + 1, 1e4))
        ax.ticklabel_format(axis='x', style='sci', scilimits=(0,0))
        ax.set_ylabel(ylabel)
        if yticks is not None:
            ax.set_yticks(yticks)
        if ylim is not None:
            ax.set_ylim(ylim)
        ax.set_title(f"{title_prefix}{dof}DOF")
    plt.savefig(plot_dir / f"{y}.{hue}.timesteps.png", bbox_inches='tight')

In [None]:
def calculate_sample_efficiency(final_reward_type_df, groupby):
    first_timestep_gt_90 = {}
    seen = set()

    final_reward_type_df = final_reward_type_df.sort_values(by=[*groupby, "timesteps", "eval_log_path"])
    for i, row in final_reward_type_df.iterrows():
        key = tuple(row[col] for col in groupby)
        if row["successes"] < 0.9:
            continue
        if key not in first_timestep_gt_90:
            first_timestep_gt_90[key] = [row["timesteps"]]
        elif row["eval_log_path"] not in seen:
            first_timestep_gt_90[key].append(row["timesteps"])
        seen.add(row["eval_log_path"])

    mean_first_timestep_gt_90 = {}
    for key, value in first_timestep_gt_90.items():
        mean_first_timestep_gt_90[key] = (np.mean(value), np.std(value), len(value))
    return mean_first_timestep_gt_90

In [None]:
DOFS = [3, 4, 7]
REWARD_TYPE = ["reward_type", "Reward Type"]
REWARD = ("results", "Reward")
SUCCESS_RATE = ("successes", "Success Rate")
LEARNING_RATE = ("learning_rate", "Learning Rate")
BEST_SUCCESS = ("best_successes", "Best Success Rate")
HIDDEN_SIZE = ("hidden_size", "Hidden Size")
DEPTH = ("depth", "Depth")
NUMBER_OF_PARAMETERS = ("n_params", "Number of parameters")
ARCHITECTURE = ("arch", "Number of Layers x Hidden Size")
ALGORITHM = ("alg", "Algorithm")
EPISODE_LENGTH = ("ep_lengths", "Episode Length")
plot_dir = Path("../../../experiments/Final/plots")
plot_dir.mkdir(parents=True, exist_ok=True)

In [None]:
final_algorithms_paths = glob.glob("../../../experiments/Final_Algorithms/data/**/*.npz", recursive=True)
final_algorithms_paths = [Path(path) for path in final_algorithms_paths]
final_algorithms_df = combine_data(final_algorithms_paths)

final_algorithms_df["arch"] = final_algorithms_df["depth"].astype(int).astype(str) + "x" + final_algorithms_df["hidden_size"].astype(int).astype(str)
final_algorithms_df = final_algorithms_df.sort_values(by=["hidden_size", "depth"])

condition = (final_algorithms_df["alg"] == "TD3") & (final_algorithms_df["policy"] == "NJMultiInputPolicy")
final_algorithms_df.loc[condition, "alg"] = "TD3-NJ"

condition = ~((final_algorithms_df["alg"] == "UVS") & (final_algorithms_df["learning_rate"] != 0))
final_algorithms_df_filtered = final_algorithms_df[condition]

condition = final_algorithms_df_filtered["reward_type"] == "Dense"
final_algorithms_df_filtered = final_algorithms_df_filtered[condition]

final_algorithms_df_filtered = final_algorithms_df_filtered.sort_values(by=["alg"])

plot_timesteps(final_algorithms_df_filtered, *SUCCESS_RATE, *ALGORITHM, plot_dir, DOFS, "WAMVisualReachDense", np.arange(0, 1.1, 0.1), axhline=0.9)

In [None]:
final_algorithms_df_filtered_50k = final_algorithms_df_filtered[final_algorithms_df_filtered["timesteps"] == 50000]
final_algorithms_df_filtered_50k[["alg", "dof", "successes", "results"]].groupby(["alg", "dof"]).describe().round(2)

In [None]:
calculate_sample_efficiency(final_algorithms_df_filtered, ("alg", "dof"))

In [None]:
df_nj = final_algorithms_df_filtered[final_algorithms_df_filtered["alg"] == "TD3-NJ"]

df_nj_lt_90 = df_nj[df_nj["successes"] < .9].copy()
df_nj_lt_90["episodes"] = 1000 / df_nj_lt_90["ep_lengths"]
df_nj_lt_90[["dof", "episodes"]].groupby("dof").sum()

In [None]:
plot_timesteps(final_algorithms_df_filtered, *EPISODE_LENGTH, *ALGORITHM, plot_dir, DOFS, "WAMVisualReachDense", ylim=(5, 20))

In [None]:
final_reward_type_paths = glob.glob("../../../experiments/Final_Reward_Type/data/**/*.npz", recursive=True)
final_reward_type_paths = [Path(path) for path in final_reward_type_paths]
final_reward_type_df = combine_data(final_reward_type_paths)
final_reward_type_df = final_reward_type_df.sort_values(by=["reward_type"])
plot_timesteps(final_reward_type_df, *SUCCESS_RATE, *REWARD_TYPE, plot_dir, DOFS, "WAMVisualReach")

In [None]:
final_reward_type_df_100k = final_reward_type_df[final_reward_type_df["timesteps"] == 100000]
final_reward_type_df_100k[["reward_type", "dof", "successes"]].groupby(["reward_type", "dof"]).describe().round(2)

In [None]:
mean_first_timestep_gt_90 = calculate_sample_efficiency(final_reward_type_df, ["reward_type", "dof"])
mean_first_timestep_gt_90

In [None]:
final_sizes_paths = glob.glob("../../../experiments/Final_Sizes/data/**/*.npz", recursive=True)
final_sizes_paths = [Path(path) for path in final_sizes_paths]
final_sizes_df = combine_data(final_sizes_paths)
final_sizes_df["arch"] = final_sizes_df["depth"].astype(int).astype(str) + "x" + final_sizes_df["hidden_size"].astype(int).astype(str)
final_sizes_df = final_sizes_df.sort_values(by=["hidden_size", "depth"])

In [None]:
condition = (final_sizes_df["depth"] == 2)
final_sizes_df_filtered = final_sizes_df[condition]

In [None]:
plot_timesteps(final_sizes_df_filtered, *SUCCESS_RATE, *ARCHITECTURE, plot_dir, DOFS, "WAMVisualReachDense")

In [None]:
import itertools
def plot_timesteps_matrix(df, y, ylabel, hue, huelabel, plot_dir, dofs, title_prefix, yticks=None, ylim=None, axhline=None, nrows=1, ncols=3, figsize=(16, 4)):
    fig, axs = plt.subplots(nrows, ncols, sharey=True, sharex=True, figsize=figsize)
    depths = [1, 2, 3]
    for i, ((depth, dof), ax) in enumerate(zip(itertools.product(depths, dofs), axs.flatten())):
        df_dof = df[df["dof"] == dof]
        df_dof = df_dof[df_dof["depth"] == depth]
        # if is_numeric_dtype(df_dof[hue]):
        #     norm = plt.Normalize(df_dof[hue].min(), df_dof[hue].max())
        # else:
        #     norm = None
        sns.lineplot(
            df_dof,
            ax=ax,
            x="timesteps",
            y=y,
            hue=hue,
            # hue_norm=norm,
            legend=(i % len(dofs) == 2),
        )
        if i % len(dofs) == 2:
            ax.legend(title=huelabel, bbox_to_anchor=(1.04, 0.5), loc="center left")
        if axhline is not None:
            ax.axhline(axhline, ls="--", color="black")
        ax.set_xlabel("Time Steps")
        ax.set_xticks(np.arange(0, df["timesteps"].max()+1, 10000))
        ax.ticklabel_format(axis='x', style='sci', scilimits=(0,0))
        ax.set_ylabel(ylabel)
        if yticks is not None:
            ax.set_yticks(yticks)
        if ylim is not None:
            ax.set_ylim(ylim)
        if i < len(dofs):
            ax.set_title(f"{title_prefix}{dof}DOF")
    plt.savefig(plot_dir / f"{y}.{hue}.timesteps.png", bbox_inches='tight')

In [None]:
final_sizes_df["hidden_size"]

In [None]:
plot_timesteps_matrix(final_sizes_df, *SUCCESS_RATE, *ARCHITECTURE, plot_dir, DOFS, "WAMVisualReachDense", np.arange(0, 1.1, 0.1), nrows=3, ncols=3, figsize=(16, 24))

In [None]:
final_sizes_df.groupby(["arch", "dof"])["n_params"].unique()[["2x64", "1x128"]]