In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import os

from utils.cluster import ClusterManager
from utils import notebooks as nb

plt.rcParams.update({"figure.dpi": 150})

cluster = ClusterManager()


In [None]:
experiment_ids = [] # fill in the experiment ids from training e.g. ab12-cd34

In [None]:
experiment_folder, results, results_with_metrics, df = nb.load_data("error_propagation", experiment_ids)


# Error Norm over T

Plot the error norm over T steps (Most left plot)

In [None]:


def energy_over_T_plot(df, dataset: str, top_n: str = 1):
    df = df.copy()
    df = df.loc[df["experiment.data.dataset"] == dataset]
    # df_best_adam = df
    df_best_adam = df.loc[df["config.optim"] == "adamw-0.9"]
    max_ids = df_best_adam.groupby("config.h_lr")["results.accuracy"].idxmax()
    df_best_adam = df_best_adam.loc[max_ids].sort_values("results.accuracy", ascending=False)
    best_conditions = df_best_adam["all_experimental_vars"].values
    df_filtered_adam = df.loc[df["all_experimental_vars"].isin(best_conditions[:top_n])]
    df_filtered_adam_ids = df_filtered_adam["id"].values
    # df_filtered_adam
    df_filtered_adam_ids

    adam_models = []

    for i, result in enumerate(results):
        if result["config"]["optimizer_w"] == "adamw":
            adam_models.append((i, float(result["results"]["accuracy"]), result["config"]["h_lr"]))

    adam_models = pd.DataFrame(adam_models, columns=["model", "accuracy", "h_lr"])
    # get max row by
    energies = []
    for idx in df_filtered_adam_ids:
        print(f"Accuracy: {results[idx]['results']['accuracy']} h_lr: {results[idx]['config']['h_lr']}")
        e = {str(k+1): results[idx]["results"]["energy_norms"][str(k)][:,-1] for k in [0, 1, 2]}
        e = pd.DataFrame(e)
        K = e["1"].shape[0]
        e["T"] = np.arange(K) + 1
        energies.append(e)

    energies = pd.concat(energies)
    print(energies)
    energies_long = pd.melt(energies, id_vars=["T"], var_name="layer", value_name="energy")

    plt.clf()
    # Create the main figure
    sns.set_theme("paper", style="whitegrid")
    plt.rcParams.update(nb.NEURIPS_FORMAT_HALF_TALL)
    print(nb.NEURIPS_FORMAT_HALF_TALL)
    fig = sns.lineplot(data=energies_long, x="T", y="energy", hue="layer", marker="o")
    fig.set_yscale("log")
    fig.set_title("Energy Norm")
    fig.set_xlabel("T (Inference Step)")
    fig.set_ylabel("")
    fig.get_legend().set_title("Layer")
    fig.set_yticklabels(fig.get_yticklabels(), rotation=90, va="center", ha="left")
    fig.set_xticks(np.arange(1, 9, 1))
    plt.savefig(os.path.join(cluster.artifact_dir, f"energy_norms_per_T_{dataset}.pdf"))
    plt.show()
    plt.close()


In [None]:
energy_over_T_plot(df, "fashion_mnist")

In [None]:
energy_over_T_plot(df, "two_moons")

In [None]:
energy_over_T_plot(df, "two_circles")

# Accuracy SGD vs Adam

Plot middle plot (accuracies) and the grid across all conditions.

In [None]:
def all_accuracies_plot(df, dataset):
    df = df.copy()
    df = df.loc[df["experiment.data.dataset"] == dataset]
    df = df.loc[df["config.optim"].isin(["sgd-0.9", 'adamw-0.9'])]
    # reduce pd.Categorical config.optim to remaining values
    df["config.optim"] = df["config.optim"].cat.remove_unused_categories()
    # map values in config.act_fn leaky_relu to LeakyReLU and hard_tanh to HardTanh
    df["config.act_fn"] = df["config.act_fn"].map({"leaky_relu": "LeakyReLU", "hard_tanh": "HardTanh"})
    # change "config.optim" to "Optimizer"
    df = df.rename(columns={"config.optim": "Optimizer"})
    # rename "sgd-0.9" to "SGD" and "adamw-0.9" to "Adam"
    df["Optimizer"] = df["Optimizer"].map({"sgd-0.9": "SGD", "adamw-0.9": "Adam"})
    plt.clf()
    sns.set_theme(style="whitegrid")
    plt.rcParams.update(nb.NEURIPS_FORMAT_HALF_TALL)
    palette = sns.color_palette("Dark2", n_colors=4)[2:]
    fig = sns.catplot(df, x="config.h_lr", y="results.accuracy", col="config.hidden_dims", kind="box", hue="Optimizer", row="config.act_fn", palette=palette, height=2.5, aspect=1.2)

    fig.set_xlabels("$\gamma$ (State Learning Rate)")
    fig.set_ylabels("Accuracy")
    fig.set_titles("Width = {col_name} | $f = $ {row_name}")
    # legend title "Optimizer"

    # fig.set(xscale="log")
    # for ax in fig.axes.flat:
        # ax.axhline(y=nb.linear_performances[dataset], color='grey', linestyle='--')
        # ax.axhline(y=nb.chance_performances[dataset], color='orange', linestyle='--')
        # ax.axhline(y=df["results.accuracy"].max(), color='green', linestyle='--')

    plt.savefig(os.path.join(cluster.artifact_dir, f"all_accuracies_{dataset}.pdf"))
    plt.show()
    plt.close()

all_accuracies_plot(df, "fashion_mnist")

In [None]:

def single_accuracy_plot(df, dataset):
    plt.clf()
    # Create the main figure
    sns.set_theme("paper", style="whitegrid")
    plt.rcParams.update(nb.NEURIPS_FORMAT_HALF_TALL)
    fig = plt.figure()
    df = df.loc[df["experiment.data.dataset"] == dataset]
    df_fewer_optims = df.copy()
    df_fewer_optims = df_fewer_optims.loc[df_fewer_optims["config.momentum_w"].isin([0.9])]
    df_fewer_optims = df_fewer_optims.loc[df_fewer_optims["config.hidden_dims"] == 1024]
    df_fewer_optims = df_fewer_optims.loc[df_fewer_optims["config.act_fn"] == "hard_tanh"]
    # rename optimizers "sgd-0.9" -> "SGD", "adamw-0.9" -> "AdamW"
    df_fewer_optims["config.optim"] = df_fewer_optims["config.optim"].replace({"sgd-0.9": "SGD", "adamw-0.9": "AdamW"})
    # reduce config.optim to only remaining levels
    df_fewer_optims["config.optim"] = df_fewer_optims["config.optim"].cat.remove_unused_categories()

    palette = sns.color_palette("Dark2", n_colors=4)[2:]
    fig = sns.boxplot(df_fewer_optims, x="config.h_lr", y="results.accuracy", hue="config.optim", palette=palette)
    fig.set(ylim=(0, 1))
    fig.set_ylabel("")
    fig.set_title("Accuracy")
    fig.set_xlabel(r"$\gamma$ (State Learning Rate)")
    fig.set_yticklabels(fig.get_yticklabels(), rotation=90, va="center", ha="left")
    # label title
    fig.get_legend().set_title("Optimizer", prop={"size": 12})
    sns.move_legend(fig, "lower left")
    # plt.show()
    plt.savefig(os.path.join(cluster.artifact_dir, f"accuracy-h-lr-optimizers-{dataset}.pdf"))
    plt.close()


In [None]:
single_accuracy_plot(df, "fashion_mnist")

In [None]:
single_accuracy_plot(df, "two_moons")

In [None]:
single_accuracy_plot(df, "two_circles")

# Energy Ratios

plot the ratios of energies (most right plot)

In [None]:

def plot_single_energy_ratio(df, dataset):
    df = df.copy()
    df = df.loc[df["experiment.data.dataset"] == dataset]
    plt.clf()
    # Create the main figure
    sns.set_theme("paper", style="whitegrid")
    plt.rcParams.update(nb.NEURIPS_FORMAT_HALF_TALL)
    # energy distribution and accuracy as function of h_lr
    df_better_than_chance = df[df["results.accuracy"] > 0.5] # not actually chance. just a custom threshold
    df_better_than_chance = df_better_than_chance[(df_better_than_chance["results.energy_norms_ratio_0/1"] < 10**8) & (df_better_than_chance["results.energy_norms_ratio_1/2"] < 10**8)] # for some conditions, ther eare still 2-3 datapoints outside of this range. Removing for readability

    df_better_than_chance = df_better_than_chance.loc[df_better_than_chance["config.optimizer_w"] == "adamw"]

    df_melted = pd.melt(df_better_than_chance, id_vars=['config.h_lr'],
                        value_vars=['results.energy_norms_ratio_0/1', 'results.energy_norms_ratio_1/2'],
                        var_name='ratio_type', value_name='results.energy_norms_ratio')
    df_melted['ratio_type'] = df_melted['ratio_type'].str.split('_').str[-1]
    #ratio_type rename 0/1 -> $l_2/l_1$ and 1/2 -> $l_3/l_2$
    df_melted['ratio_type'] = df_melted['ratio_type'].replace({"0/1": "$\epsilon_1^2/\epsilon_2^2$", "1/2": "$\epsilon_2^2/\epsilon_3^2$"})
    fig = sns.boxplot(df_melted, x="config.h_lr", y="results.energy_norms_ratio", hue="ratio_type")
    fig.set(yscale="log")
    fig.set(ylabel="")
    fig.set_yticklabels(fig.get_yticklabels(), rotation=90, va="center", ha="left")
    fig.set_title("Energy Layer Ratio")
    fig.set(xlabel=r"$\gamma$ (State Learning Rate)")
    fig.get_legend().set_title("Layer Ratio")
    sns.move_legend(fig, "upper left")
    # add horizontal red line at 1
    fig.axhline(1, color='grey', linestyle='--', zorder=0)
    fig.axhline(0.01, color='grey', alpha=0.3, linestyle=':', zorder=0)
    fig.axhline(100, color='grey', alpha=0.3, linestyle=':', zorder=0)

    # plt.show()
    plt.savefig(os.path.join(cluster.artifact_dir, f"energy-ratios-{dataset}.pdf"))
    fig.set(xscale="log")
    plt.close()


In [None]:
plot_single_energy_ratio(df, "fashion_mnist")

In [None]:
plot_single_energy_ratio(df, "two_moons")

In [None]:
plot_single_energy_ratio(df, "two_circles")