In [None]:
from utils.save import load_json_array
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt
import torch
import numpy as np
import glob
import sys
sys.path.append("./..") # \todo: change for relative import
from dataset.ASMGMovieLens import ASMGMovieLens

In [None]:
# biu_results_path = "../../others/TyXe/safebox/TyXe-results-220809.json"
# biu_results_path = "../../others/TyXe/safebox/TyXe-results-220817.json"
biu_results_path = "../../others/TyXe/safebox/TyXe-results-220818.json"
sml_results_path = "../../others/TyXe/safebox/SML-placeholder.json"
gen_error_path = "../safebox/gen-error.csv"
baselines_results_dir = "../safebox"
results_df = load_json_array(biu_results_path)
results_df

In [None]:
bm_results_path = baselines_results_dir + "/BM-results-220816.json"
iu_results_path = baselines_results_dir + "/IU-results-220816.json"
load_json_array(bm_results_path)

In [None]:
results_df = pd.concat({
    "BIU": load_json_array(biu_results_path),
    "BM": load_json_array(bm_results_path),
    "IU": load_json_array(iu_results_path),
    "SML": load_json_array(sml_results_path)
}).reset_index().rename({"level_0": "training_regime"}, axis=1)
results_df

In [None]:
def print_latex(df, column_format="c", **kwargs):
    if df.shape[1] > len(column_format):
        n_cols = df.shape[1] + 1
        "c" * n_cols
    print(df.to_latex(column_format=column_format, **kwargs))

# Robustness error

In [None]:
diss_table = results_df.groupby("training_regime")[["loss", "auc", "train_time"]].agg([
    "mean", "std"])
diss_table

#round
round_dict = dict(zip(diss_table.columns, [4] * 4 + [1] * 2))
diss_table = diss_table.round(round_dict)

# rename training regimes
tr_dict = dict(zip(['BIU', 'BM', 'IU', 'SML'], ['BIFT', 'PBT', 'IFT', 'SML']))
diss_table = diss_table.rename(tr_dict).sort_index(0)

perf_dict = dict(zip(["loss", "auc", "train_time"], ["NLL", "AUC", "training time"]))
diss_table = diss_table.rename(perf_dict, axis=1, level=0).sort_index(0)


print_latex(diss_table.groupby(axis=1, level=0, sort=False).agg(
    # lambda srs: f"{srs.at[0]} + {srs.at[1]}")
    lambda df: df.iloc[:, 0].astype("str").str.cat(df.iloc[:, 1].astype("str"), sep=" ± "))
)


# Generalization error

In [None]:
gen_error_df = pd.read_csv(gen_error_path, header=[0, 1], index_col=0)
mean_srs = gen_error_df.mean(0)
mean_srs.name = "mean"
gen_error_df.append(mean_srs)

In [None]:
gen_error_df.std()

In [None]:
gen_error_table = mean_srs.unstack()

# rename training regimes
tr_dict = dict(zip(['BIU', 'BM', 'IU', 'SML'], ['BIFT', 'PBT', 'IFT', 'SML']))
gen_error_table = gen_error_table.rename(tr_dict).sort_index(0)

# 
gen_error_table.columns = pd.MultiIndex.from_arrays((["NLL"] * 2, gen_error_table.columns))
print_latex(
    gen_error_table.round(5).groupby(axis=1, level=0, sort=False).agg(
    lambda df: df.iloc[:, 0].astype("str").str.cat(df.iloc[:, 1].astype("str"), sep=" ± ")),
    column_format="lc")

In [None]:
sns.set(font_scale=1.5)
fig, axes = plt.subplots(1, 3, figsize =(18, 6), gridspec_kw=dict(wspace=0.4))
loss_barplot = sns.barplot(x="training_regime", y="loss", data=results_df, ci="sd", ax=axes[0])
loss_barplot.set_ylim(0.605, 0.6225)
auc_barplot = sns.barplot(x="training_regime", y="auc", data=results_df, ci="sd", ax=axes[1])
auc_barplot.set_ylim(0.71, 0.73)
time_barplot = sns.barplot(x="training_regime", y="train_time", data=results_df, ci="sd", ax=axes[2])
# time_barplot.set_ylim(0.605, 0.6225)

In [None]:
sns.set(font_scale=1.5)
fig, axes = plt.subplots(1, 3, figsize=(18, 6), gridspec_kw=dict(wspace=0.4))
loss_barplot = sns.barplot(
    x="training_regime", y="loss", data=results_df, ax=axes[0])
loss_barplot.set_ylim(0.605, 0.6225)
auc_barplot = sns.barplot(x="training_regime", y="auc",
                          data=results_df, ax=axes[1])
auc_barplot.set_ylim(0.71, 0.73)
time_barplot = sns.barplot(
    x="training_regime", y="train_time", data=results_df, ax=axes[2])
# time_barplot.set_ylim(0.605, 0.6225)


In [None]:
results_df.groupby("training_regime")["loss"].agg([
    "mean", "std", (lambda srs: pd.Series(np.array([
        srs.sample(frac=1, replace=True) for _ in range(1000)]).mean(1)).std())
]).rename({"<lambda_0>": "bootstrap_std"}, axis=1)


In [None]:
results_df.groupby("training_regime")["loss"].agg([
    "mean", (lambda srs: pd.Series(np.array([
        srs.sample(frac=1, replace=True) for _ in range(1000)]).mean(1)
        ).quantile((0.025, 0.975)).to_list())
]).rename({"<lambda_0>": "bootstrap_ci"}, axis=1)
