# Imports

In [1]:
# imports
import numpy as np
from tueplots import bundles, figsizes
import wandb
import matplotlib.pyplot as plt
import matplotlib.lines as mlines
import pandas as pd


import sys

%load_ext autoreload
%autoreload 2

sys.path.insert(0, '.')

In [2]:
from analysis import sweep2df, plot_typography, stats2string


In [3]:
USETEX = True

In [4]:
plt.rcParams.update(bundles.neurips2022(usetex=USETEX))
# plt.rcParams.update({
#     'text.latex.preamble': [r'\usepackage{amsfonts}', # mathbb
#                             r'\usepackage{amsmath}'] # boldsymbol
# })

In [5]:
plot_typography(usetex=USETEX, small=12, medium=16, big=20)

In [6]:
# Constants
ENTITY = "causal-representation-learning"
PROJECT = "lti-ica"

# W&B API
api = wandb.Api(timeout=200)
runs = api.runs(ENTITY + "/" + PROJECT)

# Data loading

## Max variability

In [7]:
SWEEP_ID = "6u3mgtpz"
sweep = api.sweep(f"{ENTITY}/{PROJECT}/{SWEEP_ID}")
filename = f"max_var_{SWEEP_ID}"
df_max_var, train_log_likelihood_max_var, train_mcc_max_var, val_log_likelihood_max_var, val_mcc_max_var = sweep2df(sweep.runs, filename, save=True, load=True)

	 Loading max_var_6u3mgtpz...


### Max variability 10 dimensions

In [8]:
SWEEP_ID = "woiubqya"
sweep = api.sweep(f"{ENTITY}/{PROJECT}/{SWEEP_ID}")
filename = f"max_var_10_{SWEEP_ID}"
df_max_var_10, train_log_likelihood_max_var_10, train_mcc_max_var_10, val_log_likelihood_max_var_10, val_mcc_max_var_10 = sweep2df(sweep.runs, filename, save=True, load=False)

### Rerun for 8 dimensions

In [9]:
SWEEP_ID = "f2n0z65l"
sweep = api.sweep(f"{ENTITY}/{PROJECT}/{SWEEP_ID}")
filename = f"max_var_8_{SWEEP_ID}"
df_max_var_8, train_log_likelihood_max_var_8, train_mcc_max_var_8, val_log_likelihood_max_var_8, val_mcc_max_var_8 = sweep2df(sweep.runs, filename, save=True, load=True)

	 Loading max_var_8_f2n0z65l...


### Concatenate

In [10]:
df_max_var_concat = pd.concat([df_max_var, df_max_var_8, df_max_var_10])

In [11]:
df_max_var_concat.groupby(["num_comp", "zero_means", "use_B", "use_C"]).mean()[["train_mcc", "max_train_mcc", "val_mcc", "max_val_mcc"]]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,train_mcc,max_train_mcc,val_mcc,max_val_mcc
num_comp,zero_means,use_B,use_C,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2,False,True,True,0.967982,0.987145,0.863717,0.872004
3,False,True,True,0.999989,0.999995,0.997175,0.999113
5,False,True,True,0.995257,0.997676,0.824089,0.826335
8,False,True,True,0.976543,0.983045,0.435802,0.435802
10,False,True,True,0.995729,0.996252,0.732558,0.734933


In [12]:
df_max_var_concat_dict = {comp: df_max_var_concat[df_max_var_concat.num_comp == comp].groupby(["num_comp", "zero_means", "use_B", "use_C"]) for
 comp in sorted(df_max_var_concat.num_comp.unique())}

In [13]:
max_var_stats = [stats2string(df) for comp, df in df_max_var_concat_dict.items()]

## Minimal segments

### Original sweep

In [14]:
SWEEP_ID =  "shrjtedq" #"03w02539"
sweep = api.sweep(f"{ENTITY}/{PROJECT}/{SWEEP_ID}")
filename = f"min_segment_{SWEEP_ID}"
df_min_segment, train_log_likelihood_min_segment, train_mcc_min_segment, val_log_likelihood_min_segment, val_mcc_min_segment = sweep2df(sweep.runs, filename, save=True, load=True)

	 Loading min_segment_shrjtedq...


### Rerun for 8 and 10 dimensions

In [15]:
SWEEP_ID = "dvn24tw0"
sweep = api.sweep(f"{ENTITY}/{PROJECT}/{SWEEP_ID}")
filename = f"min_segment_8_10_{SWEEP_ID}"
df_min_segment_8_10, train_log_likelihood_min_segment_8_10, train_mcc_min_segment_8_10, val_log_likelihood_min_segment_8_10, val_mcc_min_segment_8_10 = sweep2df(sweep.runs, filename, save=True, load=True)

	 Loading min_segment_8_10_dvn24tw0...


### Missing 8-/10-dimensional runs

In [16]:
SWEEP_ID = "msankgos"
sweep = api.sweep(f"{ENTITY}/{PROJECT}/{SWEEP_ID}")
filename = f"min_segment_10_missing_{SWEEP_ID}"
df_min_segment_8_10_missing, train_log_likelihood_min_segment_8_10_missing, train_mcc_min_segment_8_10_missing, val_log_likelihood_min_segment_8_10_missing, val_mcc_min_segment_8_10_missing = sweep2df(sweep.runs, filename, save=True, load=True)

### Missing 10-dimensional runs

In [None]:
SWEEP_ID = "v3bgbuna"
sweep = api.sweep(f"{ENTITY}/{PROJECT}/{SWEEP_ID}")
filename = f"min_segment_10_missing_{SWEEP_ID}"
df_min_segment_10_missing, train_log_likelihood_min_segment_10_missing, train_mcc_min_segment_10_missing, val_log_likelihood_min_segment_10_missing, val_mcc_min_segment_10_missing = sweep2df(sweep.runs, filename, save=True, load=True)

### Concatenate

In [None]:
df_min_segment_concat = pd.concat([df_min_segment, df_min_segment_8_10, df_min_segment_8_10_missing, df_min_segment_10_missing])

In [None]:
df_min_segment_concat.groupby(["num_comp", "zero_means", "use_B", "use_C"]).mean()[
    ["train_mcc", "max_train_mcc", "val_mcc", "max_val_mcc"]]

In [None]:
df_min_segment_concat_dict = {
    comp: df_min_segment_concat[df_min_segment_concat.num_comp == comp].groupby(["num_comp", "zero_means", "use_B", "use_C"])
    for
    comp in sorted(df_min_segment_concat.num_comp.unique())}

In [None]:
min_segment_stats = [stats2string(df) for comp, df in df_min_segment_concat_dict.items()]

# Render text for table

In [None]:
all_stats = ["".join(["-----", str(comp),"\n" , min_segment, max_var, "\n"]) for comp, min_segment, max_var in zip(sorted(df_min_segment_concat.num_comp.unique()), min_segment_stats, max_var_stats)]

In [None]:
print("\n".join(all_stats))