# Imports

In [1]:
# imports
import numpy as np
from tueplots import bundles, figsizes
import wandb
import matplotlib.pyplot as plt
import matplotlib.lines as mlines
import pandas as pd


import sys

%load_ext autoreload
%autoreload 2

sys.path.insert(0, '.')

In [2]:
from analysis import sweep2df, plot_typography, stats2string


In [3]:
USETEX = True

In [4]:
plt.rcParams.update(bundles.neurips2022(usetex=USETEX))
# plt.rcParams.update({
#     'text.latex.preamble': [r'\usepackage{amsfonts}', # mathbb
#                             r'\usepackage{amsmath}'] # boldsymbol
# })

In [5]:
plot_typography(usetex=USETEX, small=12, medium=16, big=20)

In [6]:
# Constants
ENTITY = "causal-representation-learning"
PROJECT = "lti-ica"

# W&B API
api = wandb.Api(timeout=200)
runs = api.runs(ENTITY + "/" + PROJECT)

# Data loading

## Max variability

In [7]:
SWEEP_ID = "6u3mgtpz"
sweep = api.sweep(f"{ENTITY}/{PROJECT}/{SWEEP_ID}")
filename = f"max_var_{SWEEP_ID}"
df_max_var, train_log_likelihood_max_var, train_mcc_max_var, val_log_likelihood_max_var, val_mcc_max_var = sweep2df(sweep.runs, filename, save=True, load=True)

	 Loading max_var_6u3mgtpz...


### Max variability 10 dimensions

In [8]:
SWEEP_ID = "woiubqya"
sweep = api.sweep(f"{ENTITY}/{PROJECT}/{SWEEP_ID}")
filename = f"max_var_10_{SWEEP_ID}"
df_max_var_10, train_log_likelihood_max_var_10, train_mcc_max_var_10, val_log_likelihood_max_var_10, val_mcc_max_var_10 = sweep2df(sweep.runs, filename, save=True, load=False)

### Rerun for 8 dimensions

In [9]:
SWEEP_ID = "f2n0z65l"
sweep = api.sweep(f"{ENTITY}/{PROJECT}/{SWEEP_ID}")
filename = f"max_var_8_{SWEEP_ID}"
df_max_var_8, train_log_likelihood_max_var_8, train_mcc_max_var_8, val_log_likelihood_max_var_8, val_mcc_max_var_8 = sweep2df(sweep.runs, filename, save=True, load=True)

	 Loading max_var_8_f2n0z65l...


### Concatenate

In [10]:
df_max_var_concat = pd.concat([df_max_var, df_max_var_8, df_max_var_10])

In [11]:
df_max_var_concat.groupby(["num_comp", "zero_means", "use_B", "use_C"]).mean()[["train_mcc", "max_train_mcc", "val_mcc", "max_val_mcc"]]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,train_mcc,max_train_mcc,val_mcc,max_val_mcc
num_comp,zero_means,use_B,use_C,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2,False,True,True,0.967982,0.987145,0.863717,0.872004
3,False,True,True,0.999989,0.999995,0.997175,0.999113
5,False,True,True,0.995257,0.997676,0.824089,0.826335
8,False,True,True,0.976543,0.983045,0.435802,0.435802
10,False,True,True,0.995729,0.996703,0.732558,0.734933


In [12]:
df_max_var_concat_dict = {comp: df_max_var_concat[df_max_var_concat.num_comp == comp].groupby(["num_comp", "zero_means", "use_B", "use_C"]) for
 comp in sorted(df_max_var_concat.num_comp.unique())}

In [13]:
max_var_stats = [stats2string(df) for comp, df in df_max_var_concat_dict.items()]

## Minimal segments

### Original sweep

In [14]:
SWEEP_ID =  "shrjtedq" #"03w02539"
sweep = api.sweep(f"{ENTITY}/{PROJECT}/{SWEEP_ID}")
filename = f"min_segment_{SWEEP_ID}"
df_min_segment, train_log_likelihood_min_segment, train_mcc_min_segment, val_log_likelihood_min_segment, val_mcc_min_segment = sweep2df(sweep.runs, filename, save=True, load=True)

	 Loading min_segment_shrjtedq...


### Rerun for 8 and 10 dimensions

In [15]:
SWEEP_ID = "dvn24tw0"
sweep = api.sweep(f"{ENTITY}/{PROJECT}/{SWEEP_ID}")
filename = f"min_segment_8_10_{SWEEP_ID}"
df_min_segment_8_10, train_log_likelihood_min_segment_8_10, train_mcc_min_segment_8_10, val_log_likelihood_min_segment_8_10, val_mcc_min_segment_8_10 = sweep2df(sweep.runs, filename, save=True, load=True)

	 Loading min_segment_8_10_dvn24tw0...


### Missing 8-/10-dimensional runs

In [16]:
SWEEP_ID = "msankgos"
sweep = api.sweep(f"{ENTITY}/{PROJECT}/{SWEEP_ID}")
filename = f"min_segment_10_missing_{SWEEP_ID}"
df_min_segment_8_10_missing, train_log_likelihood_min_segment_8_10_missing, train_mcc_min_segment_8_10_missing, val_log_likelihood_min_segment_8_10_missing, val_mcc_min_segment_8_10_missing = sweep2df(sweep.runs, filename, save=True, load=True)

	 Loading min_segment_10_missing_msankgos...


### Missing 10-dimensional runs

In [17]:
SWEEP_ID = "v3bgbuna"
sweep = api.sweep(f"{ENTITY}/{PROJECT}/{SWEEP_ID}")
filename = f"min_segment_10_missing_{SWEEP_ID}"
df_min_segment_10_missing, train_log_likelihood_min_segment_10_missing, train_mcc_min_segment_10_missing, val_log_likelihood_min_segment_10_missing, val_mcc_min_segment_10_missing = sweep2df(sweep.runs, filename, save=True, load=True)

	 Loading min_segment_10_missing_v3bgbuna...


### Concatenate

In [18]:
df_min_segment_concat = pd.concat([df_min_segment, df_min_segment_8_10, df_min_segment_8_10_missing, df_min_segment_10_missing])

In [19]:
df_min_segment_concat.groupby(["num_comp", "zero_means", "use_B", "use_C"]).mean()[
    ["train_mcc", "max_train_mcc", "val_mcc", "max_val_mcc"]]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,train_mcc,max_train_mcc,val_mcc,max_val_mcc
num_comp,zero_means,use_B,use_C,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2,False,False,False,0.866319,0.911332,0.58251,0.619459
2,False,False,True,0.766681,0.820678,0.511342,0.524911
2,False,True,False,0.729862,0.826553,0.516959,0.553255
2,False,True,True,0.697367,0.888114,0.596271,0.629053
2,True,False,False,0.632901,0.847892,0.619947,0.667954
2,True,False,True,0.674872,0.830931,0.680293,0.717334
2,True,True,False,0.734144,0.842189,0.69317,0.708631
2,True,True,True,0.725062,0.875474,0.732886,0.753187
3,False,False,False,0.900751,0.941721,0.741451,0.747056
3,False,False,True,0.910463,0.941608,0.748249,0.760785


In [20]:
df_min_segment_concat_dict = {
    comp: df_min_segment_concat[df_min_segment_concat.num_comp == comp].groupby(["num_comp", "zero_means", "use_B", "use_C"])
    for
    comp in sorted(df_min_segment_concat.num_comp.unique())}

In [21]:
min_segment_stats = [stats2string(df) for comp, df in df_min_segment_concat_dict.items()]

## Noisy minimal segments

In [26]:
SWEEP_ID = "7z5qkfsc"
sweep = api.sweep(f"{ENTITY}/{PROJECT}/{SWEEP_ID}")
filename = f"min_segment_noisy_{SWEEP_ID}"
df_min_segment_noisy, _, _, _, _ = sweep2df(sweep.runs, filename, save=True, load=False)

In [27]:
SWEEP_ID = "sofeoukf"
sweep = api.sweep(f"{ENTITY}/{PROJECT}/{SWEEP_ID}")
filename = f"min_segment_noisy_2_{SWEEP_ID}"
df_min_segment_noisy_2, _, _, _, _ = sweep2df(sweep.runs, filename, save=True, load=False)

Encountered a faulty run with ID giddy-sweep-61


### Concatenate

In [28]:
df_min_segment_noisy_concat = pd.concat([df_min_segment_noisy, df_min_segment_noisy_2])

In [29]:
df_min_segment_noisy_concat.groupby(["num_comp", "obs_noise_var"]).mean()[
    ["train_mcc", "max_train_mcc", "val_mcc", "max_val_mcc"]]

Unnamed: 0_level_0,Unnamed: 1_level_0,train_mcc,max_train_mcc,val_mcc,max_val_mcc
num_comp,obs_noise_var,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2,0.0,0.626983,0.937836,0.583573,0.650213
2,0.0001,0.6786,0.820703,0.357806,0.447987
2,0.01,0.643191,0.828288,0.220149,0.302239
2,0.1,0.589576,0.685004,0.254984,0.288121
2,1.0,0.624789,0.734137,0.270574,0.286497
3,0.0,0.885535,0.920315,0.793121,0.794341
3,0.0001,0.82344,0.856889,0.612885,0.618519
3,0.01,0.718347,0.78563,0.459358,0.463463
3,0.1,0.637453,0.724336,0.382442,0.386184
3,1.0,0.795979,0.830876,0.371516,0.375448


In [32]:
df_min_segment_noisy_concat_dict = {
    comp: df_min_segment_noisy_concat[df_min_segment_noisy_concat.num_comp == comp].groupby(["num_comp", "obs_noise_var"])
    for
    comp in sorted(df_min_segment_noisy_concat.num_comp.unique())}

In [33]:
min_segment_noisy_stats = [stats2string(df) for comp, df in df_min_segment_noisy_concat_dict.items()]

## Noisy DC Motor

In [39]:
SWEEP_ID = "cun199xq"
sweep = api.sweep(f"{ENTITY}/{PROJECT}/{SWEEP_ID}")
filename = f"dc_motor_noisy_{SWEEP_ID}"
df_dc_motor_noisy, _, _, _, _ = sweep2df(sweep.runs, filename, save=True, load=False)

In [40]:
df_dc_motor_noisy.groupby(["zero_means", "obs_noise_var"]).mean()[
    ["train_mcc", "max_train_mcc", "val_mcc", "max_val_mcc"]]

Unnamed: 0_level_0,Unnamed: 1_level_0,train_mcc,max_train_mcc,val_mcc,max_val_mcc
zero_means,obs_noise_var,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
False,0.0,0.675323,0.985218,0.339935,0.339935
False,0.0001,0.734127,0.981561,0.309478,0.309478
False,0.001,0.734818,0.98152,0.311176,0.311176
False,0.01,0.721763,0.979569,0.312112,0.312112
False,0.1,0.666729,0.978148,0.312637,0.312637
True,0.0,0.786184,0.984692,0.854957,0.854957
True,0.0001,0.505593,0.934523,0.312036,0.312036
True,0.001,0.702507,0.946142,0.311169,0.311169
True,0.01,0.744299,0.911928,0.310335,0.310335
True,0.1,0.679656,0.904735,0.309249,0.309249


In [41]:
df_dc_motor_noisy_dict = {
    comp: df_dc_motor_noisy[df_dc_motor_noisy.num_comp == comp].groupby(["zero_means", "obs_noise_var"])
    for
    comp in sorted(df_dc_motor_noisy.num_comp.unique())}

In [42]:
dc_motor_noisy_stats = [stats2string(df) for comp, df in df_dc_motor_noisy_dict.items()]

# Render text for table

In [None]:
all_stats = ["".join(["-----", str(comp),"\n" , min_segment, max_var, "\n"]) for comp, min_segment, max_var in zip(sorted(df_min_segment_concat.num_comp.unique()), min_segment_stats, max_var_stats)]

In [None]:
print("\n".join(all_stats))

## Noisy stats

In [34]:
all_noisy_stats = ["".join(["-----", str(comp),"\n" , min_segment_noisy, "\n"]) for comp, min_segment_noisy in zip(sorted(df_min_segment_noisy_concat.num_comp.unique()), min_segment_noisy_stats)]

In [35]:
print("\n".join(all_noisy_stats))

-----2
$0.627\scriptscriptstyle\pm nan$ & $0.679\scriptscriptstyle\pm 0.162$ & $0.643\scriptscriptstyle\pm 0.068$ & $0.590\scriptscriptstyle\pm 0.034$ & $0.625\scriptscriptstyle\pm 0.038$ & 

-----3
$0.886\scriptscriptstyle\pm 0.057$ & $0.823\scriptscriptstyle\pm 0.052$ & $0.718\scriptscriptstyle\pm 0.168$ & $0.637\scriptscriptstyle\pm 0.194$ & $0.796\scriptscriptstyle\pm 0.022$ & 

-----5
$0.871\scriptscriptstyle\pm nan$ & $0.873\scriptscriptstyle\pm nan$ & $0.820\scriptscriptstyle\pm 0.022$ & $0.736\scriptscriptstyle\pm nan$ & $0.732\scriptscriptstyle\pm 0.032$ & 



## DC motor noisy stats

In [43]:
all_dc_motor_noisy_stats = ["".join(["-----", str(comp),"\n" , dc_motor_noisy, "\n"]) for comp, dc_motor_noisy in zip(sorted(df_dc_motor_noisy.num_comp.unique()), dc_motor_noisy_stats)]

In [44]:
print("\n".join(all_dc_motor_noisy_stats))

-----2
$0.675\scriptscriptstyle\pm 0.224$ & $0.734\scriptscriptstyle\pm 0.283$ & $0.735\scriptscriptstyle\pm 0.259$ & $0.722\scriptscriptstyle\pm 0.240$ & $0.667\scriptscriptstyle\pm 0.370$ & $0.786\scriptscriptstyle\pm 0.365$ & $0.506\scriptscriptstyle\pm 0.363$ & $0.703\scriptscriptstyle\pm 0.364$ & $0.744\scriptscriptstyle\pm 0.345$ & $0.680\scriptscriptstyle\pm 0.266$ & 

