# 2023-09-06 12 - Tables and plots for paper.ipynb


## Fetching runs

In [176]:
import wandb
from math import isnan 
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import wandb
from tqdm import tqdm
# from cycler import cycler
import matplotlib as mpl
from IPython.display import Markdown, display
    
api = wandb.Api()
project = "Autoformer"
workspace = "alelab"

experiment_tags = [
    "e1_weather_10e_statinformed",
    "e2_electricity_10e_statinformed",
    "e3_weather_10e_statinformed_seeds23",
    "e4_electricity_10e_statinformed_seeds23"
    
    ]

# get all runs that both: 1.  match any experiment tag and 2. are finished
runs = api.runs(f"{workspace}/{project}",
                {"$and": [
                    {"tags": {"$in": experiment_tags}},
                    {"state": "finished"}
                ]})

all_runs = []
run_counter = 0
for run in tqdm(runs):
    run_counter += 1
    for split in ["train", "test","val"]:
        for metric in ["mse",]:
            pred_len = run.config["pred_len"]
            metrics = np.zeros(pred_len)
            for i in range(pred_len):
                run_dict = {**run.config}
                #run_dict["constraint_level"] = constraint_level
                run_dict[f"{metric}"] = run.summary[f"{metric}/{split}/{i}"]
                #run_dict[f"{metric}"] = run.summary.get(f"{metric}/{split}/{i}",run.summary.get(f"mse/{split}/{i}",np.nan)) #god forgive me for this line
                run_dict["step"]=i
                run_dict["epoch"]=run.summary["epoch"]
                run_dict["infeasible_rate"]=run.summary[f"infeasible_rate/{split}"]
                run_dict["split"] = split
                run_dict["run_id"] = run.id
                # Get either Constrained/ or ERM/ from the run name, then append model name.
                #print("run.name", run.name)
                #debug if ERM run
                run_dict["Algorithm"] = f"{run.name.split('/')[0]} {run.config['model']}"
                run_dict["sweep_id"] = run.sweep.id
                #print("Algorithm", run_dict["Algorithm"])

                # To better plot constrained vs ERM
                #TODO this is a hack while I consolidate the tags. 
                run_dict["type"] = "ERM" if run.config['dual_lr'] == 0 else "Constrained"

                all_runs.append(run_dict)
print(f"Fetched {run_counter} runs")
df = pd.DataFrame(all_runs)
print(f"Total records: {(df.shape)}")
print(f"Total runs: {df.run_id.nunique()}")

100%|██████████| 192/192 [00:28<00:00,  6.80it/s]


Fetched 192 runs
Total records: (193536, 62)
Total runs: 192


In [177]:
df[['run_id',"sweep_id",'Algorithm','model','constraint_level','pred_len','epoch',"split"]] \
    .drop_duplicates().sort_values(["pred_len",'Algorithm','model',"constraint_level"]) \
    .head()

Unnamed: 0,run_id,sweep_id,Algorithm,model,constraint_level,pred_len,epoch,split
126432,govqe5nc,lcslu9kv,Electricity-StatInformed-10e-Constrained Autof...,Autoformer,0.157,96,10,train
126528,govqe5nc,lcslu9kv,Electricity-StatInformed-10e-Constrained Autof...,Autoformer,0.157,96,10,test
126624,govqe5nc,lcslu9kv,Electricity-StatInformed-10e-Constrained Autof...,Autoformer,0.157,96,10,val
128160,o5gb1msz,3klzcp50,Electricity-StatInformed-10e-Constrained Autof...,Autoformer,0.157,96,10,train
128256,o5gb1msz,3klzcp50,Electricity-StatInformed-10e-Constrained Autof...,Autoformer,0.157,96,10,test


## Data processing
1. Impute seed for the first batch of runs
2. Create aggregate dataset of metrics per experiment `(data_path,model,pred_len,constraint_level)`
3. Add any other useful columns

In [178]:
df.seed.unique()

array([ 0., nan])

It seems that the seeds didn't get properly logged, but we can confirm that the results have variance so it's not just the same run three times.

In [179]:
# sanity checking that at least results are different
#df.groupby(["Algorithm","model","constraint_level","pred_len"])['run_id'].nunique().reset_index()

dcp = df.copy()
run_means=dcp.groupby(["model","constraint_level","pred_len","data_path",'run_id'])['mse'].describe().round(3).reset_index()

# Verifying t
run_means.groupby(["model","constraint_level","pred_len","data_path"]).agg({'mean':list}).reset_index().sample(25)

Unnamed: 0,model,constraint_level,pred_len,data_path,mean
58,Reformer,0.682,336,weather.csv,"[0.554, 0.558, 0.56]"
40,Reformer,0.157,96,electricity.csv,"[0.18, 0.181, 0.182]"
9,Autoformer,0.169,96,electricity.csv,"[0.148, 0.154, 0.177]"
39,Reformer,-1.0,720,weather.csv,"[0.726, 0.64, 0.656]"
24,Autoformer,0.567,192,weather.csv,"[0.587, 0.587, 0.578]"
2,Autoformer,-1.0,192,electricity.csv,"[0.175, 0.172, 0.188]"
54,Reformer,0.553,192,weather.csv,"[0.474, 0.474, 0.466]"
46,Reformer,0.215,720,electricity.csv,"[0.225, 0.228, 0.223]"
19,Autoformer,0.256,336,electricity.csv,"[0.185, 0.209, 0.197]"
50,Reformer,0.246,720,electricity.csv,"[0.232, 0.228, 0.23]"


### Broadcast ERM runs with constraint levels

In [180]:
df_constrained = df.query('type=="Constrained"').copy()
df_erm = df.query('type=="ERM"').copy()
#(data_path,model,pred_len,constraint_level,constrained_unconstraled)
distinct_constraints = df_constrained[['data_path','model','pred_len','constraint_level']].drop_duplicates()

# crossjoin the ERM mse to all constraint levels
df_erm_w_broadcasted_constraint_levels = df_erm.drop(columns=['constraint_level']).merge(distinct_constraints,on=['data_path','model','pred_len'],how='left')
df_to_plot = pd.concat([df_constrained,df_erm_w_broadcasted_constraint_levels],axis=0)

# sanity check
display(df_erm_w_broadcasted_constraint_levels.shape)
print(f"Three constraints per experiment, broadcasted dataset size be equal to {df_erm.shape[0]*3}")
display(df_to_plot.shape)
print(f"Final dataset size should be equal to {df_constrained.shape[0]+df_erm.shape[0]*3}")

(145152, 62)

Three constraints per experiment, broadcasted dataset size be equal to 145152


(290304, 62)

Final dataset size should be equal to 290304


### Add constraint violation metric

In [181]:
# Adding the constraint violation column
df_to_plot["constraint_violation"] = (df_to_plot.mse-df_to_plot.constraint_level).clip(lower=0)

# sanity check
dd=df_to_plot[['run_id','model','type','step','mse','constraint_level','constraint_violation']] 
print("For ERM")
display(dd.query('type=="ERM"').query('constraint_violation>0').head())
print("For Constrained")
display(dd.query('type=="Constrained"').query('constraint_violation>0').head())


For ERM


Unnamed: 0,run_id,model,type,step,mse,constraint_level,constraint_violation
2204,yrysxj97,Reformer,ERM,14,0.711621,0.698,0.013621
2207,yrysxj97,Reformer,ERM,15,0.722807,0.698,0.024807
2210,yrysxj97,Reformer,ERM,16,0.729472,0.698,0.031472
2213,yrysxj97,Reformer,ERM,17,0.730466,0.698,0.032466
2216,yrysxj97,Reformer,ERM,18,0.729684,0.698,0.031684


For Constrained


Unnamed: 0,run_id,model,type,step,mse,constraint_level,constraint_violation
1084,foq7oddn,Reformer,Constrained,364,0.912449,0.912,0.000449
1085,foq7oddn,Reformer,Constrained,365,0.914066,0.912,0.002066
1133,foq7oddn,Reformer,Constrained,413,0.914629,0.912,0.002629
1212,foq7oddn,Reformer,Constrained,492,0.921657,0.912,0.009657
1213,foq7oddn,Reformer,Constrained,493,0.930293,0.912,0.018293


### Aggregate results
Summarize all three runs into metrics per split by averaging mse and constraint levels.

In [188]:
# Calculate mean and std for mse and constraint_violation. Rename them mean_{metric} and std_{metric
summary_metrics = df_to_plot.groupby([
    "Algorithm", #Harmless carryover col for legacy purposes (verified the counts are the same)
    "model",'type',"constraint_level","pred_len","data_path",
    "split"
    ])['mse','constraint_violation','run_id'].aggregate(
        mse=('mse','mean'),
        std_mse=('mse','std'),
        mcv=('constraint_violation','mean'),
        std_mcv=('constraint_violation','std'),
        run_count=('run_id','nunique')).reset_index()
summary_metrics.head()

  


Unnamed: 0,Algorithm,model,type,constraint_level,pred_len,data_path,split,mse,std_mse,mcv,std_mcv,run_count
0,Electricity-StatInformed-10e-Constrained Autof...,Autoformer,Constrained,0.157,96,electricity.csv,test,0.161314,0.0083,0.005958,0.0063,3
1,Electricity-StatInformed-10e-Constrained Autof...,Autoformer,Constrained,0.157,96,electricity.csv,train,0.122875,0.008124,0.000137,0.001351,3
2,Electricity-StatInformed-10e-Constrained Autof...,Autoformer,Constrained,0.157,96,electricity.csv,val,0.161379,0.008284,0.006002,0.006303,3
3,Electricity-StatInformed-10e-Constrained Autof...,Autoformer,Constrained,0.169,96,electricity.csv,test,0.17569,0.021071,0.011387,0.017147,3
4,Electricity-StatInformed-10e-Constrained Autof...,Autoformer,Constrained,0.169,96,electricity.csv,train,0.12749,0.007203,1.8e-05,0.000296,3


## Plots

### ERM vs. Constrained on test split.
TO DO: Reproduce the ERM vs constrained per model for each window size. Try to unify into one plot if possible.

In [192]:
#TODO Aqui quede, adapting this plot to the new datasets (probably the summary metrics one.)

to_plot = df_to_plot[df_to_plot.split=='test']# just see test to unclutter
#to_plot=df
for (pred_len), series in to_plot.groupby(["pred_len"]):
    constraints = series.constraint_level.unique()
    print(constraints)
    constraints = constraints[constraints!=-1] # to iterate over all constraints explored.
    for constraint_level in constraints:
        display(Markdown(f"### pred_len: {pred_len} constraint_level: {constraint_level}"))
        plot_series = series[series["constraint_level"].isin([constraint_level,-1])] # The -1 adds the ERM baseline.

        display(plot_series.groupby(['pred_len','constraint_level','Algorithm','epoch']).size().reset_index())
        means_per_experiment = plot_series.groupby(['pred_len','constraint_level','Algorithm','model','epoch','type'])['mse'].mean().reset_index()
        #display(means_per_experiment)
        # do a pivot where model are the rows, and the columns constrained and unconstrained
        display(means_per_experiment.pivot(index=['model'], columns='type', values='mse').reset_index())

        # sns.scatterplot(x="step", y="mse", hue="Algorithm",style="split", data=plot_series)
        
        # plt.axhline(y=constraint_level, color='r', linestyle='-')
        # plt.title(f"Pred Length: {pred_len}, Constraint_level: {constraint_level}")
        # plt.show()

        # This same scatterplot but in a grid by algorithm. 
        g = sns.FacetGrid(plot_series, col="model", hue="type",col_wrap=3)
        g.map(sns.scatterplot, "step", "mse", alpha=.7) 
        g.add_legend()
        # add the hline
        for ax in g.axes:
            ax.axhline(y=constraint_level, color='r', linestyle='-')
        plt.show()


[0.556 0.17  0.169 0.553 0.157 0.516]


### pred_len: 96 constraint_level: 0.556

Unnamed: 0,pred_len,constraint_level,Algorithm,epoch,0
0,96,0.556,StatInformed-10e Autoformer,10,288
1,96,0.556,StatInformed-10e Reformer,10,288
2,96,0.556,StatInformed-ERM-10e Autoformer,4,96
3,96,0.556,StatInformed-ERM-10e Autoformer,10,192
4,96,0.556,StatInformed-ERM-10e Reformer,7,96
5,96,0.556,StatInformed-ERM-10e Reformer,10,192


ValueError: Index contains duplicate entries, cannot reshape

## ERM on multiple algorithms
As a front page chart to illustrate the problem

### Loss shaping single instance qualitative analysis

### More random explorations
Ideas: 
1. Loss over time
2. Infeasibility rate over time
3. Loss shape across epochs (see how it evolves)
4. Plotting dual variables

## Result tables
1. Reproduce the pivot table from notebook 11, perhaps add standard deviation reports.
2. Explore and find other interesting table visualziation

## Pivots per dataset