# 2023-08-31 9 - Estimating constraint level electricity.ipynb
Copy of notebook 5 but for the electricity dataset

## Fetching runs

In [13]:
import wandb
from math import isnan 
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import wandb
from tqdm import tqdm
# from cycler import cycler
import matplotlib as mpl
from IPython.display import Markdown, display
    
api = wandb.Api()
project = "Autoformer"
workspace = "alelab"


# Get runs the electricity dataset (should be only 8 ERM runs by the time this is run.)
runs = api.runs(f"{workspace}/{project}",
                {
                    "config.data_path": "electricity.csv",
                })

all_runs = []
run_counter = 0
for run in tqdm(runs):
    run_counter += 1
    for split in ["train", "test","val"]:
        for metric in ["mse",]:
            pred_len = run.config["pred_len"]
            metrics = np.zeros(pred_len)
            for i in range(pred_len):
                run_dict = {**run.config}
                #run_dict["constraint_level"] = constraint_level
                run_dict[f"{metric}"] = run.summary[f"{metric}/{split}/{i}"]
                #run_dict[f"{metric}"] = run.summary.get(f"{metric}/{split}/{i}",run.summary.get(f"mse/{split}/{i}",np.nan)) #god forgive me for this line
                run_dict["step"]=i
                run_dict["epoch"]=run.summary["epoch"]
                run_dict["infeasible_rate"]=run.summary[f"infeasible_rate/{split}"]
                run_dict["split"] = split
                run_dict["run_id"] = run.id
                # Get either Constrained/ or ERM/ from the run name, then append model name.
                #print("run.name", run.name)
                #debug if ERM run
                run_dict["Algorithm"] = f"{run.name.split('/')[0]} {run.config['model']}"
                run_dict["sweep_id"] = run.sweep.id
                #print("Algorithm", run_dict["Algorithm"])

                # To better plot constrained vs ERM
                #TODO this is a hack while I consolidate the tags. 
                run_dict["type"] = "ERM" if run.config['dual_lr'] == 0 else "Constrained"

                all_runs.append(run_dict)
print(f"Fetched {run_counter} runs")
df = pd.DataFrame(all_runs)
print(f"Total records: {(df.shape)}")

100%|██████████| 8/8 [00:00<00:00, 81.67it/s]

Fetched 8 runs
Total records: (8064, 60)





In [14]:
df[['run_id',"sweep_id",'Algorithm','model','data_path','constraint_level','pred_len']].drop_duplicates().sort_values(['model',"pred_len","constraint_level"])

Unnamed: 0,run_id,sweep_id,Algorithm,model,data_path,constraint_level,pred_len
7776,dky72mnn,plrzt70h,StatInformed-ERM-10e Autoformer,Autoformer,electricity.csv,-1,96
7200,9ueaf7qc,plrzt70h,StatInformed-ERM-10e Autoformer,Autoformer,electricity.csv,-1,192
6192,c8gjcgjj,plrzt70h,StatInformed-ERM-10e Autoformer,Autoformer,electricity.csv,-1,336
4032,5gf925sy,plrzt70h,StatInformed-ERM-10e Autoformer,Autoformer,electricity.csv,-1,720
3744,ox5mkx6s,plrzt70h,StatInformed-ERM-10e Reformer,Reformer,electricity.csv,-1,96
3168,4dsfg2z8,plrzt70h,StatInformed-ERM-10e Reformer,Reformer,electricity.csv,-1,192
2160,ujwc5q8b,plrzt70h,StatInformed-ERM-10e Reformer,Reformer,electricity.csv,-1,336
0,39cj5po7,plrzt70h,StatInformed-ERM-10e Reformer,Reformer,electricity.csv,-1,720


# Get IQR for all splits and lengths:
Obviously Autoformer and Reformer val mses are different, I'm taking Autoformer because that's what we did on `weather.csv`,
also they're the tighter ones.

A more "proper" approach would be to choose the $\epsilon$ per model/window

In [30]:
all_runs = df[(df["Algorithm"].str.contains("Autoformer"))]
stats=all_runs.query("split=='val'").groupby(['pred_len'])['mse'].describe()
stats.transpose()

pred_len,96,192,336,720
count,96.0,192.0,336.0,720.0
mean,0.164291,0.187406,0.246101,0.244411
std,0.011176,0.016635,0.018966,0.051494
min,0.137082,0.166891,0.214304,0.196355
25%,0.157795,0.173757,0.232979,0.215966
50%,0.169146,0.184251,0.245623,0.230415
75%,0.170471,0.200061,0.256973,0.24642
max,0.204507,0.278771,0.348928,0.580343


In [34]:
stats.transpose()[[720]]

pred_len,720
count,720.0
mean,0.244411
std,0.051494
min,0.196355
25%,0.215966
50%,0.230415
75%,0.24642
max,0.580343


In [17]:
all_runs = df[(df["Algorithm"].str.contains("Reformer"))]
stats=all_runs.query("split=='val'").groupby(['pred_len'])['mse'].describe()
stats.transpose()

pred_len,96,192,336,720
count,96.0,192.0,336.0,720.0
mean,0.20274,0.235864,0.237651,0.252763
std,0.00588,0.017197,0.016282,0.003974
min,0.194854,0.211686,0.214464,0.242625
25%,0.198015,0.217943,0.222203,0.250573
50%,0.201584,0.236337,0.233914,0.253179
75%,0.205735,0.250937,0.254289,0.255089
max,0.22729,0.274632,0.27807,0.295012
