In [None]:
import pandas as pd
import wandb


def get_experiment_results(project_name="eslop"):
    """
    Pull all your experiment results from wandb into a nice DataFrame
    """
    api = wandb.Api()
    runs = api.runs(project_name)

    results = []
    for run in runs:
        # if run.state in ["deleted", "failed", "crashed"]:
        #     continue
        # Extract hyperparameters from config
        config = run.config

        # Extract final metrics from summary
        summary = run.summary

        # Combine into one row
        row = {
            "run_name": run.name,
            # All hyperparameters from argparse
            "model_name": config.get("model_name", "unknown"),
            "data_root": config.get("data_root", None),
            "input_horizon": config.get("input_horizon", None),
            "input_size": config.get("input_size", None),  # Added this
            "hidden_size": config.get("hidden_size", None),
            "output_size": config.get("output_size", None),
            "num_layers": config.get("num_layers", None),
            "lr": config.get("lr", None),
            "epochs": config.get("epochs", None),
            "warmup_epochs": config.get("warmup_epochs", None),
            "batch_size": config.get("batch_size", None),
            "ablation": config.get("ablation", False),
            "ablation_name": config.get("ablation_name", None),
            "use_adaptive_pooling": config.get("use_adaptive_pooling", True),
            # Validation metrics
            "val_mae": summary.get("val/absolute_MAE", None),
            "val_mae_std": summary.get("val/absolute_MAE_std", None),
            "val_rel_mae_1": summary.get("val/relative_MAE@1", None),
            "val_rel_mae_1_std": summary.get("val/relative_MAE@1_std", None),
            "val_rel_mae_5": summary.get("val/relative_MAE@5", None),
            "val_rel_mae_5_std": summary.get("val/relative_MAE@5_std", None),
            # Test metrics
            "test_mae": summary.get("test/absolute_MAE", None),
            "test_mae_std": summary.get("test/absolute_MAE_std", None),
            "test_rel_mae_1": summary.get("test/relative_MAE@1", None),
            "test_rel_mae_1_std": summary.get("test/relative_MAE@1_std", None),
            "test_rel_mae_5": summary.get("test/relative_MAE@5", None),
            "test_rel_mae_5_std": summary.get("test/relative_MAE@5_std", None),
            # Training metrics
            "final_train_loss": summary.get("train/avg_epoch_loss", None),
            "final_lr": summary.get("train/lr", None),
            # Run metadata
            "runtime": run.summary.get("_runtime", None),
            "state": run.state,
            "created_at": run.created_at,
        }

        # Debug: Print available config keys for first run
        if len(results) == 0:
            print(f"Available config keys for run {run.name}: {list(config.keys())}")
            print(f"Available summary keys for run {run.name}: {list(summary.keys())}")

        results.append(row)

    df = pd.DataFrame(results)

    # Sort by test performance
    # df = df.sort_values("test_mae")

    # save df
    df.to_csv("experiment_results.csv", index=False)
    return df

In [None]:
df = get_experiment_results()
# set print threshold to max
df

Available config keys for run test_linear_norm_balanced_gelu_noact_model_transformer_layer_3_lr_0.0005_ep_5_bs_128_hs_256_horizon_365_ablation_False_accesses: ['lr', 'epochs', 'ablation', 'data_root', 'batch_size', 'input_size', 'model_name', 'num_layers', 'hidden_size', 'output_size', 'ablation_name', 'input_horizon', 'warmup_epochs']
Available summary keys for run test_linear_norm_balanced_gelu_noact_model_transformer_layer_3_lr_0.0005_ep_5_bs_128_hs_256_horizon_365_ablation_False_accesses: ['_runtime', '_step', '_timestamp', '_wandb', 'test/absolute_MAE', 'test/absolute_MAE_std', 'test/epoch', 'test/relative_MAE@1', 'test/relative_MAE@1_std', 'test/relative_MAE@5', 'test/relative_MAE@5_std', 'train/avg_epoch_loss', 'train/epoch', 'train/loss', 'train/lr', 'train/step', 'val/absolute_MAE', 'val/absolute_MAE_std', 'val/epoch', 'val/relative_MAE@1', 'val/relative_MAE@1_std', 'val/relative_MAE@5', 'val/relative_MAE@5_std']


Unnamed: 0,run_name,model_name,data_root,input_horizon,input_size,hidden_size,output_size,num_layers,lr,epochs,warmup_epochs,batch_size,ablation,ablation_name,use_adaptive_pooling,val_mae,val_mae_std,val_rel_mae_1,val_rel_mae_1_std,val_rel_mae_5,val_rel_mae_5_std,test_mae,test_mae_std,test_rel_mae_1,test_rel_mae_1_std,test_rel_mae_5,test_rel_mae_5_std,final_train_loss,final_lr,runtime,state,created_at
1,test_linear_norm_balanced_gelu_noact_model_tra...,transformer,/share/dean/arxiv-data/model_dev/baseline_benc...,365,730,256,1,3,0.0005,5,0.1,128,False,accesses,True,7.253832,9.142661,1.482463,5.817767,0.42462,0.405677,7.321144,7.3721,1.46802,2.002506,0.42754,0.26054,118.419208,0,217.171803,finished,2025-08-25T21:56:39Z
2,test_linear_norm_balanced_gelu_noact_model_tra...,transformer,/share/dean/arxiv-data/model_dev/baseline_benc...,365,730,256,1,3,0.0005,10,0.1,128,False,accesses,True,8.332387,6.824427,2.117954,2.951821,0.487555,0.306808,8.402383,6.894554,2.114524,2.957375,0.490304,0.306686,117.735949,0,421.88167,finished,2025-08-25T21:35:25Z
0,test_linear_norm_balanced_gelu_noact_model_tra...,transformer,/share/dean/arxiv-data/model_dev/baseline_benc...,365,730,256,1,3,0.0005,5,0.1,128,False,accesses,True,8.342379,6.833496,2.129721,3.041974,0.488638,0.311042,8.409493,6.871683,2.124064,2.97175,0.491254,0.308894,117.931581,0,262.659726,finished,2025-08-25T22:32:16Z


In [None]:
df["use_adaptive_pooling"][0] = True

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["use_adaptive_pooling"][0] = True
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["use_adaptive_pooling"][-1] = False


In [35]:
df.iloc[-1, df.columns.get_loc("use_adaptive_pooling")] = True

In [36]:
df

Unnamed: 0,run_name,model_name,data_root,input_horizon,input_size,hidden_size,output_size,num_layers,lr,epochs,warmup_epochs,batch_size,ablation,ablation_name,use_adaptive_pooling,val_mae,val_mae_std,val_rel_mae_1,val_rel_mae_1_std,val_rel_mae_5,val_rel_mae_5_std,test_mae,test_mae_std,test_rel_mae_1,test_rel_mae_1_std,test_rel_mae_5,test_rel_mae_5_std,final_train_loss,final_lr,runtime,state,created_at
1,test_linear_norm_balanced_gelu_noact_model_tra...,transformer,/share/dean/arxiv-data/model_dev/baseline_benc...,365,730,256,1,3,0.0005,5,0.1,128,False,accesses,True,7.253832,9.142661,1.482463,5.817767,0.42462,0.405677,7.321144,7.3721,1.46802,2.002506,0.42754,0.26054,118.419208,0,217.171803,finished,2025-08-25T21:56:39Z
2,test_linear_norm_balanced_gelu_noact_model_tra...,transformer,/share/dean/arxiv-data/model_dev/baseline_benc...,365,730,256,1,3,0.0005,10,0.1,128,False,accesses,True,8.332387,6.824427,2.117954,2.951821,0.487555,0.306808,8.402383,6.894554,2.114524,2.957375,0.490304,0.306686,117.735949,0,421.88167,finished,2025-08-25T21:35:25Z
0,test_linear_norm_balanced_gelu_noact_model_tra...,transformer,/share/dean/arxiv-data/model_dev/baseline_benc...,365,730,256,1,3,0.0005,5,0.1,128,False,accesses,True,8.342379,6.833496,2.129721,3.041974,0.488638,0.311042,8.409493,6.871683,2.124064,2.97175,0.491254,0.308894,117.931581,0,262.659726,finished,2025-08-25T22:32:16Z


In [None]:
# load csv results


df = pd.read_csv("experiment_results.csv")
pd.set_option("display.max_columns", None)
df

Unnamed: 0,run_name,model_name,data_root,input_horizon,input_size,hidden_size,output_size,num_layers,lr,epochs,warmup_epochs,batch_size,ablation,ablation_name,val_mae,val_mae_std,val_rel_mae_1,val_rel_mae_1_std,val_rel_mae_5,val_rel_mae_5_std,test_mae,test_mae_std,test_rel_mae_1,test_rel_mae_1_std,test_rel_mae_5,test_rel_mae_5_std,final_train_loss,final_lr,runtime,state,created_at
0,test_linear_norm_balanced_gelu_noact_model_tra...,transformer,/share/dean/arxiv-data/model_dev/baseline_benc...,365,730,256,1,3,0.0005,10,0.1,128,False,accesses,8.332387,6.824427,2.117954,2.951821,0.487555,0.306808,8.402383,6.894554,2.114524,2.957375,0.490304,0.306686,117.735949,0,421.88167,finished,2025-08-25T21:35:25Z
