In [110]:
import pandas as pd
import wandb

In [111]:
api = wandb.Api()

# Project is specified by <entity/project-name>
runs = api.runs("martins0n/tide")

summary_list, config_list, name_list, link = [], [], [], []
for run in runs: 
    # .summary contains the output keys/values for metrics like accuracy.
    #  We call ._json_dict to omit large files 
    summary_list.append(run.summary._json_dict)

    # .config contains the hyperparameters.
    #  We remove special values that start with _.
    config_list.append(
        {k: v for k,v in run.config.items()
          if not k.startswith('_')})

    # .name is the human-readable name of the run.
    name_list.append(run.name)
    link.append(run.url)

runs_df = pd.DataFrame({
    "summary": summary_list,
    "config": config_list,
    "name": name_list,
    "link": link
    })

In [112]:
def flatten_json(y):
    out = {}
 
    def flatten(x, name=''):
 
        # If the Nested key-value
        # pair is of dict type
        if type(x) is dict:
 
            for a in x:
                flatten(x[a], name + a + '.')
 
        # If the Nested key-value
        # pair is of list type
        elif type(x) is list:
 
            i = 0
 
            for a in x:
                flatten(a, name + str(i) + '.')
                i += 1
        else:
            out[name[:-1]] = x
 
    flatten(y)
    return out

In [113]:
runs_df.head()

Unnamed: 0,summary,config,name,link
0,"{'test_loss': 0.450423002243042, '_timestamp':...","{'seed': 11, 'model': {'lr': 6.55e-05, 'horizo...",decent-cosmos-228,https://wandb.ai/martins0n/tide/runs/wu6kcoj3
1,"{'lr-Adam': 3.01e-05, '_timestamp': 1689538604...","{'seed': 11, 'model': {'lr': 3.01e-05, 'horizo...",hardy-sun-227,https://wandb.ai/martins0n/tide/runs/ixnfwz85
2,"{'epoch': 5, 'MSE_mean': 0.19268622994422913, ...","{'seed': 11, 'model': {'lr': 3.01e-05, 'horizo...",fast-terrain-226,https://wandb.ai/martins0n/tide/runs/7yhjoq06
3,"{'test_loss': 0.4383061230182648, 'trainer/glo...","{'seed': 11, 'model': {'lr': 6.55e-05, 'horizo...",peach-firebrand-225,https://wandb.ai/martins0n/tide/runs/qb7ou15t
4,"{'test_loss': 0.1379043459892273, '_step': 273...","{'seed': 11, 'model': {'lr': 0.000252, 'horizo...",polished-firefly-224,https://wandb.ai/martins0n/tide/runs/kzcu63if


In [114]:
runs_df["config"] = runs_df["config"].apply(flatten_json)

In [115]:
df = runs_df.copy()

In [116]:
df = (df.pipe(
    lambda x: pd.concat([x.drop(['config'], axis=1), x['config'].apply(pd.Series)], axis=1)
    )
    .pipe(
        lambda x: pd.concat([x.drop(['summary'], axis=1), x['summary'].apply(pd.Series)], axis=1)
    )
)

runs_df = pd.json_normalize(runs_df['summary'])

In [117]:
df = df[lambda x: x['test_mae'].notnull()]
df = df[lambda x: x['test_mse'].notnull()]
df = df[lambda x: x['accelerator'] == 'cuda']
df['dataset'] = df['dataset.name'].apply(lambda x: x.split('.')[0])
df['horizon'] = df['experiment.horizon'].apply(int)

In [118]:
paper_metrics = pd.read_csv('paper.metrics.csv')

In [119]:
paper_metrics.head()

Unnamed: 0,dataset,horizon,MSE,MAE
0,weather,96,0.166,0.222
1,weather,192,0.209,0.263
2,weather,336,0.254,0.301
3,weather,720,0.313,0.34
4,traffic,96,0.336,0.253


In [122]:
(
    df
    .sort_values(by=["model.max_epochs"], ascending=False)
    .drop_duplicates(subset=['dataset', 'horizon'], keep='first')
    .sort_values(by=['dataset',  'horizon'])
    .merge(paper_metrics, on=['dataset', 'horizon'])
    .pipe(lambda y: y.assign(name=y.apply(lambda x: f"[{x.dataset} {x.horizon}]({x.link})", axis=1))
    [['name', 'test_mae', 'MAE',  'test_mse', 'MSE']]
    )
).to_markdown("tt.md", index=False)