In [None]:
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from glob import iglob

import sklearn

from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Setup

## Plotting Options

In [None]:
sns.set_palette("pastel")
mpl.rcParams['figure.figsize'] = (20, 20)
colors = plt.rcParams['axes.prop_cycle'].by_key()['color']
#plt.rcParams["font.family"] = "Computer Modern"
plt.rcParams["font.size"] = "24"

## Data Loaders

In [None]:
# from: https://gist.github.com/willwhitney/9cecd56324183ef93c2424c9aa7a31b4
from tensorboard.backend.event_processing import event_accumulator

def load_tf(experiment):
    globbing = f"../experiments/*/{experiment}/tensorboard/*"
    found = list(iglob(globbing))
    if len(found) < 1:
        print(f"no runs found in {globbing}, check your experiments folder")
        return None, None
    print(f"found {len(found)} experiments in {globbing}")
    pds = []
    for dirname in found:
        ea = event_accumulator.EventAccumulator(dirname, size_guidance={event_accumulator.SCALARS: 0})
        ea.Reload()
        dframes = {}
        mnames = ea.Tags()['scalars']

        base_name = dirname.split('/')[-1]
        extras = {}
        extras['experiment_folder'] = experiment
        extras['experiment'] = base_name
        extras['dataset'], rest = base_name.split('-', 1)
        extras['loss_name'], rest = rest.split('-batch_', 1)
        extras['batchsize'], rest = rest.split('-lr_', 1)
        extras['batchsize'] = int(extras['batchsize'])
        extras['lr'], rest = rest.split('_', 1)
        extras['lr'] = float(extras['lr'])
        
        for n in mnames:
            dframes[n] = pd.DataFrame(ea.Scalars(n), columns=["wall_time", "epoch", n])
            dframes[n].drop("wall_time", axis=1, inplace=True)
            dframes[n] = dframes[n].set_index("epoch")
        if len(dframes):
            try:
                pds.append(pd.concat([v for k,v in dframes.items()], axis=1))
            except ValueError as e:
                print(f"Error loading: {dirname}")
                print(e)
                continue
            for k,v in extras.items():
                pds[-1][k] = v
    return pd.concat(pds), dirname

In [None]:
LOSS_NAMES = {
    'accuracy_05': '$\\text{Accuracy}^l$',
    'accuracy_05_sig_k10': '$\\text{Accuracy}^s$',
    'f1_05': '$F_1^l$',
    'f1_05_sig_k10': '$F_1^s$',
    'bce': 'BCE'
}

## Evaluation

In [None]:
experiments = [
    'main',
    'auroc',
    'wmw'
]
dfs = []
for experiment in experiments:
    df, path = load_tf(experiment)
    if df is None:
        continue
    dfs.append(df)
df = pd.concat(dfs)

### Find early stopping epoch/result

In [None]:
dfs = []
for experiment in df['experiment'].unique():
    _df = df[df['experiment']==experiment]
    _df = _df.iloc[_df['val/loss'].idxmin()]
    dfs.append(_df)
best_by_val_loss = pd.concat(dfs, axis=1).T.reset_index().rename(columns={'index': 'epoch'})


### Count by Dataset + Loss

In [None]:
n = 10
filtered = []
for dataset in best_by_val_loss['dataset'].unique():
    for loss in best_by_val_loss['loss_name'].unique():
        f = best_by_val_loss[(best_by_val_loss['loss_name']==loss)&(best_by_val_loss['dataset']==dataset)]
        count = f.shape[0]
        if count == 0:
            print("no rows: ")
            print(f"  {count}x {dataset}, {loss}")
            continue
        if count < n:
            raise RuntimeError(f"Cannot find at least 10 rows for condition: {count}x {dataset}, {loss}")
        filtered.append(f.head(n))
        #print(f"{count}x {dataset}, {loss}")

In [None]:
best_by_val_loss_first_n = pd.concat(filtered)

### Aggregate

In [None]:
excluded_cols = set(['experiment'])
dfs = []
for loss in best_by_val_loss_first_n['loss_name'].unique():
    for dataset in best_by_val_loss_first_n['dataset'].unique():
        _df = best_by_val_loss_first_n[(best_by_val_loss_first_n['loss_name']==loss)&(best_by_val_loss_first_n['dataset']==dataset)]
        _dfmean = _df.mean()
        _dfstd = _df.std()
        if np.isnan(_dfmean['loss']):
            print(f"{loss} {dataset} loss is NaN")
            continue
        d = {'count': [_df.shape[0]]}
        meancols = _dfmean.keys()
        nonmeancols = set(_df.columns) - set(_dfmean.keys()) - excluded_cols
        for col in nonmeancols:
            d[col] = [_df[col].iloc[0]]
        for col in meancols:
            d[f"{col}/mean"] = [_dfmean[col]]
            d[f"{col}/std"] = [_dfstd[col]]
        dfs.append(pd.DataFrame(data=d))
        
aggregate = pd.concat(dfs)
aggregate