In [1]:
import os
import glob
import sys
import json
import yaml
import numpy as np
import pandas as pd

from warnings import warn

#### Load external baselines

In [2]:
with open('./configs/ext_baselines.yaml', 'r') as f:
    results_ext = yaml.load(f, Loader=yaml.SafeLoader)

results_ext_mean = {k : {k2 : v2['mean'] for k2, v2 in v.items()} for k, v in results_ext.items()}
results_ext_std = {k : {k2 : v2['std'] for k2, v2 in v.items()} for k, v in results_ext.items()}

#### Parse experiment results

In [3]:
result_dir = './evaluations/'
tmp_dir = './tmp/'
    
result_ps = [p for p in glob.glob(os.path.join(result_dir, '*')) if os.path.basename(p) != '_sources']

results = {}

run_ids = {}

for i, path in enumerate(result_ps):
    run_id = os.path.basename(path)
    met_fp = os.path.join(path, 'metrics.json')
    with open(met_fp, 'r') as f:
        mets = json.load(f)
    if 'testing.final_loss' not in mets:
        warn(f'Warning | run {run_id} has been interrupted.')
        continue
        
    cfg_fp = os.path.join(path, 'config.json')
    with open(cfg_fp, 'r') as f:
        cfg = json.load(f)
    cfg_name = 'FCN' if cfg['config_name'] == 'FCNBest' else 'FCNLS2T'
    
    if cfg_name not in results:
        results[cfg_name] = {'accuracy' : [], 'auroc' : [], 'auprc' : []}
    
    for k in results[cfg_name]:
        results[cfg_name][k].append(100*mets['testing.final_' + k]['values'][0])
    
    if cfg_name not in run_ids:
        run_ids[cfg_name] = []
    run_ids[cfg_name].append(os.path.basename(path))
    

In [4]:
results_mean = {k : {k2 : np.mean(v2) for k2, v2 in v.items()} for k, v in results.items()}
results_std = {k : {k2 : np.std(v2) for k2, v2 in v.items()} for k, v in results.items()}
results_all_mean = {**results_mean, **results_ext_mean}
results_all_std = {**results_std, **results_ext_std}
results_all_str = {k : {k2 : f'{v2:.3f} ± {results_all_std[k][k2]:.3f}' for k2, v2 in v.items()} for k, v in results_all_mean.items()}

In [5]:
df = pd.DataFrame.from_dict(results_all_str).transpose()
display(df)

Unnamed: 0,accuracy,auroc,auprc,time
FCN,79.742 ± 1.087,85.842 ± 0.246,52.476 ± 0.893,
FCNLS2T,84.358 ± 0.532,86.166 ± 0.346,53.893 ± 0.531,
GRUD,80.000 ± 2.900,86.300 ± 0.300,53.700 ± 0.900,8.670 ± 0.490
GRUSimple,82.200 ± 0.200,80.800 ± 1.100,42.200 ± 0.600,30.000 ± 2.500
IPNets,79.400 ± 0.300,86.000 ± 0.200,51.000 ± 0.600,25.300 ± 1.800
PhasedLSTM,76.800 ± 5.200,79.000 ± 1.000,38.700 ± 1.500,44.600 ± 2.300
Transformer,83.700 ± 3.500,86.300 ± 0.800,52.800 ± 2.200,6.060 ± 0.060
LatentODE,76.000 ± 0.100,85.700 ± 0.600,50.700 ± 1.700,3500.000 ± nan
SeFTAttn,75.300 ± 3.500,85.100 ± 0.400,52.400 ± 1.100,7.620 ± 0.100
