In [1]:
import json
from pathlib import Path
import pandas as pd
import numpy as np

In [2]:
data_path = (Path('.').resolve().parent / 'data' / 'experiments')
files = [open(file) for file in data_path.iterdir() if file.suffix == '.json']
jsons = [json.load(file) for file in files]

In [3]:
metric_cols = list(jsons[0]['metrics'].keys())
experiment_cols = list(jsons[0]['parameters'].keys())
cols = metric_cols + experiment_cols
df = pd.DataFrame(columns=cols)
for json_result in jsons:
    df = pd.concat([df, pd.DataFrame(dict(json_result['metrics'], **json_result['parameters']), columns=cols)], ignore_index=True)

In [14]:
def agg_func_roc_auc(x):
    return {'metastatic_tissue': x.map(lambda x: x['metastatic_tissue']).mean(), 'normal_tissue': x.map(lambda x: x['normal_tissue']).mean()}

def agg_func_pr_auc(x):
    return {'metastatic_tissue': x.map(lambda x: x['metastatic_tissue']).mean(), 'normal_tissue': x.map(lambda x: x['normal_tissue']).mean()}

agg_dict = {
    'fit_time': np.mean,
    'test_time': np.mean,
    'accuracy': np.mean,
    'roc_auc': agg_func_roc_auc,
    'pr_auc': agg_func_pr_auc,
}

metrics = df.groupby(experiment_cols).agg(agg_dict)

In [33]:
metrics.reset_index(inplace=True)
metrics = metrics.set_index(metrics.SAMPLE_DATASET.str.split('_').map(lambda x: x[-1]).rename('MODEL')).drop(experiment_cols, axis=1)

In [37]:
metrics

Unnamed: 0_level_0,index,fit_time,test_time,accuracy,roc_auc,pr_auc
MODEL,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
resnet152v2,0,7338.168142,58.022043,0.783689,"{'metastatic_tissue': 0.8642732780374903, 'nor...","{'metastatic_tissue': 0.8531978979197712, 'nor..."
vgg16,1,1739.566267,12.826058,0.831094,"{'metastatic_tissue': 0.9226364456661944, 'nor...","{'metastatic_tissue': 0.9257485596373434, 'nor..."
