In [1]:
%load_ext autoreload
%autoreload 2

import pandas as pd
import matplotlib.pyplot as plt
import os
import json
from typing import List, Dict, Tuple, Optional
from tqdm import tqdm
import seaborn as sns
tqdm.pandas()
sns.set_context("paper")

In [2]:
path_to_results_dir: str = '/share/pi/nigam/mwornow/ehrshot-benchmark/EHRSHOT_ASSETS/results_ehrshot'
tasks = os.listdir(path_to_results_dir)
print("Tasks: ", tasks)

Tasks:  ['new_hypertension', 'guo_los', 'lab_hypoglycemia', 'lab_hyponatremia', 'new_pancan', 'lab_anemia', 'new_acutemi', 'chexpert', 'guo_readmission', 'lab_thrombocytopenia', 'new_hyperlipidemia', 'lab_hyperkalemia', 'guo_icu']


In [5]:
paths = []
for task in tqdm(tasks, desc='Finding paths...'):
    path_to_task_dir: str = os.path.join(path_to_results_dir, task, 'models')
    if not os.path.exists(path_to_task_dir): 
        continue
    models = os.listdir(path_to_task_dir)
    for model in models:
        path_to_model_dir: str = os.path.join(path_to_task_dir, model)
        heads = os.listdir(path_to_model_dir)
        for head in heads:
            path_to_head_dir: str = os.path.join(path_to_model_dir, head)
            subtasks = os.listdir(path_to_head_dir)
            for subtask in subtasks:
                path_to_subtask_dir: str = os.path.join(path_to_head_dir, subtask)
                ks = os.listdir(path_to_subtask_dir)
                for k in ks:
                    path_to_k_dir: str = os.path.join(path_to_subtask_dir, k)
                    paths.append({
                        'path' : path_to_k_dir,
                        'task' : task,
                        'model' : model,
                        'head' : head,
                        'k' : k,
                        'subtask' : subtask,
                    })
print("Found {} paths".format(len(paths)))

Finding paths...:   0%|          | 0/13 [00:00<?, ?it/s]

Finding paths...: 100%|██████████| 13/13 [00:17<00:00,  1.33s/it]

Found 936 paths





In [6]:
results = []
for p in tqdm(paths, desc='Loading results...'):
    path, task, model, head, k, subtask = p['path'], p['task'], p['model'], p['head'], p['k'], p['subtask']
    path_to_hparams_json: str = os.path.join(path, 'model_hparams.json')
    hparams: dict = json.load(open(path_to_hparams_json, 'r')).get("model_hparams", {})
    auroc: float = json.load(open(path_to_hparams_json, 'r')).get("scores", {}).get("auroc", {}).get("score")
    hparams = { f"hparam_{k}": v for k, v in hparams.items() }
    results.append({
        **p,
        **hparams
    })
df = pd.DataFrame(results)

Loading results...: 100%|██████████| 936/936 [15:55<00:00,  1.02s/it]


In [8]:
df['hparam_C'].value_counts()

hparam_C
0.000001    183
0.100000    173
0.010000    127
0.001000     61
0.000100     22
0.000010      9
Name: count, dtype: int64