In [1]:
import os
import pickle
import pandas as pd
import numpy as np

from src.io import read_pkl
from src.default_paths import path_root
from src.mappings import model_names, task_names

In [2]:
PATH_TO_ADAPTER_MODELS_RESULTS = os.path.join(path_root, "data/cancer", "evaluate", "adapter_models")

In [3]:
def list_dir(path: str):
    """get list of file/directory names excluding nb checkpoints"""
    
    return [
        x for x in os.listdir(path)
        if x != ".ipynb_checkpoints"
    ]

def bsp_res_to_str(results: list):
    """return formatted string of bootstrap results"""
    
    lower, med, upper = (
        [
            str(x) for x in 
            np.nanpercentile(results,[2.5, 50, 97.5])
            .round(3)
        ]
    )
    
    return f"{med} [{lower}, {upper}]"

In [4]:
use_bootstrap = True

auroc_results = {}
auprc_results = {}

models = list_dir(PATH_TO_ADAPTER_MODELS_RESULTS)

for model in models:
    tasks = list_dir(os.path.join(PATH_TO_ADAPTER_MODELS_RESULTS, model))
    auroc_results[model] = {}
    auprc_results[model] = {}
    
    for task in tasks:
        try:
            result = read_pkl(os.path.join(PATH_TO_ADAPTER_MODELS_RESULTS, model, task, "results.pkl"))
            auroc_results[model][task] = bsp_res_to_str(result["auroc_bootstrap"]) if use_bootstrap else result["auroc"]
            auprc_results[model][task] = bsp_res_to_str(result["auprc_bootstrap"]) if use_bootstrap else result["auprc"]
        except:
            auroc_results[model][task] = np.nan
            auprc_results[model][task] = np.nan

In [9]:
result = read_pkl(os.path.join(PATH_TO_ADAPTER_MODELS_RESULTS, model, task, "results.pkl"))

FileNotFoundError: [Errno 2] No such file or directory: '/hpf/projects/lsung/phi/projects/lguo/femr-on-sk/data/cancer/evaluate/adapter_models/count_sk_gbm/mortality_90/results.pkl'

In [5]:
column_orders = [
    "SK [Count]", "SK [Count] (GBM)", "SK",
    "Stanford",
    "Stanford_Cont-Pre",
]

task_orders = [
    "mortality_90", "mortality_180", "mortality_365",
]

In [6]:
df_auroc = (
    pd.DataFrame.from_dict(auroc_results)
    .round(3)
    .rename(columns=model_names)
    .reset_index(names="Task")
    .replace(task_names)
    .set_index("Task")
    .reindex(task_orders)
    [column_orders]
)

df_auroc

Unnamed: 0_level_0,SK [Count],SK [Count] (GBM),SK,Stanford,Stanford_Cont-Pre
Task,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
mortality_90,,,,,
mortality_180,,,,,
mortality_365,,,,,


In [7]:
df_auprc = (
    pd.DataFrame.from_dict(auprc_results)
    .round(3)
    .rename(columns=model_names)
    .reset_index(names="Task")
    .replace(task_names)
    .set_index("Task")
    .reindex(task_orders)
    [column_orders]
)

df_auprc

Unnamed: 0_level_0,SK [Count],SK [Count] (GBM),SK,Stanford,Stanford_Cont-Pre
Task,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
mortality_90,,,,,
mortality_180,,,,,
mortality_365,,,,,
