In [1]:
import pandas as pd
import numpy as np
import os

## Baseline 

In [2]:
base_path = '/hpf/projects/lsung/projects/mimic4ds/Experiments/baseline'

tasks = ['mortality','longlos','invasivevent','sepsis']
groups = ['2008 - 2010', '2011 - 2013', '2014 - 2016', '2017 - 2019']

##### Model Check: 20 nn models per year-group

In [3]:
check_list = []
for task in tasks:
    for group in groups:
        
        fpath = os.path.join(
            base_path,
            "artifacts",
            f"analysis_id={task}",
            "models"
        )
        
        fnames = os.listdir(fpath)
        fnames = [
            x for x in fnames if 
            'backup' not in x and
            'deprecated' not in x
        ]
        
        check = len([
            x for x in fnames if
            "_".join([x.split('_')[1]]) == group
        ]) == 20
        
        if not check:
            check_list.append([task,group])

if not check_list:
    print('All good.')
else:
    print(check_list)

All good.


##### Model output check (pred_probs):
- folder should contain 10 csv files
- Each csv file should contain the following columns: 
    - phase, outputs, labels, pred_probs, row_ids, ids, iter, analysis_id
- maximum value for 'iter' column should be 20
- check that the following columns are all integers:
    - ids, labels, row_ids 
- check that the following columns are all floats:
    - outputs, pred_probs

In [4]:
cols = ['phase','outputs','pred_probs','labels','row_id','ids','iter','analysis_id']
check_list = []
for task in tasks:
    fpath = os.path.join(
            base_path,
            "artifacts",
            f"analysis_id={task}",
            "results/pred_probs"
        )
    
    fnames = [
        x for x in os.listdir(fpath) if
        'backup' not in x and
        'deprecated' not in x
    ]
    
    if len(fnames) != 10:
        check_list.append(f"Missing files in {task}")
        
    for fname in fnames:
        
        df = pd.read_csv(f"{fpath}/{fname}")
        
        if len([
            x for x in df.columns if x in cols
        ]) != len(cols):
            check_list.append(f'Missing columns in {fname} under {task}')
            
        
        if df['iter'].max() != 20:
            check_list.append(f'Incorrect max iter in {fname} under {task}')
            
        # check int columns:
        for c in ['ids','labels','row_id']:
            if not all(isinstance(x,int) for x in df[c]):
                check_list.append(f"Check column {c} in {fname} under {task}")
        
        # check float columns:
        for c in ['pred_probs', 'outputs']:
            if not all(isinstance(x,float) for x in df[c]):
                check_list.append(f"Check column {c} in {fname} under {task}")
        
if not check_list:
    print("All good.")
else:
    for i in check_list:
        print(i)

All good.


##### Output evaluation check (evaluate_models):
- folder should contain 30 csv files (3 evaluation method for each pred_probs file)
- Each csv file should contain the following columns: 
    - phase, metric, boot_iter, analysis_id
    - mean/performance should be float and contains no NaN
- maximum value for 'boot_iter' column should be 10000
- there should be 5 metrics (ace_abs_logistic_log, ace_rmse_logistic_log, auc, auprc, loss_bce)

In [23]:
cols = ['phase','metric','boot_iter','analysis_id']
check_list = []
for task in tasks:
    fpath = os.path.join(
            base_path,
            "artifacts",
            f"analysis_id={task}",
            "results/evaluate_models"
        )
    
    fnames = [
        x for x in os.listdir(fpath) if
        'backup' not in x and
        'deprecated' not in x
    ]
    
    if len(fnames) != 30:
        check_list.append(f"Missing files in {task}")
        
    for fname in fnames:
        
        df = pd.read_csv(f"{fpath}/{fname}")
        
        if len([
            x for x in df.columns if x in cols
        ]) != len(cols):
            check_list.append(f'Missing columns in {fname} under {task}')
            
        
        if df['boot_iter'].max() != 10000:
            check_list.append(f'Incorrect max boot_iter in {fname} under {task}')
        
        if len(df['metric'].unique()) != 5:
            check_list.append(f"Incorrect number of metrics in {fname} under {task}")
            
        # check performance columns:
        c = [x for x in df.columns if x=='mean' or x=='performance']
        if not all(isinstance(x[0],float) for x in df[c].values):
            check_list.append(f"Check column {c} in {fname} under {task}")
        if np.sum(df[c].isnull())[0]>0:
            check_list.append(
                f"columns {c} in {fname} under {task} has {np.sum(df[c].isnull())[0]} nan values")
        
if not check_list:
    print("All good.")
else:
    for i in check_list:
        print(i)

All good.


##### Output comparison check (compare_models):
- folder should contain 9 csv files (3 evaluation method for each comparison of baseline (2008 - 2010) and oracle (2017 - 2019))
- Each csv file should contain the following columns: 
    - phase, metric, performance_base, performance_test, performance_diff, boot_iter, analysis_id
    - performance_diff should be float and contains no NaN
- maximum value for 'boot_iter' column should be 10000
- there should be 5 metrics (ace_abs_logistic_log, ace_rmse_logistic_log, auc, auprc, loss_bce)

In [28]:
cols = ['phase','metric','performance_base','performance_test','performance_diff','boot_iter','analysis_id']
check_list = []
for task in tasks:
    fpath = os.path.join(
            base_path,
            "artifacts",
            f"analysis_id={task}",
            "results/compare_models"
        )
    
    fnames = [
        x for x in os.listdir(fpath) if
        'backup' not in x and
        'deprecated' not in x
    ]
    
    if len(fnames) != 9:
        check_list.append(f"Missing files in {task}")
        
    for fname in fnames:
        
        df = pd.read_csv(f"{fpath}/{fname}")
        
        if len([
            x for x in df.columns if x in cols
        ]) != len(cols):
            check_list.append(f'Missing columns in {fname} under {task}')
            
        
        if df['boot_iter'].max() != 10000:
            check_list.append(f'Incorrect max boot_iter in {fname} under {task}')
        
        if len(df['metric'].unique()) != 5:
            check_list.append(f"Incorrect number of metrics in {fname} under {task}")
            
        # check performance columns:
        c = 'performance_diff'
        if not all(isinstance(x,float) for x in df[c].values):
            check_list.append(f"Check column {c} in {fname} under {task}")
        if np.sum(df[c].isnull())>0:
            check_list.append(
                f"columns {c} in {fname} under {task} has {np.sum(df['performance_diff'].isnull())[0]} nan values")
        
if not check_list:
    print("All good.")
else:
    for i in check_list:
        print(i)

All good.
