In [1]:
import os

import numpy as np
import pandas as pd

## Compare with Baseline / Best-case (Oracle) Models

In [2]:
gen_artifacts_fpath = "/hpf/projects/lsung/projects/mimic4ds/Experiments/domain_gen/artifacts"
adapt_artifacts_fpath = "/hpf/projects/lsung/projects/mimic4ds/Experiments/domain_adapt/artifacts" 
base_artifacts_fpath = "/hpf/projects/lsung/projects/mimic4ds/Experiments/baseline/artifacts"
tables_fpath = '/hpf/projects/lsung/projects/mimic4ds/Experiments/domain_adapt/tables/'

In [3]:
methods = [
    ['erm','Domain Generalization',''],
    ['irm','Domain Generalization',''],
    ['dro','Domain Generalization',''],
    ['al_layer','Domain Generalization',''],
    ['coral','Domain Generalization',''],
    ['al_layer','Domain Adaptation','100'],
    ['coral','Domain Adaptation','100'],
    ['al_layer','Domain Adaptation','500'],
    ['coral','Domain Adaptation','500'],
    ['al_layer','Domain Adaptation','1000'],
    ['coral','Domain Adaptation','1000'],
    ['al_layer','Domain Adaptation','1500'],
    ['coral','Domain Adaptation','1500'],
]

In [4]:
alpha = 0.05

df_results = pd.DataFrame()
for analysis_id in ['mortality','longlos','invasivevent','sepsis']:
    for method in methods:
        for comparison_model in ['base','oracle']:

            fpath = os.path.join(
                gen_artifacts_fpath 
                if method[1]=='Domain Generalization' else 
                adapt_artifacts_fpath,
                f"analysis_id={analysis_id}",
                "results/compare_with_baseline_exp",
                f"nn_{method[0]}_{comparison_model}.csv"
                if method[1]=='Domain Generalization' else
                f"nn_{method[0]}_{method[2]}_{comparison_model}.csv"
            )

            df = pd.read_csv(fpath)

            lower_CI = df.groupby("metric").apply(lambda x: x.quantile(alpha/2))['performance_diff'].reset_index()
            lower_CI.rename(columns={'performance_diff':'CI_lower'}, inplace=True)

            upper_CI = df.groupby("metric").apply(lambda x: x.quantile(1-alpha/2))['performance_diff'].reset_index()
            upper_CI.rename(columns={'performance_diff':'CI_upper'}, inplace=True)
            
            med_CI = df.groupby("metric").apply(lambda x: x.quantile(0.5))['performance_diff'].reset_index()
            med_CI.rename(columns={'performance_diff':'CI_med'}, inplace=True)

            df_CIs = pd.merge(lower_CI, upper_CI, how='left', left_on='metric', right_on='metric')
            df_CIs = pd.merge(df_CIs, med_CI, how='left', left_on='metric', right_on='metric')
            df_CIs['sig'] = df_CIs['CI_lower']*df_CIs['CI_upper']>0
            df_CIs['analysis_id'] = analysis_id
            df_CIs['framework'] = method[1]
            df_CIs['comparison_model'] = comparison_model
            df_CIs['train_method'] = method[0]
            df_CIs['n_ood'] = method[2]

            df_results = pd.concat((df_results, df_CIs),axis=0)

# Calculate significane & clean columns
df_results['sig'] = df_results['CI_lower']*df_results['CI_upper']>0

df_results['sig'].replace({
        False:'',
        True:'*',
        np.nan:''
    }, inplace=True)

df_results['Difference'] = (
        df_results['CI_med'].apply('{:.3f}'.format) + 
        df_results['sig'] +
        " (" + 
        df_results['CI_lower'].apply('{:.3f}'.format) + 
        ',' + 
        df_results['CI_upper'].apply('{:.3f}'.format) +
        ')'
    )

df_results.drop(columns=['CI_med','CI_lower','CI_upper','sig'], inplace=True)

# rename columns and values
df_results.rename(columns = {
    'metric':'Metric',
    'analysis_id':'Task',
    'n_ood':'Unlabeled OOD Samples',
    'framework': 'Framework',
    'train_method':'Method'},inplace=True)

df_results['Metric'].replace(
    {
        'auc':"AUROC",
        'auprc':"AUPRC",
        'ace_abs_logistic_log':'Calibration'
    },
    inplace=True
)

df_results['Method'].replace(
    {
        'al_layer':"AL",
        'coral':"CORAL",
        'erm':"ERM",
        'irm':'IRM',
        'dro':'GroupDRO'
    },
    inplace=True
)

df_results['Task'].replace(
    {
        'longlos':'Long LOS',
        'sepsis':'Sepsis',
        'mortality':'Mortality',
        'invasivevent':'Invasive Ventilation',
    },
    inplace=True
)

# pivot tables
# compare with base
df_results_base = df_results.query(
    "comparison_model=='base' and Metric==['AUROC','AUPRC','Calibration']"
).pivot(
    index=["Task","Metric"],columns=["Framework","Unlabeled OOD Samples","Method"],values=["Difference"]
)
df_results_base.fillna(" ",inplace = True)
df_results_base.columns = pd.MultiIndex.from_tuples([x[1:] for x in df_results_base.columns], names = ['Framework','Unlabelled OOD Samples','Method'])
df_results_base = df_results_base.reindex(labels = ['Long LOS','Sepsis','Mortality','Invasive Ventilation'], level=0)
df_results_base = df_results_base.reindex(labels = ['AUROC','AUPRC','Calibration'],level=1)

# compare with oracle
df_results_oracle = df_results.query(
    "comparison_model=='oracle' and Metric==['AUROC','AUPRC','Calibration']"
).pivot(
    index=["Task","Metric"],columns=["Framework","Unlabeled OOD Samples","Method"],values=["Difference"]
)
df_results_oracle.fillna(" ",inplace = True)
df_results_oracle.columns = pd.MultiIndex.from_tuples([x[1:] for x in df_results_oracle.columns], names = ['Framework','Unlabelled OOD Samples','Method'])
df_results_oracle = df_results_oracle.reindex(labels = ['Long LOS','Sepsis','Mortality','Invasive Ventilation'], level=0)
df_results_oracle = df_results_oracle.reindex(labels = ['AUROC','AUPRC','Calibration'],level=1)

In [5]:
# results relative to ERM[08-10]
df_results_base

Unnamed: 0_level_0,Framework,Domain Generalization,Domain Generalization,Domain Generalization,Domain Generalization,Domain Generalization,Domain Adaptation,Domain Adaptation,Domain Adaptation,Domain Adaptation,Domain Adaptation,Domain Adaptation,Domain Adaptation,Domain Adaptation
Unnamed: 0_level_1,Unlabelled OOD Samples,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,100,100,500,500,1000,1000,1500,1500
Unnamed: 0_level_2,Method,ERM,IRM,GroupDRO,AL,CORAL,AL,CORAL,AL,CORAL,AL,CORAL,AL,CORAL
Task,Metric,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3
Long LOS,AUROC,"0.034* (0.030,0.038)","0.034* (0.030,0.038)","0.035* (0.031,0.039)","0.035* (0.031,0.038)","0.034* (0.030,0.038)","0.035* (0.031,0.039)","0.035* (0.031,0.039)","0.034* (0.031,0.038)","0.036* (0.033,0.040)","0.035* (0.031,0.039)","0.036* (0.032,0.040)","0.035* (0.031,0.039)","0.035* (0.031,0.039)"
Long LOS,AUPRC,"0.021* (0.016,0.026)","0.024* (0.019,0.029)","0.022* (0.017,0.028)","0.023* (0.018,0.028)","0.023* (0.018,0.028)","0.023* (0.018,0.029)","0.023* (0.018,0.028)","0.023* (0.017,0.028)","0.024* (0.019,0.030)","0.023* (0.018,0.028)","0.025* (0.020,0.030)","0.023* (0.018,0.028)","0.023* (0.017,0.028)"
Long LOS,Calibration,"-0.028* (-0.030,-0.026)","-0.035* (-0.037,-0.033)","-0.012* (-0.015,-0.010)","-0.029* (-0.031,-0.027)","-0.028* (-0.030,-0.026)","-0.031* (-0.033,-0.029)","-0.030* (-0.032,-0.028)","-0.031* (-0.033,-0.029)","-0.031* (-0.033,-0.029)","-0.030* (-0.032,-0.028)","-0.028* (-0.030,-0.026)","-0.029* (-0.031,-0.026)","-0.024* (-0.026,-0.022)"
Sepsis,AUROC,"0.049* (0.041,0.057)","0.045* (0.038,0.053)","0.019* (0.013,0.024)","0.048* (0.041,0.056)","0.046* (0.038,0.054)","0.047* (0.039,0.055)","0.048* (0.040,0.055)","0.046* (0.038,0.054)","0.047* (0.039,0.055)","0.049* (0.041,0.057)","0.046* (0.039,0.054)","0.048* (0.040,0.056)","0.046* (0.038,0.053)"
Sepsis,AUPRC,"0.039* (0.034,0.045)","0.037* (0.032,0.043)","0.013* (0.009,0.017)","0.038* (0.032,0.043)","0.037* (0.031,0.042)","0.036* (0.031,0.042)","0.037* (0.032,0.043)","0.038* (0.032,0.044)","0.038* (0.033,0.044)","0.038* (0.033,0.044)","0.037* (0.032,0.043)","0.039* (0.034,0.046)","0.035* (0.030,0.041)"
Sepsis,Calibration,"-0.034* (-0.036,-0.031)","-0.033* (-0.036,-0.031)","-0.023* (-0.025,-0.021)","-0.035* (-0.037,-0.032)","-0.035* (-0.038,-0.033)","-0.032* (-0.035,-0.030)","-0.032* (-0.035,-0.029)","-0.031* (-0.034,-0.028)","-0.033* (-0.036,-0.030)","-0.032* (-0.035,-0.029)","-0.033* (-0.036,-0.031)","-0.034* (-0.036,-0.031)","-0.032* (-0.034,-0.029)"
Mortality,AUROC,"0.017* (0.013,0.021)","0.017* (0.013,0.022)","-0.033* (-0.040,-0.026)","0.016* (0.012,0.020)","0.016* (0.011,0.020)","0.016* (0.011,0.020)","0.015* (0.011,0.019)","0.016* (0.012,0.020)","0.017* (0.013,0.021)","0.015* (0.011,0.019)","0.019* (0.015,0.023)","0.016* (0.012,0.020)","0.016* (0.012,0.020)"
Mortality,AUPRC,"0.077* (0.066,0.089)","0.079* (0.067,0.090)","0.035* (0.019,0.050)","0.077* (0.065,0.089)","0.077* (0.065,0.089)","0.074* (0.062,0.086)","0.073* (0.061,0.085)","0.078* (0.066,0.090)","0.078* (0.066,0.090)","0.074* (0.062,0.086)","0.081* (0.069,0.092)","0.076* (0.064,0.088)","0.079* (0.067,0.091)"
Mortality,Calibration,"0.003* (0.002,0.004)","0.002* (0.001,0.003)","0.029* (0.027,0.031)","0.003* (0.002,0.004)","0.004* (0.002,0.005)","0.004* (0.003,0.005)","0.001 (-0.001,0.002)","0.004* (0.002,0.005)","0.002* (0.001,0.003)","0.003* (0.002,0.004)","0.002* (0.000,0.003)","0.003* (0.001,0.004)","0.002* (0.001,0.003)"
Invasive Ventilation,AUROC,"0.007* (0.002,0.012)","0.007* (0.002,0.011)","0.006* (0.001,0.011)","0.006* (0.001,0.011)","0.007* (0.002,0.012)","0.006* (0.001,0.011)","0.006* (0.002,0.011)","0.005* (0.001,0.010)","0.008* (0.003,0.013)","0.007* (0.002,0.011)","0.008* (0.004,0.013)","0.003 (-0.002,0.007)","0.005* (0.000,0.010)"


In [6]:
# results relative to ERM[17-19]
df_results_oracle

Unnamed: 0_level_0,Framework,Domain Generalization,Domain Generalization,Domain Generalization,Domain Generalization,Domain Generalization,Domain Adaptation,Domain Adaptation,Domain Adaptation,Domain Adaptation,Domain Adaptation,Domain Adaptation,Domain Adaptation,Domain Adaptation
Unnamed: 0_level_1,Unlabelled OOD Samples,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,100,100,500,500,1000,1000,1500,1500
Unnamed: 0_level_2,Method,ERM,IRM,GroupDRO,AL,CORAL,AL,CORAL,AL,CORAL,AL,CORAL,AL,CORAL
Task,Metric,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3
Long LOS,AUROC,"-0.024* (-0.030,-0.019)","-0.024* (-0.029,-0.019)","-0.023* (-0.028,-0.017)","-0.023* (-0.029,-0.018)","-0.024* (-0.029,-0.019)","-0.023* (-0.028,-0.018)","-0.023* (-0.028,-0.018)","-0.024* (-0.029,-0.019)","-0.022* (-0.027,-0.016)","-0.023* (-0.028,-0.018)","-0.022* (-0.027,-0.017)","-0.023* (-0.028,-0.018)","-0.023* (-0.028,-0.018)"
Long LOS,AUPRC,"-0.009* (-0.016,-0.002)","-0.006 (-0.013,0.000)","-0.008* (-0.015,-0.001)","-0.007* (-0.014,-0.000)","-0.007* (-0.014,-0.000)","-0.007 (-0.013,0.000)","-0.007* (-0.014,-0.000)","-0.008* (-0.014,-0.001)","-0.006 (-0.013,0.001)","-0.007* (-0.014,-0.000)","-0.005 (-0.012,0.001)","-0.007* (-0.014,-0.000)","-0.008* (-0.014,-0.001)"
Long LOS,Calibration,"0.029* (0.024,0.033)","0.022* (0.016,0.027)","0.044* (0.039,0.049)","0.028* (0.022,0.033)","0.029* (0.023,0.034)","0.025* (0.020,0.030)","0.027* (0.022,0.032)","0.025* (0.020,0.030)","0.025* (0.020,0.030)","0.026* (0.021,0.031)","0.028* (0.023,0.033)","0.028* (0.023,0.033)","0.032* (0.027,0.037)"
Sepsis,AUROC,"-0.041* (-0.049,-0.033)","-0.045* (-0.053,-0.037)","-0.071* (-0.081,-0.062)","-0.042* (-0.050,-0.034)","-0.044* (-0.053,-0.036)","-0.043* (-0.052,-0.035)","-0.043* (-0.051,-0.034)","-0.044* (-0.053,-0.035)","-0.043* (-0.052,-0.035)","-0.041* (-0.050,-0.033)","-0.044* (-0.052,-0.035)","-0.042* (-0.050,-0.033)","-0.045* (-0.053,-0.036)"
Sepsis,AUPRC,"-0.104* (-0.118,-0.090)","-0.106* (-0.120,-0.092)","-0.130* (-0.144,-0.116)","-0.105* (-0.119,-0.092)","-0.106* (-0.121,-0.092)","-0.107* (-0.121,-0.093)","-0.106* (-0.119,-0.092)","-0.105* (-0.119,-0.091)","-0.105* (-0.119,-0.091)","-0.105* (-0.119,-0.091)","-0.106* (-0.120,-0.092)","-0.104* (-0.117,-0.090)","-0.108* (-0.122,-0.094)"
Sepsis,Calibration,"0.018* (0.014,0.022)","0.019* (0.015,0.023)","0.029* (0.025,0.033)","0.017* (0.013,0.021)","0.016* (0.012,0.021)","0.019* (0.015,0.024)","0.020* (0.016,0.024)","0.021* (0.016,0.025)","0.019* (0.015,0.023)","0.020* (0.016,0.024)","0.019* (0.014,0.023)","0.018* (0.014,0.022)","0.020* (0.016,0.024)"
Mortality,AUROC,"-0.009* (-0.013,-0.005)","-0.008* (-0.012,-0.005)","-0.058* (-0.066,-0.051)","-0.010* (-0.014,-0.006)","-0.010* (-0.014,-0.006)","-0.010* (-0.014,-0.006)","-0.010* (-0.015,-0.006)","-0.010* (-0.014,-0.006)","-0.009* (-0.013,-0.005)","-0.011* (-0.015,-0.007)","-0.007* (-0.011,-0.003)","-0.010* (-0.014,-0.006)","-0.010* (-0.014,-0.006)"
Mortality,AUPRC,"0.048* (0.036,0.059)","0.049* (0.038,0.061)","0.005 (-0.010,0.021)","0.048* (0.036,0.059)","0.047* (0.036,0.059)","0.045* (0.033,0.056)","0.044* (0.032,0.055)","0.049* (0.037,0.061)","0.049* (0.037,0.060)","0.045* (0.033,0.056)","0.051* (0.040,0.063)","0.047* (0.035,0.058)","0.049* (0.038,0.061)"
Mortality,Calibration,"0.004* (0.003,0.005)","0.003* (0.002,0.004)","0.030* (0.028,0.032)","0.004* (0.003,0.005)","0.004* (0.003,0.005)","0.005* (0.003,0.006)","0.002* (0.001,0.002)","0.004* (0.003,0.005)","0.003* (0.002,0.004)","0.004* (0.003,0.005)","0.002* (0.001,0.003)","0.003* (0.002,0.005)","0.003* (0.002,0.004)"
Invasive Ventilation,AUROC,"0.005* (0.001,0.009)","0.005* (0.000,0.009)","0.004 (-0.001,0.008)","0.004 (-0.001,0.008)","0.005* (0.001,0.009)","0.004 (-0.000,0.008)","0.004 (-0.000,0.009)","0.003 (-0.001,0.008)","0.006* (0.002,0.010)","0.005* (0.000,0.009)","0.006* (0.002,0.011)","0.000 (-0.004,0.005)","0.003 (-0.001,0.007)"


## compare with ERM Models

In [8]:
gen_results_fpath = '/hpf/projects/lsung/projects/mimic4ds/Experiments/domain_gen/results'
adapt_results_fpath = '/hpf/projects/lsung/projects/mimic4ds/Experiments/domain_adapt/results'

In [9]:
alpha = 0.05

results = {
    "gen":pd.read_csv(f"{gen_results_fpath}/model_comparison_{alpha}.csv").query("phase=='test' and `lambda`==-1 and group==1 and evaluation_method=='avg'").reset_index(drop=True),
    "adapt":pd.read_csv(f"{adapt_results_fpath}/model_comparison_{alpha}.csv").query("phase=='test' and `lambda`==-1 and group==1 and evaluation_method=='avg'").reset_index(drop=True),
}

# add additional columns
results['gen']['framework'] = 'Domain Generalization'
results['gen']['n_ood'] = ''
results['adapt']['framework'] = 'Domain Adaptation'

# combine & clean
df_results = pd.concat((results['gen'],results['adapt']),axis=0,ignore_index=True)
df_results['sig'] = df_results['ci_lower']*df_results['ci_upper']>0

df_results['sig'].replace({
        False:'',
        True:'*',
        np.nan:''
    }, inplace=True)

df_results['Difference'] = (
        df_results['ci_med'].apply('{:.3f}'.format) + 
        df_results['sig'] +
        " (" + 
        df_results['ci_lower'].apply('{:.3f}'.format) + 
        ',' + 
        df_results['ci_upper'].apply('{:.3f}'.format) +
        ')'
    )

df_results.drop(
    columns=['phase','evaluation_method','lambda','group','Unnamed: 0', 'sig','ci_med','ci_lower','ci_upper'],
    inplace=True
)

# rename columns and values
df_results.rename(columns = {
    'metric':'Metric',
    'analysis_id':'Task',
    'n_ood':'Unlabeled OOD Samples',
    'framework': 'Framework',
    'train_method':'Method'},inplace=True)

df_results['Metric'].replace(
    {
        'auc':"AUROC",
        'auprc':"AUPRC",
        'ace_abs_logistic_log':'Calibration'
    },
    inplace=True
)

df_results['Method'].replace(
    {
        'al_layer':"AL",
        'coral':"CORAL",
        'erm':"ERM",
        'irm':'IRM',
        'dro':'GroupDRO'
    },
    inplace=True
)

df_results['Task'].replace(
    {
        'longlos':'Long LOS',
        'sepsis':'Sepsis',
        'mortality':'Mortality',
        'invasivevent':'Invasive Ventilation',
    },
    inplace=True
)

# pivot tables
# compare with base
df_results = df_results.query(
    "Metric==['AUROC','AUPRC','Calibration']"
).pivot(
    index=["Task","Metric"],columns=["Framework","Unlabeled OOD Samples","Method"],values=["Difference"]
)
df_results.fillna(" ",inplace = True)
df_results.columns = pd.MultiIndex.from_tuples([x[1:] for x in df_results.columns], names = ['Framework','Unlabelled OOD Samples','Method'])
df_results = df_results.reindex(labels = ['Long LOS','Sepsis','Mortality','Invasive Ventilation'], level=0)
df_results = df_results.reindex(labels = ['AUROC','AUPRC','Calibration'],level=1)

df_results = df_results[[
        ('Domain Generalization',    '',              'IRM'),
        ('Domain Generalization',    '',         'GroupDRO'),
        ('Domain Generalization',    '',               'AL'),
        ('Domain Generalization',    '',            'CORAL'),
        (    'Domain Adaptation',  100.0,               'AL'),
        (    'Domain Adaptation',  100.0,            'CORAL'),
        (    'Domain Adaptation',  500.0,               'AL'),
        (    'Domain Adaptation',  500.0,            'CORAL'),
        (    'Domain Adaptation', 1000.0,               'AL'),
        (    'Domain Adaptation', 1000.0,            'CORAL'),
        (    'Domain Adaptation', 1500.0,               'AL'),
        (    'Domain Adaptation', 1500.0,            'CORAL'),
    ]]

In [10]:
# results relative to ERM[08-16]
df_results

Unnamed: 0_level_0,Framework,Domain Generalization,Domain Generalization,Domain Generalization,Domain Generalization,Domain Adaptation,Domain Adaptation,Domain Adaptation,Domain Adaptation,Domain Adaptation,Domain Adaptation,Domain Adaptation,Domain Adaptation
Unnamed: 0_level_1,Unlabelled OOD Samples,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,100,100,500,500,1000,1000,1500,1500
Unnamed: 0_level_2,Method,IRM,GroupDRO,AL,CORAL,AL,CORAL,AL,CORAL,AL,CORAL,AL,CORAL
Task,Metric,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3
Long LOS,AUROC,"0.000 (-0.002,0.002)","0.002 (-0.001,0.004)","0.001 (-0.001,0.003)","0.000 (-0.002,0.002)","0.001 (-0.001,0.003)","0.001 (-0.001,0.003)","0.000 (-0.002,0.003)","0.003* (0.000,0.005)","0.001 (-0.001,0.004)","0.002 (-0.000,0.005)","0.001 (-0.001,0.003)","0.001 (-0.001,0.004)"
Long LOS,AUPRC,"0.003* (0.000,0.006)","0.001 (-0.002,0.005)","0.002 (-0.001,0.005)","0.002 (-0.001,0.005)","0.002 (-0.001,0.006)","0.002 (-0.002,0.006)","0.001 (-0.002,0.005)","0.003 (-0.000,0.007)","0.002 (-0.002,0.006)","0.004 (-0.000,0.008)","0.002 (-0.002,0.006)","0.002 (-0.002,0.005)"
Long LOS,Calibration,"-0.007* (-0.008,-0.006)","0.016* (0.014,0.017)","-0.001 (-0.002,0.000)","0.000 (-0.001,0.001)","-0.003* (-0.005,-0.002)","-0.002* (-0.003,-0.000)","-0.003* (-0.005,-0.002)","-0.004* (-0.005,-0.002)","-0.002* (-0.004,-0.000)","-0.001 (-0.002,0.001)","-0.001 (-0.002,0.001)","0.003* (0.002,0.005)"
Sepsis,AUROC,"-0.004* (-0.007,-0.001)","-0.030* (-0.036,-0.025)","-0.001 (-0.004,0.002)","-0.003* (-0.006,-0.000)","-0.002 (-0.006,0.002)","-0.002 (-0.005,0.002)","-0.003 (-0.007,0.001)","-0.002 (-0.006,0.002)","-0.000 (-0.004,0.004)","-0.003 (-0.007,0.001)","-0.001 (-0.004,0.003)","-0.004 (-0.007,0.000)"
Sepsis,AUPRC,"-0.002 (-0.005,0.002)","-0.026* (-0.031,-0.022)","-0.002 (-0.005,0.001)","-0.003 (-0.006,0.001)","-0.003 (-0.008,0.001)","-0.002 (-0.006,0.002)","-0.002 (-0.006,0.003)","-0.001 (-0.006,0.003)","-0.001 (-0.006,0.004)","-0.002 (-0.007,0.002)","0.000 (-0.005,0.005)","-0.004 (-0.008,0.000)"
Sepsis,Calibration,"0.001 (-0.001,0.002)","0.011* (0.009,0.013)","-0.001 (-0.002,0.000)","-0.002* (-0.003,-0.000)","0.001 (-0.000,0.003)","0.002* (0.000,0.004)","0.003* (0.001,0.004)","0.001 (-0.001,0.002)","0.002* (0.000,0.003)","0.001 (-0.001,0.002)","0.000 (-0.001,0.002)","0.002* (0.000,0.004)"
Mortality,AUROC,"0.000 (-0.001,0.002)","-0.050* (-0.055,-0.045)","-0.001 (-0.003,0.001)","-0.001 (-0.003,0.000)","-0.001 (-0.004,0.001)","-0.002 (-0.004,0.001)","-0.001 (-0.004,0.001)","0.000 (-0.002,0.002)","-0.002 (-0.004,0.001)","0.002 (-0.001,0.004)","-0.001 (-0.003,0.001)","-0.001 (-0.003,0.001)"
Mortality,AUPRC,"0.002 (-0.004,0.007)","-0.042* (-0.051,-0.033)","0.000 (-0.005,0.005)","-0.000 (-0.005,0.005)","-0.003 (-0.010,0.005)","-0.004 (-0.012,0.004)","0.001 (-0.006,0.008)","0.001 (-0.006,0.008)","-0.003 (-0.010,0.004)","0.004 (-0.003,0.011)","-0.001 (-0.008,0.007)","0.002 (-0.005,0.009)"
Mortality,Calibration,"-0.001* (-0.001,-0.000)","0.026* (0.024,0.027)","0.000 (-0.001,0.001)","0.000 (-0.000,0.001)","0.001 (-0.000,0.002)","-0.002* (-0.003,-0.001)","0.000 (-0.000,0.001)","-0.001* (-0.002,-0.000)","-0.000 (-0.001,0.001)","-0.001* (-0.002,-0.001)","-0.000 (-0.001,0.000)","-0.001* (-0.002,-0.000)"
Invasive Ventilation,AUROC,"-0.000 (-0.003,0.002)","-0.001 (-0.004,0.001)","-0.001 (-0.003,0.001)","-0.000 (-0.002,0.002)","-0.001 (-0.004,0.002)","-0.001 (-0.003,0.002)","-0.002 (-0.004,0.001)","0.001 (-0.002,0.004)","-0.000 (-0.003,0.002)","0.001 (-0.002,0.005)","-0.005 (-0.009,0.000)","-0.002 (-0.005,0.001)"


In [11]:
# baseline stats table
import docx
fpath = f"{tables_fpath}/eTabl_stats_rel_to_erm.docx"
doc = docx.Document(fpath)
t = doc.add_table(df_results.shape[0], df_results.shape[1])
for i in range(df_results.shape[0]):
    for j in range(df_results.shape[-1]):
        t.cell(i,j).text = str(df_results.values[i,j])
doc.save(fpath)