### File to compile results at both intraoperative and preoperative level for all outcomes

#### Input: pred_stage, outcome name, result file location 

In [1]:
import pickle
import json
import os
import numpy as np
import pandas as pd
import math
from datetime import datetime
import matplotlib.pyplot as plt
import pprint

In [7]:
# result location 
pred_stage = 'Preoperative'  # {'Preoperative', 'Intraoperative'}
outcome_list= ['icu', 'mortality', 'aki2']
reg_outcome_list =['postop_los', 'opioids_count_day0', 'opioids_count_day1']
sav_dir = '/mnt/ris/sandhyat/Output-TS_docker_July2024/Best_results/' + pred_stage +"/"
# sav_dir = '/home/trips/PeriOperative_RiskPrediction/Best_results/' + pred_stage +"/"

In [8]:
for outcome in outcome_list:
    perf_filename = sav_dir + str(outcome) + '_Best_perf_metrics_combined_' + pred_stage.lower() + '.pickle'
    with open(perf_filename, 'rb') as file:
        metric_data = pickle.load(file)
    model_name_list = list(metric_data.keys())
    ablation_name_list = list(metric_data[model_name_list[0]].keys())
    ablation_name_list_pretty = [i.split("_",1)[1] for i in ablation_name_list]
    
    df_auroc = pd.DataFrame(columns=model_name_list, index=ablation_name_list)
    df_auprc = pd.DataFrame(columns=model_name_list, index=ablation_name_list)

    for key, value in metric_data.items():
        for ab in ablation_name_list:
            df_auroc.loc[ab, key] = np.round(np.mean(value[ab][:,0]), decimals=3)
            df_auprc.loc[ab, key] = np.round(np.mean(value[ab][:,1]), decimals=3)

    # renaming the index
    df_auroc = df_auroc.rename(index=dict(zip(df_auroc.index, ablation_name_list_pretty)))
    df_auprc = df_auprc.rename(index=dict(zip(df_auprc.index, ablation_name_list_pretty)))
    
    new_columns = [(outcome, col) for col in df_auroc.columns]
    new_column_index = pd.MultiIndex.from_tuples(new_columns)
    
    df_auroc.columns = new_column_index
    df_auprc.columns = new_column_index

    print(" Outcome ", outcome, " AUROC")
    pprint.pp(df_auroc)
    print("\n Outcome ", outcome, " AUPRC")
    pprint.pp(df_auprc)

 Outcome  icu  AUROC
                                     icu              
                                  TabNet  Scarf   XGBT
preops_cbow                        0.898  0.909  0.969
preops_cbow_homemeds               0.921  0.824  0.974
preops_cbow_pmh_problist_homemeds  0.912  0.877  0.974

 Outcome  icu  AUPRC
                                     icu              
                                  TabNet  Scarf   XGBT
preops_cbow                        0.642  0.649  0.885
preops_cbow_homemeds               0.693  0.369  0.896
preops_cbow_pmh_problist_homemeds  0.651  0.502  0.895
 Outcome  mortality  AUROC
                                  mortality              
                                       XGBT  Scarf TabNet
preops_cbow                           0.943  0.897    0.9
preops_cbow_homemeds                  0.947   0.89  0.912
preops_cbow_pmh_problist_homemeds     0.945  0.873  0.919

 Outcome  mortality  AUPRC
                                  mortality              
    

In [10]:
for outcome in reg_outcome_list:
    perf_filename = sav_dir + str(outcome) + '_Best_perf_metrics_combined_' + pred_stage.lower() + '.pickle'
    with open(perf_filename, 'rb') as file:
        metric_data = pickle.load(file)
    model_name_list = list(metric_data.keys())
    ablation_name_list = list(metric_data[model_name_list[0]].keys())
    ablation_name_list_pretty = [i.split("_",1)[1] for i in ablation_name_list]
    
    df_Corr = pd.DataFrame(columns=model_name_list, index=ablation_name_list)
    df_R2 = pd.DataFrame(columns=model_name_list, index=ablation_name_list)
    
    for key, value in metric_data.items():
        for ab in ablation_name_list:
            df_Corr.loc[ab, key] = np.round(np.mean(value[ab][:,0]), decimals=3)
            df_R2.loc[ab, key] = np.round(np.mean(value[ab][:,2]), decimals=3)

    # renaming the index
    df_Corr = df_Corr.rename(index=dict(zip(df_Corr.index, ablation_name_list_pretty)))
    df_R2 = df_R2.rename(index=dict(zip(df_R2.index, ablation_name_list_pretty)))
    
    new_columns = [(outcome, col) for col in df_Corr.columns]
    new_column_index = pd.MultiIndex.from_tuples(new_columns)
    
    df_Corr.columns = new_column_index
    df_R2.columns = new_column_index

    print(" Outcome ", outcome, " Correlation")
    pprint.pp(df_Corr)
    print("\n Outcome ", outcome, " R2 score")
    pprint.pp(df_R2)

 Outcome  postop_los  Correlation
                                  postop_los
                                        XGBT
preops_cbow                            0.674
preops_cbow_homemeds                    0.69
preops_cbow_pmh_problist_homemeds      0.776

 Outcome  postop_los  R2 score
                                  postop_los
                                        XGBT
preops_cbow                            0.437
preops_cbow_homemeds                   0.461
preops_cbow_pmh_problist_homemeds      0.575
 Outcome  opioids_count_day0  Correlation
                                  opioids_count_day0       
                                                XGBT  Scarf
preops_cbow                                    0.562  0.328
preops_cbow_homemeds                           0.566  0.282
preops_cbow_pmh_problist_homemeds              0.563  0.273

 Outcome  opioids_count_day0  R2 score
                                  opioids_count_day0       
                                          

In [11]:
# result location 
pred_stage = 'Intraoperative'  # {'Preoperative', 'Intraoperative'}
outcome_list= ['icu', 'mortality', 'aki2']
reg_outcome_list =['postop_los', 'opioids_count_day0', 'opioids_count_day1']
sav_dir = '/mnt/ris/sandhyat/Output-TS_docker_July2024/Best_results/' + pred_stage +"/"
# sav_dir = '/home/trips/PeriOperative_RiskPrediction/Best_results/' + pred_stage +"/"

In [18]:
for outcome in outcome_list:
    perf_filename = sav_dir + str(outcome) + '_Best_perf_metrics_combined_' + pred_stage.lower() + '.pickle'
    with open(perf_filename, 'rb') as file:
        metric_data = pickle.load(file)
    model_name_list = list(metric_data.keys())
    ablation_name_list = list(metric_data[model_name_list[0]].keys())
    ablation_name_list_pretty = [i.split("_",1)[1] for i in ablation_name_list]
    
    df_auroc = pd.DataFrame(columns=model_name_list, index=ablation_name_list)
    df_auprc = pd.DataFrame(columns=model_name_list, index=ablation_name_list)
    print(metric_data)
    for key, value in metric_data.items():
        for ab in ablation_name_list:
            try:
                df_auroc.loc[ab, key] = np.round(np.mean(value[ab][:,0]), decimals=3)
                df_auprc.loc[ab, key] = np.round(np.mean(value[ab][:,1]), decimals=3)
            except(KeyError):
                df_auroc.loc[ab, key] = 'Yet to be filled'
                df_auprc.loc[ab, key] = 'Yet to be filled'

    # renaming the index
    df_auroc = df_auroc.rename(index=dict(zip(df_auroc.index, ablation_name_list_pretty)))
    df_auprc = df_auprc.rename(index=dict(zip(df_auprc.index, ablation_name_list_pretty)))
    
    new_columns = [(outcome, col) for col in df_auroc.columns]
    new_column_index = pd.MultiIndex.from_tuples(new_columns)
    
    df_auroc.columns = new_column_index
    df_auprc.columns = new_column_index

    print(" Outcome ", outcome, " AUROC")
    pprint.pp(df_auroc)
    print("\n Outcome ", outcome, " AUPRC")
    pprint.pp(df_auprc)

{'MVCL': {'DataModal_meds': array([[0.86073828, 0.5219987 ],
       [0.83846597, 0.4791385 ],
       [0.83983861, 0.49286679],
       [0.85102241, 0.51318789],
       [0.8464228 , 0.48768798]]), 'DataModal_flow': array([[0.90022765, 0.55551212],
       [0.89980775, 0.58396575],
       [0.89623304, 0.57495054],
       [0.8980147 , 0.58315155],
       [0.91225552, 0.60401176]]), 'DataModal_flow_meds': array([[0.92331067, 0.64192238],
       [0.92702158, 0.6751036 ],
       [0.912247  , 0.63571699],
       [0.91318792, 0.63591206],
       [0.92240327, 0.65184625]]), 'DataModal_preops_flow_meds': array([[0.97220335, 0.88970311],
       [0.97435134, 0.89726699],
       [0.97276837, 0.89410945],
       [0.9702884 , 0.89223028],
       [0.97060917, 0.89896962]]), 'DataModal_preops_homemeds_flow_meds': array([[0.9735685 , 0.8989029 ],
       [0.97292573, 0.8961599 ],
       [0.9754681 , 0.90030176],
       [0.9738715 , 0.8980396 ],
       [0.97418765, 0.9022322 ]]), 'DataModal_preops_pmh_probl

In [14]:
print(ablation_name_list)

['DataModal_meds', 'DataModal_flow', 'DataModal_flow_meds', 'DataModal_preops_cbow_flow_meds', 'DataModal_preops_cbow_homemeds_flow_meds', 'DataModal_preops_cbow_pmh_problist_homemeds_flow_meds']


In [13]:
for outcome in reg_outcome_list:
    perf_filename = sav_dir + str(outcome) + '_Best_perf_metrics_combined_' + pred_stage.lower() + '.pickle'
    with open(perf_filename, 'rb') as file:
        metric_data = pickle.load(file)
    model_name_list = list(metric_data.keys())
    ablation_name_list = list(metric_data[model_name_list[0]].keys())
    ablation_name_list_pretty = [i.split("_",1)[1] for i in ablation_name_list]
    
    df_Corr = pd.DataFrame(columns=model_name_list, index=ablation_name_list)
    df_R2 = pd.DataFrame(columns=model_name_list, index=ablation_name_list)
    
    for key, value in metric_data.items():
        for ab in ablation_name_list:
            try:
                df_Corr.loc[ab, key] = np.round(np.mean(value[ab][:,0]), decimals=3)
                df_R2.loc[ab, key] = np.round(np.mean(value[ab][:,2]), decimals=3)
            except(KeyError):
                df_Corr.loc[ab, key] = 'Yet to be filled'
                df_R2.loc[ab, key] = 'Yet to be filled'

    # renaming the index
    df_Corr = df_Corr.rename(index=dict(zip(df_Corr.index, ablation_name_list_pretty)))
    df_R2 = df_R2.rename(index=dict(zip(df_R2.index, ablation_name_list_pretty)))
    
    new_columns = [(outcome, col) for col in df_Corr.columns]
    new_column_index = pd.MultiIndex.from_tuples(new_columns)
    
    df_Corr.columns = new_column_index
    df_R2.columns = new_column_index

    print(" Outcome ", outcome, " Correlation")
    pprint.pp(df_Corr)
    print("\n Outcome ", outcome, " R2 score")
    pprint.pp(df_R2)

 Outcome  postop_los  Correlation
                                            postop_los
                                             XGBTtsSum
meds                                             0.502
flow                                             0.582
flow_meds                                        0.599
preops_cbow_flow_meds                            0.656
preops_cbow_homemeds_flow_meds                   0.673
preops_cbow_pmh_problist_homemeds_flow_meds      0.765

 Outcome  postop_los  R2 score
                                            postop_los
                                             XGBTtsSum
meds                                             0.251
flow                                             0.336
flow_meds                                        0.359
preops_cbow_flow_meds                            0.412
preops_cbow_homemeds_flow_meds                   0.432
preops_cbow_pmh_problist_homemeds_flow_meds      0.571


FileNotFoundError: [Errno 2] No such file or directory: '/mnt/ris/sandhyat/Output-TS_docker_July2024/Best_results/Intraoperative/opioids_count_day0_Best_perf_metrics_combined_intraoperative.pickle'