In [None]:
import pandas as pd
import os
from sklearn.metrics import PrecisionRecallDisplay, RocCurveDisplay
import joblib
import matplotlib.pyplot as plt
from utils.helpers import get_res_df, get_standard_stats
import numpy as np
from sklearn.metrics import roc_curve, auc
import seaborn as sns
sns.set_theme()

In [None]:
def independent_test(database='eicu', 
                  lookback=2, 
                  prediction_time_points='random', 
                  numberofsamples=1, 
                  sample_train=None, 
                  sample_test=None, 
                  seed=44, 
                  inc_ab=False,
                  has_microbiology=False,
                  model='LGBMClassifier',):
    if prediction_time_points == 'random':
        time_point = ('random', numberofsamples)

    original_traditional_path = 'data/model_input/traditional/mimic/microbiology_res_'+str(has_microbiology)+'/ab_'+str(inc_ab)+'/seed_'+str(seed)+'/'

    independent_traditional_path = 'data/model_input/traditional/'+database+'/microbiology_res_'+str(has_microbiology)+'/ab_'+str(inc_ab)+'/seed_'+str(seed)+'/'

    traditional_model_path = 'data/results/traditional/mimic/microbiology_res_'+str(has_microbiology)+'/ab_'+str(inc_ab)+'/seed_'+str(seed)+ \
                            '/lookback_'+str(lookback)+'/time_point'+str(time_point)+'/sample_'+str(sample_train)+"_"+str(sample_test)+"/"+model+"/"

    # first we get the complete test dataset for the traditional model
    if database != 'mimic':
        X_traditional_independent = pd.read_parquet(independent_traditional_path+'X_train_time_point_'+str(time_point)+'_lookback_'+str(lookback)+'.parquet')
        y_traditional_independent = pd.read_parquet(independent_traditional_path+'y_train_time_point_'+str(time_point)+'_lookback_'+str(lookback)+'.parquet')
    else:
        X_traditional_independent = pd.DataFrame()
        y_traditional_independent = pd.DataFrame()
    X_traditional_independent_test = pd.read_parquet(independent_traditional_path+'X_test_time_point_'+str(time_point)+'_lookback_'+str(lookback)+'.parquet')
    y_traditional_independent_test = pd.read_parquet(independent_traditional_path+'y_test_time_point_'+str(time_point)+'_lookback_'+str(lookback)+'.parquet')
    X_traditional_independent = pd.concat([X_traditional_independent, X_traditional_independent_test])
    y_traditional_independent = pd.concat([y_traditional_independent, y_traditional_independent_test])
   
    # next we load the traditional model
    print(traditional_model_path)
    print(independent_traditional_path)
    model = joblib.load(traditional_model_path+'model.pkl')

    X_trained_original = pd.read_parquet(original_traditional_path +'X_train_time_point_(\'random\', 1)_lookback_'+str(lookback)+'.parquet')
    X_traditional_independent[list(set(X_trained_original.columns).difference(set(X_traditional_independent.columns)))] = 0
    X_traditional = X_traditional_independent[X_trained_original.columns]

    # calculate test set predictions
    pred_test = pd.DataFrame(model.predict(X_traditional), columns=['pred'])
    pred_proba_test = pd.DataFrame(model.predict_proba(X_traditional), columns=['False','True'])
    test_gt_and_preds = pd.concat([y_traditional_independent.reset_index(drop=True), pred_test.reset_index(drop=True), pred_proba_test.reset_index(drop=True)], axis=1)
    

    #display(test_gt_and_preds)
    test_res = get_res_df(test_gt_and_preds)

    #display(test_res)
    test_gt_and_preds['seed'] = seed
    test_gt_and_preds['database'] = database
    test_res['seed'] = seed
    test_res['database'] = database
    return test_gt_and_preds, test_res


datasets = ['mimic', 'eicu', 'pic']
roc_data = {dataset: {'mean_fpr': np.linspace(0, 1, 100), 'tpr_list': [], 'auc_list': []} for dataset in datasets}

all_test_res = pd.DataFrame()

for dataset in datasets:
    for seed in [42, 43, 44, 45, 46]: #
        test_gt_and_preds, test_res = independent_test(database=dataset, seed=seed)
        all_test_res = pd.concat([all_test_res, test_res])
        #display(test_gt_and_preds)
        gt = test_gt_and_preds['lot<5d']
        pred_prob = test_gt_and_preds['True']

        fpr, tpr, _ = roc_curve(gt, pred_prob)
        roc_data[dataset]['tpr_list'].append(np.interp(roc_data[dataset]['mean_fpr'], fpr, tpr))
        roc_data[dataset]['tpr_list'][-1][0] = 0.0  # Startpunkt bei 0
        roc_data[dataset]['auc_list'].append(auc(fpr, tpr)) 

for dataset in datasets:
    mean_tpr = np.mean(roc_data[dataset]['tpr_list'], axis=0)
    mean_tpr[-1] = 1.0  
    mean_auc = np.mean(roc_data[dataset]['auc_list'])
    std_auc = np.std(roc_data[dataset]['auc_list']) 
    std_tpr = np.std(roc_data[dataset]['tpr_list'], axis=0)

    plt.plot(roc_data[dataset]['mean_fpr'], mean_tpr, label=f'{dataset} (AUC = {mean_auc:.2f} ± {std_auc:.2f})')
    
    tprs_upper = np.minimum(mean_tpr + std_tpr, 1)
    tprs_lower = np.maximum(mean_tpr - std_tpr, 0)
    plt.fill_between(roc_data[dataset]['mean_fpr'], tprs_lower, tprs_upper, alpha=0.2)

fs = 14
#plt.title('Receiver Operating Characteristic')
plt.legend(loc='lower right')
plt.legend(fontsize=fs)
plt.plot([0, 1], [0, 1], 'r--')
plt.xlim([0, 1])
plt.ylim([0, 1])
plt.ylabel('True Positive Rate', fontsize=fs)
plt.xlabel('False Positive Rate', fontsize=fs)
plt.xticks(fontsize=fs)
plt.yticks(fontsize=fs)
plt.tight_layout()
plt.savefig('images/experiments/transferability/traditional_transferability_auroc.png')
plt.show()


In [None]:
all_test_res.rename({'balanced_accuracy': 'Balanced Accuracy', 'prc_auc':'AUPRC', 'roc_auc':'AUROC', 'f1':'F1', 'recall':'Recall', 'precision':'Precision'}, inplace=True, axis=1)
mean_measurements = all_test_res.groupby('database').mean().reset_index()
std_measurements = all_test_res.groupby('database').std().reset_index()
metrics = mean_measurements.columns.difference(['database', 'seed'])
df_combined = pd.DataFrame(index=mean_measurements['database'], columns=pd.MultiIndex.from_product([metrics, ['mean', 'std']]))
for metric in metrics:
    df_combined[(metric, 'mean')] = mean_measurements.set_index('database')[metric]
    df_combined[(metric, 'std')] = std_measurements.set_index('database')[metric]

df_combined.reset_index() 
display(df_combined)
print(df_combined[['Balanced Accuracy', 'AUPRC', 'AUROC']].to_latex(float_format="%.2f", bold_rows=True, caption='Transferarbility performance of traditional model'))
print(df_combined[['Precision', 'Recall', 'F1']].to_latex(float_format="%.2f", bold_rows=True, caption='Transferarbility performance of traditional model'))

In [None]:
mean_measurements = all_test_res.groupby('database').mean().reset_index()
std_measurements = all_test_res.groupby('database').std().reset_index()

# Melt the DataFrames to have the metric names as a single column, which is required for seaborn plotting
mean_melted = mean_measurements.melt(id_vars='database', var_name='metric', value_name='mean')
std_melted = std_measurements.melt(id_vars='database', var_name='metric', value_name='std')

# Merge the mean and std DataFrames on model and metric to have a single DataFrame for plotting
merged_measurements = pd.merge(mean_melted, std_melted, on=['database', 'metric'])

# Loop over each metric and create a separate plot for it
for metric in merged_measurements['metric'].unique():
    plt.figure(figsize=(7, 5))
    metric_data = merged_measurements[merged_measurements['metric'] == metric]
    display(metric_data)
    sns.barplot(data=metric_data, x='database', y='mean',capsize=.1)
    plt.errorbar(data=metric_data, x=range(len(metric_data)), y='mean', yerr=metric_data['std'], fmt='none', c='black', capsize=5)
    plt.title(f'{metric}')
    plt.ylim([0, 1])
    plt.ylabel('Mean Value and Standard Deviation')

In [None]:
from sklearn.metrics import precision_recall_curve, average_precision_score

pr_data = {dataset: {'recall': np.linspace(0, 1, 100), 'precision_list': [], 'ap_list': []} for dataset in datasets}

#positive_class_frequency = 0
positive_class_frequency = {
                                'mimic': 0,
                                'eicu': 0,
                                'pic': 0,
                            }

seeds = [42, 43, 44, 45, 46]
for dataset in datasets:
    for seed in seeds:
        test_gt_and_preds, test_res = independent_test(database=dataset, seed=seed)
        #display(test_gt_and_preds)
        gt = test_gt_and_preds['lot<5d']
        pred_prob = test_gt_and_preds['True']

        precision, recall, _ = precision_recall_curve(gt, pred_prob)
        pr_data[dataset]['precision_list'].append(np.interp(pr_data[dataset]['recall'], recall[::-1], precision[::-1]))
        pr_data[dataset]['ap_list'].append(average_precision_score(gt, pred_prob))
        positive_class_frequency[dataset] += gt.mean()





for dataset in datasets:
    mean_precision = np.mean(pr_data[dataset]['precision_list'], axis=0)
    mean_ap = np.mean(pr_data[dataset]['ap_list'])
    std_ap = np.std(pr_data[dataset]['ap_list'])
    std_precision = np.std(pr_data[dataset]['precision_list'], axis=0)

    plt.plot(pr_data[dataset]['recall'], mean_precision, label=f'{dataset} (AP = {mean_ap:.2f} ± {std_ap:.2f})')
    

    precision_upper = np.minimum(mean_precision + std_precision, 1)
    precision_lower = np.maximum(mean_precision - std_precision, 0)
    plt.fill_between(pr_data[dataset]['recall'], precision_lower, precision_upper, alpha=0.2)


#positive_class_frequency /= len(seeds) * len(models)
display(positive_class_frequency)
positive_class_frequency['mimic'] = positive_class_frequency['mimic'] / (len(seeds))
positive_class_frequency['eicu'] = positive_class_frequency['eicu'] / (len(seeds))
positive_class_frequency['pic'] = positive_class_frequency['pic'] / (len(seeds))
display(positive_class_frequency)
plt.hlines(positive_class_frequency['mimic'], 0, 1, colors='blue', linestyles='dashed', label='chance level (mimic)')
plt.hlines(positive_class_frequency['eicu'], 0, 1, colors='orange', linestyles='dashed', label='chance level (eicu)')
plt.hlines(positive_class_frequency['pic'], 0, 1, colors='green', linestyles='dashed', label='chance level (pic)')


plt.legend(fontsize=12)
plt.xlim([0, 1])
plt.ylim([0, 1])
plt.ylabel('Precision', fontsize=fs)
plt.xlabel('Recall', fontsize=fs)
plt.xticks(fontsize=fs)
plt.yticks(fontsize=fs)
plt.legend(loc='lower right')
plt.tight_layout()
plt.savefig('images/experiments/transferability/traditional_transferability_auprc.png')
plt.show()

# next day

In [None]:
all_test_res = pd.DataFrame()
for dataset in datasets:
    for seed in [42, 43, 44, 45, 46]:
        test_res = pd.read_parquet("experiments/transferability/test_res_"+dataset+"_"+str(seed)+".parquet")
        all_test_res = pd.concat([all_test_res, test_res])
all_test_res

In [None]:
all_test_res.rename({'balanced_accuracy': 'Balanced Accuracy', 'prc_auc':'AUPRC', 'roc_auc':'AUROC', 'f1':'F1', 'recall':'Recall', 'precision':'Precision'}, inplace=True, axis=1)
mean_measurements = all_test_res.groupby('database').mean().reset_index()
std_measurements = all_test_res.groupby('database').std().reset_index()
metrics = mean_measurements.columns.difference(['database', 'seed'])
df_combined = pd.DataFrame(index=mean_measurements['database'], columns=pd.MultiIndex.from_product([metrics, ['mean', 'std']]))
for metric in metrics:
    df_combined[(metric, 'mean')] = mean_measurements.set_index('database')[metric]
    df_combined[(metric, 'std')] = std_measurements.set_index('database')[metric]

df_combined.reset_index() 
display(df_combined)
print(df_combined[['Balanced Accuracy', 'AUPRC', 'AUROC']].to_latex(float_format="%.2f", bold_rows=True, caption='Transferarbility performance of next day model'))
print(df_combined[['Precision', 'Recall', 'F1']].to_latex(float_format="%.2f", bold_rows=True, caption='Transferarbility performance of next day model'))

In [None]:
datasets = ['mimic', 'eicu', 'pic']
roc_data = {dataset: {'mean_fpr': np.linspace(0, 1, 100), 'tpr_list': [], 'auc_list': []} for dataset in datasets}

all_test_res = pd.DataFrame()

for dataset in datasets:
    for seed in [42, 43, 44, 45, 46]: #
        #test_gt_and_preds, test_res = independent_test_nd(database=dataset, seed=seed)
        test_gt_and_preds = pd.read_parquet("experiments/transferability/test_gt_and_preds_"+dataset+"_"+str(seed)+".parquet")
        test_res = pd.read_parquet("experiments/transferability/test_res_"+dataset+"_"+str(seed)+".parquet")

        #all_test_res = pd.concat([all_test_res, test_res])
        # #display(test_gt_and_preds)
        gt = test_gt_and_preds['next_day']
        pred_prob = test_gt_and_preds['True']

        fpr, tpr, _ = roc_curve(gt, pred_prob)
        roc_data[dataset]['tpr_list'].append(np.interp(roc_data[dataset]['mean_fpr'], fpr, tpr))
        roc_data[dataset]['tpr_list'][-1][0] = 0.0  
        roc_data[dataset]['auc_list'].append(auc(fpr, tpr)) 

for dataset in datasets:
    mean_tpr = np.mean(roc_data[dataset]['tpr_list'], axis=0)
    mean_tpr[-1] = 1.0  
    mean_auc = np.mean(roc_data[dataset]['auc_list'])
    std_auc = np.std(roc_data[dataset]['auc_list']) 
    std_tpr = np.std(roc_data[dataset]['tpr_list'], axis=0)

    plt.plot(roc_data[dataset]['mean_fpr'], mean_tpr, label=f'{dataset} (AUC = {mean_auc:.2f} ± {std_auc:.2f})')
    
    tprs_upper = np.minimum(mean_tpr + std_tpr, 1)
    tprs_lower = np.maximum(mean_tpr - std_tpr, 0)
    plt.fill_between(roc_data[dataset]['mean_fpr'], tprs_lower, tprs_upper, alpha=0.2)

#plt.title('Receiver Operating Characteristic')
plt.legend(loc='lower right')
plt.legend(fontsize=fs)
plt.plot([0, 1], [0, 1], 'r--')
plt.xlim([0, 1])
plt.ylim([0, 1])
plt.ylabel('True Positive Rate', fontsize=fs)
plt.xlabel('False Positive Rate', fontsize=fs)
plt.xticks(fontsize=fs)
plt.yticks(fontsize=fs)
plt.tight_layout()
plt.savefig('images/experiments/transferability/nd_transferability_auroc.png', format='png')
plt.show()


In [None]:
from sklearn.metrics import precision_recall_curve, average_precision_score

pr_data = {dataset: {'recall': np.linspace(0, 1, 100), 'precision_list': [], 'ap_list': []} for dataset in datasets}

#positive_class_frequency = 0
positive_class_frequency = {
                                'mimic': 0,
                                'eicu': 0,
                                'pic': 0,
                            }

seeds = [42, 43, 44, 45, 46]
for dataset in datasets:
    for seed in seeds:
        test_gt_and_preds = pd.read_parquet("experiments/transferability/test_gt_and_preds_"+dataset+"_"+str(seed)+".parquet")
        test_res = pd.read_parquet("experiments/transferability/test_res_"+dataset+"_"+str(seed)+".parquet")

        #display(test_gt_and_preds)
        gt = test_gt_and_preds['next_day']
        pred_prob = test_gt_and_preds['True']

        precision, recall, _ = precision_recall_curve(gt, pred_prob)
        pr_data[dataset]['precision_list'].append(np.interp(pr_data[dataset]['recall'], recall[::-1], precision[::-1]))
        pr_data[dataset]['ap_list'].append(average_precision_score(gt, pred_prob))
        positive_class_frequency[dataset] += gt.mean()





for dataset in datasets:
    mean_precision = np.mean(pr_data[dataset]['precision_list'], axis=0)
    mean_ap = np.mean(pr_data[dataset]['ap_list'])
    std_ap = np.std(pr_data[dataset]['ap_list'])
    std_precision = np.std(pr_data[dataset]['precision_list'], axis=0)

    plt.plot(pr_data[dataset]['recall'], mean_precision, label=f'{dataset} (AP = {mean_ap:.2f} ± {std_ap:.2f})')
    
    precision_upper = np.minimum(mean_precision + std_precision, 1)
    precision_lower = np.maximum(mean_precision - std_precision, 0)
    plt.fill_between(pr_data[dataset]['recall'], precision_lower, precision_upper, alpha=0.2)

#positive_class_frequency /= len(seeds) * len(models)
display(positive_class_frequency)
positive_class_frequency['mimic'] = positive_class_frequency['mimic'] / (len(seeds) )
positive_class_frequency['eicu'] = positive_class_frequency['eicu'] / (len(seeds) )
positive_class_frequency['pic'] = positive_class_frequency['pic'] / (len(seeds))
display(positive_class_frequency)
plt.hlines(positive_class_frequency['mimic'], 0, 1, colors='blue', linestyles='dashed', label='chance level (mimic)')
plt.hlines(positive_class_frequency['eicu'], 0, 1, colors='orange', linestyles='dashed', label='chance level (eicu)')
plt.hlines(positive_class_frequency['pic'], 0, 1, colors='green', linestyles='dashed', label='chance level (pic)')
plt.legend(fontsize=12)
plt.xlim([0, 1])
plt.ylim([0, 1])
plt.ylabel('Precision', fontsize=fs)
plt.xlabel('Recall', fontsize=fs)
plt.xticks(fontsize=fs)
plt.yticks(fontsize=fs)
plt.legend(loc='upper right')
plt.tight_layout()
plt.savefig('images/experiments/transferability/nd_transferability_auprc.png', format='png')
plt.show()