In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np

fs = 14

sns.set_theme()
base_path_template = "data/results/traditional/mimic/microbiology_res_False/ab_False/seed_{}/lookback_2/time_point('random', {})/sample_None_None/LGBMClassifier/test_res.csv"
metrics = ['Precision', 'Recall', 'F1', 'Balanced Accuracy', 'AUPRC', 'AUROC']
results_df = pd.DataFrame()
for seed in range(42, 47):  
    for time_point in range(1, 4): 
        path = base_path_template.format(seed, time_point)
        df = pd.read_csv(path)
        df['Number of time points'] = time_point
        df.rename({'balanced_accuracy': 'Balanced Accuracy', 'prc_auc':'AUPRC', 'roc_auc':'AUROC', 'f1':'F1', 'recall':'Recall', 'precision':'Precision'}, inplace=True, axis=1)
        results_df = pd.concat([results_df, df])

mean_std_df = results_df.groupby('Number of time points').agg({metric: ['mean', 'std'] for metric in metrics}).reset_index()

for_latex = mean_std_df.set_index(('Number of time points', ''))
for_latex.index.rename('Number of time points', inplace=True)
display(for_latex)

print(for_latex.transpose().to_latex(float_format="%.2f", bold_rows=True, caption='Performance of different number of random time points'))

print(for_latex[['Balanced Accuracy', 'AUPRC', 'AUROC']].to_latex(float_format="%.2f", bold_rows=True, caption='Performance of different number of random time points'))
print(for_latex[['Precision', 'Recall', 'F1']].to_latex(float_format="%.2f", bold_rows=True, caption='Performance of different number of random time points'))


def plot_metric(metric):
    plt.figure(figsize=(5, 5))
    sns.barplot(x='Number of time points', y=(metric, 'mean'), data=mean_std_df, capsize=.1, color='skyblue')
    plt.errorbar(x=np.arange(3), y=mean_std_df[(metric, 'mean')], yerr=mean_std_df[(metric, 'std')], fmt='none', c='black', capsize=5)
    #plt.title(f'Mean and SD of {metric.capitalize()}')
    plt.xlabel('# Random time points used for training', fontsize=fs)
    plt.ylabel(metric, fontsize=fs)
    plt.ylim([0, 1])
    plt.savefig('images/experiments/randomtimepoint/'+'randompoints_'+metric+".png")
    plt.xticks(fontsize=fs)
    plt.yticks(fontsize=fs)
    plt.tight_layout()
    plt.show()


for metric in metrics:
    plot_metric(metric)

In [None]:
base_path_template = "data/results/traditional/mimic/microbiology_res_False/ab_False/seed_{}/lookback_{}/time_point('random', 1)/sample_None_None/LGBMClassifier/test_res.csv"
metrics = ['Precision', 'Recall', 'F1', 'Balanced Accuracy', 'AUPRC', 'AUROC']
results_df = pd.DataFrame()
for seed in range(42, 47):  
    for time_point in range(1, 4): 
        path = base_path_template.format(seed, time_point)
        df = pd.read_csv(path)
        df['Window size'] = time_point
        df.rename({'balanced_accuracy': 'Balanced Accuracy', 'prc_auc':'AUPRC', 'roc_auc':'AUROC', 'f1':'F1', 'recall':'Recall', 'precision':'Precision'}, inplace=True, axis=1)
        results_df = pd.concat([results_df, df])

mean_std_df = results_df.groupby('Window size').agg({metric: ['mean', 'std'] for metric in metrics}).reset_index()

for_latex = mean_std_df.set_index(('Window size', ''))
for_latex.index.rename('Window size', inplace=True)
display(for_latex)
print(for_latex.transpose().to_latex(float_format="%.2f", bold_rows=True, caption='Performance of different window length'))
print(for_latex[['Balanced Accuracy', 'AUPRC', 'AUROC']].to_latex(float_format="%.2f", bold_rows=True, caption='Performance of different window length'))
print(for_latex[['Precision', 'Recall', 'F1']].to_latex(float_format="%.2f", bold_rows=True, caption='Performance of different window length'))



def plot_metric(metric):
    plt.figure(figsize=(5, 5))
    sns.barplot(x='Window size', y=(metric, 'mean'), data=mean_std_df, capsize=.1, color='skyblue')
    plt.errorbar(x=np.arange(3), y=mean_std_df[(metric, 'mean')], yerr=mean_std_df[(metric, 'std')], fmt='none', c='black', capsize=5)
    #plt.title(f'Mean and SD of {metric.capitalize()}')
    plt.xlabel('Window size in days')
    plt.ylabel(metric)
    plt.ylim([0, 1])
    plt.savefig('images/experiments/window/'+'window_'+metric+".png")
    plt.xticks(fontsize=fs)
    plt.yticks(fontsize=fs)
    plt.show()


for metric in metrics:
    plot_metric(metric)
