In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
database = 'mimic'
df = pd.read_parquet('data/episodes/'+database+'/microbiology_res_False/all_episodes.parquet')
count_values = df['isPositive'].value_counts(dropna=False).rename_axis('is positive').reset_index(name='MIMIC-IV')
total_count = df['isPositive'].shape[0]
count_values_mimic = count_values.append({'is positive': 'Total', 'MIMIC-IV': total_count}, ignore_index=True)
count_values_mimic = count_values_mimic.set_index('is positive')


database = 'eicu'
df = pd.read_parquet('data/episodes/'+database+'/microbiology_res_False/all_episodes.parquet')
count_values = df['isPositive'].value_counts(dropna=False).rename_axis('is positive').reset_index(name='eICU')
total_count = df['isPositive'].shape[0]
count_values_eicu = count_values.append({'is positive': 'Total', 'eICU': total_count}, ignore_index=True)
count_values_eicu = count_values_eicu.set_index('is positive')

database = 'pic'
df = pd.read_parquet('data/episodes/'+database+'/microbiology_res_False/all_episodes.parquet')
count_values = df['isPositive'].value_counts(dropna=False).rename_axis('is positive').reset_index(name='PIC')
total_count = df['isPositive'].shape[0]
count_values_pic = count_values.append({'is positive': 'Total', 'PIC': total_count}, ignore_index=True)
count_values_pic = count_values_pic.set_index('is positive')

df = pd.concat([count_values_mimic, count_values_eicu, count_values_pic], axis=1)
df.reset_index()

for_latex = df #.transpose()
display(for_latex)
print(for_latex.to_latex(float_format="%.2f", bold_rows=True, caption='Number of episodes per database and their positivity status'))

In [None]:
def plot_episodes(database):
    df = pd.read_parquet('data/episodes/'+database+'/microbiology_res_False/all_episodes.parquet')
    df = df[['lot_in_days']]
    median_lot_in_days = df['lot_in_days'].median()
    df = df[df['lot_in_days'] <= 100]
    sns.histplot(df['lot_in_days'], bins=100)
    plt.axvline(median_lot_in_days, color='r', linestyle='--', label=f'Median: {median_lot_in_days:.1f} days')
    plt.legend()
    fs = 14
    plt.legend(fontsize=fs)
    plt.xlabel('Episode length in days', fontsize=fs)
    plt.ylabel('Number of episodes', fontsize=fs)
    plt.xticks(fontsize=fs)
    plt.yticks(fontsize=fs)
    plt.xlim(0, 100)
    plt.tight_layout()
    plt.savefig('images/methods/'+database+'_episodes.png')
    plt.show()

plot_episodes('eicu')
plot_episodes('pic')

In [None]:
database = 'mimic'

df = pd.read_parquet('data/episodes/'+database+'/microbiology_res_False/all_episodes.parquet')

df1 = df[['lot_in_days']].copy()
df1['microbiology_res'] = 'blue'


df2 = df[df['isPositive'].notna()][['lot_in_days']].copy()
df2['microbiology_res'] = 'orange'


df3 = df[df['isPositive']==True].copy()
df3 = df3[['lot_in_days']]
df3['microbiology_res'] = 'red'


df4 = df[df['isPositive']==False].copy()
df4 = df4[['lot_in_days']]
df4['microbiology_res'] = 'green'


df = pd.concat([df1, df2, df3, df4])
df = df[df['lot_in_days'] <= 100]


palette_colors = {'blue': 'blue', 'red': 'red', 'orange': 'orange', 'green': 'green'}
sns.histplot(data=df, x='lot_in_days', hue='microbiology_res', element='step', bins=100, palette=palette_colors)

median_lot_in_days_all = df1['lot_in_days'].median()
median_lot_in_days_test = df2['lot_in_days'].median()
median_lot_in_days_positive = df3['lot_in_days'].median()
median_lot_in_days_negativ = df4['lot_in_days'].median()
plt.axvline(median_lot_in_days_all, color='grey', linestyle='--', label=f'Median (all): {median_lot_in_days_all:.1f} days')
plt.axvline(median_lot_in_days_test, color='orange', linestyle='--', label=f'Median (test done): {median_lot_in_days_test:.1f} days')
plt.axvline(median_lot_in_days_negativ, color='green', linestyle='--', label=f'Median (negative microbiology): {median_lot_in_days_negativ:.1f} days')
plt.axvline(median_lot_in_days_positive, color='red', linestyle='--', label=f'Median (positive microbiology): {median_lot_in_days_positive:.1f} days')

plt.legend()
fs = 12 
plt.legend(fontsize=fs)
plt.xlabel('Episode length in days', fontsize=fs)
plt.ylabel('Number of episodes', fontsize=fs)
plt.xticks(fontsize=fs)
plt.yticks(fontsize=fs)
plt.xlim(0, 100)
plt.tight_layout()
plt.savefig('images/methods/mimic_episodes_combined.png')
plt.show()
