In [None]:
import os
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
model_names = ["STSNet", "ECGNet"]
model_names_lookup = ["deep-sts-preop", "v14"]
path_to_predictions_prefix = os.path.expanduser("~/dropbox/sts-ecg/predictions")
path_to_figures_prefix = os.path.expanduser("~/dropbox/sts-ecg/figures")
csv_name = "predictions_test.csv"

In [None]:
sns.set_context("talk")
sns.set_style("darkgrid")
 
dfs = []

for bootstrap in range(10):
    dfs_bootstrap = {}
    
    for model_name, lookup_name in zip(model_names, model_names_lookup):
        path_to_predictions = os.path.join(path_to_predictions_prefix, lookup_name, str(bootstrap), csv_name)
        
        # Get CSV into df
        dfs_bootstrap[model_name] = pd.read_csv(path_to_predictions)
        
        # Rename columns
        dfs_bootstrap[model_name].columns = ["mrn", f"y_{model_name}", f"y_hat_{model_name}"]
    
    # Merge model results into one df
    df_both_models = dfs_bootstrap[model_names[0]].merge(right=dfs_bootstrap[model_names[1]], on="mrn")
    
    # Calculate difference
    df_both_models['y_hat_delta'] = df_both_models[f"y_hat_{model_names[0]}"] - df_both_models[f"y_hat_{model_names[1]}"]  
    
    # Append df to list of dfs
    dfs.append(df_both_models)
    
    print(f"Calculated y_hat diff from bootstrap {bootstrap}")

In [None]:
df = pd.concat(dfs)
df

In [None]:
fig_width = 10
fig_height = 6

# Scatterplot of y_hat from model 1 vs 2, aggregating test sets across all bootstraps
fig, ax = plt.subplots(figsize=(fig_width, fig_height))
sns.kdeplot(
    ax=ax,
    data=df[df['y_STSNet']==1][f"y_hat_{model_names[0]}"],
    data2=df[df['y_STSNet']==1][f"y_hat_{model_names[1]}"],
    cmap="Reds",
    shade=True,
    shade_lowest=False,
    label=f"Died (N={sum(df['y_STSNet']==1)})",
)
sns.kdeplot(
    ax=ax,
    data=df[df['y_STSNet']==0][f"y_hat_{model_names[0]}"],
    data2=df[df['y_STSNet']==0][f"y_hat_{model_names[1]}"],
    cmap="Blues",
    shade=True,
    shade_lowest=False,
    label=f"Lived (N={sum(df['y_STSNet']==0)})",
)
ax.set_title("Joint KDE of STSNet vs ECGNet")
ax.set_xlabel(f"y_hat ({model_names[0]})")
ax.set_ylabel(f"y_hat ({model_names[1]})")
plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0., frameon=False)
fpath = os.path.join(path_to_figures_prefix, f"stsnet-vs-ecgnet-joint-kde.png")
plt.tight_layout()
plt.savefig(fname=fpath, dpi=150, transparent=False)
print(f"Saved {fpath}")

# Distribution of delta y_hat, stratified by death
fig, ax = plt.subplots(figsize=(fig_width, fig_height))
sns.distplot(
    df[df['y_STSNet']==1]['y_hat_delta'],
    ax=ax,
    kde=True,
    norm_hist=True,
    color='r',
    label=f"Died (N={sum(df['y_STSNet']==1)})",
)
sns.distplot(
    df[df['y_STSNet']==0]['y_hat_delta'],
    ax=ax,
    kde=True,
    norm_hist=True,
    color='b',
    label=f"Lived (N={sum(df['y_STSNet']==0)})",
)
ax.set_title("delta y_hat")
ax.set_xlabel("STSNet - ECGNet")
plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0., frameon=False)
fpath = os.path.join(path_to_figures_prefix, f"stsnet-vs-ecgnet-delta.png")
plt.tight_layout()
plt.savefig(fname=fpath, dpi=150, transparent=False)
print(f"Saved {fpath}")