In [None]:
import os
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
model_names = ["STSNet", "ECGNet"]
model_names_lookup = ["deep-sts-preop", "v14"]
path_to_predictions_prefix = os.path.expanduser("~/dropbox/sts-ecg/predictions")
path_to_figures_prefix = os.path.expanduser("~/dropbox/sts-ecg/figures")
csv_name = "predictions_test.csv"

## Parse predictions for each bootstrap into one df containing `y`, `y_hat`, `brier`, and `y_hat_delta`

In [None]:
dfs = []
for bootstrap in range(10):
    dfs_bootstrap = {}
    
    for model_name, lookup_name in zip(model_names, model_names_lookup):
        path_to_predictions = os.path.join(path_to_predictions_prefix, lookup_name, str(bootstrap), csv_name)
        
        # Get CSV into df
        dfs_bootstrap[model_name] = pd.read_csv(path_to_predictions)
        
        # Rename columns
        dfs_bootstrap[model_name].columns = ["mrn", f"y_{model_name}", f"y_hat_{model_name}"]
        
        # Calculate Brier scores
        dfs_bootstrap[model_name][f"brier_{model_name}"] = (dfs_bootstrap[model_name][f"y_hat_{model_name}"] - dfs_bootstrap[model_name][f"y_{model_name}"])**2
    
    # Merge model results into one df
    df_both_models = dfs_bootstrap[model_names[0]].merge(right=dfs_bootstrap[model_names[1]], on="mrn")
    
    # Calculate delta between y_hat values of each model
    df_both_models['y_hat_delta'] = df_both_models[f"y_hat_{model_names[0]}"] - df_both_models[f"y_hat_{model_names[1]}"]
    df_both_models['y_hat_delta_squared'] = df_both_models['y_hat_delta']**2
    
    # Append df to list of dfs
    dfs.append(df_both_models)
    
    print(f"Parsing predictions from bootstrap {bootstrap}")

In [None]:
df = pd.concat(dfs)
df

In [None]:
df[df[f'y_STSNet']==0]['y_hat_STSNet'].mean()

In [None]:
df[df[f'y_STSNet']==1]['y_hat_STSNet'].mean()

In [None]:
df[df[f'y_ECGNet']==0]['y_hat_ECGNet'].mean()

In [None]:
df[df[f'y_ECGNet']==1]['y_hat_ECGNet'].mean()

In [None]:
sns.set_context("talk")
sns.set_style("ticks")
fig_width = 10
fig_height = 6

## Scatterplot of Brier score of STSNet vs (STSNet - ECGNet)^2

The Brier score of STSNet measures the difference between the predicted and true label; a high score means the model failed.  
The squared difference between STSNet and ECGNet measures the disagreement between the models.  

In [None]:
from scipy import stats
def calc_r2(x, y):
    return stats.pearsonr(x, y)[0] ** 2

In [None]:
for model_name in model_names:
    fig, ax = plt.subplots(figsize=(fig_width*1.25, fig_height))
    r2 = calc_r2(
        x=df[df[f'y_{model_name}']==0][f"brier_{model_name}"],
        y=df[df[f'y_{model_name}']==0][f"y_hat_delta_squared"],
    )
    sns.scatterplot(
        ax=ax,
        x=df[df[f'y_{model_name}']==0][f"brier_{model_name}"],
        y=df[df[f'y_{model_name}']==0][f"y_hat_delta_squared"],
        cmap="Blues",
        label=f"Lived (N={sum(df[f'y_{model_name}']==0)}; R^2={r2:0.2f})",
        alpha=0.7,
    )
    r2 = calc_r2(
        x=df[df[f'y_{model_name}']==1][f"brier_{model_name}"],
        y=df[df[f'y_{model_name}']==1][f"y_hat_delta_squared"],
    )
    sns.scatterplot(
        ax=ax,
        x=df[df[f'y_{model_name}']==1][f"brier_{model_name}"],
        y=df[df[f'y_{model_name}']==1][f"y_hat_delta_squared"],
        cmap="Reds",
        label=f"Died (N={sum(df[f'y_{model_name}']==1)}; R^2={r2:0.2f})",
        alpha=0.7,
    )
    ax.set_title("")

    # ax.set_xlim([-0.1, 0.1])
    ax.set_xlabel(f"Brier score ({model_name})")

    # ax.set_ylim([-0.025, 0.025])
    ax.set_ylabel(f"Squared error between\nSTSNet and ECGNet")

    plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0., frameon=False)
    fpath = os.path.join(path_to_figures_prefix, f"brier-{model_name}-vs-diff.png")
    plt.tight_layout()
    plt.savefig(fname=fpath, dpi=150, transparent=False)
    print(f"Saved {fpath}")

In [None]:
for model_name in model_names:
    fig, ax = plt.subplots(figsize=(fig_width, fig_height))
    sns.distplot(
        ax=ax,
        a=df[df[f'y_{model_name}']==0][f"brier_{model_name}"],
        color="cornflowerblue",
        kde=False,
    )
    ax.set_title("Lived")
    ax.set_xlim([-0.05, 1.05])
    fpath = os.path.join(path_to_figures_prefix, f"brier-{model_name}-lived.png")
    plt.tight_layout()
    plt.savefig(fname=fpath, dpi=150, transparent=False)
    print(f"Saved {fpath}")

    fig, ax = plt.subplots(figsize=(fig_width, fig_height))
    sns.distplot(
        ax=ax,
        a=df[df[f'y_{model_name}']==1][f"brier_{model_name}"],
        color="orange",
        kde=False,
    )
    ax.set_title("Died")
    ax.set_xlim([-0.05, 1.05])
    fpath = os.path.join(path_to_figures_prefix, f"brier-{model_name}-died.png")
    plt.tight_layout()
    plt.savefig(fname=fpath, dpi=150, transparent=False)
    print(f"Saved {fpath}")