In [None]:
import os
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
sns.set_context("talk")
sns.set_style("white")
dpi=100
fig_width = 10
fig_height = 6

In [None]:
model_names = ["STSNet", "ECGNet"]
model_names_lookup = ["deep-sts-preop-v13-swish", "v30"]
path_to_predictions_prefix = os.path.expanduser("~/dropbox/sts-ecg/predictions")
path_to_figures_prefix = os.path.expanduser("~/dropbox/sts-ecg/figures-and-tables")
csv_name = "predictions_test.csv"

## Parse predictions for each bootstrap into one df containing `y`, `y_hat`, `brier`, and `y_hat_delta`

In [None]:
dfs = []
for bootstrap in range(10):
    dfs_bootstrap = {}
    
    for model_name, lookup_name in zip(model_names, model_names_lookup):
        path_to_predictions = os.path.join(path_to_predictions_prefix, lookup_name, str(bootstrap), csv_name)
        
        # Get CSV into df
        dfs_bootstrap[model_name] = pd.read_csv(path_to_predictions)
        
        # Rename columns
        dfs_bootstrap[model_name].columns = ["mrn", f"y_{model_name}", f"y_hat_{model_name}"]
        
    # Merge model results into one df
    df_both_models = dfs_bootstrap[model_names[0]].merge(right=dfs_bootstrap[model_names[1]], on="mrn")
        
    # Append df to list of dfs
    dfs.append(df_both_models)
    
    print(f"Parsing predictions from bootstrap {bootstrap}")
    
df = pd.concat(dfs)

## Scale predictions (min-max) and calculate error

In [None]:
y_hat_min = 0.01
y_hat_max = 0.2

for model_name in model_names:   
    df[df[f'y_hat_{model_name}'] > 0.2] = 0.2
    
    df[f'y_hat_{model_name}_scaled'] = (df[f'y_hat_{model_name}'] - y_hat_min) / (y_hat_max - y_hat_min)
    
    # Calculate delta between y_hat values of each model
    df[f'squared_error_{model_name}'] = (df[f"y_{model_name}"] - df[f"y_hat_{model_name}_scaled"])**2
    
    print(f'{model_name} pre-scaling range: [{y_hat_min:0.3f} {y_hat_max:0.3f}]')
    print(f'{model_name} pre-scaling range: [{y_hat_min_new:0.3f} {y_hat_max_new:0.3f}]')
    print('\n')
    
df[f'squared_error_between_models'] = (df[f"y_hat_{model_names[0]}_scaled"] - df[f"y_hat_{model_names[1]}_scaled"])**2

## Plot of y_hat

In [None]:
for model_name in model_names:
    fig, ax = plt.subplots(figsize=(fig_width, fig_height))
    sns.distplot(df[f'y_hat_{model_name}'], ax=ax)
    plt.xlim([-0.05, 1.05])
    
    plt.title(f"{model_name}")
    plt.xlabel("y_hat")
    plt.ylabel("Counts")
    plt.tight_layout()
    
    fpath = os.path.join(path_to_figures_prefix, f"y_hat_{model_name}.png").lower()
    plt.savefig(fpath, dpi=dpi, transparent=False)

In [None]:
for model_name in model_names:
    fig, ax = plt.subplots(figsize=(fig_width, fig_height))
    sns.distplot(df[f'y_hat_{model_name}_scaled'], ax=ax)
    plt.xlim([-0.05, 1.05])

    plt.title(f"{model_name}")
    plt.xlabel("y_hat")
    plt.ylabel("Counts")
    plt.tight_layout()
    
    fpath = os.path.join(path_to_figures_prefix, f"y_hat_scaled_{model_name}.png").lower()
    plt.savefig(fpath, dpi=dpi, transparent=False)

In [None]:
df

## Scatterplot of model squared error vs (STSNet - ECGNet)^2

In [None]:
from scipy import stats
def calc_r2(x, y):
    return stats.pearsonr(x, y)[0] ** 2

r2 = calc_r2(
    x=df[f"squared_error_{model_names[0]}"],
    y=df[f"squared_error_between_models"],
)

In [None]:
fig, ax = plt.subplots(figsize=(fig_width, fig_height))

sns.scatterplot(
    ax=ax,
    x=df[f"squared_error_{model_names[0]}"],
    y=df[f"squared_error_between_models"],
    cmap="Blues",
    alpha=0.75,
)

ax.set_title(f"STSNet error vs difference between STSNet and ECGNet")
ax.set_xlabel(f"{model_names[0]}: (y - y_hat)^2")
ax.set_ylabel(f"(STSNet - ECGNet)^2")
ax.set_xlim([-0.025, 1.025])
ax.set_ylim([-0.025, 1.025])

fpath = os.path.join(path_to_figures_prefix, f"
                     .png")
plt.tight_layout()
plt.savefig(fname=fpath, dpi=dpi, transparent=False)
print(f"Saved {fpath}")