In [None]:
import os
import csv
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from scipy import stats
%matplotlib widget

In [None]:
sns.set_style("whitegrid")
sns.set_context("talk")
sns.set_palette("pastel")
fig_height = 5

In [None]:
# plots mean AUC +/- std for each model
# Inputs: model names as a list, Pandas DataFrame of exported results CSV from Google Spreadsheets

def plot_performance(
    model_names: list,
    df: pd.DataFrame,
    fpath_bar_plot: str,
    y_min: float = 0.7,
    y_max: float = 0.95):
    y_delta = 0.05
    
    stats = {model: {'aucs': [], 'mean': 0, 'std': 0} for model in model_names}

    for model in model_names:
        aucs = []
        for bootstrap in range(10):
            auc_str = df[df['parameter'] == bootstrap][model].values[0]
            aucs.append(float(auc_str))

        stats[model]['aucs'] = aucs    
        stats[model]['mean'] = np.mean(aucs)
        stats[model]['std'] = np.std(aucs)
        
    means = [stats[model]['mean'] for model in model_names]
    stds = [stats[model]['std'] for model in model_names]
        
    fig_width = len(model_names) * 1.2
    fig, ax = plt.subplots(figsize=(fig_width, fig_height))
    x_pos = np.arange(len(model_names))
    
    bar = sns.barplot(x=x_pos, y=means, yerr=stds)
    ax.set_ylabel('AUC')
    ax.set_ylim([y_min, y_max])
    ax.set_yticks(np.arange(y_min, y_max+0.01, y_delta))
    
    ax.set_xticks(range(len(model_names)))
    ax.set_xticklabels(model_names)

    offset = -0.02 * len(model_names)
    for i, v in enumerate(means):
        plt.text(x_pos[i]+offset, y_min + 0.01, f'{v:0.2f}')
    plt.xticks(rotation=90)
    plt.tight_layout()

## Load CSVs and concatenate horizontally into one wide dataframe

In [None]:
csv_filenames = ['stsnet']
rootdir = os.path.expanduser("~/dropbox/sts-ecg/figures-and-tables")
df = pd.DataFrame()
for csv_filename in csv_filenames:
    fpath = os.path.join(rootdir, f"{csv_filename}.csv")
    df = pd.concat([df, pd.read_csv(fpath)], axis=1)

df.rename(columns={'Unnamed: 0':'parameter'}, inplace=True )

In [None]:
models_to_plot = [
    'stsnet-v034 (death)',
    'stsnet-v034 (stroke)',
    'stsnet-v034 (renal)',
    'stsnet-v034 (vent)',
    'stsnet-v034 (dsw)',
    'stsnet-v034 (reop)',
    'stsnet-v034 (stay)',
    'stsnet-v034 (afib)',
]

fpath_bar_plot = os.path.join(rootdir, "aucs.png")
plot_performance(
    model_names=models_to_plot,
    df=df,
    fpath_bar_plot=fpath_bar_plot,
    y_min=0.5,
    y_max=0.9,
)

## Compare AUCs via t-test

In [None]:
fpath = os.path.join(rootdir, "STS ECG models and results - architecture_and_results_4.csv")

# Load results CSV into dataframe
df = pd.read_csv(fpath, index_col=0)

print(f"Loaded dataframe from {fpath} with keys:\n")
for key in df.keys():
    print(f"\t{key}")

In [None]:
def get_aucs_from_column(df: pd.DataFrame, col_name: str, bootstraps: int = 10) -> list:
    """Given a dataframe and model name (col_name),
    extracts the AUCs and casts to list of floats"""
    aucs = []
    if col_name not in df:
        return aucs
    for bootstrap in range(bootstraps):
        try:
            auc = df[col_name].loc[str(bootstrap)]
            aucs.append(float(auc))
        except:
            aucs.append(np.nan)
            print(f"no valid auc found at bootstrap {bootstrap}")
    return aucs

In [None]:
# Get AUCs from dataframe as a dict (keyed by model name) of lists of floats
aucs = {}
for model in df.keys():
    aucs[model] = get_aucs_from_column(df=df, col_name=model, bootstraps=10)

In [None]:
model_1 = 'v14'
model_2 = 'v14-new-bootstraps'

models_to_plot = [model_1, model_2]
fpath_bar_plot = os.path.join(rootdir, "aucs-compare-v14.png")
plot_performance(
    model_names=models_to_plot,
    df=df,
    fpath_bar_plot=fpath_bar_plot,
    y_min=0.65,
    y_max= 0.80,
)

statistic, p_val = stats.ttest_rel(aucs[model_1], aucs[model_2])
print(f"mean AUC = {np.mean(aucs[model_1]):0.3f} ± {np.std(aucs[model_1]):0.3f} from model {model_1} ")
print(f"mean AUC = {np.mean(aucs[model_2]):0.3f} ± {np.std(aucs[model_2]):0.3f} from model {model_2} ")
print(f"p-value = {p_val:0.3f}")

In [None]:
model_1 = 'shallow-sts-preop'
model_2 = 'deep-sts-preop-v8'

models_to_plot = [model_1, model_2]
fpath_bar_plot = os.path.join(rootdir, "aucs-compare-preop.png")
plot_performance(
    model_names=models_to_plot,
    df=df,
    fpath_bar_plot=fpath_bar_plot,
    y_min=0.75,
    y_max= 0.95,
)

statistic, p_val = stats.ttest_rel(aucs[model_1], aucs[model_2])
print(f"mean AUC = {np.mean(aucs[model_1]):0.3f} ± {np.std(aucs[model_1]):0.3f} from model {model_1} ")
print(f"mean AUC = {np.mean(aucs[model_2]):0.3f} ± {np.std(aucs[model_2]):0.3f} from model {model_2} ")
print(f"p-value = {p_val:0.3f}")