In [None]:
# Default input parameter values, which will be overriden by papermill library through Python API.
# This cell must carry the tag "parameters" in its metadata.
data_folder = "./"
innereye_path = "/mnt/c/Users/dacoelh/Python/InnerEye-DeepLearning"
metrics_across_all_runs_file = data_folder + "MetricsAcrossAllRuns_fulldata.csv"

In [None]:
import sys
from pathlib import Path

import matplotlib.pyplot as plt
import pandas as pd

print(f"Adding to path: {innereye_path}"),
if str(innereye_path) not in sys.path:
    sys.path.append(str(innereye_path))
display(sys.path)
    
from InnerEye.ML.utils.metrics_constants import LoggingColumns
from InnerEye.ML.reports.notebook_report import print_header
from InnerEye.ML.reports.classification_report import plot_pr_and_roc_curves

In [None]:
df = pd.read_csv(metrics_across_all_runs_file)  # type: pd.DataFrame
last_epoch = max(df[LoggingColumns.Epoch.value])
df = df.loc[df[LoggingColumns.Epoch.value] == last_epoch]

In [None]:
df

In [None]:
dataset = pd.read_csv(data_folder + "dataset.csv")
dataset

In [None]:
df_ = df.rename(columns={'subject': 'series'}).join(dataset[['subject', 'series', 'class_name']].set_index('series'), on='series', rsuffix='_')
# df.rename(columns={'subject': 'series'})['series']#.join(dataset, on='series')

In [None]:
print("Number of unique subjects per fold:")
df_.groupby('cross_validation_split_index')['subject'].nunique()

In [None]:
pd.crosstab(df_.cross_validation_split_index, [df_.label, df_.class_name], margins=True)

In [None]:
import seaborn as sns

sns.catplot(data=df, y='model_output', x='label', col='cross_validation_split_index', kind='box', aspect=.5);

In [None]:
plt.hist([df[df.label==0].model_output, df[df.label==1].model_output],
#     plt.hist([logit(fold_df[fold_df.label==0].model_output), logit(fold_df[fold_df.label==1].model_output)],
#              density=True, bins=np.linspace(-6, 6, 21), histtype='step');
         density=True, bins=16, histtype='step', lw=2, label=['CVX0', 'CVX123']);
plt.legend();

In [None]:
import numpy as np
# plt.hist(df.model_output)
def logit(x):
    return np.log(x) - np.log(1.-x)
crossval_split = LoggingColumns.CrossValidationSplitIndex.value
n_splits = len(df[crossval_split].unique())
_, axs = plt.subplots(1, n_splits, figsize=(15, 5), sharex=True, sharey=True)
for (fold_index, fold_df), ax in zip(df.groupby(crossval_split), axs.flat):
    ax.hist([fold_df[fold_df.label==0].model_output, fold_df[fold_df.label==1].model_output], bins=16,
#     ax.hist([logit(fold_df[fold_df.label==0].model_output), logit(fold_df[fold_df.label==1].model_output)], bins=16,#np.linspace(-6, 6, 21),
            density=True, histtype='step', lw=2, label=['CVX0', 'CVX123']);
    ax.set_xlabel(f"Fold {fold_index}")
plt.suptitle("Distributions of model outputs")
plt.legend(frameon=False);

In [None]:
from scipy.interpolate import interp1d

def quantile(x, q, axis=-1):
    x = np.sort(x, axis=axis)
    rank = np.linspace(0, 1, x.shape[axis])
    return interp1d(rank, x, axis=axis)(q)

x = np.random.randn(3, 4)
axis = 1
print(np.sort(x, axis))
quantile(x, [.25, .5, .75], axis=axis)

In [None]:
from sklearn.metrics import roc_auc_score, roc_curve, precision_recall_curve

for target, sub_df in df.groupby(LoggingColumns.Hue.value):
    _, axs = plt.subplots(nrows=1, ncols=2, figsize=(10, 5))

    grid = np.linspace(0, 1, 101)
    tprs, prcs = [], []
    
    for fold_index, fold_df in sub_df.groupby(LoggingColumns.CrossValidationSplitIndex.value):
        labels = fold_df[LoggingColumns.Label.value]
        model_outputs = fold_df[LoggingColumns.ModelOutput.value]
        fpr, tpr, thr = roc_curve(labels, model_outputs)
        prc, rec, thr_pr = precision_recall_curve(labels, model_outputs)
        opt_idx = np.argmax(tpr - fpr)
        t50_idx = np.argmin(abs(thr - .5))
        opt_idx_pr = np.argmin(abs(thr_pr - thr[opt_idx]))
        t50_idx_pr = np.argmin(abs(thr_pr - .5))
        auc = roc_auc_score(labels, model_outputs)
        plot_pr_and_roc_curves(labels, model_outputs, axs, plot_kwargs=dict(
            label=f"Fold {fold_index} (AUC={auc:.2f})", lw=1))
        tprs.append(np.interp(grid, fpr, tpr))
        prcs.append(np.interp(grid, rec[::-1], prc[::-1]))
        
        axs[0].plot(fpr[opt_idx], tpr[opt_idx], 'o', c=f'C{fold_index}', zorder=1000)
        axs[1].plot(rec[opt_idx_pr], prc[opt_idx_pr], 'o', c=f'C{fold_index}', zorder=1000)
#         axs[0].plot(fpr[t50_idx], tpr[t50_idx], 's', c=f'C{fold_index}', zorder=1000)
#         axs[1].plot(rec[t50_idx_pr], prc[t50_idx_pr], 's', c=f'C{fold_index}', zorder=1000)
        print("P(y = 1) =", labels.mean())
        print("P(y_hat > .5) =", (model_outputs>.5).mean())
        print((labels == (model_outputs>.5)).mean())
    
    tpr_lo, tpr_mid, tpr_hi = quantile(tprs, [.1, .5, .9], axis=0)
    prc_lo, prc_mid, prc_hi = quantile(prcs, [.1, .5, .9], axis=0)
    h1 = axs[0].fill_between(grid, tpr_lo, tpr_hi, color='k', alpha=.2, lw=0)
    h2, = axs[0].plot(grid, tpr_mid, 'k', lw=2)
    ax0handles, ax0labels = axs[0].get_legend_handles_labels()
    ax0handles.append((h1, h2))
    ax0labels.append("Median, 80% CI")
    
    axs[1].fill_between(grid, prc_lo, prc_hi, color='k', alpha=.2, lw=0)
    axs[1].plot(grid, prc_mid, 'k', lw=2)

    prevalence = sub_df[LoggingColumns.Label.value].mean()
    axs[0].plot([0, 1], [0, 1], ':k', zorder=-1)
    axs[1].axhline(prevalence, ls=':', c='k', zorder=-1)
    axs[0].set_xlabel("False positive rate")
    axs[0].set_ylabel("True positive rate")
    axs[1].set_xlabel("Recall")
    axs[1].set_ylabel("Precision")
    axs[0].legend(ax0handles, ax0labels, frameon=False)