In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

%matplotlib inline

In [2]:
from data import load_dataset, build_target_df
from features import build_feature_df


def propogate_labels(annotations, merge_eyes=True):
    if not merge_eyes:
        return annotations.assign(flag_brain=annotations[['flag_brain', 'flag_alpha', 'flag_mu']].any(axis=1)) \
                          .drop(columns=['flag_line_noise'])
    return annotations.assign(flag_eyes=annotations[['flag_eyes', 'flag_eyes_h', 'flag_eyes_v']].any(axis=1),
                              flag_brain=annotations[['flag_brain', 'flag_alpha', 'flag_mu']].any(axis=1)) \
                      .drop(columns=['flag_eyes_v', 'flag_eyes_h', 'flag_line_noise'])


dir = 'dataset_Kids'

In [3]:
data, raw_annotations = load_dataset(dir)
features = build_feature_df(data, default=True, custom_features={})
annotations = propogate_labels(raw_annotations)

targets = build_target_df(annotations, weights='uniform', strategy='mean', threshold=0.33)
targets[['flag_alpha', 'flag_mu', 'flag_heart']] = build_target_df(annotations, ['flag_alpha', 'flag_mu', 'flag_heart'], strategy='majority')
targets = targets.loc[:, (targets.mean(axis=0) > 0.05) & (targets.all(axis=0) < 0.95)]

In [4]:
from sklearn.linear_model import LogisticRegression
from xgboost import XGBClassifier
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_curve, roc_auc_score, f1_score, RocCurveDisplay, precision_recall_curve


models = {'LR': LogisticRegression(solver='liblinear', penalty='l2', C=1.0),
          'XGB': XGBClassifier(learning_rate=0.3, max_depth=4, n_estimators=30, eval_metric='logloss'),
          'SVM': SVC(kernel='linear', probability=True)}
n_repeats = 50

In [8]:
def plot_roc_curve(ax, y_true, y_pred, title=None, lw=2, color='darkorange', **kwargs):
    fpr, tpr, _ = roc_curve(y_true, y_pred)
    roc_auc = roc_auc_score(y_true, y_pred)
    
    ax.set_title(title)
    RocCurveDisplay(fpr=fpr, tpr=tpr, roc_auc=roc_auc).plot(ax=ax, color=color, lw=lw, **kwargs)


def plot_f1_curve(ax, y_true, y_pred, title=None, lw=2, color='darkorange', **kwargs):
    pr, rec, thresholds = precision_recall_curve(y_true, y_pred)
    f1 = 2 * pr * rec / (pr + rec)
    ax.set_title(title)
    ax.set_xlim(-0.05, 1.05)
    ax.set_ylim(-0.05, 1.05)
    ax.plot([0] + list(thresholds), f1, color=color, lw=lw, **kwargs)


def repeated_train_test(features, target, model, n_repeats, test_size=0.3, axes=None, random_state=57):
    y_true = []
    y_preds = []
    np.random.seed(random_state)

    for _ in range(n_repeats):
        X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=test_size, stratify=target)
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        # y_proba = model.predict_proba(X_test)[:, 1]
        # roc_aucs.append(roc_auc_score(y_test, y_proba))
        # f1_scores.append(f1_score(y_test, y_pred))
        y_true.append(y_test)
        y_preds.append(y_pred)

    return np.array(y_true), np.array(y_preds)
    #     if axes is not None:
    #         plot_roc_curve(axes[0], y_test, y_proba, alpha=.1)
    #         plot_f1_curve(axes[1], y_test, y_proba, alpha=.1)

    # if axes is not None:
    #     for ax in axes:
    #         ax.set_xticks([])
    #         ax.set_yticks([])
    #         ax.set_xlabel('')
    #         ax.set_ylabel('')
    #     axes[0].plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
    #     axes[1].axvline(x=0.5, color='navy', lw=2, linestyle='--')
    #     axes[0].legend().remove()

    # return roc_aucs, f1_scores

def plot_curves_grid(features, targets, models):
    
    fig_auc, axes_auc = plt.subplots(len(models), len(targets.columns), figsize=(4 * len(targets.columns), 3 * len(models)))
    fig_f1, axes_f1 = plt.subplots(len(models), len(targets.columns), figsize=(4 * len(targets.columns), 3 * len(models)))

    for (model_name, model), row_auc, row_f1 in zip(models.items(), axes_auc, axes_f1):
        for flag, ax_auc, ax_f1 in zip(targets, row_auc, row_f1):
            repeated_train_test(features, targets[flag], model, n_repeats, axes=(ax_auc, ax_f1))
            ax_auc.set_xlabel('')
            ax_f1.set_xlabel('')
        row_auc[0].set_ylabel(model_name, fontsize=30, labelpad=16)
        row_f1[0].set_ylabel(model_name, fontsize=30, labelpad=16)
    for target_name, ax_auc, ax_f1 in zip(targets, axes_auc[0], axes_f1[0]):
        ax_auc.set_title(target_name[5:].capitalize(), fontsize=30, pad=16)
        ax_f1.set_title(target_name[5:].capitalize(), fontsize=30, pad=16)

    fig_auc.subplots_adjust(wspace=0.05, hspace=0.05)
    fig_f1.subplots_adjust(wspace=0.05, hspace=0.05)

    return fig_auc, fig_f1

In [10]:
true, preds = repeated_train_test(features, targets['flag_eyes'], models['LR'], n_repeats)

In [20]:
from itertools import product
TN, FP, FN, TP = (np.sum((true == i) & (preds == j), axis=1) for i, j in product([0, 1], repeat=2))

In [22]:
TP.mean()

11.22

In [36]:
from itertools import product

contingency_eyes = pd.DataFrame(index=models, dtype=float)


for model_name, model in models.items():
    true, preds = repeated_train_test(features, targets['flag_eyes'], model, n_repeats)
    for type, (i, j) in zip(['TN', 'FP', 'FN', 'TP'], product([0, 1], repeat=2)):
        answer_type = np.sum((true == i) & (preds == j), axis=1)
        contingency_eyes.loc[model_name, type] = answer_type.mean()



In [37]:
contingency_eyes

Unnamed: 0,TN,FP,FN,TP
LR,161.38,1.62,11.78,11.22
XGB,159.44,3.56,10.4,12.6
SVM,161.94,1.06,11.76,11.24


In [29]:
TPs

Unnamed: 0,flag_brain,flag_alpha,flag_mu,flag_eyes,flag_muscles,flag_heart,flag_ch_noise
LR,TP: 127.72,TP: 6.38,TP: 5.54,TP: 11.22,TP: 21.98,TP: 5.74,TP: 0.00
XGB,TP: 124.68,TP: 7.12,TP: 8.92,TP: 12.60,TP: 25.16,TP: 25.14,TP: 1.70
SVM,TP: 127.00,TP: 4.98,TP: 3.38,TP: 11.24,TP: 24.82,TP: 0.22,TP: 0.00


In [30]:
TNs

Unnamed: 0,flag_brain,flag_alpha,flag_mu,flag_eyes,flag_muscles,flag_heart,flag_ch_noise
LR,TN: 37.24,TN: 167.38,TN: 155.84,TN: 161.38,TN: 141.46,TN: 110.02,TN: 171.64
XGB,TN: 37.62,TN: 165.16,TN: 149.90,TN: 159.44,TN: 138.38,TN: 87.08,TN: 169.50
SVM,TN: 38.34,TN: 167.58,TN: 157.12,TN: 161.94,TN: 141.64,TN: 116.48,TN: 172.00


In [31]:
FPs

Unnamed: 0,flag_brain,flag_alpha,flag_mu,flag_eyes,flag_muscles,flag_heart,flag_ch_noise
LR,FP: 13.76,FP: 0.62,FP: 2.16,FP: 1.62,FP: 4.08,FP: 6.98,FP: 0.36
XGB,FP: 13.38,FP: 2.84,FP: 8.10,FP: 3.56,FP: 7.20,FP: 29.92,FP: 2.50
SVM,FP: 12.66,FP: 0.42,FP: 0.88,FP: 1.06,FP: 3.90,FP: 0.52,FP: 0.00


In [32]:
FNs

Unnamed: 0,flag_brain,flag_alpha,flag_mu,flag_eyes,flag_muscles,flag_heart,flag_ch_noise
LR,FN: 7.28,FN: 11.62,FN: 22.46,FN: 11.78,FN: 18.48,FN: 63.26,FN: 14.00
XGB,FN: 10.32,FN: 10.88,FN: 19.08,FN: 10.40,FN: 15.26,FN: 43.86,FN: 12.30
SVM,FN: 8.00,FN: 13.02,FN: 24.62,FN: 11.76,FN: 15.64,FN: 68.78,FN: 14.00
