## Imports and settings

In [None]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import os
import sys
import json
import pprint

import matplotlib.pyplot as plt
from matplotlib import colors
import numpy as np
import ipywidgets as widgets
from matplotlib import gridspec
import pprint

detector_path = '..'
results_path = os.path.join(detector_path, 'results')
sys.path.append(detector_path)

from utils import constants
from utils import errors
from sleep.mass import MASS
from sleep.inta import INTA
from sleep import postprocessing
from evaluation import metrics
from evaluation import data_manipulation

SEED = 123

%matplotlib inline
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

## Load database and predicted probabilities

In [None]:
# Select database
dataset_name = constants.INTA_NAME
# Select predictions ckpt folder

# Best ones:
# MASS: ckpt_folder = os.path.join('20190325_v2bn_fixed_loading_train_mass', 'bsf_avg')
# INTA: ckpt_folder = os.path.join('20190328_v2bn_fixed_inta_train_inta', 'bsf_avg')
ckpt_folder = os.path.join('20190328_v2bn_fixed_inta_train_inta', 'bsf_avg')
verbose = False

# Load data

# Marks are binary sequences for each page, 200 fs resolution
errors.check_valid_value(
    dataset_name, 'dataset_name',
    [constants.MASS_NAME, constants.INTA_NAME])
if dataset_name == constants.MASS_NAME:
    dataset = MASS(load_checkpoint=True)
else:
    dataset = INTA(load_checkpoint=True)

    # Get training set ids
print('Loading train set and splitting train/val')
all_train_ids = dataset.train_ids
# Split to form validation set
train_ids, val_ids = data_manipulation.split_ids_list(
    all_train_ids, seed=SEED)
print('Training set IDs:', train_ids)
print('Validation set IDs:', val_ids)
# Get test data
print('Loading test set')
test_ids = dataset.test_ids
print('Testing set IDs:', test_ids)

# Get subjects data, with the expert used for training
print('Loading signals and marks')
set_list = ['train', 'val', 'test']
x = {}
y = {}
pages = {}
x['train'], y['train'] = dataset.get_subset_data(train_ids, which_expert=1, verbose=verbose)
x['val'], y['val'] = dataset.get_subset_data(val_ids, which_expert=1, verbose=verbose)
x['test'], y['test'] = dataset.get_subset_data(test_ids, which_expert=1, verbose=verbose)
print('Loading pages')
pages['train'] = dataset.get_subset_pages(train_ids, verbose=verbose)
pages['val'] = dataset.get_subset_pages(val_ids, verbose=verbose)
pages['test'] = dataset.get_subset_pages(test_ids, verbose=verbose)

# Prepare expert labels into marks
print('Preparing labels', flush=True)
y_stamps = {}
for set_name in set_list:
    y_stamps[set_name] = postprocessing.generate_mark_intervals_with_list(
        y[set_name], pages[set_name], 200, 200, thr=None, postprocess=False)

# Load predictions (probability vectors for each page), 200/factor resolution (default factor 8)
ckpt_path = os.path.abspath(os.path.join(results_path, 'predictions_%s' % dataset_name, ckpt_folder))
print('Loading predictions from %s' % ckpt_path)
y_pred = {}
for set_name in set_list:
    y_pred[set_name] = np.load(os.path.join(ckpt_path, 'y_pred_%s.npy' % set_name), allow_pickle=True)
    # Keep only class 1 probability
    y_pred[set_name] = [this_y_pred[..., 1] for this_y_pred in y_pred[set_name]]
print('Done')

## Performance: F1 vs IoU curve

In [None]:
# Performance settings
chosen_set = 'test'
thr = 0.5

# ---------------- Compute performance
print('Using thr %1.4f and %s set' % (thr, chosen_set))

# Prepare expert labels into marks
y_thr = y_stamps[chosen_set]

# Prepare model predictions
print('Preparing predictions', flush=True)
y_pred_thr = postprocessing.generate_mark_intervals_with_list(
    y_pred[chosen_set], pages[chosen_set], 200//8, 200, thr=thr)
n_subjects = len(y_thr)

# Go through several IoU values
print('Computing F1 Curve', flush=True)
iou_list = np.arange(1, 10) * 0.1
all_f1_list = [metrics.f1_vs_iou(this_y, this_y_pred, iou_list) 
               for (this_y, this_y_pred) 
               in zip(y_thr, y_pred_thr)]
all_f1_list = np.stack(all_f1_list, axis=1)
mean_f1 = np.mean(all_f1_list, axis=1)
std_f1 = np.std(all_f1_list, axis=1)
    
model_f1_mean = np.stack([iou_list, mean_f1], axis=1)
model_f1_std = np.stack([iou_list, std_f1], axis=1)
print('Mean F1')
pprint.pprint(model_f1_mean)
print('Std F1')
pprint.pprint(model_f1_std)

In [None]:
# Saving settings
save_f1_iou_result = False
ckpt_id = 'avg_2019mar'

# Comparison settings
comparison_folder = 'comparison_data'
compare_expert = True
compare_chambon = True
show_set_std = False
alpha = 0.2
colors = {'model': '#c62828', 'expert': '#455a64', 'chambon': '#0277bd'} 
zoom_xlim = [0.1, 0.7]
zoom_ylim = [0.6, 0.85]
linewidth_model = 2.5
markersize_model = 12
linewidth_others = 1.5
markersize_others = 8

# --------------- Optional: Save F1 data
if save_f1_iou_result:
    filename = os.path.join('comparison_data', '%s_f1_vs_iou_model_%s.csv' % (dataset_name, ckpt_id))
    np.savetxt(filename, model_f1_mean, delimiter=",")


# -------------------- P L O T ----------------------    
# Comparison data
if compare_expert:
    expert_f1_curve_mean = np.loadtxt(os.path.join(comparison_folder, 'f1_vs_iou_expert_mean.csv'), delimiter=',')
    expert_f1_curve_std = np.loadtxt(os.path.join(comparison_folder, 'f1_vs_iou_expert_std.csv'), delimiter=',')
if compare_chambon:
    chambon_f1_curve = np.loadtxt(os.path.join(comparison_folder, 'mass_f1_vs_iou_Chambon.csv'), delimiter=',')

fig, ax = plt.subplots(1, 2, figsize=(9, 4), dpi=150)

# Complete plot
if compare_expert:
    ax[0].plot(expert_f1_curve_mean[1:, 0], expert_f1_curve_mean[1:, 1], linewidth=linewidth_others, 
               markersize=markersize_others, marker='.', 
               label='Expert Performance', color=colors['expert'])
    ax[0].fill_between(
        expert_f1_curve_mean[1:, 0], 
        expert_f1_curve_mean[1:, 1] - expert_f1_curve_std[1:, 1], 
        expert_f1_curve_mean[1:, 1] + expert_f1_curve_std[1:, 1], 
        alpha=alpha, facecolor=colors['expert'])
if compare_chambon:
    ax[0].plot(chambon_f1_curve[:, 0], chambon_f1_curve[:, 1], linewidth=linewidth_others, 
               markersize=markersize_others, marker='.', 
               label='Chambon et al.', color=colors['chambon'])
ax[0].plot(model_f1_mean[:, 0], model_f1_mean[:, 1], 
           linewidth=linewidth_model, markersize=markersize_model, marker='.', 
           label='Proposed Model', color=colors['model'])
if show_set_std:
    ax[0].fill_between(
            model_f1_mean[:, 0], 
            model_f1_mean[:, 1] - model_f1_std[:, 1], 
            model_f1_mean[:, 1] + model_f1_std[:, 1], 
            alpha=alpha, facecolor=colors['model'])
ax[0].set_title('Performance with $\mu=%1.1f$ (%s)' % (thr, chosen_set), fontsize=11)
ax[0].set_xlim([0, 1])
ax[0].set_ylim([0, 1])
ax[0].set_yticks([0.1*i for i in range(1, 10)])
ax[0].set_xticks([0.1*i for i in range(1, 10)])
ax[0].tick_params(labelsize=8.5)
ax[0].set_xlabel('IoU Threshold', fontsize=8.5)
ax[0].set_ylabel('F1-score', fontsize=8.5)
ax[0].grid()

# Zoom plot
if compare_expert:
    ax[1].plot(expert_f1_curve_mean[1:, 0], expert_f1_curve_mean[1:, 1], 
               linewidth=linewidth_others, markersize=markersize_others, marker='.', 
               label='Expert Performance\nPrivate Database\nWarby et al. 2014', color=colors['expert'])
    ax[1].fill_between(
        expert_f1_curve_mean[1:, 0], 
        expert_f1_curve_mean[1:, 1] - expert_f1_curve_std[1:, 1], 
        expert_f1_curve_mean[1:, 1] + expert_f1_curve_std[1:, 1], 
        alpha=alpha, facecolor=colors['expert'])
if compare_chambon:
    ax[1].plot(chambon_f1_curve[:, 0], chambon_f1_curve[:, 1], 
               linewidth=linewidth_others, markersize=markersize_others, marker='.', 
               label='ConvNet\nMASS Database\nChambon et al. 2018', color=colors['chambon'])
ax[1].plot(model_f1_mean[:, 0], model_f1_mean[:, 1], 
           linewidth=linewidth_model, markersize=markersize_model, marker='.', 
           label='Proposed Model\n%s Database' % dataset_name.upper(), color=colors['model'])
if show_set_std:
    ax[1].fill_between(
            model_f1_mean[:, 0], 
            model_f1_mean[:, 1] - model_f1_std[:, 1], 
            model_f1_mean[:, 1] + model_f1_std[:, 1], 
            alpha=alpha, facecolor=colors['model'])
ax[1].set_title('Zoom in', fontsize=11)
ax[1].set_xlim(zoom_xlim)
ax[1].set_ylim(zoom_ylim)
ax[1].set_xlabel('IoU Threshold', fontsize=8.5)
# ax[1].set_ylabel('F1-score')
ax[1].tick_params(labelsize=8.5)
ax[1].legend(loc='lower left', bbox_to_anchor=(1.05, 0.15), labelspacing=3, fontsize=8.5)
ax[1].grid()

plt.show()

## Performance: Precision-Recall plot, separated subjects

In [None]:
thr = 0.5

iou_thr = 0.3

# Prepare model predictions
be_stats = {}
for set_name in set_list:
    print('Preparing predictions for %s set' % set_name, flush=True)
    y_pred_thr = postprocessing.generate_mark_intervals_with_list(
        y_pred[set_name], pages[set_name], 200//8, 200, thr=thr)
    be_stats[set_name] = [metrics.by_event_confusion(this_y, this_y_pred, iou_thr=iou_thr) 
                for (this_y, this_y_pred) in zip(y_stamps[set_name], y_pred_thr)]
    print('Done')

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(4, 4), dpi=150)
text_space = 0.01
markersize = 15

# F1 score levels
delta = 0.01 
x_ = np.arange(1, 100) * delta 
y_ = np.arange(1, 100) * delta 
X, Y = np.meshgrid(x_, y_)
Z = 2 * X * Y / (X + Y)
CS = ax.contour(X, Y, Z, colors='k', alpha=0.3, levels=[0.6, 0.7, 0.8, 0.9])
ax.clabel(CS, fontsize=7.5, fmt='%1.2f')

# Scatter plots of each subject
color_list = {'train':'#43a047', 'val':'#0288d1', 'test': '#c62828'}
for set_name in set_list:
    for i, stats in enumerate(be_stats[set_name]):
        if i==0:
            ax.scatter(stats['recall'], stats['precision'], c=color_list[set_name], label=set_name.capitalize(), s=markersize, zorder=10)
        else:
            ax.scatter(stats['recall'], stats['precision'], c=color_list[set_name], s=markersize, zorder=10)
        ax.annotate(train_ids[i], (stats['recall']+text_space, stats['precision']+text_space), fontsize=7, color='#1b2631', zorder=20)  

ax.set_title('Performance with $\mu=%1.1f$ and IoU$>$%1.1f' % (thr, iou_thr), fontsize=11)
ax.set_xlabel('Recall', fontsize=8.5)
ax.set_ylabel('Precision', fontsize=8.5)
ax.set_xlim([0.5, 1])
ax.set_ylim([0.5, 1])
ax.legend(loc='lower left', fontsize=8.5)
ax.tick_params(labelsize=8.5)
ax.grid()
plt.show()

## Performance: Precision-Recall curve, average per set

In [None]:
# The mean of each set is drawn, for several thr

iou_thr = 0.3
thr_list = [0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.95]

# Prepare model predictions
pr_curve = {}
n_thr = len(thr_list)
for set_name in set_list:
    print('Processing %s set' % set_name, flush=True)
    pr_curve[set_name] = np.zeros((n_thr, 2))   # Columns are [x: recall, y: precision]
    for i, thr in enumerate(thr_list):
        print('Processing threshold %1.2f' % thr, flush=True)
        y_pred_thr = postprocessing.generate_mark_intervals_with_list(
            y_pred[set_name], pages[set_name], 200//8, 200, thr=thr)
        this_stats = [metrics.by_event_confusion(this_y, this_y_pred, iou_thr=iou_thr) 
                    for (this_y, this_y_pred) in zip(y_stamps[set_name], y_pred_thr)]
        this_recall = np.mean([m['recall'] for m in this_stats])
        this_precision = np.mean([m['precision'] for m in this_stats])
        pr_curve[set_name][i, 0] = this_recall
        pr_curve[set_name][i, 1] = this_precision
    print('Done', flush=True)    

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(4, 4), dpi=150)
markersize = 8

# F1 score levels
delta = 0.01 
x_ = np.arange(1, 100) * delta 
y_ = np.arange(1, 100) * delta 
X, Y = np.meshgrid(x_, y_)
Z = 2 * X * Y / (X + Y)
CS = ax.contour(X, Y, Z, colors='k', alpha=0.3, levels=[0.6, 0.7, 0.8, 0.9])
ax.clabel(CS, fontsize=7.5, fmt='%1.2f')

# Scatter plots of each subset
half_thr_idx = thr_list.index(0.5)
color_list = {'train':'#43a047', 'val':'#0288d1', 'test': '#c62828'}
for i, set_name in enumerate(set_list):
    ax.plot(pr_curve[set_name][:, 0], pr_curve[set_name][:, 1], 
            label=set_name.capitalize(), markersize=markersize, marker='.', 
            linewidth=1.5, color=color_list[set_name], zorder=10*(i+1))
# Highlight threshold=0.5
for i, set_name in enumerate(set_list):
    ax.scatter(pr_curve[set_name][half_thr_idx, 0], pr_curve[set_name][half_thr_idx, 1], 
               s=100, c=color_list[set_name], zorder=10*(i+1))

ax.set_title('Performance with IoU$>$%1.1f' % iou_thr, fontsize=11)
ax.set_xlabel('Recall', fontsize=8.5)
ax.set_ylabel('Precision', fontsize=8.5)
ax.set_xlim([0.5, 1])
ax.set_ylim([0.5, 1])
ax.legend(loc='lower left', fontsize=8.5)
ax.tick_params(labelsize=8.5)
ax.grid()
plt.show()

## Visualization of subject

In [None]:
subset_name = 'train'
subject_id = 10

# -----
idx_dict = {'train': train_ids, 'val': val_ids, 'test': test_ids}
idx_subject = idx_dict[subset_name].index(subject_id)
this_signal = x[subset_name][idx_subject]
this_marks_1 = y[subset_name][idx_subject]
this_n2_pages = pages[subset_name][idx_subject]
this_prob = y_pred[subset_name][idx_subject]
if dataset_name == constants.MASS_NAME:
    _, this_marks_2 = dataset.get_subject_data(subject_id, which_expert=2, verbose=True)
    channel_name = 'C3-CLE'
else:
    channel_name = 'F4-C4'
    
# make a color map of fixed colors
cmap = colors.ListedColormap(['white', '#c62828'])

def plot_page(page_idx):
    if dataset_name == constants.MASS_NAME:
        fig = plt.figure(figsize=(10, 5), dpi=150)
        gs = gridspec.GridSpec(4, 1, height_ratios=[4, 1, 1, 1])
    else:
        fig = plt.figure(figsize=(10, 4), dpi=150)
        gs = gridspec.GridSpec(3, 1, height_ratios=[4, 1, 1])
    
    page_idx = page_idx - 1
    
    segment_signal = this_signal[page_idx, :]
    segment_marks_1 = this_marks_1[page_idx, :]
    segment_prob = this_prob[page_idx, :]
    time_axis = np.arange(this_signal.shape[1]) / dataset.fs
    
    if dataset_name == constants.MASS_NAME:
        segment_marks_2 = this_marks_2[page_idx, :]
    
    gs_idx = 0
    
    # Signal
    ax = fig.add_subplot(gs[gs_idx])
    gs_idx = gs_idx + 1
    ax.plot(time_axis, segment_signal, linewidth=1, color='#455a64')
    ax.set_yticks([])
    ax.set_xlim([0, 20])
    ax.set_ylim([-10, 10])
    ax.set_title('Subject %d (%s-%s). %s EEG channel. Page in record: %d (intervals of 0.5s are shown)' 
                 % (subject_id, dataset_name.upper(), subset_name.capitalize(), channel_name, this_n2_pages[page_idx]), fontsize=10)
    ax.set_xticks([0, 5, 10, 15, 20])
    ax.set_xticks(np.arange(0, 20, 0.5), minor=True)
    ax.grid(b=True, axis='x', which='minor')
    ax.tick_params(labelsize=8.5)
    
    # Expert mark
    ax = fig.add_subplot(gs[gs_idx])
    gs_idx = gs_idx + 1
    ax.imshow(segment_marks_1[np.newaxis, :], interpolation=None, aspect='auto', cmap=cmap)
    ax.set_xticks([])
    ax.set_yticks([])
    ax.set_title('Expert ground truth (red indicates event)', fontsize=10)
    
    # Neural net
    ax = fig.add_subplot(gs[gs_idx])
    gs_idx = gs_idx + 1
    ax.plot(segment_prob, linewidth=1.5, color='#c62828')
    ax.set_xticks([])
    ax.set_ylim([-0.1, 1.1])
    ax.set_xlim([0, 500])
    ax.set_yticks([0, 1])
    ax.set_yticks([0.5], minor=True)
    ax.grid(b=True, axis='y', which='minor')
    ax.tick_params(labelsize=8.5)
    ax.set_title('Model probability prediction (0.5 threshold is shown)', fontsize=10)
    
    if dataset_name == constants.MASS_NAME:
        ax = fig.add_subplot(gs[gs_idx])
        ax.imshow(segment_marks_2[np.newaxis, :], interpolation=None, aspect='auto', cmap=cmap)
        ax.set_xticks([])
        ax.set_yticks([])
        ax.set_title('Second expert, not used for training (red indicates event)', fontsize=10)
    
    plt.tight_layout()
    plt.show()

In [None]:
widgets.interact(
    lambda page_idx: plot_page(page_idx),
    page_idx=widgets.IntSlider(min=1,max=this_n2_pages.shape[0],step=1,value=1, continuous_update=False));