In [1]:
import json
import numpy as np
import os
import random
import pandas as pd
import pickle
import seaborn as sns
import typing 

from IPython.display import clear_output
from misc import _get_usleep_token
from matplotlib import pyplot as plt
from matplotlib.patches import Polygon
from pprint import pprint
from scipy import io, special
from scipy.stats import mode

from sklearn import metrics, model_selection

# Import project utils
from utils import *

In [2]:
# GLOBALS 

MS_MAPPING = {"Wake": 0, "MS": 1}
AASM_MAPPING = {"Wake": 0, "N1": 1, "N2": 2, "N3": 3, "REM": 4}

float_formatter = "{:.2f}".format
np.set_printoptions(formatter={'float_kind':float_formatter})

%matplotlib widget

## Manuscript for preliminary U-Sleep evaluation on Bern data
The comparison will be made with the results from Skorucack et al., 2020 (RF, SVM, LSTM classifiers).
The evaluation used in the reference study:
- bMSE vs Wake (also some other classification problems e.g. bMSE vs {Wake, uMSE, uMSEc, uED})
- True labels were converted from 200 Hz to 5 Hz (200 ms resolution) using a 9 second median filter with 200 ms step size.
- Predictions from the RF and SVM were converted to 5 Hz resolution using same median filter.
- MS predictions after resampling which were shorter than 1 second were excluded
- Calculate sensitivity, specificity, accuracy, precision, cohen's kappa.
    
In this analysis, we will use the same dev/test split (53/23) and adapt the U-Sleep output to fit the same evaluation scheme as used in the reference paper.
The hyperparameters of the U-Sleep model are:
* Data per prediction (prediction rate): 1, 2, 4, 8, 16
* Post-procesing of probabilities: y_argmax (not w/ tunable threshold), y_max_sleep, and y_sum_sleep
* MS Threshold: 0.025:0.025:1.0

Therefore, making 5x3 = 15 models.
Since the U-Sleep model is pre-trained, the only "training" part is the threshold tuning. The optimal threshold will be determined by the highest f1-score analogous to Brink-Kjær. The optimal model will be found by using a 5-fold CV validation where a model will be trained (tune threshold) on K-1 folds and validated against the remaining fold. The model with the highest f1-score will be chosen and re-trained on the entire dev set before evaluating it on the test set.

---
## Update (09/10/2020)
* Reproduce methods by Skorucak et al., 2020
* Comparison between U-Sleep and Bern with their definition
* Comparison between U-Sleep and Bern with our duration criteria of microsleep



In [125]:
def get_all_probs(rec):
    probs = get_probs(rec)
    probs_sum = np.column_stack([probs[:,0], np.sum(probs[:,1:5],axis=1)])
    probs_max = np.column_stack([probs[:,0], np.max(probs[:,1:5],axis=1)])
    
    return probs, probs_sum, probs_max


def psuedo_resample(y_org, first_last):
    if len(y_org.shape) > 1:
        return np.array([np.median(y_org[:,x[0]:x[1]],1) for x in first_last]).T
    else:
        return np.array([np.median(y_org[x[0]:x[1]]) for x in first_last])
    
make_first_last = lambda time_pos, hz: np.array([[np.floor(x[0]*hz), np.ceil(x[-1]*hz)+1] for x in time_pos], dtype=int)

_map = {"Wake": [0,2,3], "MS": [1]}
_uni = False

# thresholds
# Initialization
tstep = 0.025
tstart = 0.025
tmax = 1.0
tnum = ((tmax - tstart) / tstep) + 1
thresholds = np.linspace(tstart,1.0,np.round(tnum).astype(int))

In [148]:
## PROCESSING PRELIM ANALYSIS

HZ = [8, 16, 32, 64, 128]
for hz in HZ:
    resampled_labels = dict.fromkeys(all_names)
    resampled_first_last = dict.fromkeys(all_names)
    entries = []
        
    print(f"Dataframe creation for {hz} Hz")
    for edf, lab in zip(all_edf_files, all_labels_files):

        _id = edf.replace(".edf","")
        _type = "train" if _id in splits["train"] else "test"

        _edf = os.path.join("edf_data",edf)
        _labels = os.path.join("labels",lab)
        _preds = os.path.join("predictions", f"{hz}_hz",f"{_id}.npy")

        _tmp = BernLabels(lab, _map, _uni)
        _any_ms = np.sum(_tmp.labels) > 1

        _ms_200, _time_pos = _tmp.apply_rolling_func(win=0.2, step=0.2)
        _ms_200[_ms_200 == 0.5] == 1
        _any_ms_200 = np.sum(_ms_200) > 1

        entry = {"type": _type, "id": _id, "edf": _edf, "labels": _labels, "preds": _preds,
                 "ms": _any_ms, "ms_200": _any_ms_200}
        entries.append(entry)

        fixed_resampled_labels = _ms_200
        fixed_resampled_labels[fixed_resampled_labels==0.5] == 1
        
        resampled_labels[_id] = np.array(fixed_resampled_labels, dtype=int)
        resampled_first_last[_id] = make_first_last(_time_pos, hz)

    df = pd.DataFrame.from_records(entries)
    df.to_csv(f"prelim_data/corrected_{hz}_info_df.csv")

#     processed_recs = dict.fromkeys(all_names)
#     print(f"Processing recs for {hz} Hz")
    
#     for i, row in df.iterrows():
#         print(f"{i+1}/{df.shape[0]}")
#         p1,p2,p3 = get_all_probs(row.preds)
#         fl = resampled_first_last[row.id]

#         # Argmax
#         preds_argmax = aasm_to_wake_sleep(np.argmax(p1,axis=1))
#         resampled_preds_argmax=psuedo_resample(preds_argmax, fl)
#         resampled_preds_argmax[resampled_preds_argmax==0.5] = 1
        
#         # Sum
#         preds_sum = np.array([p2[:,1] > t for t in thresholds])*1
#         resampled_preds_sum=psuedo_resample(preds_sum, fl)
#         resampled_preds_sum[resampled_preds_sum==0.5] = 1
        
#         # Max
#         preds_max = np.array([p3[:,1] > t for t in thresholds])*1
#         resampled_preds_max=psuedo_resample(preds_max, fl)
#         resampled_preds_max[resampled_preds_max==0.5] = 1
            
#         # Store
#         entry = {"preds_argmax": resampled_preds_argmax,
#                  "preds_sum": resampled_preds_sum,
#                  "preds_max": resampled_preds_max,
#                  "labels": resampled_labels[row.id]}
#         processed_recs[row.id] = entry
#         clear_output(wait=True)


#     pickle_file = f'{hz}_processed_recs2.pickle'
#     write_to_pickle_file(processed_recs, pickle_file)
    


Dataframe creation for 8 Hz
Dataframe creation for 16 Hz
Dataframe creation for 32 Hz
Dataframe creation for 64 Hz
Dataframe creation for 128 Hz


In [None]:
_df = pd.read_csv("prelim_data/corrected_8_info_df.csv")
test_df = _df[_df.type=="test"].reset_index(drop=True)
dev_df = _df[_df.type=="train"].reset_index(drop=True)


In [191]:

seed = 42
k = 5
skf = model_selection.StratifiedKFold(k, shuffle=True, random_state=seed)

pred_keys = ["preds_argmax","preds_sum","preds_max"]
HZ = [128, 64, 32, 16, 8]
i = 0
for hz in HZ:
    processed_recs_file = f"prelim_data/{hz}_processed_recs2.pickle"
    processed_recs = load_pickle_from_file(processed_recs_file)
    for pk in pred_keys:
        for k, (train_idx, val_idx) in enumerate(skf.split(dev_df.index, dev_df.ms)):   

            print(f"K: {k} - Hz: {hz} - Method: {pk}")
            train_id = dev_df.id[train_idx].values
            val_id = dev_df.id[val_idx].values
            
            train_yhat, train_y = my_collector(collection = processed_recs, ids = train_id, key=pk, rm = True)
            val_yhat, val_y     = my_collector(collection = processed_recs, ids = val_id, key=pk, rm = True)

            k_tracker = Tracker(k, train_y, train_yhat, val_y, val_yhat)
            k_dict = k_tracker.to_dict()
            k_dict["method"] = pk
            k_dict["hz"] = hz

            if i == 0:
                k_df = pd.DataFrame(k_dict, index=[i])
            else:
                k_df = pd.concat([k_df, pd.DataFrame(k_dict, index=[i])])

            clear_output(wait=True)
            i += 1
        k_df.to_csv("corrected_prelim2_df.csv")


K: 4 - Hz: 8 - Method: preds_max


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
