In [3]:
%matplotlib widget
import sys
import os

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
os.environ["CUDA_VISIBLE_DEVICES"] = "2"  # Set the GPU you wish to use here
import tensorflow as tf
gpus = tf.config.experimental.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(gpus[0], enable=True)

from tensorflow.python.keras import Input
from tensorflow.keras.models import Sequential, Model
from tensorflow.python.keras.layers import Conv1D, GlobalAveragePooling1D, AveragePooling1D, MaxPool1D, Flatten, Dense, \
    Reshape, Dropout, TimeDistributed, Add, LSTM, GRU, Bidirectional
from tensorflow.python.keras.layers import LeakyReLU
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

from sklearn.utils import class_weight
from sklearn.model_selection import train_test_split

from tqdm import tqdm
import numpy as np
import matplotlib.pyplot as plt
from scipy import interpolate

from multiprocessing import Manager, Pool
from joblib import Parallel, delayed

os.chdir('/home/kkotzen/research/PPG_sleepstaging/')
from src.models.model_utils import plot_hypnogram
from src.parsing.MESAParser import MESAParser

# Sleep Stage Settings
WAKE, LIGHT, DEEP, REM = 0, 1, 2, 3
sleep_string_encoding = {0: 'wake', 1: 'n1', 2: 'n2', 3: 'n3', 4: 'rem'}
sleep_encoding = {'wake': WAKE, 'n1': LIGHT, 'n2': LIGHT, 'n3': DEEP, 'rem': REM}

# Sequence Settings
ihr_fs = 2 #128/60
samples_per_epoch = 30 * ihr_fs
total_epochs = 1200
total_samples = int(samples_per_epoch*total_epochs)

def comp_google_ihr(patient, dl):
    beats = dl.load_annotation(patient, signal='EKG', annotator='epltd0', annotation='Peaks')
    beats = beats/dl.ecg_fs
    ibi = np.diff(beats)
    beats = beats[:-1]+(beats[1:] - beats[:-1])/2

    upper = np.percentile(ibi, 99)
    lower = np.percentile(ibi, 1)
    
    ibi_filt_idx = np.where((ibi < upper) & (ibi > lower))
    ibi = ibi[ibi_filt_idx]
    beats = beats[ibi_filt_idx]
    ihr = 1/ibi
    ihr = (ihr - np.mean(ihr))/np.std(ihr)

    x_2hz = np.arange(0, beats[-1], 1/2)
    ihr_2hz = np.interp(x_2hz, beats, ihr)
    return ihr_2hz


def _time_series_subsequences(ts, window, hop=1):
    assert len(ts.shape) == 1
    shape = (int(int(ts.size - window) / hop + 1), window)
    strides = ts.strides[0] * hop, ts.strides[0]
    return np.lib.stride_tricks.as_strided(ts, shape=shape, strides=strides)


def _batch_subsequenced_padded(X, win_len, hop):
    assert X.shape[1]%hop == 0
    padding = int(win_len/2 - hop/2)
    X__ = np.zeros((X.shape[0], X.shape[1]+2*padding, X.shape[2]))
    X__[:,padding:-padding,:] = X
    X_ = np.zeros((X.shape[0], int(X.shape[1]/hop), win_len))
    for i in range(X.shape[0]):
        X_[i] = _time_series_subsequences(X__[i].flatten(), win_len, hop)
    return X_


def load_one_xy(patient):
    
    dl=MESAParser()
    
    patient = str(patient).zfill(4)
              
    ihr = comp_google_ihr(patient, dl)

    x = np.arange(0, ihr.shape[0]/2, 1/2)
    x_new = np.arange(0, ihr.shape[0]/(128/60), 60/128 )
    f = interpolate.interp1d(x, ihr)
    ihr = f(x_new)        

    sleep = dl.load_sleep(patient)
    sleep[sleep > 5] = 0
    sleep = [sleep_string_encoding[s] for s in sleep]
    sleep = np.array([sleep_encoding[s] for s in sleep])
    sleep[sleep > 9] = 0

    L = ihr.shape[0]
    if L > total_samples:
        L = total_samples
    X = ihr[0:L]

    L = sleep.shape[0]
    if L > total_epochs:
        L = total_epochs
    Y = sleep[0:L]

    return X, Y


def load_XY(patients, parallel=False, n_cores=16):

    batch_size = len(patients)
    X = np.zeros((batch_size, total_samples))
    Y = np.zeros((batch_size, total_epochs))
    
    if parallel:
        pool = Pool(n_cores)
        for i, r in tqdm(enumerate(pool.imap(load_one_xy, patients))):
            x, y = r[0], r[1]
            X[i,0:x.shape[0]], Y[i,0:y.shape[0]] = x, y 
        pool.close()
        pool.join()
    else: 
        for i, patient in tqdm(enumerate(patients)):
            x, y = load_one_xy(patient)
            X[i,0:x.shape[0]], Y[i,0:y.shape[0]] = x, y 

    return np.expand_dims(X,axis=2), Y

def calc_class_sample_weights(ty, weight_adjustment=[]):
    if len(ty.shape) == 1:
        y = ty.reshape(ty.shape[0])
    else:
        y = ty.reshape(ty.shape[0] * ty.shape[1])
    classes = np.unique(y)

    if len(weight_adjustment) > 0 and len(weight_adjustment) != len(classes):
        raise ValueError("The weight adjustment variable needs to have as many values as there are classes")
    elif len(weight_adjustment) == 0:
        weight_adjustment = np.ones(len(classes))

    class_weights = class_weight.compute_class_weight('balanced', classes=classes, y=y) * weight_adjustment
    class_weights = {i: class_weights[i] for i in range(len(class_weights))}
    sample_weights = np.vectorize(class_weights.get)(ty)
    return class_weights, sample_weights

def duplicate(x):
    return np.array([[x_]*2 for x_ in x]).flatten()

def hold_line(t, x):
    t = [i for i in t]
    x = [i for i in x]
    
    if len(t) - len(x) == 0:
            t.append(t[-1]+t[-1]-t[-2])
    if len(t)-len(x) != 1:
            raise ValueError()
            
    t_ = duplicate(t)[1:-1]
    x_ = duplicate(x)
    return t_, x_

In [4]:
dl = MESAParser()
train_patients, test_patients= dl.get_train_test_patients_from_file()
train_patients, validate_patients = train_test_split(train_patients, test_size = len(test_patients), random_state=6668)
print(f'Train: {len(train_patients)}')
print(f'Train: {len(validate_patients)}')
print(f'Train: {len(test_patients)}')

Train: 1576
Train: 196
Train: 196


In [5]:
# train_X, train_Y = load_XY(train_patients, parallel=True, n_cores=8)
# validate_X, validate_Y = load_XY(validate_patients, parallel=True, n_cores=8)
# test_X, test_Y = load_XY(test_patients, parallel=True, n_cores=8)

# np.savez("train.npz", train_X=train_X, train_Y=train_Y)
# np.savez("validate.npz", validate_X=validate_X, validate_Y=validate_Y)
# np.savez("test.npz", test_X=test_X, test_Y=test_Y)

train = np.load('train.npz')
train_X, train_Y = train['train_X'], train['train_Y']
validate = np.load('validate.npz')
validate_X, validate_Y = validate['validate_X'], validate['validate_Y']
test = np.load('test.npz')
test_X, test_Y = test['test_X'], test['test_Y']

train_Y[train_Y==9] = 0
validate_Y[validate_Y==9] = 0
test_Y[test_Y==9] = 0

print(train_X.shape, train_Y.shape)
print(validate_X.shape, validate_Y.shape)
print(test_X.shape, test_Y.shape)

(1576, 72000, 1) (1576, 1200)
(196, 72000, 1) (196, 1200)
(196, 72000, 1) (196, 1200)


In [6]:
plt.close('all')
# plt.plot(train_X[1])
t, y = hold_line(np.arange(0,1201*30, 30),train_Y[400])
ax1 = plt.subplot(2,1,1)
plt.plot(t, y)
t, y = np.arange(0,train_X.shape[1]/2, 0.5), train_X[400]
ax2=plt.subplot(2,1,2, sharex=ax1)
plt.plot(t, y)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

[<matplotlib.lines.Line2D at 0x7fa3ce90dca0>]

In [7]:
win_len = 256
samples_per_epoch = 30 * ihr_fs
    
train_X_, train_Y_ = _batch_subsequenced_padded(train_X, win_len, samples_per_epoch), train_Y
validate_X_, validate_Y_ = _batch_subsequenced_padded(validate_X, win_len, samples_per_epoch), validate_Y
test_X_, test_Y_ = _batch_subsequenced_padded(test_X, win_len, samples_per_epoch), test_Y

print(train_X_.shape, train_Y_.shape)
print(validate_X_.shape, validate_Y_.shape)
print(test_X_.shape, test_Y_.shape)

(1576, 1200, 256) (1576, 1200)
(196, 1200, 256) (196, 1200)
(196, 1200, 256) (196, 1200)


In [8]:
plt.close('all')
plt.figure(figsize=(10,5))
plt.subplot(4,1,1)
plt.plot(train_X[1])
plt.subplot(4,1,2)
plt.plot(train_X_[1, 0, :])
plt.subplot(4,1,3)
plt.plot(train_X_[1, 1, :])
plt.subplot(4,1,4)
plt.plot(train_X_[1, 2, :])
plt.tight_layout()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [None]:
import pandas as pd
pd.DataFrame(train_Y.flatten()).hist(density=True)

In [17]:
tf.keras.backend.clear_session()

def dilated_residual_convolution(X_in, kernel, dilations, dropout, reg):
    for dilation in dilations:
        X = TimeDistributed(Conv1D(128, kernel_size=kernel,  activation='relu', dilation_rate=dilation, padding='same',  kernel_regularizer=reg))(X_in)
    if dropout > 0:
        X = TimeDistributed(Dropout(0.2))(X)
    X = Add()([X, X_in])
    return X

def residual_convolution(X_in, kernel, filt, reg):
    X = X_in
    #Residual needs 1x1 CNN
    X_residual = TimeDistributed(MaxPool1D(2, strides=2))(X)
    X_residual = TimeDistributed(Conv1D(1, 1, activation='relu', padding='same'))(X_residual)
    #Convolutions and max pooling
    X = TimeDistributed(Conv1D(filt, kernel, activation='relu', padding='same',  kernel_regularizer=reg))(X)
    X = TimeDistributed(Conv1D(filt, kernel, activation='relu', padding='same',  kernel_regularizer=reg))(X)
    X = TimeDistributed(MaxPool1D(2, strides=2))(X)
    #Bring in the residual
    X = Add()([X, X_residual])
    return X


def convolution(X_in, kernel, filt, reg):
    X = X_in
    #Convolutions and max pooling
    X = TimeDistributed(Conv1D(filt, kernel, activation='relu', padding='same',  kernel_regularizer=reg))(X)
    X = TimeDistributed(Conv1D(filt, kernel, activation='relu', padding='same',  kernel_regularizer=reg))(X)
    X = TimeDistributed(MaxPool1D(2, strides=2))(X)
    #Bring in the residual
    return X

reg = 'l1'

inputs = Input(shape=(validate_X_.shape[1], validate_X_.shape[2], 1))

# Input Convolution
X = TimeDistributed(Conv1D(8, 1, activation='relu', padding='same', kernel_regularizer=reg))(inputs)
X = convolution(X, kernel=3, filt=16, reg=reg)
X = convolution(X, kernel=3, filt=32, reg=reg)
X = convolution(X, kernel=3, filt=64, reg=reg)
X = TimeDistributed(Flatten())(X)
X = TimeDistributed(Dense(units=256, activation='relu'))(X)
# X = Reshape(target_shape=(1, X.shape[1], X.shape[2]))(X)
# X = dilated_residual_convolution(X, kernel=7, dilations=[2,4,8,16,32], dropout=0, reg=reg)
# X = dilated_residual_convolution(X, kernel=7, dilations=[2,4,8,16,32], dropout=0, reg=reg)
# X = TimeDistributed(Conv1D(128,32,  activation='relu'))(X)
# out = TimeDistributed(Conv1D(4,1,  activation='softmax'))(X)
# X = TimeDistributed(Dense(64, activation='relu'))(X)
X = Bidirectional(LSTM(128, return_sequences=True, kernel_regularizer=None, recurrent_regularizer=None))(X)
out = TimeDistributed(Dense(4,  activation='relu'))(X)

model = Model(inputs=inputs, outputs=out)

model.compile( optimizer=tf.keras.optimizers.Adam(lr=0.001),
               loss=tf.keras.losses.SparseCategoricalCrossentropy(),
               metrics=['accuracy'],
               sample_weight_mode="temporal")

model.summary(line_length = 100)
model.save_weights('untrained_model.h5')

# dot_img_file = '/home/kkotzen/tmp/figures/model_definition.png'
# tf.keras.utils.plot_model(model, to_file=dot_img_file, show_shapes=True)

Model: "model"
____________________________________________________________________________________________________
Layer (type)                                 Output Shape                            Param #        
input_1 (InputLayer)                         [(None, 1200, 256, 1)]                  0              
____________________________________________________________________________________________________
time_distributed (TimeDistributed)           (None, 1200, 256, 8)                    16             
____________________________________________________________________________________________________
time_distributed_1 (TimeDistributed)         (None, 1200, 256, 16)                   400            
____________________________________________________________________________________________________
time_distributed_2 (TimeDistributed)         (None, 1200, 256, 16)                   784            
____________________________________________________________________________

In [None]:
model.load_weights('untrained_model.h5')

es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=20)
mc = ModelCheckpoint('trained_model.h5', monitor='val_loss', mode='min', save_best_only=True)
callbacks = [es, mc]

class_weights, sample_weights = calc_class_sample_weights(train_Y)
print('Weightings are assigned as followed: ', class_weights)
history = model.fit(train_X_, train_Y_, epochs = 40, batch_size=16, validation_data=(validate_X_, validate_Y_), validation_batch_size=16, verbose=1, callbacks=callbacks, sample_weight=sample_weights, shuffle=True)

Weightings are assigned as followed:  {0: 0.6149643937176861, 1: 0.5883773081489262, 2: 4.1078751650795855, 3: 2.3209122638221418}
Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40

In [None]:
# list all data in history
plt.close("all")
print(history.history.keys())
# summarize history for accuracy
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()
# summarize history for loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

In [None]:
def plot_hypnogram(label, prediction, patient, ticks):
    f, (ax1) = plt.subplots(1, 1, figsize=(15, 10))
    ax1.plot(prediction, color='black', label='Predicted sleep stage')
    ax1.plot(label, color='red', alpha=0.25, label='Labeled sleep stage')
    ax1.set_xlabel('Time (30s Epochs)')
    ax1.set_ylabel('Sleep Stage')
    ax1.set_yticks(list(ticks.keys()))
    ax1.set_yticklabels(list(ticks.values()), rotation='vertical')
    ax1.legend(loc="upper right")
    f.suptitle(f"Hypnogram for patient {patient}", fontsize=14)
    return f

In [None]:
model.load_weights('/home/kkotzen/research/PPG_sleepstaging/trained_model.h5')
print("Model loaded. Starting prediction")

probs = model.predict(test_X_)
print(probs.shape)
preds = np.argmax(probs, axis=2)
print(preds.shape)

patient = 4
label = test_Y_[patient]
prediction = preds[patient]
hypnograms = plot_hypnogram(label, prediction, patient, ticks={0: 'Wake', 1: 'Light', 2:'Deep', 3:'REM'})


In [None]:
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score, cohen_kappa_score

from src.models.model_utils import *
from utils.figures import make_confusion_matrix

labels = {0: 'Wake', 1: 'Light', 2:'Deep', 3:'REM'}

test_predictions_probs = model.predict(test_X, batch_size=32, verbose=1)
test_predictions = prediction_from_probability(test_predictions_probs)
test_lables = flatten_labels(test_Y)

# Confusion Matrix
conf = confusion_matrix(test_lables, test_predictions, labels=None, sample_weight=None, normalize=None)
conf_norm = confusion_matrix(test_lables, test_predictions, labels=None, sample_weight=None, normalize='true')
conf_matrix_dict = {'matrix': conf, 'labels': list(labels)}
conf_matrix_figure = make_confusion_matrix(y_reference=test_lables, y_predicted=test_predictions, categories=list(labels))

# Evaluation Metrics
auc = ""  # tf.keras.metrics.AUC(multi_label=True)(self.dense_to_sparse(test_lables), test_predictions_probs)
cr = pd.DataFrame(classification_report(test_lables, test_predictions, output_dict=True)).T
acc = accuracy_score(test_lables, test_predictions)
kappa = cohen_kappa_score(test_lables, test_predictions)

conf = pd.DataFrame(conf, columns=labels, index=labels)
conf_norm = pd.DataFrame(conf_norm, columns=labels, index=labels)
print(conf)
print(conf_norm)
print(print(f'\Classification Report', 'green'))
print(cr)
print('\nAccuracy:\t', acc, '\nAUC', auc, '\nKappa\t\t', kappa, '\nWeighted F1\t',
      cr['f1-score']['macro avg'])

metrics = {'Weighted F1': cr['f1-score']['weighted avg'], 'Accuracy:': acc, 'AUC': auc,
           'Kappa': kappa, 'Macro F1': cr['f1-score']['macro avg']}
print(metrics)

In [None]:

constant_values


In [None]:
def _time_series_subsequences(ts, window, hop=1):
    assert len(ts.shape) == 1
    shape = (int(int(ts.size - window) / hop + 1), window)
    strides = ts.strides[0] * hop, ts.strides[0]
    return np.lib.stride_tricks.as_strided(ts, shape=shape, strides=strides)


def _batch_subsequenced_padded(X, win_len, hop):
    padding = int(win_len/2 - hop/2)
    X__ = np.zeros((X.shape[0], X.shape[1]+2*padding, X.shape[2]))
    X__[:,padding:-padding,:] = X
    X_ = np.zeros((X.shape[0], 12, win_len))
    for i in range(X.shape[0]):
        X_[i] = _time_series_subsequences(X__[i].flatten(), win_len, hop)
    return X_


d = np.ones((10,120,1))
dd = _batch_subsequenced_padded(d, 14, 10)
dd