In [5]:
import sys
import os

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
os.environ["CUDA_VISIBLE_DEVICES"] = "2"  # Set the GPU you wish to use here
import tensorflow as tf
gpus = tf.config.experimental.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(gpus[0], enable=True)

from tensorflow.python.keras import Input
from tensorflow.keras.models import Sequential, Model
from tensorflow.python.keras.layers import Conv1D, GlobalAveragePooling1D, AveragePooling1D, MaxPool1D, Flatten, Dense, \
    Reshape, Dropout, TimeDistributed, Add, LSTM, GRU, Bidirectional
from tensorflow.python.keras.layers import LeakyReLU
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

from sklearn.utils import class_weight
from sklearn.model_selection import train_test_split

from tqdm import tqdm
import numpy as np
import matplotlib.pyplot as plt
from scipy import interpolate

os.chdir('/home/kkotzen/research/PPG_sleepstaging/')
from src.models.model_utils import plot_hypnogram
from src.parsing.MESAParser import MESAParser


def comp_google_ihr(patient, dl):
    beats = dl.load_annotation(patient, signal='EKG', annotator='epltd0', annotation='Peaks')
    beats = beats/dl.ecg_fs
    ibi = np.diff(beats)
    beats = beats[:-1]+(beats[1:] - beats[:-1])/2
    upper = np.percentile(ibi, 99)
    lower = np.percentile(ibi, 1)
    ibi_filt_idx = np.where((ibi < upper) & (ibi > lower))
    ibi = ibi[ibi_filt_idx]
    beats = beats[ibi_filt_idx]
    ihr = 1/ibi
    ihr = (ihr - np.mean(ihr))/np.std(ihr)

    x_2hz = np.arange(0, beats[-1], 1/2)
    ihr_2hz = np.interp(x_2hz, beats, ihr)
    return ihr_2hz

def _time_series_subsequences(ts, window, hop=1):
    assert len(ts.shape) == 1
    shape = (int(int(ts.size - window) / hop + 1), window)
    strides = ts.strides[0] * hop, ts.strides[0]
    return np.lib.stride_tricks.as_strided(ts, shape=shape, strides=strides)

def load_XY(dl, patients):

    ihr_fs = 2
    ihr_resampled_fs = 128/60
    samples_per_epoch = 64
    total_epochs = 1200
    total_samples = int(samples_per_epoch*1200)
    batch_size = len(patients)
    sleep_string_encoding = {0:'zero', 1:'one', 2:'two', 3:'three', 4:'four', 5: "five"}
    sleep_encoding = {'zero': 0, 'one': 1, 'two': 1, 'three': 2, 'four': 9, 'five': 3}

    X = np.zeros((batch_size, total_samples))
    Y = np.zeros((batch_size, total_epochs))

    for i, patient in tqdm(enumerate(patients)):
        patient = str(patient).zfill(4)
        
#         ihr = dl.load_ihr(patient, signal='EKG', filtered=True)
#         ihr = (ihr - np.mean(ihr))/np.std(ihr)
        
        ihr = comp_google_ihr(patient, dl)
        
        x = np.arange(0, ihr.shape[0]/2, 1/2)
        x_new = np.arange(0, ihr.shape[0]/(128/60), 60/128 )
        f = interpolate.interp1d(x, ihr)
        ihr = f(x_new)        

        sleep = dl.load_sleep(patient)
        sleep[sleep > 5] = 0
        sleep = [sleep_string_encoding[s] for s in sleep]
        sleep = np.array([sleep_encoding[s] for s in sleep])
        sleep[sleep > 9] = 0

        L = ihr.shape[0]
        if L > total_samples:
            L = total_samples
        X[i, 0:L] = ihr[0:L]
        
        L = sleep.shape[0]
        if L > total_epochs:
            L = total_epochs
        Y[i, 0:L] = sleep[0:L]

    return np.expand_dims(X,axis=2), Y

def calc_class_sample_weights(ty, weight_adjustment=[]):
    if len(ty.shape) == 1:
        y = ty.reshape(ty.shape[0])
    else:
        y = ty.reshape(ty.shape[0] * ty.shape[1])
    classes = np.unique(y)

    if len(weight_adjustment) > 0 and len(weight_adjustment) != len(classes):
        raise ValueError("The weight adjustment variable needs to have as many values as there are classes")
    elif len(weight_adjustment) == 0:
        weight_adjustment = np.ones(len(classes))

    class_weights = class_weight.compute_class_weight('balanced', classes=classes, y=y) * weight_adjustment
    class_weights = {i: class_weights[i] for i in range(len(class_weights))}
    sample_weights = np.vectorize(class_weights.get)(ty)
    return class_weights, sample_weights

def duplicate(x):
    return np.array([[x_]*2 for x_ in x]).flatten()

def hold_line(t, x):
    t = [i for i in t]
    x = [i for i in x]
    
    if len(t) - len(x) == 0:
            t.append(t[-1]+t[-1]-t[-2])
    if len(t)-len(x) != 1:
            raise ValueError()
            
    t_ = duplicate(t)[1:-1]
    x_ = duplicate(x)
    return t_, x_

In [6]:
dl = MESAParser()
train_patients, test_patients= dl.get_train_test_patients_from_file()
train_patients, validate_patients = train_test_split(train_patients, test_size = len(test_patients), random_state=6668)
    # train_X, train_Y = load_XY(dl, train_patients)
    # validate_X, validate_Y = load_XY(dl, validate_patients)
    # test_X, test_Y = load_XY(dl, test_patients)

    # np.savez("train.npz", train_X=train_X, train_Y=train_Y)
    # np.savez("validate.npz", validate_X=validate_X, validate_Y=validate_Y)
    # np.savez("test.npz", test_X=test_X, test_Y=test_Y)

train = np.load('train.npz')
train_X, train_Y = train['train_X'], train['train_Y']
validate = np.load('validate.npz')
validate_X, validate_Y = validate['validate_X'], validate['validate_Y']
test = np.load('test.npz')
test_X, test_Y = test['test_X'], test['test_Y']

train_Y[train_Y==9] = 0
validate_Y[validate_Y==9] = 0
test_Y[test_Y==9] = 0

print(train_X.shape, train_Y.shape)
print(validate_X.shape, validate_Y.shape)
print(test_X.shape, test_Y.shape)

(1133, 76800, 1) (1133, 1200)
(141, 76800, 1) (141, 1200)
(141, 76800, 1) (141, 1200)


In [8]:
%matplotlib widget
import matplotlib.pyplot as plt

I = 300
print("Patient: ", train_patients[I])
X, Y = train_X[I,:], train_Y[I]

plt.close("all")
fig, (ax1, ax2) = plt.subplots(2,1,figsize=(12, 5), sharex=True)
x = np.arange(0, (len(X)/(128/60))/30, (60/128)/30)
y = np.arange(0, len(Y), 1)
ax1.plot(x,X)


y_, Y_ = hold_line(y, Y)
ax2.plot(y_,Y_)

Patient:  0288


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

[<matplotlib.lines.Line2D at 0x7f1207480be0>]

In [4]:
import pandas as pd
pd.DataFrame(train_Y.flatten()).hist(density=True)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

array([[<AxesSubplot:title={'center':'0'}>]], dtype=object)

In [26]:
def dilated_residual_convolution(X_in, kernel, dilations, reg):
    for dilation in dilations:
        X = TimeDistributed(Conv1D(128, kernel_size=kernel,  activation='relu', dilation_rate=dilation, padding='same',  kernel_regularizer=reg))(X_in)
#     X = TimeDistributed(Dropout(0.2))(X)
    X = Add()([X, X_in])
    return X

reg = None

inputs = Input(shape=(validate_X.shape[1], validate_X.shape[2]))

# Input Convolution
X = Conv1D(8, 1, activation='relu', padding='same', kernel_regularizer=reg)(inputs)
# Conv Block 1 of 3
X = Conv1D(16, 3, activation='relu', padding='same',  kernel_regularizer=reg)(X)
X = MaxPool1D(2, strides=2)(X)

# Conv Block 2 of 3
X = Conv1D(32, 3, activation='relu', padding='same',  kernel_regularizer=reg)(X)
X = MaxPool1D(2, strides=2)(X)

# Conv Block 3 of 3
X = Conv1D(64, 3, activation='relu', padding='same',  kernel_regularizer=reg)(X)
X = MaxPool1D(2, strides=2)(X)

# Conv Block 4 of 3
X = Conv1D(128, 3, activation='relu', padding='same',  kernel_regularizer=reg)(X)
X = MaxPool1D(2, strides=2)(X)

X = Reshape(target_shape=(1200, 4, 128))(X)

# Flatten + Dense
# X = TimeDistributed(AveragePooling1D())(X)
X = TimeDistributed(Flatten())(X)
X = TimeDistributed(Dense(units=128, activation='relu'))(X)
X = TimeDistributed(Dense(units=16, activation='relu'))(X)
X = TimeDistributed(Dense(units=4, activation='relu'))(X)
X = Bidirectional(LSTM(128, return_sequences=True, kernel_regularizer=None, recurrent_regularizer=None))(X)
X = Bidirectional(LSTM(128, return_sequences=True, kernel_regularizer=None, recurrent_regularizer=None))(X)
X = TimeDistributed(Dense(128, activation='relu', kernel_regularizer=None))(X)
X = TimeDistributed(Dense(64, activation='relu', kernel_regularizer=None))(X)
out = TimeDistributed(Dense(4, activation='softmax'))(X)
        
# Time Dilated
# X = Reshape(target_shape=(1, X.shape[1], X.shape[2]))(X)
# Dilated Residual Connections
# X = dilated_residual_convolution(X, kernel=7, dilations=[2,4,8,16,32], reg=reg)
# X = dilated_residual_convolution(X, kernel=7, dilations=[2,4,8,16,32], reg=reg)
# X = TimeDistributed(Dense(64, activation='relu'))(X)
# out = TimeDistributed(Conv1D(4,1,  activation='softmax'))(X)

model = Model(inputs=inputs, outputs=out)

model.compile( optimizer=tf.keras.optimizers.Adam(lr=0.001),
               loss=tf.keras.losses.SparseCategoricalCrossentropy(),
               metrics=['accuracy'],
               sample_weight_mode="temporal")

model.summary(line_length = 100)
model.save_weights('untrained_model.h5')


Model: "model_1"
____________________________________________________________________________________________________
Layer (type)                                 Output Shape                            Param #        
input_2 (InputLayer)                         [(None, 76800, 1)]                      0              
____________________________________________________________________________________________________
conv1d_5 (Conv1D)                            (None, 76800, 8)                        16             
____________________________________________________________________________________________________
conv1d_6 (Conv1D)                            (None, 76800, 16)                       400            
____________________________________________________________________________________________________
max_pooling1d_4 (MaxPooling1D)               (None, 38400, 16)                       0              
__________________________________________________________________________

In [27]:
model.load_weights('untrained_model.h5')

es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=20)
mc = ModelCheckpoint('trained_model.h5', monitor='val_loss', mode='min', save_best_only=True)
callbacks = [es, mc]

class_weights, sample_weights = calc_class_sample_weights(train_Y)
print('Weightings are assigned as followed: ', class_weights)
history = model.fit(train_X, train_Y, epochs = 40, batch_size=16, validation_data=(validate_X, validate_Y), validation_batch_size=16, verbose=1, callbacks=callbacks, sample_weight=sample_weights, shuffle=True)

Weightings are assigned as followed:  {0: 0.6480988908528249, 1: 0.5737938764933581, 2: 3.8472857336895006, 3: 2.201111240626336}
Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40


In [None]:
# list all data in history
plt.close("all")
print(history.history.keys())
# summarize history for accuracy
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()
# summarize history for loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

In [6]:
def plot_hypnogram(label, prediction, patient, ticks):
    f, (ax1) = plt.subplots(1, 1, figsize=(15, 10))
    ax1.plot(prediction, color='black', label='Predicted sleep stage')
    ax1.plot(label, color='red', alpha=0.25, label='Labeled sleep stage')
    ax1.set_xlabel('Time (30s Epochs)')
    ax1.set_ylabel('Sleep Stage')
    ax1.set_yticks(list(ticks.keys()))
    ax1.set_yticklabels(list(ticks.values()), rotation='vertical')
    ax1.legend(loc="upper right")
    f.suptitle(f"Hypnogram for patient {patient}", fontsize=14)
    return f

In [10]:
model.load_weights('/home/kkotzen/research/PPG_sleepstaging/trained_model.h5')
print("Model loaded. Starting prediction")

probs = model.predict(test_X)
print(probs.shape)
preds = np.argmax(probs, axis=2)
print(preds.shape)

patient = 2
label = test_Y[patient]
prediction = preds[patient]
hypnograms = plot_hypnogram(label, prediction, patient, ticks={0: 'Wake', 1: 'Light', 2:'Deep', 3:'REM'})


Model loaded. Starting prediction


In [21]:
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score, cohen_kappa_score

from src.models.model_utils import *
from utils.figures import make_confusion_matrix

labels = {0: 'Wake', 1: 'Light', 2:'Deep', 3:'REM'}

test_predictions_probs = model.predict(test_X, batch_size=32, verbose=1)
test_predictions = prediction_from_probability(test_predictions_probs)
test_lables = flatten_labels(test_Y)

# Confusion Matrix
conf = confusion_matrix(test_lables, test_predictions, labels=None, sample_weight=None, normalize=None)
conf_norm = confusion_matrix(test_lables, test_predictions, labels=None, sample_weight=None, normalize='true')
conf_matrix_dict = {'matrix': conf, 'labels': list(labels)}
conf_matrix_figure = make_confusion_matrix(y_reference=test_lables, y_predicted=test_predictions, categories=list(labels))

# Evaluation Metrics
auc = ""  # tf.keras.metrics.AUC(multi_label=True)(self.dense_to_sparse(test_lables), test_predictions_probs)
cr = pd.DataFrame(classification_report(test_lables, test_predictions, output_dict=True)).T
acc = accuracy_score(test_lables, test_predictions)
kappa = cohen_kappa_score(test_lables, test_predictions)

conf = pd.DataFrame(conf, columns=labels, index=labels)
conf_norm = pd.DataFrame(conf_norm, columns=labels, index=labels)
print(conf)
print(conf_norm)
print(print(f'\Classification Report', 'green'))
print(cr)
print('\nAccuracy:\t', acc, '\nAUC', auc, '\nKappa\t\t', kappa, '\nWeighted F1\t',
      cr['f1-score']['macro avg'])

metrics = {'Weighted F1': cr['f1-score']['weighted avg'], 'Accuracy:': acc, 'AUC': auc,
           'Kappa': kappa, 'Macro F1': cr['f1-score']['macro avg']}
print(metrics)



Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

       0      1      2      3
0  51184   9479   1962   4531
1   6615  38111  18593   8599
2    417   2241   7779    438
3   1660   3032    757  13802
          0         1         2         3
0  0.762166  0.141149  0.029216  0.067470
1  0.091980  0.529923  0.258531  0.119567
2  0.038345  0.206069  0.715310  0.040276
3  0.086229  0.157498  0.039323  0.716950
\Classification Report green
None
              precision    recall  f1-score        support
0.0            0.854833  0.762166  0.805844   67156.000000
1.0            0.720939  0.529923  0.610846   71918.000000
2.0            0.267402  0.715310  0.389281   10875.000000
3.0            0.504275  0.716950  0.592094   19251.000000
accuracy       0.655296  0.655296  0.655296       0.655296
macro avg      0.586862  0.681087  0.599516  169200.000000
weighted avg   0.720281  0.655296  0.671867  169200.000000

Accuracy:	 0.6552955082742317 
AUC  
Kappa		 0.505653088152623 
Weighted F1	 0.5995162449310348
{'Weighted F1': 0.6718672304652197, '