In [1]:
#import statements
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
from mne.io import read_raw_edf
import tensorflow as tf
import keras
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from keras import layers
from keras import Sequential

In [2]:
#read in and preprocess data
data = read_raw_edf('sleep-cassette/SC4001E0-PSG.edf')
raw_data = data.get_data()
eeg_cz = raw_data[0]
eeg_cz = eeg_cz.reshape((eeg_cz.shape[0],1))
eeg_oz = raw_data[1]
eeg_oz = eeg_oz.reshape((eeg_cz.shape[0],1))
eog = raw_data[2]
eog = eog.reshape((eeg_cz.shape[0],1))
X = np.concatenate((eeg_cz, eeg_oz, eog), axis = 1)
scaler = StandardScaler()
X = scaler.fit_transform(X)

Extracting EDF parameters from C:\Users\Tyler\Documents\CSCI 5622 Final Project\sleep-cassette\SC4001E0-PSG.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...


In [3]:
#create sequence data for forward prediction
def forward_split_sequences_multivariate(data, seq_len, forecast_len):
    n = data.shape[0]
    x_data = []
    y_data = []
    for i in range(n - seq_len - forecast_len):
        x_data.append(data[i:i + seq_len])
        y_data.append(data[i + seq_len:i + seq_len + forecast_len])
    x = np.array(x_data)
    y = np.array(y_data)

    return x, y

#create sequence data for forward and back predicition
def bimodal_split_sequences_multivariate(data, seq_len, forecast_len):
    n = data.shape[0]
    x_data = []
    y_data = []
    for i in range(n - seq_len - forecast_len - seq_len):
        x_data.append(np.concatenate((data[i:i + seq_len], data[i + seq_len + forecast_len:i + seq_len + forecast_len + seq_len]), axis = 1))
        y_data.append(data[i + seq_len:i + seq_len + forecast_len])
    x = np.array(x_data)
    y = np.array(y_data)

    return x, y

In [5]:
#choose sequence length and forecast length
seq_len = 20
forecast_len = 10 

#use bimodal, makes graphing easier
x, y = bimodal_split_sequences_multivariate(X, seq_len, forecast_len)
x_train = x[:,:,0:3]
y_train = y

n_features_x = x_train.shape[2]
n_features_y = y_train.shape[2]


In [6]:
#build the model
model = Sequential()
model.add(layers.LSTM(32, input_shape = (seq_len,n_features_x)))
model.add(layers.Dense(forecast_len*n_features_y))
model.add(layers.Reshape([forecast_len, n_features_y]))
model.compile(optimizer = 'adam', loss = 'mse')

#save checkpoints
checkpoint_path = "checkpoints_forward/cp-{epoch:04d}.ckpt"
checkpoint_dir = os.path.dirname(checkpoint_path)
cp_callback = keras.callbacks.ModelCheckpoint(filepath = checkpoint_path, save_weights_only = True, verbose = 1)

In [7]:
#fit model
history = model.fit(x_train, y_train, epochs = 20, batch_size = 32, callbacks = [cp_callback])

Epoch 1/20
Epoch 1: saving model to checkpoints_forward\cp-0001.ckpt
Epoch 2/20
Epoch 2: saving model to checkpoints_forward\cp-0002.ckpt
Epoch 3/20
Epoch 3: saving model to checkpoints_forward\cp-0003.ckpt
Epoch 4/20
Epoch 4: saving model to checkpoints_forward\cp-0004.ckpt
Epoch 5/20
Epoch 5: saving model to checkpoints_forward\cp-0005.ckpt
Epoch 6/20
Epoch 6: saving model to checkpoints_forward\cp-0006.ckpt
Epoch 7/20
Epoch 7: saving model to checkpoints_forward\cp-0007.ckpt
Epoch 8/20
Epoch 8: saving model to checkpoints_forward\cp-0008.ckpt
Epoch 9/20
Epoch 9: saving model to checkpoints_forward\cp-0009.ckpt
Epoch 10/20
Epoch 10: saving model to checkpoints_forward\cp-0010.ckpt
Epoch 11/20
Epoch 11: saving model to checkpoints_forward\cp-0011.ckpt
Epoch 12/20
Epoch 12: saving model to checkpoints_forward\cp-0012.ckpt
Epoch 13/20
Epoch 13: saving model to checkpoints_forward\cp-0013.ckpt
Epoch 14/20
Epoch 14: saving model to checkpoints_forward\cp-0014.ckpt
Epoch 15/20
Epoch 15: sa

In [8]:
#load best checkpoint
model.load_weights("checkpoints_forward/cp-0020.ckpt")

<tensorflow.python.checkpoint.checkpoint.CheckpointLoadStatus at 0x261a3e2a340>

In [9]:
#read in and preprocess test data
test_data = read_raw_edf('sleep-cassette/SC4002E0-PSG.edf')
raw_test_data = test_data.get_data()
eeg_cz_test = raw_test_data[0]
eeg_cz_test = eeg_cz_test.reshape((eeg_cz_test.shape[0],1))
eeg_oz_test = raw_test_data[1]
eeg_oz_test = eeg_oz_test.reshape((eeg_cz_test.shape[0],1))
eog_test = raw_test_data[2]
eog_test = eog_test.reshape((eeg_cz_test.shape[0],1))
X_test = np.concatenate((eeg_cz_test, eeg_oz_test, eog_test), axis = 1)
X_test = scaler.transform(X_test)

Extracting EDF parameters from C:\Users\Tyler\Documents\CSCI 5622 Final Project\sleep-cassette\SC4002E0-PSG.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...


In [11]:
#process test data
seq_len = 20
forecast_len = 10 

#use bimodal, makes graphing easier
x_test_full, y_test = bimodal_split_sequences_multivariate(X_test, seq_len, forecast_len)
x_test = x_test_full[:,:,0:3]


(8489950, 20, 3) (8489950, 10, 3)


In [12]:
test_mse = model.evaluate(x_test, y_test)



In [22]:
#make sure same seed as other notebook
rng = np.random.default_rng(500)

#prepare predictions for graph in other notebook
random_sample = rng.integers(low = 0, high = x_test.shape[0], size = 2)
x_test_full_sample = x_test_full[random_sample]
x_test_sample = x_test_full_sample[:,:,0:3]

y_predict_sample = model.predict(x_test_sample)
print(y_predict_sample.shape)
y_predict_sample = y_predict_sample.reshape((20,3))
np.savetxt('forward_predict.txt', y_predict_sample)


(2, 10, 3)
