In [6]:
import glob

import numpy as np
import scipy as sp
import scipy.io
import scipy.signal
import time
import pandas as pd
import math
from utils.data_utils import *

from scipy.io import loadmat
import matplotlib.pyplot as plt
from matplotlib import figure
from scipy.signal import savgol_filter
from scipy.signal import butter, lfilter
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

# Import model libs
# from keras.models import Sequential
# from keras.layers import Dense, Conv1D, Dropout, Flatten, MaxPooling1D, BatchNormalization, LSTM
# from keras import optimizers
# from keras.utils import to_categorical
# from keras.utils.vis_utils import plot_model


fs = 125
minBPM = 40
maxBPM = 240
window_length = 8 * fs
window_shift = 2 * fs

In [7]:
# Retrieve dataset files
data_dir = "datasets/troika/training_data"
data_fls, ref_fls = LoadTroikaDataset(data_dir)
errs, confs = [], []

data_fl = data_fls[0]
ref_fl = ref_fls[0]

# load data using LoadTroikaDataFile
ppg, accx, accy, accz = LoadTroikaDataFile(data_fl)

# bandpass filter the signals
ppg = bandpass_filter(ppg, fs)
accx = bandpass_filter(accx, fs)
accy = bandpass_filter(accy, fs)
accz = bandpass_filter(accz, fs)

# Consider only magnitude of acceleration
acc = calculate_magnitude(accx, accy, accz)

# Standardization
ppg = (ppg- np.mean(ppg))/np.std(ppg)
acc = (acc- np.mean(acc))/np.std(acc)

# loading the reference file
ground_truth = sp.io.loadmat(ref_fl)['BPM0'].reshape(-1)

X1 = []
y1 = ground_truth
for i in range(0, len(ppg) - window_length + 1, window_shift):

    # aggregate accelerometer data into single signal to get the acc window
    ppg_window = ppg[i:i+window_length]
    acc_window = acc[i:i+window_length]

    X1.append(ppg_window)


X1 = np.array(X1)
print(X1.shape)
print(y1.shape)


(148, 1000)
(148,)


In [None]:
X = []
y = []
y = np.array(y)

print(data_fls)
for i in range(len(data_fls)):

    data_fl = data_fls[i]
    ref_fl = ref_fls[i]
    # load data using LoadTroikaDataFile
    ppg, accx, accy, accz = LoadTroikaDataFile(data_fl)

    # bandpass filter the signals
    ppg = bandpass_filter(ppg, fs)
    accx = bandpass_filter(accx, fs)
    accy = bandpass_filter(accy, fs)
    accz = bandpass_filter(accz, fs)

    # Consider only magnitude of acceleration
    acc = calculate_magnitude(accx, accy, accz)

    # Standardization
    ppg = (ppg- np.mean(ppg))/np.std(ppg)
    acc = (acc- np.mean(acc))/np.std(acc)

    # loading the reference file
    ground_truth = sp.io.loadmat(ref_fl)['BPM0'].reshape(-1)
    y = np.append(y, ground_truth)

    for i in range(0, len(ppg) - window_length + 1, window_shift):

        # aggregate accelerometer data into single signal to get the acc window
        ppg_window = ppg[i:i+window_length]
        acc_window = acc[i:i+window_length]

        X.append(ppg_window)


X = np.array(X)
print(X.shape)
print(y.shape)


In [None]:
# Build the model and evaluate
seed = 42
def split(X, y):
    train_size = 0.8
    X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=train_size, random_state=seed)
    return X_train, X_test, y_train, y_test

x_train, x_test, y_train, y_test = split(X, y)

# TODO: Check if any of these change the data in any way
# Reshaping the array to 3-dims so that it can work with the Keras API
x_train = x_train.reshape(x_train.shape[0], 1000, 1)
x_test = x_test.reshape(x_test.shape[0], 1000, 1)
X = X.reshape(X.shape[0], 1000, 1)

x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
X = X.astype('float32')

# y_train = to_categorical(y_train)
# y_test = to_categorical(y_test)
# y = to_categorical(y)

print('x_train shape:', x_train.shape)
print('Number of segments in x_train', x_train.shape[0])
print('Number of segments in x_test', x_test.shape[0])

print(y_train)


In [None]:
# fit and evaluate a model (trainX, trainy, testX, testy):
trainX, trainy, testX, testy = x_train, y_train, x_test, y_test
verbose, epochs, batch_size = 1, 1000, 25 # Batch size used to be 32
n_timesteps, n_features, n_outputs = trainX.shape[1], 1, 1
model = Sequential()
model.add(Conv1D(filters=32, kernel_size=40, activation='relu', input_shape=(n_timesteps,n_features)))
model.add(BatchNormalization())
model.add(MaxPooling1D(pool_size=4))
model.add(Dropout(0.1))
model.add(Conv1D(filters=32, kernel_size=40, activation='relu', input_shape=(n_timesteps,n_features)))
model.add(BatchNormalization())
model.add(MaxPooling1D(pool_size=4))
model.add(Dropout(0.1))
model.add(LSTM(128, return_sequences=True))
model.add(LSTM(128, return_sequences=True))
model.add(Flatten())
model.add(Dense(100, activation='relu'))
model.add(Dense(n_outputs, activation='linear'))
model.compile(loss='mse', optimizer='adam', metrics=['accuracy'])
# fit network
oldtime = time.time()
model.fit(trainX, trainy, epochs=epochs, batch_size=batch_size, verbose=verbose)
# model.fit(X, y, epochs=epochs, batch_size=batch_size, verbose=verbose)
print("training time:")
print(time.time()-oldtime)
# evaluate model
oldtime = time.time()
_, accuracy = model.evaluate(testX, testy, batch_size=batch_size, verbose=0)

predictions = model.predict(testX)
testMAE = np.absolute(np.subtract(testy,predictions[:,0])).mean()

predictions = model.predict(X)
totalMAE = np.absolute(np.subtract(y,predictions[:,0])).mean()

print("Test, Total MAE:", str(testMAE), str(totalMAE))
print("testing time:")
print(time.time()-oldtime)
model.summary()

# Notes:
#   - Flatte, Dense 100, rmsprop Test, Total MAE: 33.09671329490874 33.18895348192425
#   - Flatten, Dense 100, adam, Test, Total MAE: 27.908568967205614 28.388384894643007
#   - Flatten, Dense 50, then final neuron, Test, Total RMSE: 134.23943523918422 134.3048856681949
#   - Just found out I had metrics wrong: Train error came down to 0.735 bpm
#   - Test, Total MAE: 2.3047275488171093 1.3646295197764875

In [None]:
print(predictions.shape)
print(predictions[:,0])
# print(predictions[:,0]-y)

In [None]:
#Network still needs work before I can really scale it to use the entire dataset. Its too noisey even on its own data.
fig = plt.figure(figsize=(40, 6))
predictions = model.predict(X)
# yhat = savgol_filter(predictions[:,0], 71, 3) # window size 51, polynomial order 3
totalMAE = np.absolute(np.subtract(y,predictions[:,0])).mean()
print("Test, Total MAE:", str(testMAE), str(totalMAE))


# aa=pd.DataFrame(predictions)
# for i in range(50):
#   plt.plot(predictions[:, i])
# # plt.plot(y)
plt.plot(predictions[:,0])
plt.plot(y)
# plt.plot(yhat)
# X1MAE = np.absolute(np.subtract(y1,predictions)).mean()
# plt.title(X1MAE)
plt.show


a = np.zeros(shape=(9999,10))


In [None]:
print(predictions[:, 1].shape)
plot_model(model, to_file = proj_dir + 'model_plot.png', show_shapes = True, show_layer_names=True)

# print(x_train.shape)
# print(x_train[0].shape)
t = 1200
plt.plot(X[t])
plt.title(y[t])
plt.show()
