<a href="https://colab.research.google.com/github/kjspring/stress-detection-wearable-devices/blob/main/modeling_WESAD.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [16]:
# Load pickled data

# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

import os

#! cd /content/drive/MyDrive/stress-prediction/ # Main file directory for this notebook on Google Drive
os.chdir('/content/drive/MyDrive/stress-prediction')
# absolute path of data directory
PATH = os.path.join(os.path.abspath(os.getcwd()), 'data') # Path of data folder on Google Drive

import joblib
data = joblib.load(f"{PATH}/pickle/WESAD_data_model.pickle") # read pickle file
labels = joblib.load(f"{PATH}/pickle/WESAD_labels_model.pickle") # read pickle file

Mounted at /content/drive


In [17]:
# Subsample the data and features
subsample_rate = 175  # subsample rate, e.g. to reduce from 700Hz to 4Hz

subsampled_data = data[::subsample_rate]
subsampled_labels = labels[::subsample_rate]

print(len(data))  # 2742499
print(len(subsampled_data))  # 15672

2742499
15672


In [56]:
# Try with a Datagenerator to reduce the RAM use
from keras.preprocessing.sequence import TimeseriesGenerator
random_state = 42
Hz = 4
sampling_rate = 5 # keep one data point out of 5
duration = 1 # how many minutes in the future the target after the end of the sequence
sequence_length = Hz * 60 * duration # observations will go back duration minutues
delay = sampling_rate*(sequence_length + duration*60*Hz - 1) # the target for a
                                                             # sequence will be
                                                             # duration (min)
                                                             # after the end of
                                                             # the sequence
batch_size = 32
shuffle = True

# Train Test Split
from sklearn.model_selection import train_test_split

X_dat, X_val, y_dat, y_val = train_test_split(subsampled_data, subsampled_labels, 
                                                 test_size = 0.2,
                                                 random_state=random_state)

X_train, X_test, y_train, y_test = train_test_split(X_dat, y_dat,
                                                    test_size = 0.2,
                                                    random_state = random_state)


# Normalize the data
from sklearn.preprocessing import StandardScaler
# create the StandardScaler object
scaler = StandardScaler()
# fit the scaler on the training data
X_train_scaled = scaler.fit_transform(X_train.values.reshape(-1,1))
# transform the validation data
X_val_scaled = scaler.transform(X_val.values.reshape(-1,1))
# transform the test data
X_test_scaled = scaler.transform(X_test.values.reshape(-1,1))

# Data Generator
train_data_gen = TimeseriesGenerator(X_train_scaled, y_train, length=sequence_length, batch_size=batch_size)
val_data_gen = TimeseriesGenerator(X_val_scaled, y_val, length=sequence_length, batch_size=batch_size)
test_data_gen = TimeseriesGenerator(X_test_scaled, y_test, length=sequence_length, batch_size=batch_size)

In [57]:
print(train_data_gen[0][0].shape[0]) # prints the batch size of the first entry

32


In [62]:
from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.optimizers import Adam, RMSprop
from keras.callbacks import EarlyStopping, ModelCheckpoint

# Define the LSTM model
model = Sequential()
#model.add(Dense(batch_size, activation='relu', input_shape=(sequence_length, 1)))
model.add(LSTM(64, 
               activation='relu', 
               stateful = True, # To save RAM use,
               batch_input_shape=(batch_size, sequence_length, 1))) # Broke after adding Dense
               #unroll=True, # unroll the dropout to speed runtime
               #recurrent_dropout=0.5)) # Add dropout
model.add(Dropout(0.5)) # Dropout
#model.add(Dense(units=32, activation='relu'))
model.add(Dense(units=1, activation='sigmoid'))

# Callback and Early Stopping
callbacks = [EarlyStopping(monitor='val_loss', patience=3),
             ModelCheckpoint('models/LSTMmodel_best.keras',
                  save_best_only=True)
]

# Compile
model.compile(loss='binary_crossentropy', 
              optimizer=RMSprop(learning_rate=0.01), 
              metrics=['binary_accuracy'])

# Train the model using the TimeSeriesGenerator
# train the model
epochs = 10
#total_epochs = 0
#for i in range(epochs):
#    for j in range(len(train_data_gen)):
#      print('Epoch', total_epochs+1, '/', epochs)
#      model.fit(train_data_gen, epochs=1, validation_data=val_data_gen, 
#                verbose=1, shuffle=False, callbacks=callbacks)
#      model.reset_states() # Need to reset since using LSTM stateful
#      total_epochs += 1

model.fit(train_data_gen, validation_data=val_data_gen,
          shuffle=False, callbacks=callbacks)





InvalidArgumentError: ignored

In [None]:
# Evaluate the model on the validation and test dataset

score = model.evaluate(val_data_gen)
print('Validation loss:', score[0])
print('Validation accuracy:', score[1])


test_loss = model.evaluate(test_data_gen)
print('Test loss:', test_loss)
print('Test accuracy:', score[1])