In [None]:
pip install dataManager

In [None]:
import pickle
import numpy as np
import os
import datetime
import tensorflow as tf
from pathlib import Path

import sklearn
from  sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split

from tensorflow.keras.optimizers import RMSprop
import keras
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.models import load_model

class DataManager:
    ROOT_PATH = "dataset\WESAD"
    FILE_EXT = ".pkl"
    MODELS_DIR = os.path.join(Path().absolute(), 'src', 'models')
    SUBJECTS = [2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
    BASELINE = 1
    STRESS = 2
    
    FEATURE_KEYS =     ['max',  'min', 'mean', 'range', 'std']
    FEATURE_ACC_KEYS = ['maxx', 'maxy', 'maxz', 'mean', 'std']

    RAW_SENSOR_VALUES = ['ACC', 'EDA','Temp']
    
    FEATURES = {'a_mean': [], 'a_std': [], 'a_maxx': [], 'a_maxy': [], 'a_maxz': [],\
                'e_max': [],  'e_min': [], 'e_mean': [], 'e_range': [], 'e_std': [], \
                't_max': [],  't_min': [], 't_mean': [], 't_range': [], 't_std': [] }
    STRESS_FEATURES = {'a_mean': [], 'a_std': [], 'a_maxx': [], 'a_maxy': [], 'a_maxz': [],\
                'e_max': [],  'e_min': [], 'e_mean': [], 'e_range': [], 'e_std': [], \
                't_max': [],  't_min': [], 't_mean': [], 't_range': [], 't_std': [] }
    
    BASELINE_DATA = []
    STRESS_DATA = []
    
    last_saved=''
    
    def __init__(self, ignore_empatica=True, ignore_additional_signals=True):
        self.ignore_empatica = ignore_empatica

    def get_subject_path(self, subject):
        path = os.path.join(DataManager.ROOT_PATH, 'S'+ str(subject), 'S' + str(subject) + DataManager.FILE_EXT).replace("\\", "/")
        print('Loading data for S'+ str(subject))
        print('Path=' + path)
        if os.path.isfile(path):
            return path
        else:
            print(path)
            raise Exception('Invalid subject: ' + str(subject))

    def load(self, subject):
        with open(self.get_subject_path(subject), 'rb') as file:
            data = pickle.load(file, encoding='latin1')
            return self.extract_and_reform(data, subject)
    
    def load_all(self, subjects=SUBJECTS):
        for subject in subjects:
            self.load(subject)
                
    
    def extract_and_reform(self, data, subject):     
        if self.ignore_empatica:
            del data['signal']['wrist']
        
        baseline_indices = np.nonzero(data['label']==DataManager.BASELINE)[0]   
        stress_indices = np.nonzero(data['label']==DataManager.STRESS)[0]
        base = dict()
        stress = dict()
        
        for value in DataManager.RAW_SENSOR_VALUES: 
            base[value] = data['signal']['chest'][value][baseline_indices]
            stress[value] = data['signal']['chest'][value][stress_indices]
        
        DataManager.BASELINE_DATA.append(base)
        DataManager.STRESS_DATA.append(stress)
        
        return base, stress
    
    def get_stats(self, values, window_size=42000, window_shift=175):
        num_features = values.size - window_size      
        max_tmp = []
        min_tmp = []
        mean_tmp = []
        dynamic_range_tmp = []
        std_tmp = []
        for i in range(0, num_features, window_shift):
            window = values[i:window_size + i]
            max_tmp.append(np.amax(window))
            min_tmp.append(np.amin(window))
            mean_tmp.append(np.mean(window))
            dynamic_range_tmp.append(max_tmp[-1] - min_tmp[-1])
            std_tmp.append(np.std(window))

        features = {}
        features['max'] = max_tmp
        features['min'] = min_tmp
        features['mean'] = mean_tmp
        features['range'] = dynamic_range_tmp
        features['std'] = std_tmp
        return features

    def get_features_for_acc(self, values, window_size=42000, window_shift=175):
        num_features = len(values[:,1]) - window_size
        maxx_tmp = []
        maxy_tmp = []
        maxz_tmp = []
        mean_tmp = []
        std_tmp = []        
        for i in range(0, num_features, window_shift):
            windowx = values[i:window_size + i, 0]
            windowy = values[i:window_size + i, 1]
            windowz = values[i:window_size + i, 2]
                        
            meanx = np.mean(windowx)
            meany = np.mean(windowy)
            meanz = np.mean(windowz)
            mean_tmp.append( (meanx + meany + meanz) )

            stdx = np.std(windowx)
            stdy = np.std(windowy)
            stdz = np.std(windowz)
            std_tmp.append( (stdx + stdy + stdz) )
            
            maxx_tmp.append(np.amax(windowx))
            maxy_tmp.append(np.amax(windowy))
            maxz_tmp.append(np.amax(windowz))

        features = {}
        features['mean'] = mean_tmp
        features['std'] =  std_tmp
        features['maxx'] = maxx_tmp
        features['maxy'] = maxy_tmp
        features['maxz'] = maxz_tmp
        
        return features
    
    def compute_features(self, subjects=SUBJECTS, data=BASELINE_DATA, window_size=42000, window_shift=175):
        keys = list(DataManager.FEATURES.keys())
        print('Computing features..')
        for subject in subjects:
            print("\tsubject:", subject)
            index = subject - 2
            key_index = 0
            
            acc = self.get_features_for_acc(data[index]['ACC'], window_size, window_shift)
            for feature in DataManager.FEATURE_ACC_KEYS:
                #print('computed ', len(acc[feature]), 'windows for acc ', feature)
                DataManager.FEATURES[keys[key_index]].extend(acc[feature])
                key_index = key_index + 1
            
            eda = self.get_stats(data[index]['EDA'], window_size, window_shift)
            for feature in DataManager.FEATURE_KEYS:
                #print('computed ', len(eda[feature]), 'windows for eda ', feature)
                DataManager.FEATURES[keys[key_index]].extend(eda[feature])
                key_index = key_index + 1

            temp = self.get_stats(data[index]['Temp'], window_size, window_shift)
            for feature in DataManager.FEATURE_KEYS:
                #print('computed ', len(temp[feature]), 'windows for temp ', feature)
                DataManager.FEATURES[keys[key_index]].extend(temp[feature])
                key_index = key_index + 1
            
        return DataManager.FEATURES

    def compute_features_stress(self, subjects=SUBJECTS, data=STRESS_DATA, window_size=42000, window_shift=175):
        keys = list(DataManager.STRESS_FEATURES.keys())
        print('computing features..')    
        for subject in subjects:
            print("\tsubject:", subject)
            index = subject - 2
            key_index = 0
            
            acc = self.get_features_for_acc(data[index]['ACC'], window_size, window_shift)
            for feature in DataManager.FEATURE_ACC_KEYS:
                #print('computed ', len(acc[feature]), 'windows for acc ', feature)
                DataManager.STRESS_FEATURES[keys[key_index]].extend(acc[feature])
                key_index = key_index + 1
            
            eda = self.get_stats(data[index]['EDA'], window_size, window_shift)
            for feature in DataManager.FEATURE_KEYS:
                #print('computed ', len(eda[feature]), 'windows for eda ', feature)
                DataManager.STRESS_FEATURES[keys[key_index]].extend(eda[feature])
                key_index = key_index + 1

            temp = self.get_stats(data[index]['Temp'], window_size, window_shift)
            for feature in DataManager.FEATURE_KEYS:
                #print('computed ', len(temp[feature]), 'windows for temp ', feature)
                DataManager.STRESS_FEATURES[keys[key_index]].extend(temp[feature])
                key_index = key_index + 1
        return DataManager.STRESS_FEATURES

    def get_train_and_test_data(self):
        X1 = []
        X2 = []
        for i in range(0, len(DataManager.FEATURES['a_mean'])):
            X1.append([DataManager.FEATURES['a_mean'][i], DataManager.FEATURES['a_std'][i],\
                       DataManager.FEATURES['a_maxx'][i], DataManager.FEATURES['a_maxy'][i],\
                       DataManager.FEATURES['a_maxz'][i], DataManager.FEATURES['e_max'][i],\
                       DataManager.FEATURES['e_min'][i],  DataManager.FEATURES['e_mean'][i],\
                       DataManager.FEATURES['e_range'][i],DataManager.FEATURES['e_std'][i],\
                       DataManager.FEATURES['t_max'][i],  DataManager.FEATURES['t_min'][i],\
                       DataManager.FEATURES['t_mean'][i], DataManager.FEATURES['t_range'][i],\
                       DataManager.FEATURES['t_std'][i]])
        
        for i in range(0,  len(DataManager.STRESS_FEATURES['a_mean'])):
            X2.append([DataManager.STRESS_FEATURES['a_mean'][i], DataManager.STRESS_FEATURES['a_std'][i],\
                       DataManager.STRESS_FEATURES['a_maxx'][i], DataManager.STRESS_FEATURES['a_maxy'][i],\
                       DataManager.STRESS_FEATURES['a_maxz'][i], DataManager.STRESS_FEATURES['e_max'][i],\
                       DataManager.STRESS_FEATURES['e_min'][i], DataManager.STRESS_FEATURES['e_mean'][i],\
                       DataManager.STRESS_FEATURES['e_range'][i], DataManager.STRESS_FEATURES['e_std'][i],\
                       DataManager.STRESS_FEATURES['t_max'][i], DataManager.STRESS_FEATURES['t_min'][i],\
                       DataManager.STRESS_FEATURES['t_mean'][i], DataManager.STRESS_FEATURES['t_range'][i],\
                       DataManager.STRESS_FEATURES['t_std'][i]])                
        
        y1 = [0] * len(X1)
        y2 = [1] * len(X2)  
        X = np.concatenate((X1, X2), axis=0)
        
        y = np.concatenate((y1,y2), axis=0)
        X_train, X_test, y_train, y_test = \
            train_test_split(X, y, test_size=0.25, random_state=42)
        return (X_train, X_test, y_train, y_test)

    def normalize(self, data):
        scaler = MinMaxScaler(feature_range=(0, 1))
        return scaler.fit_transform(data)

    def scale_data(self, X_train, X_test, y_train, y_test):
        print("Scaling the data...")
        (X_train, X_test, y_train, y_test) = self.get_train_and_test_data()
        X_train = self.normalize(X_train)
        X_test = self.normalize(X_test)
        X_train = np.reshape(X_train, (X_train.shape[0], 1, X_train.shape[1]))
        X_test = np.reshape(X_test, (X_test.shape[0], 1, X_test.shape[1]))
        return (X_train, X_test, y_train, y_test)

    def build_model(self):
        num_neurons = 15
        num_features = 15
        
        print('Building the LSTM NN...')

        model = Sequential()
        model.add(LSTM(num_neurons, input_shape=(1, num_features), return_sequences=True))
        model.add(LSTM(num_neurons, input_shape=(1, num_features), return_sequences=False))
        model.add(Dense(1, activation='sigmoid'))
        
        model = self.configure_learning(model)
        print(model.summary())
        return model

    def configure_learning(self, model):
        opt = SGD(lr=0.05)
        model.compile(loss='binary_crossentropy', optimizer=opt,\
                      metrics=['accuracy'])
        return model

    def train_model(self, model, X_train, X_test, y_train, y_test,\
                    batch_size=32, epochs=5):
        print('Training network...')
        model.fit(X_train, y_train,
                  batch_size=batch_size,
                  epochs=epochs,
                  validation_data=(X_test, y_test))
        score, acc = model.evaluate(X_test, y_test, batch_size=batch_size)
        return (model, score, acc)

    def load_model(self, file_name=last_saved):
        print("Loading model:", file_name)
        file = (os.path.join(DataManager.MODELS_DIR, file_name))
        model_from_disc = load_model(file)
        return model_from_disc

    def save_model(self, model):
        now = datetime.datetime.now()
        DataManager.last_saved = str("model-" + \
                                     str(now.replace(microsecond=0)) +\
                                     ".h5").replace(" ", "").replace(":", "_")
        model.save(os.path.join(DataManager.MODELS_DIR, DataManager.last_saved))
        print("Saved model to disc:",\
              DataManager.last_saved)
        
    def get_model_results(self, model, X_train, X_test, y_train, y_test,\
                          batch_size=32):
        print('batch_size = ', batch_size)
        print('Model results from model.evaluate() test data')
        score, acc = model.evaluate(X_test , y_test, batch_size=batch_size)
        print('score:', score, 'accuracy:', acc)
        
        y_pred = model.predict(X_test, batch_size=batch_size, verbose=1)
        y_pred[y_pred>0.5] = 1 
        y_pred[y_pred<=0.5] = 0 
        print("_________________________________________________________________")
        print('\nClassification report from model.predict with test data')
        print(classification_report(y_test, y_pred))
        print("_________________________________________________________________")
        print('\nConfusion matrix from model.predict with test data')
        print(confusion_matrix(y_test, y_pred))
        print("_________________________________________________________________")

    def create_network(self, epochs=5, batch_size=32):
        (X_train, X_test, y_train, y_test) = self.get_train_and_test_data()
        (X_train, X_test, y_train, y_test) = \
            self.scale_data(X_train, X_test, y_train, y_test)
        model = self.build_model()
        (model, score, acc) = self.train_model(model, X_train, X_test, y_train, y_test,\
                                 batch_size, epochs)
        self.save_model(model)
        return (model, X_train, X_test, y_train, y_test)

In [None]:
manager = DataManager()

In [None]:
manager.load_all()

In [None]:
manager.compute_features();
manager.compute_features_stress();

In [None]:
print("We have", len(manager.SUBJECTS), " subjects")

for feature in manager.FEATURES.keys():
    print("there are ", len(manager.FEATURES[feature]), " values for ", feature)

In [None]:
X1 = []
X2 = []
for i in range(0,  len(manager.FEATURES['a_mean'])):
    X1.append([manager.FEATURES['a_mean'][i], manager.FEATURES['a_std'][i], manager.FEATURES['a_maxx'][i], manager.FEATURES['a_maxy'][i], manager.FEATURES['a_maxz'][i],\
                  manager.FEATURES['e_max'][i], manager.FEATURES['e_min'][i], manager.FEATURES['e_mean'][i], manager.FEATURES['e_range'][i], manager.FEATURES['e_std'][i],\
                  manager.FEATURES['t_max'][i], manager.FEATURES['t_min'][i], manager.FEATURES['t_mean'][i], manager.FEATURES['t_range'][i], manager.FEATURES['t_std'][i]])
print(np.shape(X1))

for i in range(0,  len(manager.STRESS_FEATURES['a_mean'])):
    X2.append([manager.STRESS_FEATURES['a_mean'][i], manager.STRESS_FEATURES['a_std'][i], manager.STRESS_FEATURES['a_maxx'][i], manager.STRESS_FEATURES['a_maxy'][i], manager.STRESS_FEATURES['a_maxz'][i],\
                  manager.STRESS_FEATURES['e_max'][i], manager.STRESS_FEATURES['e_min'][i], manager.STRESS_FEATURES['e_mean'][i], manager.STRESS_FEATURES['e_range'][i], manager.STRESS_FEATURES['e_std'][i],\
                  manager.STRESS_FEATURES['t_max'][i], manager.STRESS_FEATURES['t_min'][i], manager.STRESS_FEATURES['t_mean'][i], manager.STRESS_FEATURES['t_range'][i], manager.STRESS_FEATURES['t_std'][i]] )
print(np.shape(X2))

In [None]:
import numpy as np
from numpy import concatenate
from matplotlib import pyplot
from pandas import read_csv
from pandas import DataFrame
from pandas import concat
import sklearn
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import mean_squared_error
import tensorflow as tf
import keras
from keras.preprocessing import sequence

from keras.models import Sequential

from keras.layers import Dense, Embedding, Flatten

from keras.layers import LSTM

In [None]:
y1 = [0] * len(X1)
y2 = [1] * len(X2)

In [None]:
print(len(y1))
print(len(y2))

In [None]:
X = np.concatenate((X1, X2), axis=0)
print(np.shape(X))

y = np.concatenate((y1,y2), axis=0)
print(np.shape(y))

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = \
    train_test_split(X, y, test_size=0.25, random_state=42)

In [None]:
print(np.shape(X_train))
print(np.shape(X_test))
print(np.shape(y_train))
print(np.shape(y_test))

In [None]:
def normalize(data):
    scaler = MinMaxScaler(feature_range=(0, 1))
    return scaler.fit_transform(data)
X_train = normalize(X_train)
X_test = normalize(X_test)
print(X_train)

In [None]:
X_train = np.reshape(X_train, (X_train.shape[0], 1, X_train.shape[1]))
X_test = np.reshape(X_test, (X_test.shape[0], 1, X_test.shape[1]))

In [None]:
print(np.shape(X_train))
print(np.shape(X_test))
print(np.shape(y_train))
print(np.shape(y_test))
print(X_train)

In [None]:
num_neurons = 15
num_features = 15

In [None]:
model = Sequential()
model.add(LSTM(num_neurons, input_shape=(1, num_features), return_sequences=True))\
model.add(LSTM(num_neurons, input_shape=(1, num_features), return_sequences=False))
model.add(Dense(1, activation='sigmoid'))


In [None]:
model.summary()

print("inputs: " , model.input_shape)
print("outputs: ", model.output_shape)
print("actual inputs: ", np.shape(X_train))
print("actual outputs: ", np.shape(y_train))

In [None]:
from tensorflow.keras.optimizers import RMSprop
opt = RMSprop(learning_rate=0.05)
model.compile(loss='binary_crossentropy', optimizer=opt, metrics=['accuracy'])

In [None]:
print('Training LSTM...')

batch_size = 2

model.fit(X_train, y_train,
          batch_size=batch_size,
          epochs=5,
          validation_data=(X_test, y_test))

score, acc = model.evaluate(X_test, y_test, batch_size=batch_size)

In [None]:
print('score:', score)
print('accuracy:', acc)

In [None]:
json = model.to_json()
with open("model.json", "w") as file:
    file.write(json)
    file.close()
model.save_weights("model.h5")
print("Saved model to disc")

In [None]:
model.save("LSTM_model.h5")
print("Model saved to disk")

In [None]:
from keras.models import model_from_json
# Load the model of interest
json_file = open('model.json', 'r')
json = json_file.read()
json_file.close()
model_from_disc = model_from_json(json)
model_from_disc.load_weights("model.h5")

In [None]:
opt = RMSprop(lr=0.05)
model_from_disc.compile(loss='binary_crossentropy', optimizer=opt, metrics=['accuracy'])

score, acc = model_from_disc.evaluate(X_test , y_test, batch_size=batch_size)
print('score:', score)
print('accuracy:', acc)

In [None]:
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
y_pred = model_from_disc.predict(X_test, batch_size=2, verbose=1)

In [None]:
print(y_pred)

In [None]:
y_pred[y_pred>0.5] = 1 
y_pred[y_pred<=0.5] = 0 

print(classification_report(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))