## This Notebook shows how a raw Dataset gets processed for further machine learning steps

In [1]:
# Imports
import os, sys
import numpy as np
from typing import Tuple, List, Dict
import pandas as pd
import io

# to enable local imports
module_path = os.path.abspath('code')
print(module_path)
if module_path not in sys.path:
    sys.path.append(module_path)
    
from machine_learning_data_generation import loadPickeldDataset
from consts import DEVICES_NEUROSCAN

D:\Masterthesis\thesis_eeg\code


## Save a dataset to pickel files
With this function  a dataset gets processed and the output will be saved to pickel files

In [2]:
PROCESS_DATA = False

if PROCESS_DATA:
    # Process the online EEG Data
    processRawDatasetToPickleFiles(datasetDirPath = "D:/Masterthesis/EEG_Data/eeg_data_online",
                              device = DEVICES_NEUROSCAN,
                              awakeFileName = None,
                              fatigueFileName = "Fatigue_state_256hz.csv",
                              normalFileName = "Normal_state_256hz.csv",
                              unlabeledFileName = None)
else:
    print ("Already processed the EEG Online Data")

Already processed the EEG Online Data


### Function to create a Machine Learning Dataset
With this functions you can create a X and y Dataset from a given EEG Dataset (use the function 'processRawDatasetToPickleFiles').
It creates a X & y for the EEG Signals and the frequency features

In [None]:
from machine_learning_data_generation import createAndSafeMlDataset
from consts import TARGET_FATIGUE, TARGET_NORMAL

CREATE_ML_DATA = False

TARGET_LABEL_DICT = {TARGET_NORMAL : 0,
                     TARGET_FATIGUE : 1}

if CREATE_ML_DATA:
    
    eegDataset = loadPickeldDataset("D:/Masterthesis/EEG_Data/eeg_data_online")
    
    createAndSafeMlDataset(eegDataset=eegDataset,
                           targetLabelDict=TARGET_LABEL_DICT,
                           dirPath="D:/Masterthesis/EEG_Data/eeg_data_online")
else:
    print("Already created ML Data")


Load Subject 1 Data...
Load Subject 10 Data...
Load Subject 11 Data...
Load Subject 12 Data...
Load Subject 2 Data...
Load Subject 3 Data...
Load Subject 4 Data...
Load Subject 5 Data...
Load Subject 6 Data...
Load Subject 7 Data...
Load Subject 8 Data...
Load Subject 9 Data...
Creating Machine Learning Dataset!
Processing Subject 1 - Target: NORMAL ...


### Example - Load  the online EEG Dataset

In [None]:
y_eeg = np.load(os.path.join("D:/Masterthesis/EEG_Data/eeg_data_online", 'y_eegData.npy'))

In [None]:
to_categorical(y_eeg, num_classes=2)

In [10]:
from sklearn.model_selection import train_test_split
from keras.utils import to_categorical
    
def loadOnlineEEGdata(test_size=0.3, shuffle=False) -> ((), ()):
    dirPath = "D:/Masterthesis/EEG_Data/eeg_data_online"
    
    print("Loading Online EEG Data from {} ...".format(dirPath))
    
    # load array
    X_eeg = np.load(os.path.join(dirPath, 'X_eegData.npy'))
    y_eeg = np.load(os.path.join(dirPath, 'y_eegData.npy'))
    
    X_freq = np.load(os.path.join(dirPath, 'X_frequencyData.npy'), allow_pickle=True)
    y_freq = np.load(os.path.join(dirPath, 'y_frequencyData.npy'))
    
    # load target labels
    
    # load feature names
    
    # Split dataset into training set and test set
    # EEG Data
    X_eeg_train, X_eeg_test, y_eeg_train, y_eeg_test = train_test_split(X_eeg, y_eeg, test_size=test_size,random_state=109, shuffle=shuffle) # 70% training and 30% test    
    y_eeg_train = to_categorical(y_eeg_train)
    y_eeg_test = to_categorical(y_eeg_test)
    eegData = (X_eeg_train, y_eeg_train, X_eeg_test, y_eeg_test)
    print("EEG Data Shape:")
    print(X_eeg_train.shape, y_eeg_train.shape, X_eeg_test.shape, y_eeg_test.shape)
    
    # Frequency Data
    X_freq_train, X_freq_test, y_freq_train, y_freq_test = train_test_split(X_freq, y_freq, test_size=test_size,random_state=109, shuffle=shuffle) # 70% training and 30% test    
    y_freq_train = to_categorical(y_freq_train)
    y_freq_test = to_categorical(y_freq_test)
    freqData = (X_freq_train, y_freq_train, X_freq_test, y_freq_test)
    print("Freq Data Shape:")
    print(X_freq_train.shape, y_freq_train.shape, X_freq_test.shape, y_freq_test.shape)
    
    return (eegData, freqData)

In [11]:
eegData, freqData = loadOnlineEEGdata()

Loading Online EEG Data from D:/Masterthesis/EEG_Data/eeg_data_online ...
EEG Data Shape:
(5024, 512, 40) (5024, 3) (2154, 512, 40) (2154, 3)
Freq Data Shape:
(1008, 1, 1200) (1008, 3) (432, 1, 1200) (432, 3)


In [24]:
# Import train_test_split function
from sklearn.model_selection import train_test_split
from keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Flatten
from keras.layers import Dropout
from keras.layers import LSTM

In [33]:
# fit and evaluate a model
def evaluate_model(trainX, trainy, testX, testy):
    verbose, epochs, batch_size = 0, 15, 64
    
    n_timesteps, n_features, n_outputs = trainX.shape[1], trainX.shape[2], trainy.shape[1]
        
    #print("n_timesteps: {} - n_features: {} - n_outputs: {}".format(n_timesteps, n_features, n_outputs))
    model = Sequential()
    
    model.add(LSTM(100, input_shape=(n_timesteps,n_features)))
    model.add(Dropout(0.5))
    model.add(Dense(100, activation='relu'))
    model.add(Dense(n_outputs, activation='softmax'))
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    # fit network
    model.fit(trainX, trainy, epochs=epochs, batch_size=batch_size, verbose=verbose)
    # evaluate model
    _, accuracy = model.evaluate(testX, testy, batch_size=batch_size, verbose=0)
    return accuracy

# summarize scores
def summarize_results(scores):
    print(scores)
    m, s = np.mean(scores), np.std(scores)
    print('Accuracy: %.3f%% (+/-%.3f)' % (m, s))

# run an experiment
def run_experiment(repeats=10, channel=None):
    # load data
    trainX, trainy, testX, testy = loadOnlineEEGdata()
    
    if channel != None:
        print("Using only Channel: {}".format(channel))
        trainX = np.dstack(np.dstack(trainX))[channel]
        testX = np.dstack(np.dstack(testX))[channel]
    
    print("Loaded the Dataset")
    # repeat experiment
    scores = list()
    for r in range(repeats):
        score = evaluate_model(trainX, trainy, testX, testy)
        score = score * 100.0
        print('>#%d: %.3f' % (r+1, score))
        scores.append(score)
    # summarize results
    summarize_results(scores)


In [64]:
#run_experiment(repeats=1, channel=5)

In [11]:
trainX, trainy, testX, testy = loadOnlineEEGdata()

Loading Online EEG Data from D:/Masterthesis/EEG_Data/eeg_data_online ...
(5024, 512, 40) (5024, 2) (2154, 512, 40) (2154, 2)


In [12]:
channel = 6
#trainX = np.dstack(np.dstack(trainX))[channel]
#testX = np.dstack(np.dstack(testX))[channel]
trainX = trainX[:, :, channel:channel+1]
testX = testX[:, :, channel:channel+1]


In [27]:
trainX, trainy, testX, testy  = eegData

In [28]:
trainX, trainy, testX, testy  = freqData

In [31]:
print("Shape train X: {}".format(trainX.shape))
print("Shape train y: {}".format(trainy.shape))
print("Shape test X: {}".format(testX.shape))
print("Shape test y: {}".format(testy.shape))


Shape train X: (1008, 1, 1200)
Shape train y: (1008, 3)
Shape test X: (432, 1, 1200)
Shape test y: (432, 3)


In [34]:
repeats = 1
# repeat experiment
scores = list()
for r in range(repeats):
    break
    score = evaluate_model(trainX, trainy, testX, testy)
    score = score * 100.0
    print('>#%d: %.3f' % (r+1, score))
    scores.append(score)
# summarize results
summarize_results(scores)

[]
Accuracy: nan% (+/-nan)


  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  keepdims=keepdims)
  arrmean, rcount, out=arrmean, casting='unsafe', subok=False)
  ret = ret.dtype.type(ret / rcount)
