## Process a raw Dataset
This notebook shows the complete pipeline to have machine learning ready data

In [2]:
# Imports
import os, sys
import numpy as np
from typing import Tuple, List, Dict
import pandas as pd
import io

# to enable local imports
module_path = os.path.abspath('code')
print(module_path)
if module_path not in sys.path:
    sys.path.append(module_path)
    
from machine_learning_data_generation import loadPickeldDataset, processRawDatasetToPickleFiles, loadOnlineEEGdata
from consts import DEVICES_NEUROSCAN, DEVICES_MUSE_LSL_OPEN_VIBE

D:\Masterthesis\thesis_eeg\code


Using TensorFlow backend.


## Save a dataset to pickel files
With this function  a dataset gets processed and the output will be saved to pickel files

*** Careful! Takes about 1 Hour! ***

In [3]:
PROCESS_DATA = True


if PROCESS_DATA:
    # Process the online EEG Data
    '''
    processRawDatasetToPickleFiles(datasetDirPath = "D:/Masterthesis/EEG_Data/eeg_data_online",
                              device = DEVICES_NEUROSCAN,
                              awakeFileName = None,
                              fatigueFileName = "Fatigue_state_256hz.csv",
                              normalFileName = "Normal_state_256hz.csv",
                              unlabeledFileName = None)
    '''
    
    # Process the Muse Data
    
    # Todo add parameter to optionally add en extra name/id to the generated data. E.g. for where it came from (driving, p300, reaction_game, ...)
    datasetDirPath = "D:/Masterthesis/EEG_Data/muse_data",
                                  device = DEVICES_MUSE_LSL_OPEN_VIBE,
                                  awakeFileName = None,
                                  fatigueFileName = None,
                                  normalFileName = None,
                                  unlabeledFileName = "reaction_game_complete.csv", # or 'driving_complete.csv'
                                  skipDirs=['openVibe', 'subject_1', 'subject_6']) # faulty or empty dirs
    
    
else:
    print ("Already processed the EEG Online Data")

Loading the config file for muse_lsl_with_open_vibe
Skipping openVibe
Skipping subject_1
#############################################
Process Subject subject_10 Data...
---------------------------------------------
Starting to process D:/Masterthesis/EEG_Data/muse_data\subject_10\reaction_game_complete.csv...
Creating sliding windows...
Converting 3d Numpy Array to a series of Df's
Normalizing data...
Deleting Nan's...
Frequenccy Bands: [(0.5, 4, 'Delta'), (4, 8, 'Theta'), (8, 12, 'Alpha'), (12, 30, 'Beta'), (30, 50, 'Gamma')]


  .format(nperseg, input_length))


Creating bandpower, lower & upper envelope dictionary...
Creating statistics bandpower dict...
Creating a nice feature dataframe...
###
Extracting Entropy Features...
Created Numpy Array - Shape: (298, 1, 20)


  se = -np.multiply(psd_norm, np.log2(psd_norm)).sum()
  se = -np.multiply(psd_norm, np.log2(psd_norm)).sum()
  se = -np.multiply(psd_norm, np.log2(psd_norm)).sum()
  se = -np.multiply(psd_norm, np.log2(psd_norm)).sum()


Normalizing the entropy features...
Saving a feature list to: 'D:/Masterthesis/EEG_Data/muse_data\subject_10\features_channel_names.txt'
Saving a feature list to: 'D:/Masterthesis/EEG_Data/muse_data\subject_10\features_frequency_df.txt'
Saving a feature list to: 'D:/Masterthesis/EEG_Data/muse_data\subject_10\features_entropy.txt'
#############################################
Process Subject subject_2 Data...
---------------------------------------------
Starting to process D:/Masterthesis/EEG_Data/muse_data\subject_2\reaction_game_complete.csv...
Creating sliding windows...
Converting 3d Numpy Array to a series of Df's
Normalizing data...
Deleting Nan's...
Frequenccy Bands: [(0.5, 4, 'Delta'), (4, 8, 'Theta'), (8, 12, 'Alpha'), (12, 30, 'Beta'), (30, 50, 'Gamma')]


  .format(nperseg, input_length))


Creating bandpower, lower & upper envelope dictionary...
Creating statistics bandpower dict...
Creating a nice feature dataframe...
###
Extracting Entropy Features...
Created Numpy Array - Shape: (305, 1, 20)
Normalizing the entropy features...
Saving a feature list to: 'D:/Masterthesis/EEG_Data/muse_data\subject_2\features_channel_names.txt'
Saving a feature list to: 'D:/Masterthesis/EEG_Data/muse_data\subject_2\features_frequency_df.txt'
Saving a feature list to: 'D:/Masterthesis/EEG_Data/muse_data\subject_2\features_entropy.txt'
#############################################
Process Subject subject_3 Data...
---------------------------------------------
Starting to process D:/Masterthesis/EEG_Data/muse_data\subject_3\reaction_game_complete.csv...
Creating sliding windows...
Converting 3d Numpy Array to a series of Df's
Normalizing data...
Deleting Nan's...
Frequenccy Bands: [(0.5, 4, 'Delta'), (4, 8, 'Theta'), (8, 12, 'Alpha'), (12, 30, 'Beta'), (30, 50, 'Gamma')]


  .format(nperseg, input_length))


Creating bandpower, lower & upper envelope dictionary...
Creating statistics bandpower dict...
Creating a nice feature dataframe...
###
Extracting Entropy Features...
Created Numpy Array - Shape: (313, 1, 20)


  se = -np.multiply(psd_norm, np.log2(psd_norm)).sum()
  se = -np.multiply(psd_norm, np.log2(psd_norm)).sum()


Normalizing the entropy features...
Saving a feature list to: 'D:/Masterthesis/EEG_Data/muse_data\subject_3\features_channel_names.txt'
Saving a feature list to: 'D:/Masterthesis/EEG_Data/muse_data\subject_3\features_frequency_df.txt'
Saving a feature list to: 'D:/Masterthesis/EEG_Data/muse_data\subject_3\features_entropy.txt'
#############################################
Process Subject subject_4 Data...
---------------------------------------------
Starting to process D:/Masterthesis/EEG_Data/muse_data\subject_4\reaction_game_complete.csv...
Creating sliding windows...
Converting 3d Numpy Array to a series of Df's
Normalizing data...
Deleting Nan's...
Frequenccy Bands: [(0.5, 4, 'Delta'), (4, 8, 'Theta'), (8, 12, 'Alpha'), (12, 30, 'Beta'), (30, 50, 'Gamma')]


  .format(nperseg, input_length))


Creating bandpower, lower & upper envelope dictionary...
Creating statistics bandpower dict...
Creating a nice feature dataframe...
###
Extracting Entropy Features...
Created Numpy Array - Shape: (310, 1, 20)


  se = -np.multiply(psd_norm, np.log2(psd_norm)).sum()
  se = -np.multiply(psd_norm, np.log2(psd_norm)).sum()
  se = -np.multiply(psd_norm, np.log2(psd_norm)).sum()
  se = -np.multiply(psd_norm, np.log2(psd_norm)).sum()


Normalizing the entropy features...
Saving a feature list to: 'D:/Masterthesis/EEG_Data/muse_data\subject_4\features_channel_names.txt'
Saving a feature list to: 'D:/Masterthesis/EEG_Data/muse_data\subject_4\features_frequency_df.txt'
Saving a feature list to: 'D:/Masterthesis/EEG_Data/muse_data\subject_4\features_entropy.txt'
#############################################
Process Subject subject_5 Data...
---------------------------------------------
Starting to process D:/Masterthesis/EEG_Data/muse_data\subject_5\reaction_game_complete.csv...
Creating sliding windows...
Converting 3d Numpy Array to a series of Df's
Normalizing data...
Deleting Nan's...
Frequenccy Bands: [(0.5, 4, 'Delta'), (4, 8, 'Theta'), (8, 12, 'Alpha'), (12, 30, 'Beta'), (30, 50, 'Gamma')]


  .format(nperseg, input_length))


Creating bandpower, lower & upper envelope dictionary...
Creating statistics bandpower dict...


StatisticsError: variance requires at least two data points

### Function to create a Machine Learning Dataset
With this functions you can create a X and y Dataset from a given EEG Dataset (use the function 'processRawDatasetToPickleFiles').
It creates a X & y for the EEG Signals and the frequency features

In [4]:
from machine_learning_data_generation import createAndSafeMlDataset
from consts import TARGET_FATIGUE, TARGET_NORMAL

CREATE_ML_DATA = True

TARGET_LABEL_DICT = {TARGET_NORMAL : 1,
                     TARGET_FATIGUE : 0}

if CREATE_ML_DATA:
    
    eegDataset = loadPickeldDataset("D:/Masterthesis/EEG_Data/eeg_data_online")
    
    createAndSafeMlDataset(eegDataset=eegDataset,
                           targetLabelDict=TARGET_LABEL_DICT,
                           dirPath="D:/Masterthesis/EEG_Data/eeg_data_online")
else:
    print("Already created ML Data")


Load Subject 1 Data...
Load Subject 10 Data...
Load Subject 11 Data...
Load Subject 12 Data...
Load Subject 2 Data...
Load Subject 3 Data...
Load Subject 4 Data...
Load Subject 5 Data...
Load Subject 6 Data...
Load Subject 7 Data...
Load Subject 8 Data...
Load Subject 9 Data...
Creating Machine Learning Dataset!
Processing Subject 1 - Target: NORMAL ...
Processing Subject 1 - Target: FATIGUE ...
Processing Subject 10 - Target: NORMAL ...
Processing Subject 10 - Target: FATIGUE ...
Processing Subject 11 - Target: NORMAL ...
Processing Subject 11 - Target: FATIGUE ...
Processing Subject 12 - Target: NORMAL ...
Processing Subject 12 - Target: FATIGUE ...
Processing Subject 2 - Target: NORMAL ...
Processing Subject 2 - Target: FATIGUE ...
Processing Subject 3 - Target: NORMAL ...
Processing Subject 3 - Target: FATIGUE ...
Processing Subject 4 - Target: NORMAL ...
Processing Subject 4 - Target: FATIGUE ...
Processing Subject 5 - Target: NORMAL ...
Processing Subject 5 - Target: FATIGUE ...


### Example - Load  the online EEG Dataset

In [5]:
eegData, freqData, entropyData = loadOnlineEEGdata(shuffle=False, splitData=False)

Loading Online EEG Data from D:/Masterthesis/EEG_Data/eeg_data_online ...
Data does not get splitted into train and test!
EEG Data Shape:
(7178, 512, 40) (7178,)
Freq Data Shape:
(1440, 1, 1200) (1440,)
Entropy Data Shape:
(7178, 1, 200) (7178,)


In [5]:
print("Shape train X: {}".format(trainX.shape))
print("Shape train y: {}".format(trainy.shape))
print("Shape test X: {}".format(testX.shape))
print("Shape test y: {}".format(testy.shape))


NameError: name 'trainX' is not defined

array([[[ 0.58857225, -0.05947701,  0.75340673, ...,  0.        ,
          0.        ,  0.        ]],

       [[ 0.77181281,  0.53140419,  0.83882233, ...,  0.        ,
          0.        ,  0.        ]],

       [[ 0.60697763,  0.44422637,  0.88404084, ...,  0.        ,
          0.        ,  0.        ]],

       ...,

       [[ 0.73472731,  0.54010546,  0.80864854, ...,  0.        ,
          0.        ,  0.        ]],

       [[ 0.50687363,  0.31490547,  0.60680156, ...,  0.        ,
          0.        ,  0.        ]],

       [[ 0.53433675,  0.39765327,  0.74250543, ...,  0.        ,
          0.        ,  0.        ]]])

In [28]:
repeats = 5
# repeat experiment
scores = list()
for r in range(repeats):
    score = evaluate_model(trainX, trainy, testX, testy)
    score = score * 100.0
    print('>#%d: %.3f' % (r+1, score))
    scores.append(score)
# summarize results
summarize_results(scores)

>#1: 51.620
>#2: 51.620
>#3: 51.620
>#4: 51.620
>#5: 51.620
[51.62037014961243, 51.62037014961243, 51.62037014961243, 51.62037014961243, 51.62037014961243]
Accuracy: 51.620% (+/-0.000)
