## This Notebook shows how a raw Dataset gets processed for further machine learning steps

In [3]:
# Imports
import os, sys

# to enable local imports
module_path = os.path.abspath(os.path.join('code'))
if module_path not in sys.path:
    sys.path.append(module_path)

In [54]:
from utils import readFileCSV, loadConfigFile
from pipelines import (filter_signal, pre_process_signal, feature_extraction)
import pandas as pd
from typing import Dict

def processRawFileWithPipeline(filepath : str, yamlConfig) -> (pd.Series, pd.DataFrame):
    ''' Process a given filepath with the current pipelines 
    
    This creates two different data objects:
        epochSeries: This is a panda.Series which contains dataframes. Each index at the series represens one epoch
        frequencyFreatureDf: This is a dataframe of the frequency features of the epochSeries. The index represnts the epochs. The features are the columns
    '''
    if not os.path.isfile(filepath):
        raise Exception("The file '{}' does not exists!".format(filepath))
    
    df = readFileCSV(filepath)
    df = filter_signal(df=df, config=yamlConfig, starttime=None) # general filtering
    epochSeries = pre_process_signal(df=df, config=yamlConfig)   # pre-processing
    frequencyFeatureDf = feature_extraction(epochSeries=epochSeries, config=yamlConfig) # extract features
    
    return epochSeries, frequencyFeatureDf

def safeAndProcessRawFileWithPipeline(rawFilePath : str, fileDir : str, label : str, yamlConfig):
    ''' Process the given rawfilePath and safe the result as pickle files
    This function calls 'processRawFileWithPipeline()' and the two returning data objects will be safed
    
    @param str rawFilePath: path to file which gets process
    @param str fileDir: Directory where the data objects should be stored
    @param str label: A label to know which data we process, e.g. fatigue, normal or awake data
    @param yamlConfig: A loaded yaml config file for processing the data
    '''
    print ("Starting to process {}...".format(rawFilePath))
    # process the file
    epochSeries, frequencyFeatureDf = processRawFileWithPipeline(filepath=rawFilePath, yamlConfig=yamlConfig)
    
    # save the epoch series
    epochSeries.to_pickle(os.path.join(fileDir,'epochSeries_{}.pkl'.format(label)))
    
    # save the frequency df
    frequencyFeatureDf.to_pickle(os.path.join(fileDir,'frequencyFeaturesDf_{}.pkl'.format(label))) 

def processRawDatasetToPickleFiles(datasetDirPath : str, device : str, awakeFileName : str,
                                   fatigueFileName : str, normalFileName : str, unlabeledFileName : str):
    '''
    @param str datasetDirPath: Path where the directory of the dataset is
    @param str device: name of the device, to load the correct yaml file for processing
    
    Depending on the dataset there might be awake, normal, fatigue or unlabeled data. 
    @param awakeFileName: filename of the awake data or None then it will be ignored
    @param fatigueFileName: filename of the fatigue data or None then it will be ignored
    @param normalFileName: filename of the normal data or None then it will be ignored
    @param unlabeledFileName: filename of the unlabeled data or None then it will be ignored
    '''
    
    if not os.path.isdir(datasetDirPath):
        raise Exception("The given dir path '{}' does not exist!".format(datasetDirPath))
        
    # Load the yaml config file for the processing
    yamlConfig = loadConfigFile(device)
    
    for root, dirs, files in os.walk(datasetDirPath):
        for subjectDir in dirs:
            print("#############################################")
            print("Process Subject {} Data...".format(subjectDir))
            print("---------------------------------------------")
            
            if awakeFileName is not None: 
                safeAndProcessRawFileWithPipeline(rawFilePath=os.path.join(root, subjectDir, awakeFilename),
                                                  fileDir=os.path.join(root, subjectDir),
                                                  label = "awake",
                                                  yamlConfig=yamlConfig)
                
            if fatigueFileName is not None: 
                safeAndProcessRawFileWithPipeline(rawFilePath=os.path.join(root, subjectDir, fatigueFileName),
                                                  fileDir=os.path.join(root, subjectDir),
                                                  label = "fatigue",
                                                  yamlConfig=yamlConfig)
                
            if normalFileName is not None: 
                safeAndProcessRawFileWithPipeline(rawFilePath=os.path.join(root, subjectDir, normalFileName),
                                                  fileDir=os.path.join(root, subjectDir),
                                                  label = "normal",
                                                  yamlConfig=yamlConfig)
                
            if unlabeledFileName is not None: 
                safeAndProcessRawFileWithPipeline(rawFilePath=os.path.join(root, subjectDir, normalFileName),
                                                  fileDir=os.path.join(root, subjectDir),
                                                  label = "unlabeled",
                                                  yamlConfig=yamlConfig)
    
    print("#######################################")
    print("Done processing and saving a complete Dataset!")

def loadPickeldData(dataDir : str, label : str):
    ''' Load the epochseries and frequency feature df
    
    @param str dataDir: Directory where the data is
    @param str label: decide which 
    '''
    try:
        epochSeries = pd.read_pickle(os.path.join(dataDir,'epochSeries_{}.pkl'.format(label)))
    except Exception as e:
        #print (e)
        epochSeries = None
        
    try:
        frequencyFeatureDf = pd.read_pickle(os.path.join(dataDir,'frequencyFeaturesDf_{}.pkl'.format(label)))
    except Exception as e:
        #print (e)
        frequencyFeatureDf = None

    return epochSeries, frequencyFeatureDf

def loadPickeldDataset(datasetDirPath : str) -> Dict:
    ''' This functions loads a complete dataset into a dict
    
    Each Subject contains a dict with 'awake', 'normal', 'fatigue' and 'unlabeled' entry.
    Each entry contain the epochSeries and frequencyFeatureDf
    '''
    
    datasetDict = {}
    
    for root, dirs, files in os.walk(datasetDirPath):
        for subjectDir in dirs:
            print("Load Subject {} Data...".format(subjectDir))
            
            epochSeries_awake, frequencyFeatureDf_awake = loadPickeldData(dataDir = os.path.join(datasetDirPath, subjectDir),
                                                                          label="awake")
            
            epochSeries_normal, frequencyFeatureDf_normal = loadPickeldData(dataDir = os.path.join(datasetDirPath, subjectDir),
                                                                          label="normal")
            
            epochSeries_fatigue, frequencyFeatureDf_fatigue = loadPickeldData(dataDir = os.path.join(datasetDirPath, subjectDir),
                                                                          label="fatigue")
            
            epochSeries_unlabeled, frequencyFeatureDf_unlabeled = loadPickeldData(dataDir = os.path.join(datasetDirPath, subjectDir),
                                                                          label="unlabeled")
            
            datasetDict[subjectDir] = {"awake" : (epochSeries_awake, frequencyFeatureDf_awake),
                                       "normal" : (epochSeries_normal, frequencyFeatureDf_normal),
                                       "fatigue" : (epochSeries_fatigue, frequencyFeatureDf_fatigue),
                                       "unlabeled" : (epochSeries_unlabeled, frequencyFeatureDf_unlabeled)}
    return datasetDict


def convertEpochSeriesToMachineLearningData():
    # TODO - Convert it to 3d Numpy Array [Samples, TimeSteps, Features]
    pass

def convertFrequencyFeatureDftoMachineLearningData():
    # TODO - Convert it to 3d Numpy Array [Samples, TimeSteps, Features]
    pass
        
    

In [49]:
from consts import DEVICES_NEUROSCAN

PROCESS_DATA = False

if PROCESS_DATA:
    # Process the online EEG Data
    processRawDatasetToPickleFiles(datasetDirPath = "D:/Masterthesis/EEG_Data/eeg_data_online",
                              device = DEVICES_NEUROSCAN,
                              awakeFileName = None,
                              fatigueFileName = "Fatigue_state_256hz.csv",
                              normalFileName = "Normal_state_256hz.csv",
                              unlabeledFileName = None)
else:
    print ("Already processed the EEG Online Data")

Already processed the EEG Online Data


In [55]:
eegDataset = loadPickeldDataset(datasetDirPath= "D:/Masterthesis/EEG_Data/eeg_data_online")

Load Subject 1 Data...
Load Subject 10 Data...
Load Subject 11 Data...
Load Subject 12 Data...
Load Subject 2 Data...
Load Subject 3 Data...
Load Subject 4 Data...
Load Subject 5 Data...
Load Subject 6 Data...
Load Subject 7 Data...
Load Subject 8 Data...
Load Subject 9 Data...


In [61]:
eegDataset['2']['awake']

(None, None)

In [102]:
test = eegDataset['2']['normal'][1]

In [103]:

def crea


Unnamed: 0,channel_1_Delta_mean_bandpower_list,channel_1_Delta_mean_bandpower_lower_envelope_list,channel_1_Delta_mean_bandpower_upper_envelope_list,channel_1_Delta_std_dev_bandpower_list,channel_1_Delta_std_dev_bandpower_lower_envelope_list,channel_1_Delta_std_dev_bandpower_upper_envelope_list,channel_1_Theta_mean_bandpower_list,channel_1_Theta_mean_bandpower_lower_envelope_list,channel_1_Theta_mean_bandpower_upper_envelope_list,channel_1_Theta_std_dev_bandpower_list,channel_1_Theta_std_dev_bandpower_lower_envelope_list,channel_1_Theta_std_dev_bandpower_upper_envelope_list,channel_1_Alpha_mean_bandpower_list,channel_1_Alpha_mean_bandpower_lower_envelope_list,channel_1_Alpha_mean_bandpower_upper_envelope_list,channel_1_Alpha_std_dev_bandpower_list,channel_1_Alpha_std_dev_bandpower_lower_envelope_list,channel_1_Alpha_std_dev_bandpower_upper_envelope_list,channel_1_Beta_mean_bandpower_list,channel_1_Beta_mean_bandpower_lower_envelope_list,channel_1_Beta_mean_bandpower_upper_envelope_list,channel_1_Beta_std_dev_bandpower_list,channel_1_Beta_std_dev_bandpower_lower_envelope_list,channel_1_Beta_std_dev_bandpower_upper_envelope_list,channel_1_Gamma_mean_bandpower_list,channel_1_Gamma_mean_bandpower_lower_envelope_list,channel_1_Gamma_mean_bandpower_upper_envelope_list,channel_1_Gamma_std_dev_bandpower_list,channel_1_Gamma_std_dev_bandpower_lower_envelope_list,channel_1_Gamma_std_dev_bandpower_upper_envelope_list,channel_2_Delta_mean_bandpower_list,channel_2_Delta_mean_bandpower_lower_envelope_list,channel_2_Delta_mean_bandpower_upper_envelope_list,channel_2_Delta_std_dev_bandpower_list,channel_2_Delta_std_dev_bandpower_lower_envelope_list,channel_2_Delta_std_dev_bandpower_upper_envelope_list,channel_2_Theta_mean_bandpower_list,channel_2_Theta_mean_bandpower_lower_envelope_list,channel_2_Theta_mean_bandpower_upper_envelope_list,channel_2_Theta_std_dev_bandpower_list,channel_2_Theta_std_dev_bandpower_lower_envelope_list,channel_2_Theta_std_dev_bandpower_upper_envelope_list,channel_2_Alpha_mean_bandpower_list,channel_2_Alpha_mean_bandpower_lower_envelope_list,channel_2_Alpha_mean_bandpower_upper_envelope_list,channel_2_Alpha_std_dev_bandpower_list,channel_2_Alpha_std_dev_bandpower_lower_envelope_list,channel_2_Alpha_std_dev_bandpower_upper_envelope_list,channel_2_Beta_mean_bandpower_list,channel_2_Beta_mean_bandpower_lower_envelope_list,channel_2_Beta_mean_bandpower_upper_envelope_list,channel_2_Beta_std_dev_bandpower_list,channel_2_Beta_std_dev_bandpower_lower_envelope_list,channel_2_Beta_std_dev_bandpower_upper_envelope_list,channel_2_Gamma_mean_bandpower_list,channel_2_Gamma_mean_bandpower_lower_envelope_list,channel_2_Gamma_mean_bandpower_upper_envelope_list,channel_2_Gamma_std_dev_bandpower_list,channel_2_Gamma_std_dev_bandpower_lower_envelope_list,channel_2_Gamma_std_dev_bandpower_upper_envelope_list,...,channel_39_Delta_mean_bandpower_list,channel_39_Delta_mean_bandpower_lower_envelope_list,channel_39_Delta_mean_bandpower_upper_envelope_list,channel_39_Delta_std_dev_bandpower_list,channel_39_Delta_std_dev_bandpower_lower_envelope_list,channel_39_Delta_std_dev_bandpower_upper_envelope_list,channel_39_Theta_mean_bandpower_list,channel_39_Theta_mean_bandpower_lower_envelope_list,channel_39_Theta_mean_bandpower_upper_envelope_list,channel_39_Theta_std_dev_bandpower_list,channel_39_Theta_std_dev_bandpower_lower_envelope_list,channel_39_Theta_std_dev_bandpower_upper_envelope_list,channel_39_Alpha_mean_bandpower_list,channel_39_Alpha_mean_bandpower_lower_envelope_list,channel_39_Alpha_mean_bandpower_upper_envelope_list,channel_39_Alpha_std_dev_bandpower_list,channel_39_Alpha_std_dev_bandpower_lower_envelope_list,channel_39_Alpha_std_dev_bandpower_upper_envelope_list,channel_39_Beta_mean_bandpower_list,channel_39_Beta_mean_bandpower_lower_envelope_list,channel_39_Beta_mean_bandpower_upper_envelope_list,channel_39_Beta_std_dev_bandpower_list,channel_39_Beta_std_dev_bandpower_lower_envelope_list,channel_39_Beta_std_dev_bandpower_upper_envelope_list,channel_39_Gamma_mean_bandpower_list,channel_39_Gamma_mean_bandpower_lower_envelope_list,channel_39_Gamma_mean_bandpower_upper_envelope_list,channel_39_Gamma_std_dev_bandpower_list,channel_39_Gamma_std_dev_bandpower_lower_envelope_list,channel_39_Gamma_std_dev_bandpower_upper_envelope_list,channel_40_Delta_mean_bandpower_list,channel_40_Delta_mean_bandpower_lower_envelope_list,channel_40_Delta_mean_bandpower_upper_envelope_list,channel_40_Delta_std_dev_bandpower_list,channel_40_Delta_std_dev_bandpower_lower_envelope_list,channel_40_Delta_std_dev_bandpower_upper_envelope_list,channel_40_Theta_mean_bandpower_list,channel_40_Theta_mean_bandpower_lower_envelope_list,channel_40_Theta_mean_bandpower_upper_envelope_list,channel_40_Theta_std_dev_bandpower_list,channel_40_Theta_std_dev_bandpower_lower_envelope_list,channel_40_Theta_std_dev_bandpower_upper_envelope_list,channel_40_Alpha_mean_bandpower_list,channel_40_Alpha_mean_bandpower_lower_envelope_list,channel_40_Alpha_mean_bandpower_upper_envelope_list,channel_40_Alpha_std_dev_bandpower_list,channel_40_Alpha_std_dev_bandpower_lower_envelope_list,channel_40_Alpha_std_dev_bandpower_upper_envelope_list,channel_40_Beta_mean_bandpower_list,channel_40_Beta_mean_bandpower_lower_envelope_list,channel_40_Beta_mean_bandpower_upper_envelope_list,channel_40_Beta_std_dev_bandpower_list,channel_40_Beta_std_dev_bandpower_lower_envelope_list,channel_40_Beta_std_dev_bandpower_upper_envelope_list,channel_40_Gamma_mean_bandpower_list,channel_40_Gamma_mean_bandpower_lower_envelope_list,channel_40_Gamma_mean_bandpower_upper_envelope_list,channel_40_Gamma_std_dev_bandpower_list,channel_40_Gamma_std_dev_bandpower_lower_envelope_list,channel_40_Gamma_std_dev_bandpower_upper_envelope_list
0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,0.715521,,,0.284726,,,0.033844,,,0.039797,,,0.019249,,,0.019798,,,0.117896,,,0.117098,,,0.1128,,,0.115116,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,0.483559,,,0.226123,,,0.064049,,,0.044977,,,0.03443,,,0.021958,,,0.192122,,,0.101326,,,0.219852,,,0.092676,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,0.661985,,,0.140738,,,0.032869,,,0.022091,,,0.011631,,,0.004392,,,0.12102,,,0.047653,,,0.17524,,,0.084282,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,0.407027,,,0.156789,,,0.038607,,,0.012818,,,0.039793,,,0.029859,,,0.229002,,,0.075551,,,0.281925,,,0.063768,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
5,0.537865,,,0.260056,,,0.039577,,,0.031019,,,0.016671,,,0.007917,,,0.224551,,,0.164944,,,0.182543,,,0.083486,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
6,0.420442,,,0.16791,,,0.067612,,,0.038211,,,0.030474,,,0.016093,,,0.216019,,,0.060698,,,0.262724,,,0.08081,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
7,0.591587,,,0.130688,,,0.051195,,,0.027447,,,0.019055,,,0.011772,,,0.165345,,,0.067569,,,0.172391,,,0.072928,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
8,0.7916,,,0.109538,,,0.023141,,,0.006692,,,0.017307,,,0.008449,,,0.08778,,,0.050223,,,0.083264,,,0.052456,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
9,0.588384,,,0.168443,,,0.052107,,,0.022175,,,0.023737,,,0.012407,,,0.17875,,,0.09489,,,0.154022,,,0.076281,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [76]:
def 

nan

In [74]:
import numpy as np
np.dstack(np.dstack(test)).shape

(1200, 60, 1)