In [1]:
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import BatchNormalization

from scikeras.wrappers import KerasClassifier

from keras import metrics

In [2]:
init_df = pd.read_csv('csv/out_gameemo.csv', sep=',')
init_df.head()

Unnamed: 0,lag1_mean_0,lag1_mean_1,lag1_mean_2,lag1_mean_3,lag1_mean_4,lag1_mean_5,lag1_mean_6,lag1_mean_7,lag1_mean_8,lag1_mean_9,...,freq_669_13,freq_679_13,freq_689_13,freq_699_13,freq_709_13,freq_720_13,freq_730_13,freq_740_13,freq_750_13,Label
0,-2.934765,0.267884,-2.144542,-2.533547,-3.066073,0.328303,-1.131894,5.42983,-0.836372,8.041636,...,0.007022,0.009016,0.002528,0.00436,0.002872,0.002023,0.004519,0.003357,0.004046,4.0
1,-6.458215,5.005935,-0.725545,8.512712,-17.244226,20.164538,-0.70751,1.786132,-1.28716,-1.528923,...,0.007777,0.003482,0.00331,0.001004,0.001927,0.004033,0.00163,0.002139,0.00255,1.0
2,-1.055457,-0.677152,0.24156,0.079234,-1.38614,-0.207097,-0.201138,-5.107441,1.374121,-0.398227,...,0.006512,0.004879,0.004725,0.003842,0.003871,0.002725,0.002234,0.002157,0.000697,1.0
3,1.104418,0.758306,0.877612,1.910334,5.005314,3.530473,-1.477243,0.118954,-0.91684,-1.358008,...,0.006753,0.008789,0.005344,0.000313,0.002932,0.003106,0.004528,0.003814,0.003689,2.0
4,1.543269,-0.106916,0.183276,-0.624663,0.278018,-0.392274,-0.944821,1.830702,-0.935904,2.064984,...,0.003381,0.001809,0.002519,0.00278,0.010865,0.003386,0.002724,0.007896,0.007865,4.0


In [3]:
map_labels = {1.0: 0, 2.0: 1, 3.0: 2, 4.0: 3}
init_df["Label"] = init_df["Label"].map(map_labels)

In [4]:
X = init_df.iloc[:,:-1]
y = init_df.iloc[:,-1:] 
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.7, random_state=0)

In [5]:
print('Shape of data: ', init_df.shape)
print('train features.shape: ', X_train.shape)
print('train label.shape: ', y_train.shape)

Shape of data:  (40071, 3739)
train features.shape:  (28049, 3738)
train label.shape:  (28049, 1)


# First decide the best amount of nodes.

In [28]:
def create_model(hl_1_nodes=16,hl_2_nodes=0,hl_3=False):
    
    model = Sequential()
    model.add(BatchNormalization(input_dim=X_train.shape[1]))
    model.add(Dense(units=hl_1_nodes,activation='relu'))

    if hl_2_nodes > 0:
        model.add(Dense(units=hl_2_nodes,activation='relu'))
   
    if hl_3 == True:
        model.add(Dense(units=8,activation='relu'))
        
    model.add(Dense(units=4,activation='softmax'))
    model.compile(loss='sparse_categorical_crossentropy',optimizer='SGD',metrics=['accuracy'])
    
    return model

seed = 0
tf.random.set_seed(seed)

model = KerasClassifier(model=create_model,epochs=10,batch_size=60,hl_1_nodes=16,hl_2_nodes=0,hl_3=False,verbose=0)

hl_1_nodes = [16,32,64]
hl_2_nodes = [0,16,32,64]
hl_3 = [True,False]
param_grid = dict(hl_1_nodes=hl_1_nodes, hl_2_nodes=hl_2_nodes, hl_3=hl_3)

grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, cv=3)
grid_result = grid.fit(X_train,y_train)

# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

Best: 0.814325 using {'hl_1_nodes': 64, 'hl_2_nodes': 16, 'hl_3': False}
0.780919 (0.009826) with: {'hl_1_nodes': 16, 'hl_2_nodes': 0, 'hl_3': True}
0.792328 (0.005924) with: {'hl_1_nodes': 16, 'hl_2_nodes': 0, 'hl_3': False}
0.778673 (0.008512) with: {'hl_1_nodes': 16, 'hl_2_nodes': 16, 'hl_3': True}
0.785340 (0.000157) with: {'hl_1_nodes': 16, 'hl_2_nodes': 16, 'hl_3': False}
0.769118 (0.014151) with: {'hl_1_nodes': 16, 'hl_2_nodes': 32, 'hl_3': True}
0.788156 (0.000654) with: {'hl_1_nodes': 16, 'hl_2_nodes': 32, 'hl_3': False}
0.782773 (0.007986) with: {'hl_1_nodes': 16, 'hl_2_nodes': 64, 'hl_3': True}
0.791258 (0.001270) with: {'hl_1_nodes': 16, 'hl_2_nodes': 64, 'hl_3': False}
0.801205 (0.004299) with: {'hl_1_nodes': 32, 'hl_2_nodes': 0, 'hl_3': True}
0.807515 (0.003628) with: {'hl_1_nodes': 32, 'hl_2_nodes': 0, 'hl_3': False}
0.789226 (0.007043) with: {'hl_1_nodes': 32, 'hl_2_nodes': 16, 'hl_3': True}
0.801918 (0.002263) with: {'hl_1_nodes': 32, 'hl_2_nodes': 16, 'hl_3': False}
0

## All of the top three performing combinations had 64 nodes in the first hidden layer. In almost all cases, the addition of a third layer did not improve the model. Let's try some more combinations for two hidden layers max.  

In [29]:
def create_model(hl_1_nodes=16,hl_2_nodes=0):
    
    model = Sequential()
    model.add(BatchNormalization(input_dim=X_train.shape[1]))
    model.add(Dense(units=hl_1_nodes,activation='relu'))

    if hl_2_nodes > 0:
        model.add(Dense(units=hl_2_nodes,activation='relu'))
        
    model.add(Dense(units=4,activation='softmax'))
    model.compile(loss='sparse_categorical_crossentropy',optimizer='SGD',metrics=['accuracy'])
    
    return model

seed = 0
tf.random.set_seed(seed)

model = KerasClassifier(model=create_model,epochs=10,batch_size=60,hl_1_nodes=16,hl_2_nodes=0,verbose=0)

hl_1_nodes = [64,128,256,512]
hl_2_nodes = [0,16,32,64,128,256,512,1024]
param_grid = dict(hl_1_nodes=hl_1_nodes, hl_2_nodes=hl_2_nodes)

grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, cv=3)
grid_result = grid.fit(X_train,y_train)

# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

Best: 0.819459 using {'hl_1_nodes': 512, 'hl_2_nodes': 64}
0.812614 (0.001232) with: {'hl_1_nodes': 64, 'hl_2_nodes': 0}
0.814717 (0.006604) with: {'hl_1_nodes': 64, 'hl_2_nodes': 16}
0.805162 (0.000714) with: {'hl_1_nodes': 64, 'hl_2_nodes': 32}
0.815216 (0.004121) with: {'hl_1_nodes': 64, 'hl_2_nodes': 64}
0.811829 (0.005375) with: {'hl_1_nodes': 64, 'hl_2_nodes': 128}
0.808835 (0.001660) with: {'hl_1_nodes': 64, 'hl_2_nodes': 256}
0.804414 (0.001604) with: {'hl_1_nodes': 64, 'hl_2_nodes': 512}
0.808228 (0.001763) with: {'hl_1_nodes': 64, 'hl_2_nodes': 1024}
0.808300 (0.003057) with: {'hl_1_nodes': 128, 'hl_2_nodes': 0}
0.815822 (0.000639) with: {'hl_1_nodes': 128, 'hl_2_nodes': 16}
0.816072 (0.002363) with: {'hl_1_nodes': 128, 'hl_2_nodes': 32}
0.813719 (0.001402) with: {'hl_1_nodes': 128, 'hl_2_nodes': 64}
0.813362 (0.002595) with: {'hl_1_nodes': 128, 'hl_2_nodes': 128}
0.810902 (0.003531) with: {'hl_1_nodes': 128, 'hl_2_nodes': 256}
0.814432 (0.003603) with: {'hl_1_nodes': 128, 'h

## The combination of hl_1_nodes = 512 and hl_2_nodes = 64 did the best.

# Find the best optimization algorithm and batch size. 

In [30]:
def create_model(optimizer='SGD'):
    
    model = Sequential()
    model.add(BatchNormalization(input_dim=X_train.shape[1]))
    model.add(Dense(units=512,activation='relu'))
    model.add(Dense(units=64,activation='relu'))
        
    model.add(Dense(units=4,activation='softmax'))
    model.compile(loss='sparse_categorical_crossentropy',optimizer=optimizer,metrics=['accuracy'])
    
    return model

seed = 0
tf.random.set_seed(seed)

model = KerasClassifier(model=create_model,epochs=10,batch_size=60,optimizer='SGD',verbose=0)

batch_size = [30,60,120,240]
optimizer = ['SGD','Adam']
param_grid = dict(batch_size=batch_size, optimizer=optimizer)

grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, cv=3)
grid_result = grid.fit(X_train,y_train)

# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

Best: 0.839709 using {'batch_size': 30, 'optimizer': 'Adam'}
0.832115 (0.014021) with: {'batch_size': 30, 'optimizer': 'SGD'}
0.839709 (0.004691) with: {'batch_size': 30, 'optimizer': 'Adam'}
0.817177 (0.001327) with: {'batch_size': 60, 'optimizer': 'SGD'}
0.819530 (0.003172) with: {'batch_size': 60, 'optimizer': 'Adam'}
0.779992 (0.001742) with: {'batch_size': 120, 'optimizer': 'SGD'}
0.785875 (0.003426) with: {'batch_size': 120, 'optimizer': 'Adam'}
0.722878 (0.002540) with: {'batch_size': 240, 'optimizer': 'SGD'}
0.720311 (0.001632) with: {'batch_size': 240, 'optimizer': 'Adam'}


## Batch size of 30 with Adam optimizer performing best. 

# Build model with many epochs. Then evaluate on test data and DREAMER data.

In [25]:
def build_model_a():

    model = Sequential([BatchNormalization(),
                              Dense(units=512,activation='relu'),
                              Dense(units=64,activation='relu'),
                              Dense(units=4,activation='softmax')])

    optimizer =  tf.keras.optimizers.Adam()
    
    model.compile(loss='sparse_categorical_crossentropy',optimizer=optimizer,metrics=['accuracy'])
    
    return model

model = build_model_a()

history = model.fit(x=X_train,y=y_train,epochs=60,batch_size=30,validation_split=0.1)

history = pd.DataFrame(history.history)
display(history)

Epoch 1/60
Epoch 2/60
Epoch 3/60
Epoch 4/60
Epoch 5/60
Epoch 6/60
Epoch 7/60
Epoch 8/60
Epoch 9/60
Epoch 10/60
Epoch 11/60
Epoch 12/60
Epoch 13/60
Epoch 14/60
Epoch 15/60
Epoch 16/60
Epoch 17/60
Epoch 18/60
Epoch 19/60
Epoch 20/60
Epoch 21/60
Epoch 22/60
Epoch 23/60
Epoch 24/60
Epoch 25/60
Epoch 26/60
Epoch 27/60
Epoch 28/60
Epoch 29/60
Epoch 30/60
Epoch 31/60
Epoch 32/60
Epoch 33/60
Epoch 34/60
Epoch 35/60
Epoch 36/60
Epoch 37/60
Epoch 38/60
Epoch 39/60
Epoch 40/60
Epoch 41/60
Epoch 42/60
Epoch 43/60
Epoch 44/60
Epoch 45/60
Epoch 46/60
Epoch 47/60
Epoch 48/60
Epoch 49/60
Epoch 50/60
Epoch 51/60
Epoch 52/60
Epoch 53/60
Epoch 54/60
Epoch 55/60
Epoch 56/60
Epoch 57/60
Epoch 58/60
Epoch 59/60
Epoch 60/60


Unnamed: 0,loss,accuracy,val_loss,val_accuracy
0,0.719902,0.704088,0.498141,0.811052
1,0.330808,0.873039,0.379804,0.862032
2,0.201142,0.926359,0.344822,0.886275
3,0.14278,0.948225,0.300497,0.895544
4,0.1098,0.961733,0.295537,0.909804
5,0.089323,0.969141,0.259276,0.916221
6,0.071164,0.976509,0.286394,0.915508
7,0.069079,0.977143,0.263242,0.923351
8,0.05827,0.980748,0.264065,0.926203
9,0.047308,0.983877,0.25429,0.929055


In [26]:
model.evaluate(X_test,y_test)



[0.2958739697933197, 0.9539178013801575]

In [21]:
dream_df = pd.read_csv('out_dreamer.csv', sep=',')
dream_df.head()

Unnamed: 0,lag1_mean_0,lag1_mean_1,lag1_mean_2,lag1_mean_3,lag1_mean_4,lag1_mean_5,lag1_mean_6,lag1_mean_7,lag1_mean_8,lag1_mean_9,...,freq_669_13,freq_679_13,freq_689_13,freq_699_13,freq_709_13,freq_720_13,freq_730_13,freq_740_13,freq_750_13,Label
0,4395.309365,4105.013935,4175.248049,4425.6466,4322.146042,4367.742475,4460.53233,3951.243032,4310.554627,4289.464883,...,0.003496,0.003562,0.00331,0.002777,0.003207,0.003174,0.003147,0.003079,0.00295,4.0
1,4386.211466,4095.978104,4169.008931,4417.412849,4333.40824,4366.058773,4440.651109,3943.298761,4311.426102,4273.362143,...,0.000691,0.000187,0.000287,8.9e-05,5.6e-05,0.000134,8.7e-05,0.000163,0.000147,2.0
2,4390.169539,4111.878941,4194.308533,4410.931764,4319.355471,4391.43898,4459.854281,3964.752697,4333.582738,4298.761384,...,0.00016,0.000615,6.7e-05,0.000433,0.000297,0.000114,0.000169,0.000115,5e-05,2.0
3,4373.919067,4097.929275,4165.580265,4410.059546,4317.934135,4366.195163,4454.081905,3933.556933,4297.768866,4204.851136,...,0.000925,0.001757,0.000949,0.000445,0.000891,0.003211,0.001094,0.001721,0.002623,3.0
4,4385.213675,4091.960298,4169.947615,4412.194651,4216.468156,4370.512821,4436.228288,3926.377171,4298.296112,4244.778053,...,0.000302,0.000235,0.000631,0.00037,0.000171,1.9e-05,5.1e-05,0.00016,0.000142,4.0


In [22]:
map_labels = {1.0: 0, 2.0: 1, 3.0: 2, 4.0: 3}
dream_df["Label"] = dream_df["Label"].map(map_labels)

In [23]:
dream_X = dream_df.iloc[:,:-1]
dream_y = dream_df.iloc[:,-1:] 

In [24]:
model.evaluate(x=dream_X,y=dream_y)



[20.458141326904297, 0.26743581891059875]

# Making predictions with the raw data. 

In [None]:
import os, sys
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Conv2D
from tensorflow.keras.layers import MaxPooling2D


In [None]:
path="original_data_with_timestamps"

dirs = os.listdir(path)

In [None]:
testdf = pd.read_csv('original_data_with_timestamps/S03G2AllChannels.csv')
testdf.drop(columns='timestamps', inplace=True)
testdf = testdf.reindex(columns=['O1','P7','T7','F7','FC5','F3','AF3','AF4','F4','FC6','F8','T8','P8','O2'])
testdf.head()

In [None]:
X_7_sec = []
y = []
for file in dirs:
    df = pd.read_csv('original_data_with_timestamps/' + file)
    df.drop(columns='timestamps', inplace=True)
    list_of_arrays = np.array_split(df[:37632],42)
    for array in list_of_arrays:
        X_7_sec.append(array)
        y.append(str(file)[4])
X_7_sec = np.array(X_7_sec)
y = np.array([int(cat)-1 for cat in y])

In [None]:
Xr_train, Xr_test, yr_train, yr_test = train_test_split(X_7_sec, y, train_size=0.7, random_state=0)

In [None]:
print('train features.shape: ', Xr_train.shape)
print('train label.shape: ', yr_train.shape)

In [None]:
def build_model_raw():

    model = Sequential([Conv2D(filters=8, kernel_size=(128,1), strides=(32,1), activation='relu', padding='valid', input_shape=(896,14,1)),
                        
                        Flatten(),
                        
                        #BatchNormalization(),
                        #Dense(units=256,activation='relu'),
                        BatchNormalization(),
                        
                        Dense(units=4,activation='softmax')])

    optimizer =  tf.keras.optimizers.SGD(learning_rate=0.2)

    model.compile(loss='sparse_categorical_crossentropy',optimizer=optimizer,metrics=['accuracy'])
    
    return model

In [None]:
model = build_model_raw()

history = model.fit(x=Xr_train,y=yr_train,epochs=10,batch_size=60,validation_split=0.1)

history = pd.DataFrame(history.history)
display(history)