In [1]:
# Imports
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from pyeeg.spectrum import bin_power
import matplotlib.pyplot as plt
from xgboost import XGBClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import KFold
from sklearn.metrics import confusion_matrix, classification_report
import tsfresh
import pandas as pd
from utils.split import split_features_and_labels

In [2]:
# Things associated with the GAMEEMO dataset
electrodes = 'AF3 AF4 F3 F4 F7 F8 FC5 FC6 O1 O2 P7 P8 T7 T8'.split(' ')
sample_rate = 128

In [3]:
# In summary, the resulting features are an array containing raw EEG data split into 5-second intervals. 
# (dimensionality of n subjects*n trials*slice_amt x n_electrodes x 128*5), which I'll call batch size.
# and the labels are an array containing valence and arousal for each 5-second interval.
# (dimensionality of n subjects*n trials*slice_amt x 2)
eeg_splitted_features = np.load('gameemo_features.npy')
eeg_splitted_labels = np.load('gameemo_labels.npy')
print(eeg_splitted_features.shape)

(112, 14, 38252)


In [4]:
eeg_splitted_features, eeg_splitted_labels = split_features_and_labels(eeg_splitted_features, eeg_splitted_labels, sample_rate, 1)

In [5]:
# Labels are originally scaled between 1 and 5. We transform them between 0 and 1.
labels = eeg_splitted_labels[:, -1].astype('int32')

In [6]:

# Extract power bands; we first flatten the first two dimensions to obtain a 2D array of voltage timeseries, we calculate band power for each electrode separately, then get the electrode dimension back. This gives us a dimensionality of (batch size x n electrodes x n bands)
reshaped_features = np.reshape(eeg_splitted_features, [eeg_splitted_features.shape[0]*eeg_splitted_features.shape[1], eeg_splitted_features.shape[2]])
extracted_powers = np.asarray([bin_power(X=feature, Band=[0.5, 3, 7, 10, 30], Fs=sample_rate) for feature in reshaped_features])[:, 0, :]
extracted_powers = np.reshape(extracted_powers, [eeg_splitted_features.shape[0], eeg_splitted_features.shape[1], extracted_powers.shape[1]])
print(extracted_powers.shape)
# Then we flatten the last two dimensions, leaving some number of bands per electrode as the final feature for each data point.
final_features = np.reshape(extracted_powers, [extracted_powers.shape[0], extracted_powers.shape[1]*extracted_powers.shape[2]])

(33376, 14, 4)


In [7]:
# We do k-fold cross validation, meaning that we hold out 10% of the data, train on the other 90%, and evaluate the model performance on the original 10%.
# We repeat this process k times. 
# As we can see, there is some overfitting as the train R^2 is much higher than the test R^2.
kf = KFold(n_splits=29, shuffle=False)
i = 0

all_predictions_arousal = np.zeros((final_features.shape[0], ))
for train_index, test_index in kf.split(final_features):
    x_train = final_features[train_index]
    y_train = labels[train_index]
    x_test = final_features[test_index]
    y_test = labels[test_index]

    xgb = RandomForestClassifier()
    xgb.fit(x_train, y_train)
    

    print('Fold number ' + str(i))
    y_pred = xgb.predict(x_train)

    print('Train:')
    print(classification_report(y_train, y_pred))
    y_pred = xgb.predict(x_test)
    print('Test: ')
    print(classification_report(y_test, y_pred))
    print(y_pred)
    print(y_test)
    all_predictions_arousal[test_index] = y_pred
    i += 1

Fold number 0
Train:
              precision    recall  f1-score   support

           1       1.00      1.00      1.00      8056
           2       1.00      1.00      1.00      8056
           3       1.00      1.00      1.00      8056
           4       1.00      1.00      1.00      8057

    accuracy                           1.00     32225
   macro avg       1.00      1.00      1.00     32225
weighted avg       1.00      1.00      1.00     32225

Test: 
              precision    recall  f1-score   support

           1       0.54      0.66      0.60       288
           2       0.55      0.51      0.53       288
           3       0.52      0.52      0.52       288
           4       0.49      0.43      0.46       287

    accuracy                           0.53      1151
   macro avg       0.53      0.53      0.52      1151
weighted avg       0.53      0.53      0.52      1151

[3 1 1 ... 1 2 3]
[1 2 3 ... 1 2 3]
