In [1]:
import numpy as np
from pyeeg import bin_power

import matplotlib.pyplot as plt
from xgboost import XGBClassifier

from sklearn.model_selection import KFold
from sklearn.metrics import classification_report, f1_score
import pandas as pd
import pickle
import os
from IPython import display

In [2]:
from sys import path
path.append('../')
from utils.average_classifier import *
from utils.split_features_and_labels import split_features_and_labels_interfaces

In [3]:
channels = "Fp1,Fp2,Fz,Cz,T3,T4,Pz,Oz".split(',')
sampling_rate = 250
seconds_to_predict = 5

In [4]:
EEG = np.load('INTERFACES/EEG_no_ICA.npy')
label_arousal = np.load('INTERFACES/label/arousal.npy')
label_valence = np.load('INTERFACES/label/valence.npy')

In [5]:
bands = ['3-7','8-13','14-29','30-47']
feature_names = np.asarray([[str(bands[x]) + '|' + e for x in range(len(bands))] for e in channels], dtype='object')
feature_names = np.ravel(feature_names)
print(feature_names)

['3-7|Fp1' '8-13|Fp1' '14-29|Fp1' '30-47|Fp1' '3-7|Fp2' '8-13|Fp2'
 '14-29|Fp2' '30-47|Fp2' '3-7|Fz' '8-13|Fz' '14-29|Fz' '30-47|Fz' '3-7|Cz'
 '8-13|Cz' '14-29|Cz' '30-47|Cz' '3-7|T3' '8-13|T3' '14-29|T3' '30-47|T3'
 '3-7|T4' '8-13|T4' '14-29|T4' '30-47|T4' '3-7|Pz' '8-13|Pz' '14-29|Pz'
 '30-47|Pz' '3-7|Oz' '8-13|Oz' '14-29|Oz' '30-47|Oz']


In [6]:
importance_dfs = []

In [7]:
use_last_npy = False
feature_dump_file = 'interfaces_features_8.npy'

splitted_features, splitted_labels_arousal = split_features_and_labels_interfaces(EEG, label_arousal, sampling_rate, seconds_to_predict)
splitted_features, splitted_labels_valence = split_features_and_labels_interfaces(EEG, label_valence, sampling_rate, seconds_to_predict)

if use_last_npy and os.path.exists(feature_dump_file):
    final_features = np.load(feature_dump_file)
else:
    temp_reshaped = np.reshape(splitted_features, [splitted_features.shape[0]*splitted_features.shape[1]*splitted_features.shape[2], splitted_features.shape[3]])
    powers = []
    for i, sample in enumerate(temp_reshaped):
        if i % 3000 == 0:
            print('Progress: %s' % (str(np.round(i/len(temp_reshaped), 2))))
        powers.append(bin_power(sample, [0, 100], Fs=sampling_rate)[0])
    powers = np.asarray(powers)
    powers = np.reshape(powers, splitted_features.shape[:3])
#    powers = powers / np.expand_dims(np.sum(powers, axis=-1), axis=-1) # normalize by total band power in each electrode
    powers = np.reshape(powers, [powers.shape[0], powers.shape[1] * powers.shape[2]])
    final_features = powers

    final_features.dump(feature_dump_file)


Progress: 0.0
Progress: 0.01
Progress: 0.03
Progress: 0.04
Progress: 0.05
Progress: 0.07
Progress: 0.08
Progress: 0.09
Progress: 0.11
Progress: 0.12
Progress: 0.13
Progress: 0.15
Progress: 0.16
Progress: 0.17
Progress: 0.18
Progress: 0.2
Progress: 0.21
Progress: 0.22
Progress: 0.24
Progress: 0.25
Progress: 0.26
Progress: 0.28
Progress: 0.29
Progress: 0.3
Progress: 0.32
Progress: 0.33
Progress: 0.34
Progress: 0.36
Progress: 0.37
Progress: 0.38
Progress: 0.4
Progress: 0.41
Progress: 0.42
Progress: 0.44
Progress: 0.45
Progress: 0.46
Progress: 0.48
Progress: 0.49
Progress: 0.5
Progress: 0.52
Progress: 0.53
Progress: 0.54
Progress: 0.55
Progress: 0.57
Progress: 0.58
Progress: 0.59
Progress: 0.61
Progress: 0.62
Progress: 0.63
Progress: 0.65
Progress: 0.66
Progress: 0.67
Progress: 0.69
Progress: 0.7
Progress: 0.71
Progress: 0.73
Progress: 0.74
Progress: 0.75
Progress: 0.77
Progress: 0.78
Progress: 0.79
Progress: 0.81
Progress: 0.82
Progress: 0.83
Progress: 0.85
Progress: 0.86
Progress: 0.87
P

In [8]:
for classification_type in ['valence', 'arousal']:
    print('-------------------------------------------------------------------------')
    print(classification_type)
    final_features = powers
    if classification_type == 'valence':
        labels=splitted_labels_valence
    elif classification_type == 'arousal':
        labels=splitted_labels_arousal
    kf = KFold(n_splits=10, shuffle=True) 
    i = 0
    regressors = []
    importances =[]
    all_predictions_arousal = np.zeros((final_features.shape[0], ))
    for train_index, test_index in kf.split(final_features):
        x_train = final_features[train_index]
        y_train = labels[train_index]
        x_test = final_features[test_index]
        y_test = labels[test_index]
    
        xgb =  XGBClassifier(eval_metric='mlogloss')
        xgb.fit(x_train, y_train)    

        print('Fold number ' + str(i))
        y_pred = xgb.predict(x_train)
        print('Train F1: ' + str(f1_score(y_train, y_pred)))
        y_pred = xgb.predict(x_test)
        print('Test F1: ' + str(f1_score(y_test, y_pred)))
        importances.append(xgb.feature_importances_)
        all_predictions_arousal[test_index] = y_pred
        regressors.append(xgb)
        i += 1
    print(classification_report(labels,all_predictions_arousal))


    df = pd.DataFrame()
    df['feature_name'] = feature_names
    df['mean_importances'] = np.sum(np.asarray(importances), axis=0)
    df['electrode'] = [x.split('|')[1] for x in df['feature_name']]
    importance_dfs.append(df)


-------------------------------------------------------------------------
valence




Fold number 0
Train F1: 0.9974928366762178
Test F1: 0.7349177330895794
Fold number 1
Train F1: 0.9983639338302127
Test F1: 0.7549467275494673
Fold number 2
Train F1: 0.998371040723982
Test F1: 0.7781456953642385
Fold number 3
Train F1: 0.9974774774774775
Test F1: 0.7389830508474577
Fold number 4
Train F1: 0.9994542477715118
Test F1: 0.7596153846153846
Fold number 5
Train F1: 0.9992800575953923
Test F1: 0.7266435986159169
Fold number 6
Train F1: 0.9981824790985095
Test F1: 0.7916666666666666
Fold number 7
Train F1: 0.9974756581319871
Test F1: 0.7733333333333333
Fold number 8
Train F1: 0.9985406785844582
Test F1: 0.7617504051863858
Fold number 9
Train F1: 0.9985486211901307
Test F1: 0.7248764415156507
              precision    recall  f1-score   support

           0       0.81      0.83      0.82      4026
           1       0.77      0.74      0.75      3069

    accuracy                           0.79      7095
   macro avg       0.79      0.79      0.79      7095
weighted avg       

In [9]:
importance_dfs[0].groupby('electrode').sum().sort_values('mean_importances', ascending=False)

Unnamed: 0_level_0,mean_importances
electrode,Unnamed: 1_level_1
T4,1.423414
Fp2,1.405843
Fp1,1.367895
Fz,1.272086
Oz,1.147124
T3,1.128919
Pz,1.127692
Cz,1.127026


In [10]:
importance_dfs[1].groupby('electrode').sum().sort_values('mean_importances', ascending=False)

Unnamed: 0_level_0,mean_importances
electrode,Unnamed: 1_level_1
Fp2,1.410435
Fp1,1.365693
Cz,1.309614
Fz,1.232991
Pz,1.231916
T3,1.230231
Oz,1.18961
T4,1.029509


Select Fp1, Fp2, Fz, Cz