# DEAP

In [21]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Activation, AvgPool1D, Dense, Conv1D, Flatten, Dropout, Input, BatchNormalization, GlobalMaxPool1D, MaxPool1D, SpatialDropout1D, GlobalAvgPool1D
from tensorflow.keras.optimizers import Adam
from keras.utils import np_utils
from sklearn.preprocessing import StandardScaler                                                      
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import confusion_matrix 
from scipy import signal
import pickle as pkl
import gc

In [22]:
sampling_rate = 128
window_size = 1280
overlap = 256
channel_len = 32
classes=2

bands = {'delta': [0.5/(sampling_rate/2), 4/(sampling_rate/2)], 'theta': [4/(sampling_rate/2), 8/(sampling_rate/2)], \
         'alpha': [8/(sampling_rate/2), 14/(sampling_rate/2)], 'beta': [14/(sampling_rate/2), 30/(sampling_rate/2)], \
         'gamma': [30/(sampling_rate/2), 50/(sampling_rate/2)], 'all': [0.5/(sampling_rate/2), 50/(sampling_rate/2)]} # FIXED: 75 -> 50

## Loading Data

In [23]:
def load_data(eeg_band = None):

  eeg_signal = []
  valence = []
  arousal = []
  dominance = []
  subject = {} # FIXED: Add subject dict

  for i in range(32):
    subject[i] = "{0:02d}".format(i+1)

  for person in range(32):
    print('Person No.' + str(person))
    
    # EEG files address
    address = './eeg_data/DEAP/s'+subject[person]+'.dat'

    with open(address, 'rb') as file:
      data = pkl.load(file, encoding = 'latin1')

    eeg = data['data']
    label = data['labels']
    
    # Assigning classes
    label[label<5.5] = 0
    label[label>=5.5] = 1     

    val = label.T[0] # Valence label
    aro = label.T[1] # Arousal label
    dom = label.T[2] # Dominance label

    del data, label
    

    for i in range(40): # Iterating through 40 vidoes/trials

      sig = eeg[i].T
      v = val[i]
      a = aro[i]
      d = dom[i]

      sig = sig[:, :32] # Considering all 32 EEG channels
      
      num, den = signal.butter(4, bands[eeg_band], 'band') # Butterworth filter of order N = 4
      band_signal = signal.filtfilt(num, den, sig, axis=0)
      sig = band_signal
      del band_signal, num, den
      
      scaler = StandardScaler().fit(sig)
      scaled_sig = scaler.transform(sig)

      del sig
        
      # Segmenting into 10 seconds (1280 timesteps) windows with 2 seconds (256 timesteps) overlap
      start = 0
      while start + window_size < scaled_sig.shape[0]:
        eeg_signal.append(scaled_sig[start:start+window_size, :])

        valence.append(v)
        arousal.append(a)
        dominance.append(d)
        start += window_size - overlap # NOTE: Isn't 'window_size - overlap' right?

      del scaled_sig


  eeg_signal = np.asarray(eeg_signal, dtype = np.float32) # Using 32 bit floating point value to save memory
  valence = np.asarray(valence, dtype = np.int8)
  arousal = np.asarray(arousal, dtype = np.int8)
  dominance = np.asarray(dominance, dtype = np.int8)

  print(valence.shape, valence[valence == 0].shape, valence[valence == 1].shape)
  print(arousal.shape, arousal[arousal == 0].shape, arousal[arousal == 1].shape)
  print(dominance.shape, dominance[dominance == 0].shape, dominance[dominance == 1].shape)

  valence = np_utils.to_categorical(valence)
  arousal = np_utils.to_categorical(arousal)
  dominance = np_utils.to_categorical(dominance)

    
  return (eeg_signal, valence, arousal, dominance)

In [24]:
FOLD = 1
eeg_band = 'all'#'gamma' # EEG band name

data, valence, arousal, dominance = load_data(eeg_band) # Loading processed data

nb_samples = data.shape[0] # Number of samples
factor = nb_samples//FOLD # Kth fold by this factor
factor_ho = 7*nb_samples//10

shuffler = np.random.permutation(nb_samples) # Shuffling data

data = data[shuffler]
valence = valence[shuffler]
arousal = arousal[shuffler]
dominance = dominance[shuffler]

Person No.0
Person No.1
Person No.2
Person No.3
Person No.4
Person No.5
Person No.6
Person No.7
Person No.8
Person No.9
Person No.10
Person No.11
Person No.12
Person No.13
Person No.14
Person No.15
Person No.16
Person No.17
Person No.18
Person No.19
Person No.20
Person No.21
Person No.22
Person No.23
Person No.24
Person No.25
Person No.26
Person No.27
Person No.28
Person No.29
Person No.30
Person No.31
(8960,) (4851,) (4109,)
(8960,) (4606,) (4354,)
(8960,) (4599,) (4361,)


## CNN Model

In [25]:
eeg_input = Input(shape = (window_size, channel_len), name='eeg_input') # Input layer

# CNN model
def get_CNN_design():
  x = Conv1D(filters = 32, kernel_size = 1, strides = 2, padding = 'valid', activation='relu', name='conv1')(eeg_input)
  x = Conv1D(filters = 32, kernel_size = 5, strides = 2, padding = 'valid', activation='relu', name='conv2')(x)
  x = AvgPool1D(pool_size=2, name='avg_pool1')(x)
  x = BatchNormalization(name='batch_norm1')(x)
  x = SpatialDropout1D(rate=0.0625, name = 'spatial_dropout1')(x)
  x = Conv1D(filters = 64, kernel_size = 5, strides = 2, padding = 'valid', activation='relu', name='conv3')(x)
  x = Conv1D(filters = 64, kernel_size = 5, strides = 2, padding = 'valid', activation='relu', name='conv4')(x)
  x = AvgPool1D(pool_size=2, name='avg_pool2')(x)
  x = BatchNormalization(name='batch_norm2')(x)
  x = SpatialDropout1D(rate=0.125, name = 'spatial_dropout2')(x)
  x = Conv1D(filters = 128, kernel_size = 3, strides = 1, padding = 'valid', activation='relu', name='conv5')(x)
  x = Conv1D(filters = 128, kernel_size = 3, strides = 1, padding = 'valid', activation='relu', name='conv6')(x)
  x = AvgPool1D(pool_size=2, name='avg_pool3')(x)
  x = BatchNormalization(name='batch_norm3')(x)
  x = SpatialDropout1D(rate=0.25, name = 'spatial_dropout3')(x)
  x = Conv1D(filters = 256, kernel_size = 3, strides = 1, padding = 'valid', activation='relu', name='conv7')(x)
  x = Conv1D(filters = 256, kernel_size = 3, strides = 1, padding = 'valid', activation='relu', name='conv8')(x)
  x = GlobalAvgPool1D(name='global_pool1')(x)
  x = BatchNormalization(name='batch_norm4')(x)
  x = Dropout(0.2)(x)
  x = Dense(64)(x)
  x = Activation('tanh')(x)
  x = Dense(8)(x)
  x = Activation('tanh')(x)
  x = Dropout(0.1)(x)
  return x

def get_model():
  x = get_CNN_design()

  out = Dense(classes, activation='softmax', name = 'output')(x) # Output layer

  model = Model(inputs=eeg_input, outputs=out) # Creating a model instance
  
  adam = Adam(lr=1e-3,decay=1e-5) # Adam optimizer
  
  model.compile(optimizer=adam,loss='categorical_crossentropy',metrics=['categorical_accuracy']) # Compiling model
  model.summary()
  
  return model

## Training

### Valence

In [26]:
val_res = {'accuracy': [], 'confusion_matrix': []}

gc.collect() # Garbage collecter
for i in range(FOLD):
  print('Fold: '+str(i))

  X_train = data[0 : factor_ho] # Training data
  X_test = data[factor_ho : nb_samples] # Testing data
  val_train = valence[0 : factor_ho] # Valence training labels
  val_test = valence[factor_ho : nb_samples] # Valence testing labels

  gc.collect() # Garbage collecter
  model = get_model()
  model.fit(X_train, val_train, epochs = 100, batch_size = 1024, shuffle = True)

  acc = model.evaluate(X_test, val_test)
  print(acc)

  val_res['accuracy'].append(acc)

  pred = model.predict(X_test)
  val_res['confusion_matrix'].append(confusion_matrix(val_test.argmax(1), pred.argmax(1)))
  gc.collect() # Garbage collecter


# Dumping valence results
file = './eeg_data/DEAP_valence_' + eeg_band + '.pkl'

with open(file, 'wb') as f:
  pkl.dump(val_res, f)


Fold: 0
Model: "model_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
eeg_input (InputLayer)       [(None, 1280, 32)]        0         
_________________________________________________________________
conv1 (Conv1D)               (None, 640, 32)           1056      
_________________________________________________________________
conv2 (Conv1D)               (None, 318, 32)           5152      
_________________________________________________________________
avg_pool1 (AveragePooling1D) (None, 159, 32)           0         
_________________________________________________________________
batch_norm1 (BatchNormalizat (None, 159, 32)           128       
_________________________________________________________________
spatial_dropout1 (SpatialDro (None, 159, 32)           0         
_________________________________________________________________
conv3 (Conv1D)               (None, 78, 64)        

  "The `lr` argument is deprecated, use `learning_rate` instead.")


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [27]:
for i in val_res['accuracy']:
  print(round(i[1]*100, 2)) # Rounding off to two decimal places

53.98


In [28]:
for i in val_res['confusion_matrix']:
  print(i)

[[829 600]
 [637 622]]


### Arousal

In [29]:
aro_res = {'accuracy': [], 'confusion_matrix': []}

gc.collect() # Garbage collecter
for i in range(FOLD):
  print('Fold: '+str(i))
  
  X_train = data[0 : factor_ho] # Training data
  X_test = data[factor_ho : nb_samples] # Testing data
  aro_train = arousal[0 : factor_ho] # Valence training labels
  aro_test = arousal[factor_ho : nb_samples] # Valence testing labels

  gc.collect() # Garbage collecter
  model = get_model()
  model.fit(X_train, aro_train, epochs = 100, batch_size = 1024, shuffle = True)

  acc = model.evaluate(X_test, aro_test)
  print(acc)

  aro_res['accuracy'].append(acc)

  pred = model.predict(X_test)
  aro_res['confusion_matrix'].append(confusion_matrix(aro_test.argmax(1), pred.argmax(1)))
  gc.collect() # Garbage collecter


# Dumping arousal results
file = './eeg_data/DEAP_arousal_' + eeg_band + '.pkl'

with open(file, 'wb') as f:
  pkl.dump(aro_res, f)


Fold: 0
Model: "model_6"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
eeg_input (InputLayer)       [(None, 1280, 32)]        0         
_________________________________________________________________
conv1 (Conv1D)               (None, 640, 32)           1056      
_________________________________________________________________
conv2 (Conv1D)               (None, 318, 32)           5152      
_________________________________________________________________
avg_pool1 (AveragePooling1D) (None, 159, 32)           0         
_________________________________________________________________
batch_norm1 (BatchNormalizat (None, 159, 32)           128       
_________________________________________________________________
spatial_dropout1 (SpatialDro (None, 159, 32)           0         
_________________________________________________________________
conv3 (Conv1D)               (None, 78, 64)        

  "The `lr` argument is deprecated, use `learning_rate` instead.")


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [30]:
for i in aro_res['accuracy']:
  print(round(i[1]*100, 2)) # Rounding off to two decimal places

59.6


In [31]:
for i in aro_res['confusion_matrix']:
  print(i)

[[843 554]
 [532 759]]


### Dominance

In [32]:
dom_res = {'accuracy':[], 'confusion_matrix':[]}

gc.collect() # Garbage collecter
for i in range(FOLD):
  print('Fold: '+str(i))

  X_train = data[0 : factor_ho] # Training data
  X_test = data[factor_ho : nb_samples] # Testing data
  dom_train = dominance[0 : factor_ho] # Valence training labels
  dom_test = dominance[factor_ho : nb_samples] # Valence testing labels

  gc.collect() # Garbage collecter
  model = get_model()
  model.fit(X_train, dom_train, epochs = 100, batch_size = 1024, shuffle = True)

  acc = model.evaluate(X_test, dom_test)
  print(acc)
    
  dom_res['accuracy'].append(acc)

  pred = model.predict(X_test)
  dom_res['confusion_matrix'].append(confusion_matrix(dom_test.argmax(1), pred.argmax(1)))
  gc.collect() # Garbage collecter


# Dumping dominance results
file = './eeg_data/DEAP_dominance_' + eeg_band + '.pkl'

with open(file, 'wb') as f:
  pkl.dump(dom_res, f)

Fold: 0
Model: "model_7"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
eeg_input (InputLayer)       [(None, 1280, 32)]        0         
_________________________________________________________________
conv1 (Conv1D)               (None, 640, 32)           1056      
_________________________________________________________________
conv2 (Conv1D)               (None, 318, 32)           5152      
_________________________________________________________________
avg_pool1 (AveragePooling1D) (None, 159, 32)           0         
_________________________________________________________________
batch_norm1 (BatchNormalizat (None, 159, 32)           128       
_________________________________________________________________
spatial_dropout1 (SpatialDro (None, 159, 32)           0         
_________________________________________________________________
conv3 (Conv1D)               (None, 78, 64)        

  "The `lr` argument is deprecated, use `learning_rate` instead.")


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [33]:
for i in dom_res['accuracy']:
  print(round(i[1]*100, 2)) # Rounding off to two decimal places

57.4


In [34]:
for i in dom_res['confusion_matrix']:
  print(i)

[[ 436  923]
 [ 222 1107]]
