# Valence value regression based on Deap Dataset

## 0. This notebook is based on DEAP database

Anyone should refer to DEAP team first

@article{koelstra2012deap,
  title={Deap: A database for emotion analysis; using physiological signals},
  author={Koelstra, Sander and Muhl, Christian and Soleymani, Mohammad and Lee, Jong-Seok and Yazdani, Ashkan and Ebrahimi, Touradj and Pun, Thierry and Nijholt, Anton and Patras, Ioannis},
  journal={IEEE Transactions on Affective Computing},
  volume={3},
  number={1},
  pages={18--31},
  year={2012},
  publisher={IEEE}
}

## 1. Dependency
* numpy
* pyEEG
* sciki-learn

In [2]:
import numpy as np
import pyeeg as pe
import pickle as pickle
import pandas as pd

## 2. Global Variables setup
File Name data\SXX.dat, XX \in [0,31]
* data: 40 x 40 x 8064: trial x channel x data
* label: 40 x 4: video/trial x label (valence, arousal, dominance, liking)

Channel Indice: {
* 1 : AF3; 2: F3; 3: F7; 4: FC5; 7: T7; 11: P7; 13: O1
* 17: AF4; 19: F4; 20: F8; 21: FC6; 25: T8; 29: P8; 31: O2 }

In [48]:
channel = [1,2,3,4,6,11,13,17,19,20,21,25,29,31] #14 Channels chosen to fit Emotiv Epoch+
band = [4,8,12,16,25,45] #5 bands
window_size = 256 #Averaging band power of 2 sec
step_size = 16 #Each 0.125 sec update once
sample_rate = 128 #Sampling rate of 128 Hz
subjectList = ['01','02','03','04','05','06','07','08','09','10','11','12','13','14','15','16','17','18','19','20','21','22','23','24','25','26','27','28','29','30','31','32']
#List of subjects

## 3. FFT with pyeeg
* [4-8]: theta band
* [8-12]: alpha band
* [12-16]: low beta band 
* [16-25]: high beta band
* [25-45]: gamma band

In [62]:
def FFT_Processing (sub, channel, band, window_size, step_size, sample_rate):
    '''
    arguments:  string subject
                list channel indice
                list band
                int window size for FFT
                int step size for FFT
                int sample rate for FFT
    return:     void
    '''
    meta = []
    with open('data\s' + sub + '.dat', 'rb') as file:

        subject = pickle.load(file, encoding='latin1') #resolve the python 2 data problem by encoding : latin1

        for i in range (0,40):
            # loop over 0-39 trails
            data = subject["data"][i]
            labels = subject["labels"][i]
            start = 0;

            while start + window_size < data.shape[1]:
                meta_array = []
                meta_data = [] #meta vector for analysis
                for j in channel:
                    X = data[j][start : start + window_size] #Slice raw data over 2 sec, at interval of 0.125 sec
                    Y = pe.bin_power(X, band, sample_rate) #FFT over 2 sec of channel j, in seq of theta, alpha, low beta, high beta, gamma
                    meta_data = meta_data + list(Y[0])

                meta_array.append(np.array(meta_data))
                meta_array.append(labels)

                meta.append(np.array(meta_array))    
                start = start + step_size
                
        meta = np.array(meta)
        np.save('out\s' + sub, meta, allow_pickle=True, fix_imports=True)

for subjects in subjectList:
    FFT_Processing (subjects, channel, band, window_size, step_size, sample_rate)

## 3.Segment of preprocessed data
* training dataset: 75.5%
* validation dataset: 12.5%
* testing dataset: 12.5%

Agrithom pool:
* Support Vector Machine (which kernal?)
* Ada-Boost

Best practice could be refered to this paper: 

@article{alarcao2017emotions,
  title={Emotions recognition using EEG signals: A survey},
  author={Alarcao, Soraia M and Fonseca, Manuel J},
  journal={IEEE Transactions on Affective Computing},
  year={2017},
  publisher={IEEE}
}

In [54]:
#for subjects in subjectList:
data_training = []
label_training = []
data_testing = []
label_testing = []
data_validation = []
label_validation = []

for subjects in subjectList:

    with open('out\s' + subjects + '.npy', 'rb') as file:
        sub = np.load(file)
        for i in range (0,sub.shape[0]):
            if i % 8 == 0:
                data_testing.append(sub[i][0])
                label_testing.append(sub[i][1])
            elif i % 8 == 1:
                data_validation.append(sub[i][0])
                label_validation.append(sub[i][1])
            else:
                data_training.append(sub[i][0])
                label_training.append(sub[i][1])

np.save('out\data_training', np.array(data_training), allow_pickle=True, fix_imports=True)
np.save('out\label_training', np.array(label_training), allow_pickle=True, fix_imports=True)
print("training dataset:", np.array(data_training).shape, np.array(label_training).shape)

np.save('out\data_testing', np.array(data_testing), allow_pickle=True, fix_imports=True)
np.save('out\label_testing', np.array(label_testing), allow_pickle=True, fix_imports=True)
print("testing dataset:", np.array(data_testing).shape, np.array(label_testing).shape)

np.save('out\data_validation', np.array(data_validation), allow_pickle=True, fix_imports=True)
np.save('out\label_validation', np.array(label_validation), allow_pickle=True, fix_imports=True)
print("validation dataset:", np.array(data_validation).shape, np.array(label_validation).shape)

training dataset: (468480, 70) (468480, 4)
testing dataset: (78080, 70) (78080, 4)
validation dataset: (78080, 70) (78080, 4)
