<a href="https://colab.research.google.com/github/mykolesiko/eeg_investigation/blob/diplom/MADE_DE_features_version2_3em_sub.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## In this notebook, we will show how to read DE features with python

- Author: Wei Liu
- Affiliation: [BCMI lab, Shanghai Jiao Tong University, Shanghai, China](http://bcmi.sjtu.edu.cn)
- Date: May, 11, 2021

In [None]:
import numpy as np
import pickle

# check the version of these modules
print(np.__version__)
print(pickle.format_version)

1.19.5
4.0


In [None]:
import os
os.chdir("/content/drive/MyDrive/MADE/Project/seed/EEG_DE_features")

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# load DE features named '1_123.npz'
data_seed = []
labels_seed = []
for i in range(1, 16):
    data_npz = np.load(f'{i}_123.npz')
    data_seed.append(pickle.loads(data_npz['data']))
    labels_seed.append(pickle.loads(data_npz['label']))
    #print(data_npz.files)

In [None]:
# As we can see, there are 45 keys in both 'data' and 'label'.
# Each participant took part in our experiments for 3 sessions, and he/she watched 15 movie clips (i.e. 15 trials) during each session.
# Therefore, we could extract 3 * 15 = 45 DE feature matrices.

# The key indexes [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] belong to Session 1.
# The key indexes [15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29] belong to Session 2.
# The key indexes [30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44] belong to Session 3.

# We will print the emotion labels for each trial.
# label_dict = {0:'Disgust', 1:'Fear', 2:'Sad', 3:'Neutral', 4:'Happy'}
# for i in range(45):
#     print('Session {} -- Trial {} -- EmotionLabel : {}'.format(i//15+1, i%15+1, label_dict[label[i][0]]))

The above emotion labels should be the same as labels in file "emotion_label_and_stimuli_order.xlsx"

In [None]:
from sklearn.model_selection import StratifiedKFold , KFold
from sklearn.metrics import f1_score, accuracy_score, confusion_matrix
from sklearn.naive_bayes import  GaussianNB
from sklearn.model_selection import StratifiedKFold 
from sklearn.ensemble import RandomForestClassifier
import pandas as pd
from sklearn import svm
import lightgbm as lgbm
from sklearn.model_selection import GridSearchCV
from sklearn.neighbors import KNeighborsClassifier

In [None]:
# s = 0
# indexes = []
 
# for i in range(15):
#   n = len(label[i])
#   indexes.append(np.arange(n) + s)
#   s += n
#   if i == 0:
#     features_all = data[i]
#     labels_all = label[0]
#   else:
#     features_all = np.vstack((features_all, data[i]))  
#     labels_all = np.hstack((labels_all, label[i]))

# features_all = (features_all - features_all.mean(axis = 1,  keepdims = True))#/features_all.std(axis = 1,  keepdims = True) + 0.5
# features_all = (features_all)/features_all.std(axis = 1,  keepdims = True)
# features_all = features_all + 0.5

In [None]:
def get_label_valence(l):
  if (l == 0) or (l == 1) or (l == 2):
    return 0
  elif (l == 3):
    return 1
  else:
    return 2 

# def get_label_valence(l):
#    if (l == 0) or (l == 1):
#       return 0
#    elif (l == 2):
#       return 1
#    elif (l == 3):
#       return 2
#    else:
#       return 3  
# def get_label_valence(l):
#   return int(l)

f = np.vectorize(get_label_valence)    

In [None]:

features_sub  = []
labels_sub = []
for sub in range(15):
      label = labels_seed[sub]
      data = data_seed[sub]
      s = 0
      indexes = []
      for i in range(45):
        n = len(label[i])
        indexes.append(np.arange(n) + s)
        s += n
        if i == 0:
          features_all = data[i]
          labels_all = (f(label[0]))
        else:
          features_all = np.vstack((features_all, data[i]))  
          labels_all = np.hstack((labels_all, f(label[i])))

      features_all = (features_all - features_all.mean(axis = 1,  keepdims = True))#/features_all.std(axis = 1,  keepdims = True) + 0.5
      features_all = (features_all)/features_all.std(axis = 1,  keepdims = True)
      features_all = features_all + 0.5
      features_sub.append(features_all)
      labels_sub.append(labels_all)


In [None]:
for i in range(15):
  #print(features_sub[i].shape)
  print(labels_seed[i][2])

In [None]:
from collections import Counter

for sub in range(15):
    cnt = Counter()
    cnt.update(labels_sub[sub])
    print(cnt)

Counter({0: 1155, 1: 367, 2: 301})
Counter({0: 1155, 1: 367, 2: 301})
Counter({0: 1155, 1: 367, 2: 301})
Counter({0: 1155, 1: 367, 2: 301})
Counter({0: 1155, 1: 367, 2: 301})
Counter({0: 1155, 1: 367, 2: 301})
Counter({0: 1155, 1: 367, 2: 301})
Counter({0: 1155, 1: 367, 2: 301})
Counter({0: 1155, 1: 367, 2: 301})
Counter({0: 1155, 1: 367, 2: 301})
Counter({0: 1155, 1: 367, 2: 301})
Counter({0: 1155, 1: 367, 2: 301})
Counter({0: 1155, 1: 367, 2: 301})
Counter({0: 1155, 1: 367, 2: 301})
Counter({0: 1155, 1: 367, 2: 301})


In [None]:
k = 3
class CVsplitter:
  def __init__(self, n_splits):#, *, shuffle, random_state):
    self.n_splits = n_splits
  def  split(self, X_in=None, y=None, groups=None):
      #print(X_in.shape[0])
      X = np.arange(15)
      train_indexes = []
      test_indexes = []
      train_indexes.append([])    
      test_indexes.append([]) 
      #skf = StratifiedKFold(n_splits=k, random_state=None, shuffle=True)
      #kf = KFold(n_splits=k, random_state=123, shuffle=True)
      #for fold, (train_index, test_index) in enumerate(skf.split(X, y)):
      #for fold, (train_index, test_index) in enumerate(kf.split(X)):
      for fold in range(self.n_splits):
            test_index = np.arange(5) + fold * 5
            test_indexes[-1].append(test_index)
            train_index = np.array(list(set(range(15)).difference(set(test_index))))
            train_indexes[-1].append(train_index)
            print(train_index, test_index)
      for fold in range(self.n_splits):
                  train = []
                  test = []
                  
                  #print(train_indexes[0][fold], test_indexes[0][fold])
                  for nvideo in train_indexes[0][fold]:
                      indexes1 = indexes[nvideo]
                      indexes2 = indexes[nvideo + 15]
                      indexes3 = indexes[nvideo + 30]
                      #print(indexes1, indexes2, indexes3)
                      train.extend(indexes1)
                      train.extend(indexes2)
                      train.extend(indexes3)
                  for nvideo in test_indexes[0][fold]:
                      indexes1 = indexes[nvideo]
                      indexes2 = indexes[nvideo + 15]
                      indexes3 = indexes[nvideo + 30]
                      test.extend(indexes1)
                      test.extend(indexes2)
                      test.extend(indexes3)
                  print(len(train), len(test))    
                  print(list(set(train) & set(test)))

                  yield (train,  test)

  def  get_n_splits(self, X=None, y=None, groups=None):
      return(self.n_splits)


In [42]:
params = []
for sub in range(15):
      features_all = features_sub[sub]
      labels_all = labels_sub[sub]
      cv = CVsplitter(3)
      pows2 = [2.0 ** n for n in np.arange(-10, 10, 1)]
      parameters = {'kernel':(['linear']), 'C': pows2, 'class_weight': ['balanced']}
      #parameters = {'kernel':(['linear']), 'C': pows2}
                            # parameters = {'kernel':(['rbf']), 'C': pows2, 'gamma' : pows2, 'class_weight': (['balanced'])}
        #print(clf.best_score_, clf.best_params_)
      #svc = svm.SVC()# C = 0.0009765625, kernel = 'linear')
      #clf = GridSearchCV(svc, parameters, scoring='f1_macro', cv=cv, n_jobs=-1)
      #parameters = {'C': pows2, 'class_weight': ['balanced'], 'max_iter': [5000], 'dual': [True, False]}
      #parameters = {'kernel':(['linear']), 'C': pows2}
                           # parameters = {'kernel':(['rbf']), 'C': pows2, 'gamma' : pows2, 'class_weight': (['balanced'])}
        #print(clf.best_score_, clf.best_params_)
      svc = svm.SVC()# C = 0.0009765625, kernel = 'linear')
      #clf = GridSearchCV(svc, parameters, scoring='f1_macro', cv=cv, n_jobs=-1)
      #svc = svm.LinearSVC()


      clf = GridSearchCV(svc, parameters, scoring='accuracy', cv=cv, n_jobs=-1)
      clf.fit(features_all, labels_all)
      print(clf.best_score_, clf.best_params_)
      params.append(clf.best_params_)

[ 5  6  7  8  9 10 11 12 13 14] [0 1 2 3 4]
[ 0  1  2  3  4 10 11 12 13 14] [5 6 7 8 9]
[0 1 2 3 4 5 6 7 8 9] [10 11 12 13 14]
1291 532
[]
1156 667
[]
1199 624
[]
0.911052059038921 {'C': 0.03125, 'class_weight': 'balanced', 'kernel': 'linear'}
[ 5  6  7  8  9 10 11 12 13 14] [0 1 2 3 4]
[ 0  1  2  3  4 10 11 12 13 14] [5 6 7 8 9]
[0 1 2 3 4 5 6 7 8 9] [10 11 12 13 14]
1291 532
[]
1156 667
[]
1199 624
[]
0.865833765448103 {'C': 1.0, 'class_weight': 'balanced', 'kernel': 'linear'}
[ 5  6  7  8  9 10 11 12 13 14] [0 1 2 3 4]
[ 0  1  2  3  4 10 11 12 13 14] [5 6 7 8 9]
[0 1 2 3 4 5 6 7 8 9] [10 11 12 13 14]
1291 532
[]
1156 667
[]
1199 624
[]
0.8246011886576472 {'C': 0.03125, 'class_weight': 'balanced', 'kernel': 'linear'}
[ 5  6  7  8  9 10 11 12 13 14] [0 1 2 3 4]
[ 0  1  2  3  4 10 11 12 13 14] [5 6 7 8 9]
[0 1 2 3 4 5 6 7 8 9] [10 11 12 13 14]
1291 532
[]
1156 667
[]
1199 624
[]
0.7048447713197575 {'C': 0.0078125, 'class_weight': 'balanced', 'kernel': 'linear'}
[ 5  6  7  8  9 10 11 12

In [44]:
k = 3
f1_sub = []
acc_sub = []
for sub in range(15):
        print(sub)

        #data = data_seed[sub]
        #label = (labels_seed[sub])

        train_indexes = []
        test_indexes = []
        train_indexes.append([])    
        test_indexes.append([]) 
        #skf = StratifiedKFold(n_splits=k, random_state=None, shuffle=True)
        #kf = KFold(n_splits=k, random_state=None, shuffle=True)
        splitter = CVsplitter(n_splits=k)#, random_state=123, shuffle=True)
        #for fold, (train_index, test_index) in enumerate(skf.split(X, y)):
        for fold, (train_index, test_index) in enumerate(splitter.split(data, label)):
              train_indexes[-1].append(train_index)
              test_indexes[-1].append(test_index)

        f1_all = []
        acc_all = []
        for fold in range(k):
            features_train = np.array(features_sub[sub])[np.array(train_indexes[0][fold])]
            features_test = np.array(features_sub[sub])[np.array(test_indexes[0][fold])]
            labels_train = np.array(labels_sub[sub])[np.array(train_indexes[0][fold])]
            labels_test = np.array(labels_sub[sub])[np.array(test_indexes[0][fold])]

          
            model = svm.SVC(C = params[sub]['C'], kernel = 'linear', class_weight = 'balanced')
            #model = KNeighborsClassifier(n_neighbors=5)
            #model = svc = svm.LinearSVC(penalty = "l1", C = params[sub]['C'], class_weight = 'balanced', dual = params[sub]['dual'], max_iter = 5000)
            model.fit(features_train, labels_train)                   
            predicted = model.predict(features_test)
            f1 = (f1_score(predicted, labels_test, average = 'macro'))
            acc =  (accuracy_score(predicted, labels_test))
            f1_all.append(f1)
            acc_all.append(acc)
            print(confusion_matrix(predicted, labels_test))  
                              
          #print(f1, acc)
        print("**********************")   
        print(np.mean(f1_all), np.mean(acc_all))   
        acc_sub.append(np.mean(acc_all))
        f1_sub.append(np.mean(f1_all))
print(np.mean(acc_sub))        
print(np.mean(f1_sub))        



0
[ 5  6  7  8  9 10 11 12 13 14] [0 1 2 3 4]
[ 0  1  2  3  4 10 11 12 13 14] [5 6 7 8 9]
[0 1 2 3 4 5 6 7 8 9] [10 11 12 13 14]
1291 532
[]
1156 667
[]
1199 624
[]
[[337   0   0]
 [ 16 116   0]
 [  0   0  63]]
[[403   0   0]
 [ 35  92   0]
 [  0   0 137]]
[[269  20   0]
 [  0 139   0]
 [ 95   0 101]]
**********************
0.9053079241041404 0.911052059038921
1
[ 5  6  7  8  9 10 11 12 13 14] [0 1 2 3 4]
[ 0  1  2  3  4 10 11 12 13 14] [5 6 7 8 9]
[0 1 2 3 4 5 6 7 8 9] [10 11 12 13 14]
1291 532
[]
1156 667
[]
1199 624
[]
[[353  41   0]
 [  0  75   0]
 [  0   0  63]]
[[346   0   0]
 [  0  92   0]
 [ 92   0 137]]
[[301  54   0]
 [  0 105   0]
 [ 63   0 101]]
**********************
0.8618593035202596 0.865833765448103
2
[ 5  6  7  8  9 10 11 12 13 14] [0 1 2 3 4]
[ 0  1  2  3  4 10 11 12 13 14] [5 6 7 8 9]
[0 1 2 3 4 5 6 7 8 9] [10 11 12 13 14]
1291 532
[]
1156 667
[]
1199 624
[]
[[332  59  13]
 [  0  57   0]
 [ 21   0  50]]
[[365   0  47]
 [ 73  92   0]
 [  0   0  90]]
[[320  63   0]
 [

In [None]:
k = 3
f1_sub = []
acc_sub = []

f1_all = []
acc_all = []
for sub in range(15):
        print(sub)

        #data = data_seed[sub]
        #label = (labels_seed[sub])

        train_indexes = []
        test_indexes = []
        train_indexes.append([])    
        test_indexes.append([]) 
        #skf = StratifiedKFold(n_splits=k, random_state=None, shuffle=True)
        #kf = KFold(n_splits=k, random_state=None, shuffle=True)
        splitter = CVsplitter(n_splits=k)#, random_state=123, shuffle=True)
        #for fold, (train_index, test_index) in enumerate(skf.split(X, y)):
        for fold, (train_index, test_index) in enumerate(splitter.split(data, label)):
              train_indexes[-1].append(train_index)
              test_indexes[-1].append(test_index)

        for fold in range(k):
            features_train = np.array(features_sub[sub])[np.array(train_indexes[0][fold])]
            features_test = np.array(features_sub[sub])[np.array(test_indexes[0][fold])]
            labels_train = np.array(labels_sub[sub])[np.array(train_indexes[0][fold])]
            labels_test = np.array(labels_sub[sub])[np.array(test_indexes[0][fold])]

          
            model = svm.SVC(C = params[sub]['C'], kernel = 'linear', class_weight = 'balanced')
            #model = KNeighborsClassifier(n_neighbors=5)
            model.fit(features_train, labels_train)                   
            predicted = model.predict(features_test)
            f1 = (f1_score(predicted, labels_test, average = 'macro'))
            acc =  (accuracy_score(predicted, labels_test))
            f1_all.append(f1)
            acc_all.append(acc)
            print(confusion_matrix(predicted, labels_test))  
                              
          #print(f1, acc)
        #print("**********************")   
        #print(np.mean(f1_all), np.mean(acc_all))   
        #acc_sub.append(np.mean(acc_all))
        #f1_sub.append(np.mean(f1_all))
print(np.mean(acc_all))        
print(np.mean(f1_all))        



0
[ 5  6  7  8  9 10 11 12 13 14] [0 1 2 3 4]
[ 0  1  2  3  4 10 11 12 13 14] [5 6 7 8 9]
[0 1 2 3 4 5 6 7 8 9] [10 11 12 13 14]
1291 532
[]
1156 667
[]
1199 624
[]
[[337   0   0]
 [ 16 116   0]
 [  0   0  63]]
[[403   0   0]
 [ 35  92   0]
 [  0   0 137]]
[[269  20   0]
 [  0 139   0]
 [ 95   0 101]]
1
[ 5  6  7  8  9 10 11 12 13 14] [0 1 2 3 4]
[ 0  1  2  3  4 10 11 12 13 14] [5 6 7 8 9]
[0 1 2 3 4 5 6 7 8 9] [10 11 12 13 14]
1291 532
[]
1156 667
[]
1199 624
[]
[[353  41   0]
 [  0  75   0]
 [  0   0  63]]
[[346   0   0]
 [  0  92   0]
 [ 92   0 137]]
[[301  54   0]
 [  0 105   0]
 [ 63   0 101]]
2
[ 5  6  7  8  9 10 11 12 13 14] [0 1 2 3 4]
[ 0  1  2  3  4 10 11 12 13 14] [5 6 7 8 9]
[0 1 2 3 4 5 6 7 8 9] [10 11 12 13 14]
1291 532
[]
1156 667
[]
1199 624
[]
[[332  59  13]
 [  0  57   0]
 [ 21   0  50]]
[[365   0  47]
 [ 73  92   0]
 [  0   0  90]]
[[320  63   0]
 [  0  96   0]
 [ 44   0 101]]
3
[ 5  6  7  8  9 10 11 12 13 14] [0 1 2 3 4]
[ 0  1  2  3  4 10 11 12 13 14] [5 6 7 8 9]
[

In [None]:
print(labels_train[0:28])

[2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 0 0 0 0 0 0 0 0 0 0]


In [None]:
print(len(train_indexes[0][fold]))
print(len(features_sub[0]))

1291
1823
