## 1D CNN Model - Benchmarking

In [None]:
"""
@authors: Maxime Tchibozo, Donggeun Kim
@affiliation: NYSPI, Columbia University
@date: Oct 2018 - Oct 2020
@overview: Comparing the performance of different Machine Learning algorithms on the Experiment A task.
@input: Hyperaligned voxels with labels as numpy arrays (X_hyp_v2.csv and Y_hyp_v2.csv).
@output: Metrics for different classical algorithms.
"""

In [1]:
import numpy as np
import pandas as pd
import keras
from keras import backend as K
from keras.callbacks import History
import pickle
from keras import initializers
from IPython.display import SVG
from keras.utils.vis_utils import model_to_dot, plot_model

from keras.models import Sequential
from keras.layers.core import Flatten, Dense, Dropout, Activation
from keras.layers.convolutional import Conv1D, MaxPooling1D, ZeroPadding1D
from keras.layers.convolutional import Conv2D, MaxPooling2D, ZeroPadding2D
from keras.optimizers import SGD
from sklearn.model_selection import KFold
from sklearn.model_selection import StratifiedKFold
import glob
import os
from keras.callbacks import ModelCheckpoint
from keras.callbacks import ReduceLROnPlateau

import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.pyplot import figure, show
from matplotlib.ticker import MaxNLocator
import matplotlib as mpl
from sklearn import preprocessing
from sklearn.svm import SVC, LinearSVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from imblearn.over_sampling import RandomOverSampler
from imblearn.under_sampling import RandomUnderSampler
from xgboost import XGBClassifier

from sklearn.metrics import roc_curve,roc_auc_score



In [3]:
folder_root = '/content/drive/My Drive/BrainStateClassification/code/'#Update to your main folder, this folder must contain a data and a result subfolder
folder_data = folder_root + 'data/' #folder containing all the .mat files
folder_result = folder_root + 'data/plateau' #folder where your models and logs will be stored


In [4]:
os.chdir(folder_data)

In [5]:
Y = np.loadtxt('Y_hyp_v2.csv',delimiter=",")
X = np.loadtxt('X_hyp_v2.csv',delimiter=",")

In [6]:
from collections import Counter
Counter(Y)

Counter({0.0: 2200, 1.0: 1100, 2.0: 1100})

In [None]:
np.shape(X)

(4400, 300)

## Comparison with other models

Here we compare the 1DCNN performance with SVM, XGBoost

In [None]:
rbf_svm_scores = []

tmp = list(range(0,4800,400))
X_Hold_out = [] #Holdout = testing | Max
X_CV=[]
Y_Hold_out = []
Y_CV=[]
for i in range(len(tmp)-1):
    X_Hold_out+=[X[tmp[i]:tmp[i+1]]]
    Y_Hold_out+=[Y[tmp[i]:tmp[i+1]]]
    X_CV += [np.concatenate((X[0:tmp[i]],X[tmp[i+1]:]))]
    Y_CV += [np.concatenate((Y[0:tmp[i]],Y[tmp[i+1]:]))]
N_Leave_One_Subject = len(tmp)-1


for leave_one_idx in range(N_Leave_One_Subject): 
    print(leave_one_idx)
    X_train = X_CV[leave_one_idx] # 300 refers to N_features
    Y_train = Y_CV[leave_one_idx].reshape(-1,1)
    X_test = X_Hold_out[leave_one_idx]
    Y_test = Y_Hold_out[leave_one_idx]

    scaler = preprocessing.StandardScaler().fit(X_train)
    X_train = scaler.transform(X_train).reshape(-1,300,1)
    X_test = scaler.transform(X_test).reshape(-1,300,1)

    X_train, Y_train = RandomOverSampler(random_state=0).fit_resample(X_train.reshape(-1,300),Y_train.flatten())
    X_test, Y_test = RandomOverSampler(random_state=0).fit_resample(X_test.reshape(-1,300),Y_test.flatten())

    svm_rbf = SVC(kernel='rbf')
    svm_rbf.fit(X_train,Y_train)
    rbf_svm_scores.append(svm_rbf.score(X_test,Y_test))
    print(rbf_svm_scores[-1])




0




0.7666666666666667
1




0.83
2




0.7766666666666666
3




0.7666666666666667
4




0.7066666666666667
5




0.8216666666666667
6




0.7966666666666666
7




0.8316666666666667
8




0.7516666666666667
9




0.7666666666666667
10




0.8383333333333334


In [None]:
linear_svm_scores = []


for leave_one_idx in range(N_Leave_One_Subject): 
    print(leave_one_idx)
    X_train = X_CV[leave_one_idx] # 300 refers to N_features
    Y_train = Y_CV[leave_one_idx].reshape(-1,1)
    X_test = X_Hold_out[leave_one_idx]
    Y_test = Y_Hold_out[leave_one_idx]

    scaler = preprocessing.StandardScaler().fit(X_train)
    X_train = scaler.transform(X_train).reshape(-1,300,1)
    X_test = scaler.transform(X_test).reshape(-1,300,1)

    X_train, Y_train = RandomOverSampler(random_state=0).fit_resample(X_train.reshape(-1,300),Y_train.flatten())
    X_test, Y_test = RandomOverSampler(random_state=0).fit_resample(X_test.reshape(-1,300),Y_test.flatten())

    svm_linear = LinearSVC()
    svm_linear.fit(X_train,Y_train)
    linear_svm_scores.append(svm_linear.score(X_test,Y_test))
    print(linear_svm_scores[-1])

0




0.7633333333333333
1




0.7633333333333333
2




0.7533333333333333
3




0.7666666666666667
4




0.715
5




0.7583333333333333
6




0.715
7




0.7733333333333333
8




0.7466666666666667
9




0.75
10
0.715




In [None]:
random_forest_scores = []


for leave_one_idx in range(N_Leave_One_Subject): 
    print(leave_one_idx)
    X_train = X_CV[leave_one_idx] # 300 refers to N_features
    Y_train = Y_CV[leave_one_idx].reshape(-1,1)
    X_test = X_Hold_out[leave_one_idx]
    Y_test = Y_Hold_out[leave_one_idx]

    scaler = preprocessing.StandardScaler().fit(X_train)
    X_train = scaler.transform(X_train).reshape(-1,300,1)
    X_test = scaler.transform(X_test).reshape(-1,300,1)

    X_train, Y_train = RandomOverSampler(random_state=0).fit_resample(X_train.reshape(-1,300),Y_train.flatten())
    X_test, Y_test = RandomOverSampler(random_state=0).fit_resample(X_test.reshape(-1,300),Y_test.flatten())

    random_forest = RandomForestClassifier()
    random_forest.fit(X_train,Y_train)
    random_forest_scores.append(random_forest.score(X_test,Y_test))
    print(random_forest_scores[-1])

0




0.7233333333333334
1




0.765
2




0.75
3




0.6966666666666667
4




0.6866666666666666
5




0.7533333333333333
6




0.7566666666666667
7




0.7433333333333333
8




0.7116666666666667
9




0.7283333333333334
10




0.82


In [None]:

xgb_scores = []


for leave_one_idx in range(N_Leave_One_Subject): 
    print(leave_one_idx)
    X_train = X_CV[leave_one_idx] # 300 refers to N_features
    Y_train = Y_CV[leave_one_idx].reshape(-1,1)
    X_test = X_Hold_out[leave_one_idx]
    Y_test = Y_Hold_out[leave_one_idx]

    scaler = preprocessing.StandardScaler().fit(X_train)
    X_train = scaler.transform(X_train).reshape(-1,300,1)
    X_test = scaler.transform(X_test).reshape(-1,300,1)

    X_train, Y_train = RandomOverSampler(random_state=0).fit_resample(X_train.reshape(-1,300),Y_train.flatten())
    X_test, Y_test = RandomOverSampler(random_state=0).fit_resample(X_test.reshape(-1,300),Y_test.flatten())

    xgb_model = XGBClassifier()
    xgb_model.fit(X_train,Y_train)
    xgb_scores.append(xgb_model.score(X_test,Y_test))
    print(xgb_scores[-1])


0




0.7183333333333334
1




0.76
2




0.7366666666666667
3




0.7333333333333333
4




0.7
5




0.765
6




0.7533333333333333
7




0.7433333333333333
8




0.7783333333333333
9




0.7216666666666667
10




0.815


In [None]:

lda_scores = []


for leave_one_idx in range(N_Leave_One_Subject): 
    print(leave_one_idx)
    X_train = X_CV[leave_one_idx] # 300 refers to N_features
    Y_train = Y_CV[leave_one_idx].reshape(-1,1)
    X_test = X_Hold_out[leave_one_idx]
    Y_test = Y_Hold_out[leave_one_idx]

    scaler = preprocessing.StandardScaler().fit(X_train)
    X_train = scaler.transform(X_train).reshape(-1,300,1)
    X_test = scaler.transform(X_test).reshape(-1,300,1)

    X_train, Y_train = RandomOverSampler(random_state=0).fit_resample(X_train.reshape(-1,300),Y_train.flatten())
    X_test, Y_test = RandomOverSampler(random_state=0).fit_resample(X_test.reshape(-1,300),Y_test.flatten())

    lda_model = LinearDiscriminantAnalysis()
    lda_model.fit(X_train,Y_train)
    lda_scores.append(lda_model.score(X_test,Y_test))
    print(lda_scores[-1])


0




0.7516666666666667
1




0.735
2




0.735
3




0.7716666666666666
4




0.705
5




0.7783333333333333
6




0.735
7




0.79
8




0.7383333333333333
9




0.7516666666666667
10




0.7416666666666667


In [None]:
cnn_scores = [0.868,0.853,0.857,0.820,0.747,0.873,0.850,0.868,0.870,0.863,0.870]


In [None]:
pd.DataFrame({'CNN test acc':cnn_scores,'SVM(Linear) test acc':linear_svm_scores,'SVM(rbf) test acc':rbf_svm_scores,'Random Forest test acc':random_forest_scores,'XGBoost test acc':xgb_scores,'LDA test acc':lda_scores})

Unnamed: 0,CNN test acc,SVM(Linear) test acc,SVM(rbf) test acc,Random Forest test acc,XGBoost test acc,LDA test acc
0,0.868,0.763333,0.766667,0.723333,0.718333,0.751667
1,0.853,0.763333,0.83,0.765,0.76,0.735
2,0.857,0.753333,0.776667,0.75,0.736667,0.735
3,0.82,0.766667,0.766667,0.696667,0.733333,0.771667
4,0.747,0.715,0.706667,0.686667,0.7,0.705
5,0.873,0.758333,0.821667,0.753333,0.765,0.778333
6,0.85,0.715,0.796667,0.756667,0.753333,0.735
7,0.868,0.773333,0.831667,0.743333,0.743333,0.79
8,0.87,0.746667,0.751667,0.711667,0.778333,0.738333
9,0.863,0.75,0.766667,0.728333,0.721667,0.751667


In [None]:
#**Comparison to the Hanson et al (2004) neural net**
#each subject, we created a 10-hidden node, eight-waymulti-class NN classifier. We used the hyperbolic tangent activa-tion transfer function for its hidden nodes, softmax activation

import keras
def create_model_2():
    model = Sequential()
    model.add(Dense(10, activation='tanh', input_shape= (300,1)))
    model.add(Flatten())
    model.add(Dense(3, activation='softmax')) 
    return model
model = create_model_2()
model.summary()

Model: "sequential_38"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_75 (Dense)             (None, 300, 10)           20        
_________________________________________________________________
flatten_22 (Flatten)         (None, 3000)              0         
_________________________________________________________________
dense_76 (Dense)             (None, 3)                 9003      
Total params: 9,023
Trainable params: 9,023
Non-trainable params: 0
_________________________________________________________________


In [None]:
NN_2004_acc = []

batch_size = 64

tmp = list(range(0,4800,400))
X_Hold_out = [] #Holdout = testing | Max
X_CV=[]
Y_Hold_out = []
Y_CV=[]
for i in range(len(tmp)-1):
    X_Hold_out+=[X[tmp[i]:tmp[i+1]]]
    Y_Hold_out+=[Y[tmp[i]:tmp[i+1]]]
    X_CV += [np.concatenate((X[0:tmp[i]],X[tmp[i+1]:]))]
    Y_CV += [np.concatenate((Y[0:tmp[i]],Y[tmp[i+1]:]))]
N_Leave_One_Subject = len(tmp)-1


for learn_rate in [0.0045]:
    for leave_one_idx in range(N_Leave_One_Subject): 
        print(leave_one_idx)
        X_train = X_CV[leave_one_idx] # 300 refers to N_features
        Y_train = Y_CV[leave_one_idx].reshape(-1,1)
        X_test = X_Hold_out[leave_one_idx]
        Y_test = Y_Hold_out[leave_one_idx]
        
        scaler = preprocessing.StandardScaler().fit(X_train)
        X_train = scaler.transform(X_train).reshape(-1,300,1)
        X_test = scaler.transform(X_test).reshape(-1,300,1)

#Y_..._onehot contains one hot encoded Y_... i.e, if Y[i] = 0, Y_onehot[i] = [1,0,0] 

        X_train, Y_train = RandomOverSampler(random_state=0).fit_resample(X_train.reshape(-1,300),Y_train.flatten())
        X_train = X_train.reshape(-1,300,1)
        X_test, Y_test = RandomOverSampler(random_state=0).fit_resample(X_test.reshape(-1,300),Y_test.flatten())
        X_test = X_test.reshape(-1,300,1)

        Y_train_onehot = np.eye(3)[Y_train.astype(int)].reshape(-1,3)
        Y_test_onehot = np.eye(3)[Y_test.astype(int)].reshape(-1,3)

        print("to create")
        model = create_model_2()
        model.compile(loss=keras.losses.categorical_crossentropy,
                      optimizer=keras.optimizers.Adam(lr=learn_rate),
                      metrics=['accuracy'])

        batch_size = 32
        epochs = 50 #20 epochs now

        print("Before Fit")
        model.fit(X_train, Y_train_onehot,
                  batch_size=batch_size,
                  epochs=epochs,
                  verbose=1,
                  validation_data=(X_test, Y_test_onehot))
        score, acc = model.evaluate(X_test, Y_test_onehot, batch_size=batch_size,verbose=0)
        print(f'Test Accuracy for learning rate {learn_rate} CV index {leave_one_idx} is :', "{:.3f}".format(acc))
        NN_2004_acc.append(acc)


0
to create
Before Fit
Epoch 1/50




Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Test Accuracy for learning rate 0.0045 CV index 0 is : 0.740
1
to create
Before Fit
Epoch 1/50




Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Test Accuracy for learning rate 0.0045 CV index 1 is : 0.725
2
to create
Before Fit
Epoch 1/50




Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Test Accuracy for learning rate 0.0045 CV index 2 is : 0.732
3
to create
Before Fit
Epoch 1/50




Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Test Accuracy for learning rate 0.0045 CV index 3 is : 0.753
4
to create
Before Fit
Epoch 1/50




Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Test Accuracy for learning rate 0.0045 CV index 4 is : 0.713
5
to create
Before Fit
Epoch 1/50




Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
 25/188 [==>...........................] - ETA: 0s - loss: 0.3338 - accuracy: 0.8637

KeyboardInterrupt: ignored

In [None]:
np.std([0.735, 0.768, 0.740, 0.747, 0.672, 0.742, 0.743, 0.763, 0.778, 0.745, 0.760])

0.02635015329200945