In [1]:
import warnings, pickle, math, random, numpy, time
import numpy as np
import pandas as pd
from sklearn.svm import SVC
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
from deap import creator, base, tools, algorithms, GA
from scoop import futures
from sklearn.feature_selection import mutual_info_classif
from sklearn.utils import shuffle
warnings.filterwarnings("ignore")

In [2]:
# This list contains all the eeg channels used in Deap dataset 
subject_names = ['s01', 's02', 's03', 's04', 's05', 's06', 's07', 's08', 's09', 's10', 's11', 's12', 
                 's13', 's14', 's15', 's16', 's17', 's18', 's19', 's20', 's21',
                 's22', 's23', 's24', 's25', 's26', 's27', 's28', 's29', 's30', 's31', 's32']
eeg_channels = np.array(['Fp1', 'AF3', 'F3', 'F7', 'FC5', 'FC1', 'C3', 'T7', 'CP5', 'CP1', 'P3', 
                         'P7', 'PO3', 'O1', 'Oz', 'Pz', 'Fp2', 'AF4', 'Fz', 'F4', 'F8', 'FC6', 
                         'FC2', 'Cz', 'C4', 'T8', 'CP6', 'CP2', 'P4', 'P8', 'PO4', 'O2'])

In [3]:
def getFitness(individual):
    global x_train, x_test, y_train, y_test
    total_features = int(x.shape[1])
    all_features_name = list(x.columns)
    if(len(set(individual)) == 1 and list(set(individual))[0] == 0):
        # If all gene values are 0 then return 0
        return 0
    features = []
    for i in range(0, len(individual)):
        if(individual[i]==1):
            features.append(all_features_name[i])
    no_sel_features = len(features)
    _classifier = SVC(kernel = 'poly')
    new_x_train = x_train[features].copy()
    new_x_test = x_test[features].copy()
    _classifier.fit(new_x_train, y_train)
    predictions = _classifier.predict(new_x_test)
    accuracy = accuracy_score(y_true = y_test, y_pred = predictions)
    my_fitness = alpha*accuracy + (1-alpha)*((total_features - no_sel_features)/total_features)
    return (my_fitness,)
def get_final_report(individual):
    total_features = int(x.shape[1])
    all_features_name = list(x.columns)
    if(len(set(individual)) == 1 and list(set(individual))[0] == 0):
        # If all gene values are 0 then return 0
        return 0, 0, 0, 0
    features = []
    for i in range(0, len(individual)):
        if(individual[i]==1):
            features.append(all_features_name[i])
    no_sel_features = len(features)
    _classifier = SVC(kernel = 'poly')
    new_x_train = x_train[features].copy()
    new_x_test = x_test[features].copy()
    _classifier.fit(new_x_train, y_train)
    predictions = _classifier.predict(new_x_test)
    accuracy = accuracy_score(y_true = y_test, y_pred = predictions)
    prec = precision_score(predictions, y_test)
    recall = recall_score(predictions, y_test)
    f1 = f1_score(predictions, y_test)
    return accuracy, prec, recall, f1

In [4]:
def kfold(x, y):
    # do the scalling
    names = x.columns
    scaler = MinMaxScaler()
    x = scaler.fit_transform(x)
    x = pd.DataFrame(x, columns=names)
    feature_vectors = list(x.columns)
    skf = StratifiedKFold(n_splits=10)
    x = np.array(x)
    y = np.array(y)
    skf.get_n_splits(x, y)
    test_data, train_data, train_label, test_label = [], [], [], []
    for train_index, test_index in skf.split(x, y):
        X_train, X_test = x[train_index], x[test_index]
        y_train, y_test = y[train_index], y[test_index]
        # convert into dataframe
        X_train = pd.DataFrame.from_records(X_train)
        X_train.columns = feature_vectors
        X_test = pd.DataFrame.from_records(X_test)
        X_test.columns = feature_vectors
        train_data.append(X_train)
        test_data.append(X_test)
        train_label.append(y_train)
        test_label.append(y_test)
    return train_data, test_data, train_label, test_label

# GA

In [5]:
def getHof(popu):
    global toolbox
    pop = popu
    hof = tools.HallOfFame(numPop * numGen)
    stats = tools.Statistics(lambda ind: ind.fitness.values)
    stats.register("avg", numpy.mean)
    stats.register("std", numpy.std)
    stats.register("min", numpy.min)
    stats.register("max", numpy.max)

    # Launch genetic algorithm, change the crossover and mutation probability
    pop, log = algorithms.eaSimple(pop, toolbox, cxpb = 0.65, mutpb = 0.3,\
                                   ngen=numGen, stats=stats, halloffame=hof, verbose=False)
    return hof, log # Return the hall of fame

# Drive Code

In [6]:
def get_channels_list(best_individual):
    channel_list = []
    eeg_channels = np.array(optimal_channels)
    for i in range(0, len(best_individual)):
        if(best_individual[i]==1):
            # add that channel in the optimal set of channels
            channel_list.append(optimal_channels[i])
    return channel_list

In [7]:
def drive_code(inputdata, inputlabel, numPop, numGen):
    global toolbox, x, x_train, x_test, y_train, y_test
    #========================         Data opening       ==============================
    data = inputdata.copy()
    features_name = data.columns
    total_features = len(data.columns)-1
    x, y = data[data.columns[:total_features]], inputlabel.copy()
    
    # drop constant features
    x = x.loc[:,x.apply(pd.Series.nunique) != 1]
    
    #============================      Train-Test splitting      ======================
    x, y = shuffle(x, y, random_state = 40)
    train_data, test_data, train_label, test_label = kfold(x.copy(), y.copy())
    #==================================================================================
    
    
    creator.create('FitnessMax', base.Fitness, weights = (1.0,))
    creator.create('Individual', list, fitness = creator.FitnessMax)
    toolbox = base.Toolbox() # Create Toolbox
    toolbox.register('attr_bool', random.randint, 0, 1)
    toolbox.register('individual', tools.initRepeat, creator.Individual, toolbox.attr_bool, int(x.shape[1]))
    toolbox.register('population', tools.initRepeat, list, toolbox.individual)
    initial_population = toolbox.population(numPop)
    
    #=============================     GA based feature selection  ==================
    toolbox.register('evaluate', getFitness)
    toolbox.register('mate', tools.cxOnePoint)
    toolbox.register('mutate', tools.mutFlipBit, indpb = 0.1)
    toolbox.register('select', tools.selTournament, tournsize = 7)
    # taing optimal channels
    acc_cross, prec_cross, recall_cross, f1_score_cross = [], [], [], []
    print('Accuracy\tPre\tRecall\tF1')
    for i in range(0, 10):
        x_train, x_test, y_train, y_test = train_data[i], test_data[i], train_label[i], test_label[i]
        initial_population = toolbox.population(numPop)
        hof, log = getHof(initial_population)
        best_individual = list(hof)[0]
        acc, prec, recall, f1_score = get_final_report(best_individual)
        acc_cross.append(acc)
        prec_cross.append(prec)
        recall_cross.append(recall)
        f1_score_cross.append(f1_score)
        print(float('{:.3f}'.format(acc)), '\t\t', float('{:.3f}'.format(prec)), '\t', float('{:.3f}'.format(recall)),
                    '\t', float('{:.3f}'.format(f1_score)))
    acc_cross, prec_cross = np.array(acc_cross), np.array(prec_cross)
    recall_cross, f1_score_cross = np.array(recall_cross), np.array(f1_score_cross)
    acc_mean, prec_mean = np.mean(acc_cross), np.mean(prec_cross)
    recall_mean, f1_mean = np.mean(recall_cross), np.mean(f1_score_cross)
    print('-'*43)
    print(float('{:.3f}'.format(acc_mean)), '\t\t', float('{:.3f}'.format(prec_mean)), '\t', float('{:.3f}'.format(recall_mean)), '\t', float('{:.3f}'.format(f1_mean)))

In [8]:
subject_arousal_channels = [['P7', 'Fp2', 'AF4', 'F8', 'Fp1']
,['P7', 'F3', 'FC2', 'Pz', 'C4', 'CP2', 'T8', 'P3', 'FC5', 'F8', 'P4', 'CP5', 'CP1', 'O2', 'FC6', 'Cz', 'PO3', 'P8', 'O1', 'CP6', 'AF4', 'Oz', 'F4']
,['AF4', 'Fp1']
,['T8', 'FC2', 'Fz', 'Pz', 'CP6', 'P3']
,['PO3', 'Pz', 'P3', 'CP5', 'C3', 'Fp2', 'FC5', 'PO4', 'T7', 'F8', 'FC2', 'FC6']
,['C3', 'F3', 'AF4', 'AF3', 'FC2', 'F4', 'T8', 'Oz', 'FC5', 'F7', 'FC1', 'Cz', 'Fp2', 'O2', 'PO3', 'CP6', 'PO4']
,['FC1', 'O1', 'FC5', 'PO3', 'P7', 'Fp2', 'CP1', 'CP6', 'Cz']
,['Fz', 'AF4', 'C4', 'AF3', 'F4', 'FC1', 'Fp1', 'CP1', 'O1', 'P4', 'F3']
,['AF4', 'Oz', 'T8', 'T7', 'P8', 'AF3', 'Fp1', 'Fz', 'Pz', 'P3', 'CP6', 'CP1', 'FC2', 'CP2', 'P7', 'FC6', 'P4', 'CP5', 'F3', 'PO4', 'FC5', 'O2']
,['PO4', 'Fz']
,['P4', 'Fz', 'Pz', 'P8', 'Oz', 'O2', 'PO3', 'F4', 'C3', 'PO4', 'O1', 'Cz', 'T8', 'C4', 'CP1', 'CP6', 'AF4', 'F7', 'CP2', 'P3', 'F8', 'FC1', 'F3', 'P7', 'FC5', 'FC2', 'FC6', 'Fp2', 'Fp1']
,['FC6', 'F7', 'CP5', 'FC5']
,['T8', 'CP1', 'P3', 'C4']
,['AF3', 'Oz', 'FC1', 'Fp1', 'F8', 'C4', 'AF4', 'PO3']
,['FC1', 'P3', 'FC2', 'CP6', 'P8', 'O1', 'C3', 'C4']
,['O2', 'FC1', 'P8', 'Fz', 'Cz']
,['P4', 'Pz', 'PO4', 'O1', 'PO3', 'C4', 'C3']
,['FC5', 'AF4', 'C4', 'PO3']
,['CP5', 'F8', 'P8', 'Oz', 'O2', 'CP6']
,['Fp1', 'P7', 'F3', 'AF3', 'F8', 'FC5', 'CP5', 'AF4', 'T7', 'C3']
,['P7', 'AF3']
,['Oz', 'AF3', 'FC2', 'FC1']
,['Fp2', 'C3', 'CP5', 'CP2', 'FC2', 'FC5', 'P4', 'AF4', 'T8']
,['O2', 'P8', 'CP2', 'CP1', 'PO3', 'FC5', 'Cz', 'FC2', 'C3', 'PO4', 'FC1', 'P7', 'T7']
,['F8', 'C4', 'PO3', 'FC5', 'AF4', 'F4', 'CP2', 'T7', 'Fp1', 'C3', 'AF3', 'Oz', 'P8', 'FC2', 'PO4', 'FC6']
,['CP5', 'AF4', 'F7', 'Oz', 'FC2', 'AF3', 'Cz', 'F4', 'P4', 'P3', 'PO4']
,['Oz', 'O2', 'Pz', 'CP1', 'FC5', 'AF4', 'F8', 'CP5', 'FC2', 'P4', 'Cz', 'F4', 'Fp1']
,['PO3', 'F4', 'FC6', 'C4', 'Cz', 'CP1', 'CP5', 'C3']
,['PO4', 'P4', 'O2', 'FC2', 'F8', 'Cz', 'C4', 'CP2', 'Pz']
,['PO4', 'Fz', 'P3', 'AF3', 'P4', 'FC2', 'CP1', 'FC6', 'Pz', 'F3', 'CP2', 'CP5', 'C4', 'FC5', 'P8', 'Fp2', 'CP6', 'T7', 'T8', 'F4', 'Fp1', 'O2', 'AF4']
,['C4', 'PO4']
,['Cz', 'AF4', 'F3']]

In [9]:
def getData(sub):
    fs_vector = []
    mypath = '/Users/shyammarjit/Desktop/Brain Computer Interface/Hybrid Sequential Forward channel selection (HSFCS)/Subject Independent/data files/'
    datapath = mypath + sub + '_arousal.csv'
    data = pd.read_csv(datapath)
    label = data[data.columns[-1]]
    optimal_arousal_channels = subject_arousal_channels[subject_names.index(sub)]
    features_list = ['theta_mean', 'theta_var', 'theta_mode', 'theta_median', 'theta_skew', 'theta_std', 'theta_kurtosis', 
                   'theta_f_d', 'theta_nfd', 'theta_s_d', 'theta_nsd', 'alpha_mean', 'alpha_var', 'alpha_mode', 'alpha_median',
                   'alpha_skew', 'alpha_std', 'alpha_kurtosis', 'alpha_f_d', 'alpha_nfd', 'alpha_s_d', 'alpha_nsd',
                   'beta_mean', 'beta_var', 'beta_mode', 'beta_median', 'beta_skew', 'beta_std', 'beta_kurtosis', 
                   'beta_f_d', 'beta_nfd', 'beta_s_d', 'beta_nsd', 'gamma_mean', 'gamma_var', 'gamma_mode', 'gamma_median',
                   'gamma_skew', 'gamma_std', 'gamma_kurtosis', 'gamma_f_d', 'gamma_nfd', 'gamma_s_d', 'gamma_nsd', 'theta_energy',
                   'alpha_energy', 'beta_energy', 'gamma_energy', 'theta_avg_power', 'alpha_avg_power', 'beta_avg_power',
                   'gamma_avg_power', 'theta_rms', 'alpha_rms', 'beta_rms', 'gamma_rms',
                   'theta_ShEn', 'alpha_ShEn', 'beta_ShEn', 'gamma_ShEn', 'theta_aentropy', 'alpha_aentropy',
                   'beta_aentropy', 'gamma_aentropy', 'theta_pentropy', 'alpha_pentropy', 'beta_pentropy', 'gamma_pentropy', 
                   'theta_wpe', 'alpha_wpe', 'theta_wpe', 'gamma_wpe', 'H_theta', 'c_theta', 'H_alpha', 'c_alpha', 'H_beta',
                   'c_beta', 'H_gamma', 'c_gamma', 'higuchi_theta', 'petrosian_theta', 'higuchi_alpha', 'petrosian_alpha', 'higuchi_beta',
                   'petrosian_beta', 'higuchi_gamma', 'petrosian_gamma', 'aic_theta_ar',
                   'hqic_theta_ar', 'bic_theta_ar', 'llf_theta_ar', 'aic_alpha_ar', 'hqic_alpha_ar', 'bic_alpha_ar', 'llf_alpha_ar', 
                   'aic_beta_ar', 'hqic_beta_ar', 'bic_beta_ar', 'llf_beta_ar', 'aic_gamma_ar', 'hqic_gamma_ar', 'bic_gamma_ar', 
                   'llf_gamma_ar', 'aic_theta_arma', 'hqic_theta_arma', 'bic_theta_arma', 'llf_theta_arma', 'aic_alpha_arma', 
                   'hqic_alpha_arma', 'bic_alpha_arma', 'llf_alpha_arma', 'aic_beta_arma', 'hqic_beta_arma', 'bic_beta_arma', 
                   'llf_beta_arma', 'aic_gamma_arma', 'hqic_gamma_arma', 'bic_gamma_arma', 'llf_gamma_arma']
    for i in optimal_arousal_channels:
        for j in features_list:
            fs_vector.append(i + '_' + j)
    data = data[fs_vector]
    return data, label

In [28]:
#=======================        Hyperparameters value      =========================
alpha = 0.90
numPop, numGen = 100, 50
#===================================================================================

for sub in subject_names[31:32]:
    indepdata, label = getData(sub)
    print('='*97)
    print(" "*45, sub, " "*45)
    print('='*97,"\n")
    drive_code(indepdata, label, numPop, numGen)
    break

                                              s32                                              

Accuracy	Pre	Recall	F1
0.5 		 0.5 	 0.5 	 0.5
0.5 		 1.0 	 0.5 	 0.667
1.0 		 1.0 	 1.0 	 1.0
0.75 		 0.667 	 1.0 	 0.8
1.0 		 1.0 	 1.0 	 1.0
1.0 		 1.0 	 1.0 	 1.0
1.0 		 1.0 	 1.0 	 1.0
1.0 		 1.0 	 1.0 	 1.0
1.0 		 1.0 	 1.0 	 1.0
1.0 		 1.0 	 1.0 	 1.0
-------------------------------------------
0.875 		 0.917 	 0.9 	 0.897


In [11]:
'''
=================================================================================================
                                              s01                                              
================================================================================================= 

Accuracy	Pre	Recall	F1
0.75 		 0.667 	 1.0 	 0.8
1.0 		 1.0 	 1.0 	 1.0
0.75 		 1.0 	 0.75 	 0.857
1.0 		 1.0 	 1.0 	 1.0
1.0 		 1.0 	 1.0 	 1.0
1.0 		 1.0 	 1.0 	 1.0
0.75 		 1.0 	 0.667 	 0.8
1.0 		 1.0 	 1.0 	 1.0
1.0 		 1.0 	 1.0 	 1.0
1.0 		 1.0 	 1.0 	 1.0
-------------------------------------------
0.925 		 0.967 	 0.942 	 0.946
=================================================================================================
                                              s02                                              
================================================================================================= 

Accuracy	Pre	Recall	F1
0.5 		 0.333 	 1.0 	 0.5
1.0 		 1.0 	 1.0 	 1.0
1.0 		 1.0 	 1.0 	 1.0
0.5 		 0.667 	 0.667 	 0.667
0.75 		 1.0 	 0.667 	 0.8
0.5 		 1.0 	 0.5 	 0.667
0.75 		 1.0 	 0.667 	 0.8
1.0 		 1.0 	 1.0 	 1.0
1.0 		 1.0 	 1.0 	 1.0
0.75 		 1.0 	 0.667 	 0.8
-------------------------------------------
0.775 		 0.9 	 0.817 	 0.823
=================================================================================================
                                              s03                                              
================================================================================================= 

Accuracy	Pre	Recall	F1
1.0 		 0.0 	 0.0 	 0.0
1.0 		 0.0 	 0.0 	 0.0
1.0 		 1.0 	 1.0 	 1.0
0.75 		 1.0 	 0.5 	 0.667
0.75 		 0.0 	 0.0 	 0.0
0.75 		 0.0 	 0.0 	 0.0
0.75 		 0.0 	 0.0 	 0.0
1.0 		 1.0 	 1.0 	 1.0
1.0 		 1.0 	 1.0 	 1.0
1.0 		 1.0 	 1.0 	 1.0
-------------------------------------------
0.9 		 0.5 	 0.45 	 0.467
=================================================================================================
                                              s04                                              
================================================================================================= 

Accuracy	Pre	Recall	F1
1.0 		 1.0 	 1.0 	 1.0
1.0 		 1.0 	 1.0 	 1.0
1.0 		 1.0 	 1.0 	 1.0
1.0 		 1.0 	 1.0 	 1.0
1.0 		 1.0 	 1.0 	 1.0
0.75 		 1.0 	 0.667 	 0.8
1.0 		 1.0 	 1.0 	 1.0
1.0 		 1.0 	 1.0 	 1.0
1.0 		 1.0 	 1.0 	 1.0
0.75 		 0.5 	 1.0 	 0.667
-------------------------------------------
0.95 		 0.95 	 0.967 	 0.947
'''

