In [1]:
import warnings, pickle, math, random, numpy, time
import numpy as np
import pandas as pd
from sklearn.svm import SVC
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
from deap import creator, base, tools, algorithms, GA
from scoop import futures
from sklearn.feature_selection import mutual_info_classif
from sklearn.utils import shuffle
warnings.filterwarnings("ignore")

In [2]:
# This list contains all the eeg channels used in Deap dataset 
subject_names = ['s01', 's02', 's03', 's04', 's05', 's06', 's07', 's08', 's09', 's10', 's11', 's12', 
                 's13', 's14', 's15', 's16', 's17', 's18', 's19', 's20', 's21',
                 's22', 's23', 's24', 's25', 's26', 's27', 's28', 's29', 's30', 's31', 's32']
eeg_channels = np.array(['Fp1', 'AF3', 'F3', 'F7', 'FC5', 'FC1', 'C3', 'T7', 'CP5', 'CP1', 'P3', 
                         'P7', 'PO3', 'O1', 'Oz', 'Pz', 'Fp2', 'AF4', 'Fz', 'F4', 'F8', 'FC6', 
                         'FC2', 'Cz', 'C4', 'T8', 'CP6', 'CP2', 'P4', 'P8', 'PO4', 'O2'])

In [3]:
def getFitness(individual):
    global x_train, x_test, y_train, y_test
    total_features = int(x.shape[1])
    all_features_name = list(x.columns)
    if(len(set(individual)) == 1 and list(set(individual))[0] == 0):
        # If all gene values are 0 then return 0
        return 0
    features = []
    for i in range(0, len(individual)):
        if(individual[i]==1):
            features.append(all_features_name[i])
    no_sel_features = len(features)
    _classifier = SVC(kernel = 'poly')
    new_x_train = x_train[features].copy()
    new_x_test = x_test[features].copy()
    _classifier.fit(new_x_train, y_train)
    predictions = _classifier.predict(new_x_test)
    accuracy = accuracy_score(y_true = y_test, y_pred = predictions)
    my_fitness = alpha*accuracy + (1-alpha)*((total_features - no_sel_features)/total_features)
    return (my_fitness,)
def get_final_report(individual):
    total_features = int(x.shape[1])
    all_features_name = list(x.columns)
    if(len(set(individual)) == 1 and list(set(individual))[0] == 0):
        # If all gene values are 0 then return 0
        return 0, 0, 0, 0
    features = []
    for i in range(0, len(individual)):
        if(individual[i]==1):
            features.append(all_features_name[i])
    no_sel_features = len(features)
    _classifier = SVC(kernel = 'poly')
    new_x_train = x_train[features].copy()
    new_x_test = x_test[features].copy()
    _classifier.fit(new_x_train, y_train)
    predictions = _classifier.predict(new_x_test)
    accuracy = accuracy_score(y_true = y_test, y_pred = predictions)
    prec = precision_score(predictions, y_test, average = 'macro')
    recall = recall_score(predictions, y_test, average = 'macro')
    f1 = f1_score(predictions, y_test, average = 'macro')
    return accuracy, prec, recall, f1

In [4]:
def kfold(x, y):
    # do the scalling
    names = x.columns
    scaler = MinMaxScaler()
    x = scaler.fit_transform(x)
    x = pd.DataFrame(x, columns=names)
    feature_vectors = list(x.columns)
    skf = StratifiedKFold(n_splits=10)
    x = np.array(x)
    y = np.array(y)
    skf.get_n_splits(x, y)
    test_data, train_data, train_label, test_label = [], [], [], []
    for train_index, test_index in skf.split(x, y):
        X_train, X_test = x[train_index], x[test_index]
        y_train, y_test = y[train_index], y[test_index]
        # convert into dataframe
        X_train = pd.DataFrame.from_records(X_train)
        X_train.columns = feature_vectors
        X_test = pd.DataFrame.from_records(X_test)
        X_test.columns = feature_vectors
        train_data.append(X_train)
        test_data.append(X_test)
        train_label.append(y_train)
        test_label.append(y_test)
    return train_data, test_data, train_label, test_label

# GA

In [5]:
def getHof(popu):
    global toolbox
    pop = popu
    hof = tools.HallOfFame(numPop * numGen)
    stats = tools.Statistics(lambda ind: ind.fitness.values)
    stats.register("avg", numpy.mean)
    stats.register("std", numpy.std)
    stats.register("min", numpy.min)
    stats.register("max", numpy.max)

    # Launch genetic algorithm, change the crossover and mutation probability
    pop, log = algorithms.eaSimple(pop, toolbox, cxpb = 0.65, mutpb = 0.3,\
                                   ngen=numGen, stats=stats, halloffame=hof, verbose=False)
    return hof, log # Return the hall of fame

# Drive Code

In [6]:
def get_channels_list(best_individual):
    channel_list = []
    eeg_channels = np.array(optimal_channels)
    for i in range(0, len(best_individual)):
        if(best_individual[i]==1):
            # add that channel in the optimal set of channels
            channel_list.append(optimal_channels[i])
    return channel_list

In [7]:
def drive_code(inputdata, inputlabel, numPop, numGen):
    global toolbox, x, x_train, x_test, y_train, y_test
    #========================         Data opening       ==============================
    data = inputdata.copy()
    features_name = data.columns
    total_features = len(data.columns)-1
    x, y = data[data.columns[:total_features]], inputlabel.copy()
    
    # drop constant features
    x = x.loc[:,x.apply(pd.Series.nunique) != 1]
    
    #============================      Train-Test splitting      ======================
    x, y = shuffle(x, y, random_state = 40)
    train_data, test_data, train_label, test_label = kfold(x.copy(), y.copy())
    #==================================================================================
    
    
    creator.create('FitnessMax', base.Fitness, weights = (1.0,))
    creator.create('Individual', list, fitness = creator.FitnessMax)
    toolbox = base.Toolbox() # Create Toolbox
    toolbox.register('attr_bool', random.randint, 0, 1)
    toolbox.register('individual', tools.initRepeat, creator.Individual, toolbox.attr_bool, int(x.shape[1]))
    toolbox.register('population', tools.initRepeat, list, toolbox.individual)
    initial_population = toolbox.population(numPop)
    
    #=============================     GA based feature selection  ==================
    toolbox.register('evaluate', getFitness)
    toolbox.register('mate', tools.cxOnePoint)
    toolbox.register('mutate', tools.mutFlipBit, indpb = 0.1)
    toolbox.register('select', tools.selTournament, tournsize = 7)
    # taing optimal channels
    acc_cross, prec_cross, recall_cross, f1_score_cross = [], [], [], []
    print('Accuracy\tPre\tRecall\tF1')
    for i in range(0, 10):
        x_train, x_test, y_train, y_test = train_data[i], test_data[i], train_label[i], test_label[i]
        initial_population = toolbox.population(numPop)
        hof, log = getHof(initial_population)
        best_individual = list(hof)[0]
        acc, prec, recall, f1_score = get_final_report(best_individual)
        acc_cross.append(acc)
        prec_cross.append(prec)
        recall_cross.append(recall)
        f1_score_cross.append(f1_score)
        print(float('{:.3f}'.format(acc)), '\t\t', float('{:.3f}'.format(prec)), '\t', float('{:.3f}'.format(recall)),
                    '\t', float('{:.3f}'.format(f1_score)))
    acc_cross, prec_cross = np.array(acc_cross), np.array(prec_cross)
    recall_cross, f1_score_cross = np.array(recall_cross), np.array(f1_score_cross)
    acc_mean, prec_mean = np.mean(acc_cross), np.mean(prec_cross)
    recall_mean, f1_mean = np.mean(recall_cross), np.mean(f1_score_cross)
    print('-'*43)
    print(float('{:.3f}'.format(acc_mean)), '\t\t', float('{:.3f}'.format(prec_mean)), '\t', float('{:.3f}'.format(recall_mean)), '\t', float('{:.3f}'.format(f1_mean)))

In [8]:
def getData(sub):
    fs_vector = []
    mypath = '/Users/shyammarjit/Desktop/Brain Computer Interface/Hybrid Sequential Forward channel selection (HSFCS)/Subject Independent/data files/'
    datapath = mypath + sub + '_all.csv'
    data = pd.read_csv(datapath)
    label = data[data.columns[-1]]
    optimal_all_channels = ['T7', 'C3', 'Fp1', 'FC6', 'P4', 'C4', 'AF3']
    features_list = ['theta_mean', 'theta_var', 'theta_mode', 'theta_median', 'theta_skew', 'theta_std', 'theta_kurtosis', 
                   'theta_f_d', 'theta_nfd', 'theta_s_d', 'theta_nsd', 'alpha_mean', 'alpha_var', 'alpha_mode', 'alpha_median',
                   'alpha_skew', 'alpha_std', 'alpha_kurtosis', 'alpha_f_d', 'alpha_nfd', 'alpha_s_d', 'alpha_nsd',
                   'beta_mean', 'beta_var', 'beta_mode', 'beta_median', 'beta_skew', 'beta_std', 'beta_kurtosis', 
                   'beta_f_d', 'beta_nfd', 'beta_s_d', 'beta_nsd', 'gamma_mean', 'gamma_var', 'gamma_mode', 'gamma_median',
                   'gamma_skew', 'gamma_std', 'gamma_kurtosis', 'gamma_f_d', 'gamma_nfd', 'gamma_s_d', 'gamma_nsd', 'theta_energy',
                   'alpha_energy', 'beta_energy', 'gamma_energy', 'theta_avg_power', 'alpha_avg_power', 'beta_avg_power',
                   'gamma_avg_power', 'theta_rms', 'alpha_rms', 'beta_rms', 'gamma_rms',
                   'theta_ShEn', 'alpha_ShEn', 'beta_ShEn', 'gamma_ShEn', 'theta_aentropy', 'alpha_aentropy',
                   'beta_aentropy', 'gamma_aentropy', 'theta_pentropy', 'alpha_pentropy', 'beta_pentropy', 'gamma_pentropy', 
                   'theta_wpe', 'alpha_wpe', 'theta_wpe', 'gamma_wpe', 'H_theta', 'c_theta', 'H_alpha', 'c_alpha', 'H_beta',
                   'c_beta', 'H_gamma', 'c_gamma', 'higuchi_theta', 'petrosian_theta', 'higuchi_alpha', 'petrosian_alpha', 'higuchi_beta',
                   'petrosian_beta', 'higuchi_gamma', 'petrosian_gamma', 'aic_theta_ar',
                   'hqic_theta_ar', 'bic_theta_ar', 'llf_theta_ar', 'aic_alpha_ar', 'hqic_alpha_ar', 'bic_alpha_ar', 'llf_alpha_ar', 
                   'aic_beta_ar', 'hqic_beta_ar', 'bic_beta_ar', 'llf_beta_ar', 'aic_gamma_ar', 'hqic_gamma_ar', 'bic_gamma_ar', 
                   'llf_gamma_ar', 'aic_theta_arma', 'hqic_theta_arma', 'bic_theta_arma', 'llf_theta_arma', 'aic_alpha_arma', 
                   'hqic_alpha_arma', 'bic_alpha_arma', 'llf_alpha_arma', 'aic_beta_arma', 'hqic_beta_arma', 'bic_beta_arma', 
                   'llf_beta_arma', 'aic_gamma_arma', 'hqic_gamma_arma', 'bic_gamma_arma', 'llf_gamma_arma']
    for i in optimal_all_channels:
        for j in features_list:
            fs_vector.append(i + '_' + j)
    data = data[fs_vector]
    return data, label

In [10]:
#=======================        Hyperparameters value      =========================
alpha = 0.90
numPop, numGen = 100, 50
#===================================================================================

for sub in subject_names[0:4]:
    indepdata, label = getData(sub)
    print('='*97)
    print(" "*45, sub, " "*45)
    print('='*97,"\n")
    drive_code(indepdata, label, numPop, numGen)

                                              s01                                              

Accuracy	Pre	Recall	F1
0.75 		 0.667 	 0.556 	 0.6
1.0 		 1.0 	 1.0 	 1.0
0.5 		 0.5 	 0.444 	 0.389
1.0 		 1.0 	 1.0 	 1.0
0.75 		 0.75 	 0.625 	 0.667
0.75 		 0.75 	 0.625 	 0.667
0.5 		 0.5 	 0.25 	 0.333
0.5 		 0.5 	 0.375 	 0.417
0.75 		 0.75 	 0.625 	 0.667
0.75 		 0.667 	 0.5 	 0.556
-------------------------------------------
0.725 		 0.708 	 0.6 	 0.629
                                              s02                                              

Accuracy	Pre	Recall	F1
0.5 		 0.333 	 0.167 	 0.222
0.5 		 0.333 	 0.167 	 0.222
0.75 		 0.667 	 0.556 	 0.6
0.5 		 0.5 	 0.5 	 0.444
0.75 		 0.667 	 0.556 	 0.6
0.75 		 0.667 	 0.5 	 0.556
0.5 		 0.5 	 0.333 	 0.375
1.0 		 1.0 	 1.0 	 1.0
0.5 		 0.5 	 0.375 	 0.417
0.75 		 0.75 	 0.625 	 0.667
-------------------------------------------
0.65 		 0.592 	 0.478 	 0.51
                                              s03                       

In [11]:
#=======================        Hyperparameters value      =========================
alpha = 0.90
numPop, numGen = 100, 50
#===================================================================================

for sub in subject_names[4:5]:
    indepdata, label = getData(sub)
    print('='*97)
    print(" "*45, sub, " "*45)
    print('='*97,"\n")
    drive_code(indepdata, label, numPop, numGen)

                                              s05                                              

Accuracy	Pre	Recall	F1
0.5 		 0.5 	 0.375 	 0.417
0.5 		 0.5 	 0.333 	 0.375
0.75 		 0.75 	 0.625 	 0.667
0.5 		 0.5 	 0.25 	 0.333
0.5 		 0.5 	 0.333 	 0.375
0.5 		 0.5 	 0.333 	 0.375
0.75 		 0.75 	 0.625 	 0.667
0.5 		 0.5 	 0.5 	 0.5
1.0 		 1.0 	 1.0 	 1.0
0.75 		 0.5 	 0.5 	 0.5
-------------------------------------------
0.625 		 0.6 	 0.487 	 0.521


In [12]:
#=======================        Hyperparameters value      =========================
alpha = 0.90
numPop, numGen = 100, 50
#===================================================================================

for sub in subject_names[5:32]:
    indepdata, label = getData(sub)
    print('='*97)
    print(" "*45, sub, " "*45)
    print('='*97,"\n")
    drive_code(indepdata, label, numPop, numGen)

                                              s06                                              

Accuracy	Pre	Recall	F1
1.0 		 1.0 	 1.0 	 1.0
1.0 		 1.0 	 1.0 	 1.0
0.75 		 0.5 	 0.5 	 0.5
0.5 		 0.333 	 0.222 	 0.267
0.5 		 0.5 	 0.25 	 0.333
1.0 		 1.0 	 1.0 	 1.0
0.25 		 0.167 	 0.111 	 0.133
0.5 		 0.5 	 0.333 	 0.375
0.5 		 0.5 	 0.333 	 0.375
0.75 		 0.75 	 0.625 	 0.667
-------------------------------------------
0.675 		 0.625 	 0.537 	 0.565
                                              s07                                              

Accuracy	Pre	Recall	F1
0.5 		 0.5 	 0.333 	 0.375
0.5 		 0.5 	 0.375 	 0.417
0.75 		 0.667 	 0.556 	 0.6
0.5 		 0.333 	 0.167 	 0.222
0.75 		 0.667 	 0.556 	 0.6
0.75 		 0.5 	 0.5 	 0.5
1.0 		 1.0 	 1.0 	 1.0
0.75 		 0.5 	 0.5 	 0.5
0.5 		 0.333 	 0.333 	 0.333
1.0 		 1.0 	 1.0 	 1.0
-------------------------------------------
0.7 		 0.6 	 0.532 	 0.555
                                              s08                                          

0.5 		 0.5 	 0.333 	 0.375
0.5 		 0.5 	 0.333 	 0.375
0.25 		 0.125 	 0.25 	 0.167
1.0 		 1.0 	 1.0 	 1.0
0.75 		 0.5 	 0.5 	 0.5
1.0 		 1.0 	 1.0 	 1.0
1.0 		 1.0 	 1.0 	 1.0
0.75 		 0.667 	 0.556 	 0.6
1.0 		 1.0 	 1.0 	 1.0
-------------------------------------------
0.75 		 0.704 	 0.66 	 0.668
                                              s19                                              

Accuracy	Pre	Recall	F1
1.0 		 1.0 	 1.0 	 1.0
0.5 		 0.333 	 0.333 	 0.333
0.5 		 0.5 	 0.333 	 0.389
0.5 		 0.5 	 0.5 	 0.444
0.75 		 0.833 	 0.833 	 0.778
1.0 		 1.0 	 1.0 	 1.0
0.5 		 0.5 	 0.333 	 0.375
0.5 		 0.5 	 0.333 	 0.375
0.75 		 0.667 	 0.556 	 0.6
0.75 		 0.667 	 0.556 	 0.6
-------------------------------------------
0.675 		 0.65 	 0.578 	 0.589
                                              s20                                              

Accuracy	Pre	Recall	F1
0.75 		 0.75 	 0.625 	 0.667
0.5 		 0.5 	 0.333 	 0.375
0.5 		 0.5 	 0.333 	 0.375
1.0 		 1.0 	 1.0 	 1.0
0.75 		 0.75 

FileNotFoundError: [Errno 2] No such file or directory: '/Users/shyammarjit/Desktop/Brain Computer Interface/Hybrid Sequential Forward channel selection (HSFCS)/Subject Independent/data files/s30_all.csv'

In [13]:
#=======================        Hyperparameters value      =========================
alpha = 0.90
numPop, numGen = 100, 50
#===================================================================================

for sub in subject_names[29:32]:
    indepdata, label = getData(sub)
    print('='*97)
    print(" "*45, sub, " "*45)
    print('='*97,"\n")
    drive_code(indepdata, label, numPop, numGen)

                                              s30                                              

Accuracy	Pre	Recall	F1
1.0 		 1.0 	 1.0 	 1.0
0.75 		 0.75 	 0.625 	 0.667
0.75 		 0.75 	 0.625 	 0.667
1.0 		 1.0 	 1.0 	 1.0
1.0 		 1.0 	 1.0 	 1.0
1.0 		 1.0 	 1.0 	 1.0
1.0 		 1.0 	 1.0 	 1.0
1.0 		 1.0 	 1.0 	 1.0
0.75 		 0.667 	 0.556 	 0.6
0.5 		 0.333 	 0.167 	 0.222
-------------------------------------------
0.875 		 0.85 	 0.797 	 0.816
                                              s31                                              

Accuracy	Pre	Recall	F1
0.75 		 0.667 	 0.556 	 0.6
0.5 		 0.5 	 0.333 	 0.389
1.0 		 1.0 	 1.0 	 1.0
0.75 		 0.833 	 0.833 	 0.778
0.75 		 0.667 	 0.556 	 0.6
0.75 		 0.667 	 0.556 	 0.6
0.5 		 0.5 	 0.444 	 0.389
0.5 		 0.375 	 0.375 	 0.333
0.75 		 0.75 	 0.625 	 0.667
0.75 		 0.75 	 0.625 	 0.667
-------------------------------------------
0.7 		 0.671 	 0.59 	 0.602


FileNotFoundError: [Errno 2] No such file or directory: '/Users/shyammarjit/Desktop/Brain Computer Interface/Hybrid Sequential Forward channel selection (HSFCS)/Subject Independent/data files/s32_all.csv'

In [15]:
#=======================        Hyperparameters value      =========================
alpha = 0.90
numPop, numGen = 100, 50
#===================================================================================

for sub in subject_names[31:32]:
    indepdata, label = getData(sub)
    print('='*97)
    print(" "*45, sub, " "*45)
    print('='*97,"\n")
    drive_code(indepdata, label, numPop, numGen)

                                              s32                                              

Accuracy	Pre	Recall	F1
0.75 		 0.667 	 0.556 	 0.6
0.25 		 0.125 	 0.125 	 0.125
0.5 		 0.25 	 0.25 	 0.25
0.75 		 0.667 	 0.5 	 0.556
0.75 		 0.833 	 0.833 	 0.778
1.0 		 1.0 	 1.0 	 1.0
0.75 		 0.833 	 0.833 	 0.778
0.5 		 0.5 	 0.333 	 0.375
0.75 		 0.75 	 0.625 	 0.667
0.75 		 0.75 	 0.625 	 0.667
-------------------------------------------
0.675 		 0.637 	 0.568 	 0.579
