In [None]:
import time
import numpy as np
import pandas as pd
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from tensorflow.keras.regularizers import l2
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam, SGD
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.layers import Dense, Dropout, LSTM

In [None]:
from ipynb.fs.full.ClassificationPerformanceIndexes import classificationPerformanceIndexes, printClassificationPerformanceIndexes

## Feature Classification

### SVM

In [None]:
def SVM(X_train, y_train, X_test, y_test, results):
    print('Implementing SVM method...')
    start = time.time()
    clf = SVC(C = 1.0, kernel = 'rbf', gamma = 100)
    svm_ind = clf.fit(X_train, y_train).predict(X_test)
    end = time.time()
    t = round(end - start,2)
    acc, snv, spc, ppv, f1, mcc, kappa, tt = classificationPerformanceIndexes (y_test, svm_ind, t)
    results.loc['SVM', :] = acc, snv, spc, ppv, f1, mcc, kappa, t
    printClassificationPerformanceIndexes('KNN', acc, snv, spc, ppv, f1, mcc, kappa)
    print('SVM finished in', t, 'sec\n')

In [None]:
def SVM_Kfold(X, kf, cols, results):
    f = pd.DataFrame(columns = cols)
    print('Implementing SVM k-fold...')
    start = time.time()
    clf = SVC(C = 1.0, kernel = 'rbf', gamma = 100)
    for train, test in kf.split(X):
        X_train = X.iloc[train,:X.shape[1]-1]
        y_train = X.loc[train,'seizure']
        X_test = X.iloc[test,:X.shape[1]-1]
        y_test = X.loc[test,'seizure']
        svm_ind = clf.fit(X_train, y_train).predict(X_test)
        f.loc[f.shape[0], :] = classificationPerformanceIndexes (y_test, svm_ind, 0)
    end = time.time()
    t = round(end - start,2)    
    acc, snv, spc, ppv, f1, mcc, kappa, tt = np.array(f.mean(axis=0))
    results.loc['SVM Kfold', :] = acc, snv, spc, ppv, f1, mcc, kappa, t
    printClassificationPerformanceIndexes('KNN Kfold', acc, snv, spc, ppv, f1, mcc, kappa)
    print('SVM k-fold finished in', t, 'sec\n')

In [None]:
def CompleteSVM(train_dat, test_dat, train_ind, test_ind, results, features, kf, perfInd):
    SVM(train_dat, train_ind, test_dat, test_ind, results)
    SVM_Kfold(features, kf, perfInd, results)

### K-NN

In [None]:
def KNN(X_train, y_train, X_test, y_test, experiment,results):
    print('Implementing KNN...')
    start = time.time()
    clf = KNeighborsClassifier(n_neighbors = 3, weights='distance', metric = 'manhattan', n_jobs = -1)
    knn_ind = clf.fit(X_train, y_train).predict(X_test)
    end = time.time()
    t = round(end - start,2)    
    acc, snv, spc, ppv, f1, mcc, kappa, tt = classificationPerformanceIndexes (y_test, knn_ind, t)
    results.loc['KNN', :] = acc, snv, spc, ppv, f1, mcc, kappa, t
    printClassificationPerformanceIndexes('KNN', acc, snv, spc, ppv, f1, mcc, kappa)
    print('KNN finished in', t,'sec\n')

In [None]:
def KNN_Kfold(X, experiment, kf, cols, results):
    f = pd.DataFrame(columns = cols)
    print('Implementing KNN k-fold...')
    start = time.time()
    clf = KNeighborsClassifier(n_neighbors = 3, weights='distance', metric = 'manhattan', n_jobs = -1)
    for train, test in kf.split(X):
        X_train = X.iloc[train,:X.shape[1]-1]
        y_train = X.loc[train,'seizure']
        X_test = X.iloc[test,:X.shape[1]-1]
        y_test = X.loc[test,'seizure']
        knn_ind = clf.fit(X_train, y_train).predict(X_test)
        f.loc[f.shape[0], :] = classificationPerformanceIndexes (y_test, knn_ind, 0)
    end = time.time()
    t = round(end - start,2)    
    acc, snv, spc, ppv, f1, mcc, kappa, tt =  np.array(f.mean(axis=0))
    printClassificationPerformanceIndexes('KNN Kfold', acc, snv, spc, ppv, f1, mcc, kappa)
    results.loc['KNN Kfold', :] = acc, snv, spc, ppv, f1, mcc, kappa, t
    print('KNN k-fold finished in', t,'sec\n')

In [None]:
def CompleteKNN(train_dat, test_dat, train_ind, test_ind, results, experiment, features, kf, perfInd):
    KNN(train_dat, train_ind, test_dat, test_ind, experiment, results)
    KNN_Kfold(features, experiment, kf, perfInd, results)

### Naive Bayes

In [None]:
def NaiveBayes(X_train, y_train, X_test, y_test, results):
    print('Implementing Naive Bayes...')
    start = time.time()
    clf = GaussianNB()
    nb_ind = clf.fit(X_train, y_train).predict(X_test)
    end = time.time()
    t = round(end - start,2)
    acc, snv, spc, ppv, f1, mcc, kappa, tt = classificationPerformanceIndexes (y_test, nb_ind, t)
    results.loc['Naive Bayes', :] = acc, snv, spc, ppv, f1, mcc, kappa, t
    printClassificationPerformanceIndexes('Naive Bayes', acc, snv, spc, ppv, f1, mcc, kappa)
    print('Naive Bayes finished in', t,'sec\n')

In [None]:
def NaiveBayes_Kfold(X, kf, cols, results):
    f = pd.DataFrame(columns = cols)
    print('Implementing Naive Bayes k-fold...')
    start = time.time()
    clf = GaussianNB()
    for train, test in kf.split(X):
        X_train = X.iloc[train,:X.shape[1]-1]
        y_train = X.loc[train,'seizure']
        X_test = X.iloc[test,:X.shape[1]-1]
        y_test = X.loc[test,'seizure']
        nb_ind = clf.fit(X_train, y_train).predict(X_test)
        f.loc[f.shape[0], :] = classificationPerformanceIndexes (y_test, nb_ind, 0)
    end = time.time()
    t = round(end - start,2)        
    acc, snv, spc, ppv, f1, mcc, kappa, tt = np.array(f.mean(axis=0))
    results.loc['Naive Bayes Kfold', :] = acc, snv, spc, ppv, f1, mcc, kappa, t
    printClassificationPerformanceIndexes('Naive Bayes k-fold', acc, snv, spc, ppv, f1, mcc, kappa)
    print('Naive Bayes k_fold finished in', t,'sec\n')

In [None]:
def CompleteNB(train_dat, test_dat, train_ind, test_ind, results, features, kf, perfInd):
    NaiveBayes(train_dat, train_ind, test_dat, test_ind, results)
    NaiveBayes_Kfold(features, kf, perfInd, results)

### Decision Trees

In [None]:
def DecisionTrees(X_train, y_train, X_test, y_test, experiment, results):
    clf = DecisionTreeClassifier()
    print('Implementing Decision Trees...')
    start = time.time()
    if experiment.upper() == 'AVERAGE': c, md, mss = 'gini', 62, 2
    elif experiment.upper() == 'LEFTRIGHT': c, md, mss = 'entropy', 82, 2
    else: c, md, mss = 'entropy', 42, 12
    clf = DecisionTreeClassifier(criterion = c, max_depth = md, min_samples_split = mss, splitter = 'best')
    dt_ind = clf.fit(X_train, y_train).predict(X_test)
    end = time.time()
    t = round(end - start,2)
    acc, snv, spc, ppv, f1, mcc, kappa, tt = classificationPerformanceIndexes (y_test, dt_ind, t)
    results.loc['Decision Trees', :] = acc, snv, spc, ppv, f1, mcc, kappa, t
    printClassificationPerformanceIndexes('Decision Trees', acc, snv, spc, ppv, f1, mcc, kappa)
    print('Decision Trees finished in', t,'sec\n')

In [None]:
def DecisionTrees_Kfold(X, kf, cols, experiment, results):
    f = pd.DataFrame(columns = cols)
    print('Implementing Decision Trees k-fold...')
    start = time.time()
    if experiment.upper() == 'AVERAGE': c, md, mss = 'gini', 62, 2
    elif experiment.upper() == 'LEFTRIGHT': c, md, mss = 'entropy', 82, 2
    else: c, md, mss = 'entropy', 42, 12
    clf = DecisionTreeClassifier(criterion = c, max_depth = md, min_samples_split = mss, splitter = 'best')
    for train, test in kf.split(X):
        X_train = X.iloc[train,:X.shape[1]-1]
        y_train = X.loc[train,'seizure']
        X_test = X.iloc[test,:X.shape[1]-1]
        y_test = X.loc[test,'seizure']
        dt_ind = clf.fit(X_train, y_train).predict(X_test)
        f.loc[f.shape[0], :] = classificationPerformanceIndexes (y_test, dt_ind, 0)
    end = time.time()
    t = round(end - start,2)
    acc, snv, spc, ppv, f1, mcc, kappa, tt = np.array(f.mean(axis=0))
    results.loc['Decision Trees Kfold', :] = acc, snv, spc, ppv, f1, mcc, kappa, t    
    printClassificationPerformanceIndexes('Decision Trees Kfold', acc, snv, spc, ppv, f1, mcc, kappa)
    print('Decision Trees k-fold finished in', t,'sec\n')

In [None]:
def CompleteDT(train_dat, test_dat, train_ind, test_ind, results, features, kf, experiment, perfInd):
    DecisionTrees(train_dat, train_ind, test_dat, test_ind, experiment, results)
    DecisionTrees_Kfold(features, kf, perfInd, experiment, results)

### Random Forest

In [None]:
def RandomForest(X_train, y_train, X_test, y_test, experiment, results):
    print('Implementing Random Forest...')
    start = time.time()
    if experiment.upper() == 'AVERAGE': c, md, mss,est = 'gini', 62, 2, 10
    elif experiment.upper() == 'LEFTRIGHT': c, md, mss, est = 'gini', 82, 2, 70
    else: c, md, mss, est = 'entropy', 42, 2, 50
    clf = RandomForestClassifier(n_estimators = est, criterion = c, max_depth = md, min_samples_split = mss, n_jobs = -1)
    rf_ind = clf.fit(X_train, y_train).predict(X_test)
    end = time.time()
    t = round(end - start,2)
    acc, snv, spc, ppv, f1, mcc, kappa, tt = classificationPerformanceIndexes (y_test, rf_ind, t)
    results.loc['Random Forest', :] = acc, snv, spc, ppv, f1, mcc, kappa, t
    printClassificationPerformanceIndexes('Random Forest', acc, snv, spc, ppv, f1, mcc, kappa)
    print('Random Forest finished in', t,'sec\n')

In [None]:
def RandomForest_Kfold(X, experiment, kf, cols, results):
    f = pd.DataFrame(columns = cols)
    print('Implementing Random Forest k-fold...')
    start = time.time()
    if experiment.upper() == 'AVERAGE': c, md, mss,est = 'gini', 62, 2, 10
    elif experiment.upper() == 'LEFTRIGHT': c, md, mss, est = 'gini', 82, 2, 70
    else: c, md, mss, est = 'entropy', 42, 2, 50
    clf = RandomForestClassifier(n_estimators = est, criterion = c, max_depth = md, min_samples_split = mss, n_jobs = -1)
    for train, test in kf.split(X):
        X_train = X.iloc[train,:X.shape[1]-1]
        y_train = X.loc[train,'seizure']
        X_test = X.iloc[test,:X.shape[1]-1]
        y_test = X.loc[test,'seizure']
        rf_ind = clf.fit(X_train, y_train).predict(X_test)
        f.loc[f.shape[0], :] = classificationPerformanceIndexes (y_test, rf_ind, 0)
    end = time.time()
    t = round(end - start,2)
    acc, snv, spc, ppv, f1, mcc, kappa, tt = np.array(f.mean(axis=0))
    results.loc['Random Forest Kfold', :] = acc, snv, spc, ppv, f1, mcc, kappa, t
    printClassificationPerformanceIndexes('Random Forest Kfold', acc, snv, spc, ppv, f1, mcc, kappa)
    print('Random Forest k-fold finished in', t,'sec\n')

In [None]:
def CompleteRF(train_dat, test_dat, train_ind, test_ind, results, rf_estimators, features, kf, perfInd):
    RandomForest(train_dat, train_ind, test_dat, test_ind, rf_estimators, results)
    RandomForest_Kfold(features, rf_estimators, kf, perfInd, results)

### LDA

In [None]:
def LDA(X_train, y_train, X_test, y_test, experiment, results):
    print('Implementing LDA...')
    start = time.time()
    shr = 0
    if experiment.upper() == 'LEFTRIGHT': shr = 'auto'
    clf = LinearDiscriminantAnalysis(solver = 'lsqr', shrinkage = shr, tol = 1e-4)
    lda_ind = clf.fit(X_train, y_train).predict(X_test)
    end = time.time()
    t = round(end - start,2)
    acc, snv, spc, ppv, f1, mcc, kappa, tt = classificationPerformanceIndexes (y_test, lda_ind, t)
    results.loc['LDA', :] = acc, snv, spc, ppv, f1, mcc, kappa, t
    printClassificationPerformanceIndexes('LDA', acc, snv, spc, ppv, f1, mcc, kappa)
    print('LDA finished in', t,'sec\n')

In [None]:
def LDA_Kfold(X, kf, experiment,cols, results):
    f = pd.DataFrame(columns = cols)
    print('Implementing LDA k-fold...')
    start = time.time()
    shr = 0
    if experiment.upper() == 'LEFTRIGHT': shr = 'auto'
    clf = LinearDiscriminantAnalysis(solver = 'lsqr', shrinkage = shr, tol = 1e-4)
    for train, test in kf.split(X):
        X_train = X.iloc[train,:X.shape[1]-1]
        y_train = X.loc[train,'seizure']
        X_test = X.iloc[test,:X.shape[1]-1]
        y_test = X.loc[test,'seizure']
        lda_ind = clf.fit(X_train, y_train).predict(X_test)
        f.loc[f.shape[0], :] = classificationPerformanceIndexes (y_test, lda_ind, 0)
    end = time.time()
    t = round(end - start,2)
    acc, snv, spc, ppv, f1, mcc, kappa, tt = np.array(f.mean(axis=0))
    results.loc['LDA Kfold', :] = acc, snv, spc, ppv, f1, mcc, kappa, t
    printClassificationPerformanceIndexes('LDA Kfold', acc, snv, spc, ppv, f1, mcc, kappa)
    print('LDA k-fold finished in', t,'sec\n')

In [None]:
def CompleteLDA(train_dat, test_dat, train_ind, test_ind, results, experiment, features, kf, perfInd):
    LDA(train_dat, train_ind, test_dat, test_ind, experiment, results)
    LDA_Kfold(features, kf, experiment, perfInd, results)

### Logistic Regression

In [None]:
def LogReg(X_train, y_train, X_test, y_test, results, experiment):
    print('Implementing Logistic Regression...')
    start = time.time()
    if experiment.upper() == 'AVERAGE': it = 500
    elif experiment.upper() == 'LEFTRIGHT': it = 100
    else: it = 1000
    clf = LogisticRegression(penalty = 'l1', C = 0.01, max_iter = it, solver = 'liblinear')
    lr_ind = clf.fit(X_train, y_train).predict(X_test)
    end = time.time()
    t = round(end - start,2)    
    acc, snv, spc, ppv, f1, mcc, kappa, tt = classificationPerformanceIndexes (y_test, lr_ind, t)
    results.loc['Logistic Regression', :] = acc, snv, spc, ppv, f1, mcc, kappa, t
    printClassificationPerformanceIndexes('Logistic Regression', acc, snv, spc, ppv, f1, mcc, kappa)
    print('Logistic Regression finished in', t,'sec\n')

In [None]:
def LogReg_Kfold(X, kf, cols, results, experiment):
    f = pd.DataFrame(columns = cols)
    print('Implementing Logistic Regression k-fold...')
    start = time.time()
    if experiment.upper() == 'AVERAGE': it = 500
    elif experiment.upper() == 'LEFTRIGHT': it = 100
    else: it = 1000
    clf = LogisticRegression(penalty = 'l1', C = 0.01, max_iter = it, solver = 'liblinear')
    for train, test in kf.split(X):
        X_train = X.iloc[train,:X.shape[1]-1]
        y_train = X.loc[train,'seizure']
        X_test = X.iloc[test,:X.shape[1]-1]
        y_test = X.loc[test,'seizure']
        lr_ind = clf.fit(X_train, y_train).predict(X_test)
        f.loc[f.shape[0], :] = classificationPerformanceIndexes (y_test, lr_ind, 0)
    end = time.time()
    t = round(end - start,2)
    acc, snv, spc, ppv, f1, mcc, kappa, tt = np.array(f.mean(axis=0))
    results.loc['Logistic Regression Kfold', :] = acc, snv, spc, ppv, f1, mcc, kappa, t
    printClassificationPerformanceIndexes('Logistic Regression Kfold', acc, snv, spc, ppv, f1, mcc, kappa)
    print('Logistic Regression k-fold finished in', t,'sec\n')

In [None]:
def CompleteLR(train_dat, test_dat, train_ind, test_ind, results, experiment, features, kf, perfInd):
    LogReg(train_dat, train_ind, test_dat, test_ind, results, experiment)
    LogReg_Kfold(features, kf, perfInd, results, experiment)

### LSTM

In [None]:
def LstmModel (size, lstm_units, dense_units, dropout_percentage, loss_function, metric):
    model = Sequential()
    model.add(LSTM(lstm_units, recurrent_regularizer = l2(1e-2), activity_regularizer = l2(1e-4), bias_regularizer = l2(1e-6)))
    model.add(Dropout(dropout_percentage))
    model.add(Dense(dense_units, activation = 'relu', kernel_regularizer = l2(1e-3), bias_regularizer = l2(1e-2)))
    model.add(Dropout(dropout_percentage/2))
    model.add(Dense(1, activation = 'sigmoid', kernel_regularizer = l2(1e-3), bias_regularizer = l2(1e-2)))
    model.compile(optimizer = Adam(learning_rate = 1e-3), loss = loss_function, metrics = metric)
    return model

In [None]:
def LSTM_method (model, X_train, y_train, X_test, y_test, batch, epochs, results):
    print('Implementing LSTM...')
    start = time.time()
    es = EarlyStopping(monitor = 'val_loss', min_delta = 0, patience = 5, mode = 'auto', restore_best_weights = True, verbose = 0)
    history = model.fit(X_train, y_train, batch_size = batch, epochs = epochs, validation_data = (X_test,y_test), callbacks = es, verbose = 0)
    lstm_ind = (model.predict(X_test, batch_size = batch) >= 0.5).astype('int')
    end = time.time()
    t = round(end - start,2)
    acc, snv, spc, ppv, f1, mcc, kappa, tt = classificationPerformanceIndexes (y_test, np.reshape(lstm_ind, lstm_ind.shape[0]), t)
    results.loc['LSTM', :] = acc, snv, spc, ppv, f1, mcc, kappa, t
    printClassificationPerformanceIndexes('LSTM', acc, snv, spc, ppv, f1, mcc, kappa)
    print('LSTM finished in', t,'sec\n')

In [None]:
def LSTM_method_Kfold(X, kf, cols, model, batch, epochs, results):
    f = pd.DataFrame(columns = cols)
    print('Implementing LSTM k-fold...')
    start = time.time()
    es = EarlyStopping(monitor = 'val_loss', min_delta = 0, patience = 5, mode = 'auto', restore_best_weights = True, verbose = 0)
    for train, test in kf.split(X):
        X_train = X.iloc[train,:X.shape[1]-1]
        X_train = np.reshape(X_train.values, (X_train.shape[0], 1, X_train.shape[1]))
        y_train = X.loc[train,'seizure'].values.astype(int)
        X_test = X.iloc[test,:X.shape[1]-1]
        X_test = np.reshape(X_test.values, (X_test.shape[0], 1, X_test.shape[1]))
        y_test = X.loc[test,'seizure'].values.astype(int)
        history = model.fit(X_train, y_train, batch_size = batch, epochs = epochs, validation_data = (X_test,y_test), callbacks = es, verbose = 0)
        lstm_ind = (model.predict(X_test, batch_size = batch) >= 0.5).astype('int')
        f.loc[f.shape[0], :] = classificationPerformanceIndexes (y_test, np.reshape(lstm_ind, lstm_ind.shape[0]), 0)
    end = time.time()
    t = round(end - start,2)
    acc, snv, spc, ppv, f1, mcc, kappa, tt = np.array(f.mean(axis=0))
    results.loc['LSTM Kfold', :] = acc, snv, spc, ppv, f1, mcc, kappa, t
    printClassificationPerformanceIndexes('LSTM Kfold', acc, snv, spc, ppv, f1, mcc, kappa)
    print('LSTM finished in', t,'sec\n')

In [None]:
def CompleteLSTM (train_dat, test_dat, train_ind, test_ind, results, ft, kf, perfInd, epochs, batch, lstm_units, dense_units, dropout_percentage, loss_function, metric):
    X_train = np.reshape(train_dat.values, (train_dat.shape[0], 1, train_dat.shape[1]))
    y_train = train_ind.values.astype(int)
    X_test = np.reshape(test_dat.values, (test_dat.shape[0], 1, test_dat.shape[1]))
    y_test = test_ind.values.astype(int)

    lstm_model = LstmModel (train_dat.shape[1], lstm_units, dense_units, dropout_percentage, loss_function, metric)
    LSTM_method (lstm_model, X_train, y_train, X_test, y_test, batch, epochs, results)
    LSTM_method_Kfold (ft, kf, perfInd, lstm_model, batch, epochs, results)