In [1]:
#Importing libraries 
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt 
%matplotlib inline

from sklearn.model_selection import train_test_split 

#ML Classifiers
from sklearn.neighbors import KNeighborsClassifier #K-Neighbors classifier
from sklearn.tree import DecisionTreeClassifier #Decision Tree classifier
from sklearn.svm import SVC # Support Vector Machines classifier
#from sklearn.linear_model import LogisticRegression, RidgeClassifier
from sklearn.naive_bayes import GaussianNB

#Boosting classifiers
import xgboost as xgb
from sklearn.ensemble import *

#Evaluation metrics
from sklearn import metrics #to check for error and accuracy of the model




**Data Uploading**

In [None]:
#If using google colab
#Upload data from local drive


#from google.colab import files
#uploaded = files.upload()

Saving data.csv to data.csv


In [2]:
#Load data

eegdatafile = 'data.csv' 

data = pd.read_csv(eegdatafile, header=0)
    
   

In [3]:
data.head()

Unnamed: 0.1,Unnamed: 0,X1,X2,X3,X4,X5,X6,X7,X8,X9,...,X170,X171,X172,X173,X174,X175,X176,X177,X178,y
0,X21.V1.791,135,190,229,223,192,125,55,-9,-33,...,-17,-15,-31,-77,-103,-127,-116,-83,-51,4
1,X15.V1.924,386,382,356,331,320,315,307,272,244,...,164,150,146,152,157,156,154,143,129,1
2,X8.V1.1,-32,-39,-47,-37,-32,-36,-57,-73,-85,...,57,64,48,19,-12,-30,-35,-35,-36,5
3,X16.V1.60,-105,-101,-96,-92,-89,-95,-102,-100,-87,...,-82,-81,-80,-77,-85,-77,-72,-69,-65,5
4,X20.V1.54,-9,-65,-98,-102,-78,-48,-16,0,-21,...,4,2,-12,-32,-41,-65,-83,-89,-73,5


In [4]:
#In the column y, replace values greater than 1 with 0 to make it a binary classification
#1 - epileptic seizure, 0- control (which replaces 2,3,4,5 - subjects with no epileptic seizure)

data.loc[data["y"] > 1, "y"] = 0

In [5]:
data.head()

Unnamed: 0.1,Unnamed: 0,X1,X2,X3,X4,X5,X6,X7,X8,X9,...,X170,X171,X172,X173,X174,X175,X176,X177,X178,y
0,X21.V1.791,135,190,229,223,192,125,55,-9,-33,...,-17,-15,-31,-77,-103,-127,-116,-83,-51,0
1,X15.V1.924,386,382,356,331,320,315,307,272,244,...,164,150,146,152,157,156,154,143,129,1
2,X8.V1.1,-32,-39,-47,-37,-32,-36,-57,-73,-85,...,57,64,48,19,-12,-30,-35,-35,-36,0
3,X16.V1.60,-105,-101,-96,-92,-89,-95,-102,-100,-87,...,-82,-81,-80,-77,-85,-77,-72,-69,-65,0
4,X20.V1.54,-9,-65,-98,-102,-78,-48,-16,0,-21,...,4,2,-12,-32,-41,-65,-83,-89,-73,0


In [6]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11500 entries, 0 to 11499
Columns: 180 entries, Unnamed: 0 to y
dtypes: int64(179), object(1)
memory usage: 15.8+ MB


**Preprocessing**

In [7]:
#Drop the first column

data = data.drop("Unnamed: 0", axis=1)




In [8]:
#Generate the Independent variables

X = data.values
X= X[:,0:-1]

In [9]:
#Normalize the data

X = (X-X.mean())/X.std()
X.shape

(11500, 178)

In [10]:
#Generate the Dependent variable

y= data['y'].values


In [11]:
#Splitting dataset into train, validation and test sets at 80%, 10% and 10% respectively.

#Split data into train and remaining dataset
X_train, X_re, y_train, y_re = train_test_split(X, y, train_size=0.80,stratify=y,random_state=1)

#Split remaining data into test and validation with 50% split 
X_val, X_test, y_val, y_test = train_test_split(X_re, y_re, test_size=0.5, stratify=y_re,random_state=1)

In [12]:
print(X.shape, y.shape)

(11500, 178) (11500,)


In [13]:
#Create Function to fit and predict model

def fitPredictModel(model):
    model.fit(X_train, y_train)
    prediction = model.predict(X_val)
    print(metrics.accuracy_score(prediction,y_val))
    print(metrics.f1_score(prediction,y_val))
 

Standard Classifiers

In [14]:
#Train models
#Support Vector Machines Classifier

SVC_model = SVC(kernel = 'rbf', random_state = 0)
fitPredictModel(SVC_model)




0.9678260869565217
0.9164785553047404


In [15]:
#Random Forest Classifier

RF_model=RandomForestClassifier(n_estimators=100)
fitPredictModel(RF_model)

0.9660869565217391
0.9146608315098468


In [16]:
#K-Nearest Neighbors Classifier

KNN_model = KNeighborsClassifier(n_neighbors = 1)
fitPredictModel(KNN_model)

0.9495652173913044
0.8557213930348259


In [17]:
#Decision Tree Classifier
DT_model = DecisionTreeClassifier(max_depth=10)
fitPredictModel(DT_model)


0.9260869565217391
0.8054919908466819


In [18]:
#Naive Bayes Classifier
gnb_model = GaussianNB()
fitPredictModel(gnb_model)

0.957391304347826
0.8941684665226782


**Boosting Algorithms**

In [20]:
#XGBoost
xgb_boost = xgb.XGBClassifier(use_label_encoder=False)
fitPredictModel(xgb_boost)

0.9617391304347827
0.8995433789954338


In [21]:
#Adaptive boosting

ada_boost = AdaBoostClassifier()
fitPredictModel(ada_boost)

0.9426086956521739
0.8506787330316742
