# Drowsiness Detection using EEG Sensor Data


# Importing Libraries

In [None]:
import numpy as np 
import pandas as pd 
import seaborn as sns 
import matplotlib.pyplot as plt
plt.style.use('ggplot')
from sklearn.model_selection import train_test_split
from keras.models import Model
from sklearn.preprocessing import MinMaxScaler
from keras.layers import LSTM, Activation, Dense, Dropout, Input, Conv1D, Embedding
from tensorflow.keras.optimizers import RMSprop
from keras.preprocessing import sequence
from keras.models import Sequential
from sklearn.metrics import f1_score
from keras.layers import Conv1D, GlobalMaxPooling1D
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense,Flatten,Conv1D,BatchNormalization,Dropout
from prettytable import PrettyTable
from sklearn.feature_selection import chi2
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import LinearSVC
from keras.preprocessing.text import Tokenizer
from sklearn.feature_selection import f_classif
from sklearn.ensemble import ExtraTreesClassifier 
from sklearn.feature_selection import SelectKBest
from mlxtend.plotting import plot_confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.feature_selection import SelectFromModel
from sklearn.feature_selection import mutual_info_classif

import warnings
warnings.filterwarnings("ignore")

# Loading the data

In [None]:
data = pd.read_csv("Dataset_EEG.csv")

#### Five top records of data

In [None]:
data.head()

#### Five last records of data

In [None]:
data.tail()

#### Coloumns/features in data

In [None]:
data.columns

#### Length of data

In [None]:
print('lenght of data is', len(data))

#### Shape of data

In [None]:
data.shape

#### Data information

In [None]:
data.info()

#### Data types of all coloumns

In [None]:
data.dtypes

#### Checking Null values / missing values

In [None]:
np.sum(data.isnull().any(axis=1))

#### Counts of missing values in each column

In [None]:
data.isnull().sum()

#### Rows and columns in the dataset

In [None]:
print('Count of columns in the data is:  ', len(data.columns))

In [None]:
print('Count of rows in the data is:  ', len(data))

#### Data Description

In [None]:
data.describe()

## Univariate Analysis 

#### Numeric features distrubution 

In [None]:
data.hist(figsize=(20,20),bins = 20, color="#107009AA")
plt.title("Numeric Features Distribution")
a=data[data['label']==0]
b=data[data['label']==1]
data=pd.concat([a,b])
plt.show()

### As we can see above, there are the values ranges of all features. Every feature have different distribution of values. 

# Data Preparation

### Deleting the duplicate rows

In [None]:
current=len(data)
print('Rows of data before Delecting ', current)

In [None]:
data=data.drop_duplicates()

In [None]:
now=len(data)
print('Rows of data before Delecting ', now)

In [None]:
diff=current-now
print('Duplicated rows deleted ', diff)

# Feature Selection

### Univariate feature selection
- using f_classif technique to extrcat the most useful features from the data

In [None]:
d=data.head(1280)
X=d.drop(columns=['label'])
y=d['label']
FCF_M= SelectKBest(f_classif, k=5)
FCF_M.fit(X, y)
FCF_features = FCF_M.transform(X)

In [None]:
feature_names = list(X.columns[FCF_M.get_support(indices=True)])
FCF_features=pd.DataFrame(FCF_features)
FCF_features.columns=feature_names
FCF_features.head()

In [None]:
FCF_features=pd.DataFrame(FCF_features)
FCF_features.shape[1]  #Features count

### Univariate feature selection
- using chi2 technique to extract the most useful features from the data

In [None]:
C_M= SelectKBest(chi2, k=5)
C_M.fit(X, y)
C_features = C_M.transform(X)

In [None]:
feature_names = list(X.columns[C_M.get_support(indices=True)])
C_features=pd.DataFrame(C_features)
C_features.columns=feature_names
C_features.head()

In [None]:
C_features=pd.DataFrame(C_features)
C_features.shape[1]  #Features count

### Feature selection using SelectFromModel
- L1-based feature selection

In [None]:
L_svc = LinearSVC(C=0.01, penalty="l1", dual=False).fit(X, y)
L_svc = SelectFromModel(L_svc, prefit=True)
L_svc_Features = L_svc.transform(X)

In [None]:
feature_names = list(X.columns[L_svc.get_support(indices=True)])
L_svc_Features=pd.DataFrame(L_svc_Features)
L_svc_Features.columns=feature_names
L_svc_Features.head()

In [None]:
#These features are selected
list(L_svc_Features.columns)

In [None]:
L_svc_Features.shape[1]  #Features count

### Feature selection using SelectFromModel
- Decision Tree-based feature selection

In [None]:
ETC_M = DecisionTreeClassifier()
ETC_M.fit(X, y)
ETC_M = SelectFromModel(ETC_M, prefit=True)
ETC_M_Features = ETC_M.transform(X)

In [None]:
feature_names = list(X.columns[ETC_M.get_support(indices=True)])
ETC_M_Features=pd.DataFrame(ETC_M_Features)
ETC_M_Features.columns=feature_names
ETC_M_Features.head()

In [None]:
#These features are selected
list(ETC_M_Features.columns)

In [None]:
ETC_M_Features.shape[1]  #Features count

- We are going to combine all the features that are selected using differenet techniques 

In [None]:
Hybrid_Features=pd.concat([FCF_features, C_features, L_svc_Features, ETC_M_Features], axis=1)
#drop all those features 
Hybrid_Features1 = Hybrid_Features.loc[:,~Hybrid_Features.columns.duplicated()]

In [None]:
Hybrid_Features1.shape[1]  #Features count

### Split th data into 70% training and 30% testing

In [None]:
X= MinMaxScaler().fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=5)

# Model Building

# Implementing CNN Model

In [None]:
CNN_Model = Sequential()
CNN_Model.add(Embedding(100, 50, input_length=14))

CNN_Model.add(Conv1D(256, 3, padding='valid', activation='relu', strides=2,))
CNN_Model.add(GlobalMaxPooling1D())
CNN_Model.add(Dense(1, activation='sigmoid'))

CNN_Model.compile(loss='binary_crossentropy',  optimizer='rmsprop', metrics=['accuracy'])
CNN_Model.summary()

# Training and validating 

In [None]:
CNN_Model.fit(X_train, y_train, batch_size=100, epochs=200, validation_split=0.2, verbose=False)

# Testing the Trained CNN_Model with test data

In [None]:
accr = CNN_Model.evaluate(X_test,y_test, verbose=False)

In [None]:
y_pred = CNN_Model.predict(X_test)
y_pred = (y_pred > 0.5)
cnn_model_acc=accr[1]
print('Accuracy: {:0.2f}'.format(cnn_model_acc))

In [None]:
cnn_model_f1=f1_score(y_test, y_pred, average='weighted')
print('F1',round(cnn_model_f1,2),'%')

# Implementing CRF Model

In [None]:
x_train=X_train.reshape(X_train.shape[0],X_train.shape[1],1)
x_test=X_test.reshape(X_test.shape[0],X_test.shape[1],1)
crf_model=Sequential()
crf_model.add(Conv1D(filters=32, kernel_size=2, activation='relu', input_shape=x_train[0].shape))
crf_model.add(BatchNormalization())
crf_model.add(Dropout(0.5))
crf_model.add(Conv1D(filters=64, kernel_size=2, activation='relu'))
crf_model.add(BatchNormalization())
crf_model.add(Dropout(0.5))
crf_model.add(Flatten())
crf_model.add(Dense(64, activation='relu'))
crf_model.add(Dropout(0.5))
crf_model.add(Dense(1, activation='relu'))
crf_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
crf_model.summary()

# Training and validating 

In [None]:
crf_model.fit(x_train,y_train,epochs=20,validation_data=(x_test,y_test), verbose=False)

# Testing the Trained crf_model with test data

In [None]:
accr = crf_model.evaluate(x_test,y_test, verbose=False)

In [None]:
y_pred = crf_model.predict(x_test)
y_pred = (y_pred > 0.5)
crf_model_acc=accr[1]
print('Accuracy: {:0.4f}'.format(crf_model_acc))

In [None]:
crf_model_f1=f1_score(y_test, y_pred, average='micro')
print('F1',round(crf_model_f1,3),'%')

# Implementing LSTM Model

In [None]:
def LSTM_Model():
    inputs = Input(name='inputs',shape=[14])
    layer = Embedding(10,50,input_length=14)(inputs)
    layer = LSTM(100)(layer)
    layer = Dense(16,name='FC1')(layer)
    layer = Activation('relu')(layer)
    layer = Dropout(0.1)(layer)
    layer = Dense(1,name='out_layer')(layer)
    layer = Activation('relu')(layer)
    model = Model(inputs=inputs,outputs=layer)
    return model

In [None]:
model = LSTM_Model()
model.summary()
model.compile(loss='binary_crossentropy',optimizer='rmsprop',metrics=['accuracy'])

# Training and validating 

In [None]:
history=model.fit(X_train,y_train,batch_size=80,epochs=10,
          validation_split=0.1, verbose=False)

# Testing the Trained model with test data

In [None]:
accr = model.evaluate(X_test,y_test, verbose=False)

In [None]:
y_pred = model.predict(X_test)
y_pred = (y_pred > 0.5)
lstm_model_acc=accr[1]
print('Accuracy: {:0.4f}'.format(lstm_model_acc))

In [None]:
lstm_model_f1=f1_score(y_test, y_pred, average='micro')
print('F1',round(lstm_model_f1,4),'%')

# Comparison of all Model on F1

In [None]:
x = PrettyTable()
print('\n')
x.field_names = ["Model", "F1"]
x.add_row(["CNN Model", round(cnn_model_f1,2)])
x.add_row(["CNN CRF Model", round(crf_model_f1,3)])
x.add_row(["CNN LSTM Model", round(lstm_model_f1,4)])

print(x)
print('\n')

# Comparison of all Model on Accuracy

In [None]:
x = PrettyTable()
print('\n')
x.field_names = ["Model", "Accuracy"]
x.add_row(["CNN Model", round(cnn_model_acc,2)])
x.add_row(["CNN CRF Model", round(crf_model_acc,3)])
x.add_row(["CNN LSTM Model", round(lstm_model_acc,4)])

print(x)
print('\n')