# Import Libraries

In [1]:
# Import necessary libraries to get started
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
sns.set()

# Data Preprocessing

In [2]:
# Import the dataset
data = pd.read_csv("eeg_data.csv")

In [3]:
# data exploration
data.head()

Unnamed: 0,# mean_0_a,mean_1_a,mean_2_a,mean_3_a,mean_4_a,mean_d_0_a,mean_d_1_a,mean_d_2_a,mean_d_3_a,mean_d_4_a,...,fft_741_b,fft_742_b,fft_743_b,fft_744_b,fft_745_b,fft_746_b,fft_747_b,fft_748_b,fft_749_b,label
0,4.62,30.3,-356.0,15.6,26.3,1.07,0.411,-15.7,2.06,3.15,...,23.5,20.3,20.3,23.5,-215.0,280.0,-162.0,-162.0,280.0,NEGATIVE
1,28.8,33.1,32.0,25.8,22.8,6.55,1.68,2.88,3.83,-4.82,...,-23.3,-21.8,-21.8,-23.3,182.0,2.57,-31.6,-31.6,2.57,NEUTRAL
2,8.9,29.4,-416.0,16.7,23.7,79.9,3.36,90.2,89.9,2.03,...,462.0,-233.0,-233.0,462.0,-267.0,281.0,-148.0,-148.0,281.0,POSITIVE
3,14.9,31.6,-143.0,19.8,24.3,-0.584,-0.284,8.82,2.3,-1.97,...,299.0,-243.0,-243.0,299.0,132.0,-12.4,9.53,9.53,-12.4,POSITIVE
4,28.3,31.3,45.2,27.3,24.5,34.8,-5.79,3.06,41.4,5.52,...,12.0,38.1,38.1,12.0,119.0,-17.6,23.9,23.9,-17.6,NEUTRAL


In [4]:
eeg_data_columns = data.columns.values.tolist()

In [5]:
print(data["label"].value_counts())
cleanup_rules = ({'NEUTRAL': 0, 'POSITIVE': 1, 'NEGATIVE': 2} )
eeg_data_encoded = data.replace(cleanup_rules, inplace=False)
print(eeg_data_encoded['label'].value_counts())

label
NEUTRAL     716
NEGATIVE    708
POSITIVE    708
Name: count, dtype: int64
label
0    716
2    708
1    708
Name: count, dtype: int64


In [6]:
y = eeg_data_encoded.label

##eeg_data_features = ['mean_1_a', 'mean_2_a', 'mean_3_a', 'mean_4_a','mean_d_0_a', 'mean_d_1_a', 'mean_d_2_a', 'mean_d_3_a', 'mean_d_4_a']
eeg_data_features = eeg_data_columns[1:10]
x=eeg_data_encoded[eeg_data_features]

In [7]:
# verify that outputs are selected correctly
y.head()

0    2
1    0
2    1
3    1
4    0
Name: label, dtype: int64

# Modelling, Training and Test

In [8]:
#import sklearn's model selection and split the data set into %80 training and %20 test
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size = 0.20)

In [9]:
#import GaussianNB and perform Gaussian Naive Bayes on the training dataset
from sklearn.naive_bayes import GaussianNB
estimator = GaussianNB()
estimator.fit(X_train, y_train)

In [10]:
#apply the trained estimator on the test dataset
estimator.score(X_test, y_test)
y_pred = estimator.predict(X_test)

In [11]:
# output the predicted values
y_pred

array([2, 0, 0, 1, 2, 0, 1, 1, 1, 0, 0, 1, 2, 0, 2, 2, 2, 2, 2, 1, 0, 0,
       2, 2, 2, 2, 2, 2, 1, 1, 1, 0, 2, 0, 2, 2, 2, 2, 0, 0, 2, 0, 2, 2,
       2, 0, 0, 0, 0, 1, 0, 1, 0, 2, 0, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2,
       2, 1, 1, 2, 1, 2, 2, 2, 2, 0, 0, 2, 2, 2, 2, 0, 1, 0, 2, 2, 0, 0,
       1, 0, 2, 2, 0, 2, 2, 2, 0, 1, 2, 0, 0, 0, 2, 2, 2, 0, 1, 0, 1, 1,
       2, 2, 2, 1, 2, 0, 2, 2, 1, 2, 2, 2, 0, 2, 2, 2, 0, 2, 2, 0, 0, 2,
       1, 2, 2, 0, 0, 2, 0, 2, 0, 2, 0, 2, 1, 0, 2, 2, 2, 0, 0, 1, 2, 2,
       2, 1, 0, 2, 2, 2, 2, 2, 2, 2, 0, 1, 0, 2, 2, 2, 0, 1, 0, 0, 2, 0,
       2, 2, 0, 0, 2, 2, 0, 2, 2, 2, 0, 1, 2, 2, 2, 2, 0, 1, 2, 1, 2, 1,
       1, 1, 2, 2, 1, 0, 2, 0, 0, 2, 0, 0, 0, 2, 2, 0, 1, 2, 1, 0, 0, 2,
       2, 2, 2, 0, 0, 0, 0, 0, 0, 2, 1, 2, 0, 0, 2, 2, 2, 2, 0, 0, 2, 2,
       1, 2, 2, 0, 2, 1, 0, 0, 2, 1, 0, 2, 0, 2, 2, 1, 0, 2, 0, 2, 0, 0,
       1, 0, 2, 2, 1, 2, 2, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 2, 0, 0,
       0, 2, 1, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 0, 0,

In [12]:
# import relevant metrics and print the confusion matrix and classification report
from sklearn.metrics import classification_report, confusion_matrix
print(confusion_matrix(y_test,y_pred))
print(classification_report(y_test,y_pred))

[[126   2   6]
 [ 18  67  58]
 [  0   1 149]]
              precision    recall  f1-score   support

           0       0.88      0.94      0.91       134
           1       0.96      0.47      0.63       143
           2       0.70      0.99      0.82       150

    accuracy                           0.80       427
   macro avg       0.84      0.80      0.79       427
weighted avg       0.84      0.80      0.78       427

