# Importing the libraries

In [None]:
import numpy as np
import pandas as pd
import math 
import os
import glob

# Loading the Data

In [None]:
data = pd.read_csv('alldata.csv', header=None)

In [None]:
data.shape

(1023, 31)

In [None]:
X = data.iloc[:,:-1]
y = data.iloc[:,-1]

In [None]:
y

0       1
1       1
2       1
3       1
4       1
       ..
1018    0
1019    0
1020    0
1021    0
1022    0
Name: 30, Length: 1023, dtype: int64

# splitting the test and train data

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 1)

In [None]:
from sklearn.preprocessing import StandardScaler
sc_X = StandardScaler()

from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()


#Scaling features and target 
X_train = sc_X.fit_transform(X_train)
y_train = le.fit_transform(y_train)

#Transform test set
X_test = sc_X.transform(X_test)
y_test = le.transform(y_test)

## SVM 

In [None]:
#Initlializing model
from sklearn.svm import SVC # non-linear
classifier = SVC() 


from sklearn.model_selection import GridSearchCV

#Initializing different parameters that will be used in grid search 
parameters = [{'C':[0.25, 0.5, 0.75, 1], 'kernel':['rbf','linear', 'poly', 'sigmoid'], 'gamma':['scale', 'auto']}]

grid_search = GridSearchCV(estimator=classifier, 
                           param_grid=parameters,
                           scoring='accuracy',  #scoring method accuracy
                           cv=5,         #five sets for cross validation
                           n_jobs=-1)     #n_jobs=-1 will use all the available processors

#Fitting it on train set
grid_search.fit(X_train, y_train)

best_accuracy = grid_search.best_score_
best_parameters = grid_search.best_params_
print("Best Accuracy: {:.2f} %".format(best_accuracy*100))
print("Best Parameters:",best_parameters)

In [None]:
from sklearn.svm import SVC # non-linear
classifier = SVC(kernel='rbf', C=0.75, gamma='scale') 
classifier.fit(X_train, y_train)

y_pred = classifier.predict(X_test)


from sklearn.metrics import confusion_matrix, accuracy_score
cm = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:\n", cm)
print("Accuracy", accuracy_score(y_test, y_pred))
print("Test:     ", y_test)
print("Predicted:", y_pred)

from sklearn.metrics import recall_score
print("Recall", recall_score(y_test, y_pred, pos_label=1, average='binary'))
from sklearn.metrics import precision_score
print("Precision", precision_score(y_test, y_pred, pos_label=1,  average='binary'))

Confusion Matrix:
 [[  2  84]
 [  2 117]]
Accuracy 0.5804878048780487
Test:      [1 1 0 1 1 0 0 0 1 0 0 0 1 1 0 1 1 1 1 1 0 0 1 0 0 1 1 0 0 1 0 1 0 0 1 0 0
 1 1 0 1 0 1 1 1 1 1 0 1 1 1 1 1 1 1 1 0 0 1 0 0 0 1 1 0 1 1 1 0 0 1 0 1 0
 0 1 1 1 1 0 1 0 0 0 0 0 1 1 0 1 0 0 1 1 1 1 0 1 0 0 1 0 0 1 1 1 0 1 0 1 0
 0 0 1 1 1 0 0 1 1 1 1 1 0 0 1 1 0 0 0 1 1 0 0 0 1 1 0 0 1 1 1 0 0 1 1 1 1
 1 1 0 0 0 1 1 0 0 1 1 1 1 1 1 0 1 1 1 1 1 1 0 1 1 1 0 1 0 0 1 1 1 1 1 0 0
 0 0 0 1 0 1 1 1 1 1 0 1 1 1 1 0 0 1 0 1]
Predicted: [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 0 1 1 1 1 0 1 1 1 1 1 1 1 1 1]
Recall 0.9831932773109243
Precision 0.582089552238806


## Random Forest

In [None]:
#Initlializing model
from sklearn.ensemble import RandomForestClassifier
classifier = RandomForestClassifier()


from sklearn.model_selection import GridSearchCV

#Initializing different parameters that will be used in grid search 
parameters = [{'n_estimators':[100, 150, 250, 300, 350, 400, 450], 'criterion':['gini', 'entropy', 'log_loss']}]

grid_search = GridSearchCV(estimator=classifier, 
                           param_grid=parameters,
                           scoring='accuracy',  #scoring method will be acuuracy
                           cv=5,         #five sets for cross validation
                           n_jobs=-1)     #n_jobs=-1 will use all the available processors

#Fitting it on train set
grid_search.fit(X_train, y_train)

best_accuracy = grid_search.best_score_
best_parameters = grid_search.best_params_
print("Best Accuracy: {:.2f} %".format(best_accuracy*100))
print("Best Parameters:",best_parameters)

Best Accuracy: 74.81 %
Best Parameters: {'criterion': 'gini', 'n_estimators': 100}


In [None]:
from sklearn.ensemble import RandomForestClassifier
classifier = RandomForestClassifier(criterion='gini', n_estimators=100)
classifier.fit(X_train, y_train)
y_pred = classifier.predict(X_test)


cm = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:\n", cm)

print("Accuracy", accuracy_score(y_test, y_pred))

print("Recall ", recall_score(y_test, y_pred, pos_label=1, average='binary'))
print("Precision ", precision_score(y_test, y_pred,  average='binary'))

print("Test:     ", y_test)
print("Predicted:", y_pred)

Confusion Matrix:
 [[ 41  45]
 [ 11 108]]
Accuracy 0.7268292682926829
Recall  0.907563025210084
Precision  0.7058823529411765
Test:      [1 1 0 1 1 0 0 0 1 0 0 0 1 1 0 1 1 1 1 1 0 0 1 0 0 1 1 0 0 1 0 1 0 0 1 0 0
 1 1 0 1 0 1 1 1 1 1 0 1 1 1 1 1 1 1 1 0 0 1 0 0 0 1 1 0 1 1 1 0 0 1 0 1 0
 0 1 1 1 1 0 1 0 0 0 0 0 1 1 0 1 0 0 1 1 1 1 0 1 0 0 1 0 0 1 1 1 0 1 0 1 0
 0 0 1 1 1 0 0 1 1 1 1 1 0 0 1 1 0 0 0 1 1 0 0 0 1 1 0 0 1 1 1 0 0 1 1 1 1
 1 1 0 0 0 1 1 0 0 1 1 1 1 1 1 0 1 1 1 1 1 1 0 1 1 1 0 1 0 0 1 1 1 1 1 0 0
 0 0 0 1 0 1 1 1 1 1 0 1 1 1 1 0 0 1 0 1]
Predicted: [1 1 0 0 1 1 1 0 1 1 1 0 0 1 1 1 1 1 0 1 1 1 1 0 0 1 1 0 0 1 1 1 1 0 1 1 0
 1 1 0 1 0 1 1 1 1 1 0 1 1 1 1 1 1 1 1 0 0 1 0 1 1 1 0 1 1 1 0 0 0 0 1 1 1
 0 1 1 1 1 1 1 1 0 0 0 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 0 0 1 1 1 1 1 0 1 1
 0 1 1 1 1 1 1 1 1 0 1 1 0 1 1 1 1 1 1 0 1 1 1 0 1 1 0 1 1 1 1 0 0 1 1 1 1
 1 1 1 0 0 1 1 0 1 1 1 1 1 1 1 0 1 1 1 0 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 0 1
 0 0 1 1 1 1 0 1 1 1 0 1 0 1 1 1 1 1 0 1]


## Naive Bayes

In [None]:
from sklearn.naive_bayes import GaussianNB
classifier = GaussianNB()
classifier.fit(X_train, y_train)
y_pred = classifier.predict(X_test)


cm = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:\n", cm)

print("Accuracy", accuracy_score(y_test, y_pred))

print("Recall", recall_score(y_test, y_pred, pos_label=0, average='binary'))

print("Precision", precision_score(y_test, y_pred, pos_label=0,  average='binary'))

print("Test:     ", y_test)
print("Predicted:", y_pred)

Confusion Matrix:
 [[ 16  70]
 [ 18 101]]
Accuracy 0.5707317073170731
Recall 0.18604651162790697
Precision 0.47058823529411764
Test:      [1 1 0 1 1 0 0 0 1 0 0 0 1 1 0 1 1 1 1 1 0 0 1 0 0 1 1 0 0 1 0 1 0 0 1 0 0
 1 1 0 1 0 1 1 1 1 1 0 1 1 1 1 1 1 1 1 0 0 1 0 0 0 1 1 0 1 1 1 0 0 1 0 1 0
 0 1 1 1 1 0 1 0 0 0 0 0 1 1 0 1 0 0 1 1 1 1 0 1 0 0 1 0 0 1 1 1 0 1 0 1 0
 0 0 1 1 1 0 0 1 1 1 1 1 0 0 1 1 0 0 0 1 1 0 0 0 1 1 0 0 1 1 1 0 0 1 1 1 1
 1 1 0 0 0 1 1 0 0 1 1 1 1 1 1 0 1 1 1 1 1 1 0 1 1 1 0 1 0 0 1 1 1 1 1 0 0
 0 0 0 1 0 1 1 1 1 1 0 1 1 1 1 0 0 1 0 1]
Predicted: [1 1 1 1 1 1 1 0 0 0 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 0 1 1
 0 1 1 1 0 1 1 1 0 1 1 0 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 0 0 1 1 1 1 1
 1 0 1 1 1 1 1 1 1 1 1 0 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 0 1 0 1 1 1 0 1 1 1
 1 1 1 0 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 0 1 1 1 1 0 1 0 1 1 1 1 1 1 0 1
 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 0 0 1 1 1 0 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 0 0 1 0 1]


## KNN

In [None]:
from sklearn.neighbors import KNeighborsClassifier
classifier = KNeighborsClassifier()

from sklearn.model_selection import GridSearchCV

#Initializing different parameters that will be used in grid search 
parameters = [{'n_neighbors':[3,5,7,9,11,13,15], 'algorithm':['auto', 'ball_tree', 'kd_tree', 'brute']}]

grid_search = GridSearchCV(estimator=classifier, 
                           param_grid=parameters,
                           scoring='accuracy',  #scoring method will be accuracy
                           cv=5,         #cross validation
                           n_jobs=-1)     #n_jobs=-1 will use all the available processors

#Fitting it on train set
grid_search.fit(X_train, y_train)

best_accuracy = grid_search.best_score_
best_parameters = grid_search.best_params_
print("Best Accuracy: {:.2f} %".format(best_accuracy*100))
print("Best Parameters:",best_parameters)

Best Accuracy: 57.58 %
Best Parameters: {'algorithm': 'auto', 'n_neighbors': 3}


In [None]:
from sklearn.neighbors import KNeighborsClassifier
classifier = KNeighborsClassifier(n_neighbors=3, algorithm='auto')
classifier.fit(X_train, y_train)
y_pred = classifier.predict(X_test)


cm = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:\n", cm)

print("Accuracy", accuracy_score(y_test, y_pred))

print("Recall", recall_score(y_test, y_pred, pos_label=0, average='binary'))

print("Precision", precision_score(y_test, y_pred, pos_label=0,  average='binary'))

print("Recall for Non dyslexic:", recall_score(y_test, y_pred, pos_label=1, average='binary'))

print("Precision for Non dyslexic:", precision_score(y_test, y_pred, pos_label=1,  average='binary'))

print("Test:     ", y_test)
print("Predicted:", y_pred)


Confusion Matrix:
 [[28 58]
 [28 91]]
Accuracy 0.5804878048780487
Recall 0.32558139534883723
Precision 0.5
Recall for Non dyslexic: 0.7647058823529411
Precision for Non dyslexic: 0.610738255033557
Test:      [1 1 0 1 1 0 0 0 1 0 0 0 1 1 0 1 1 1 1 1 0 0 1 0 0 1 1 0 0 1 0 1 0 0 1 0 0
 1 1 0 1 0 1 1 1 1 1 0 1 1 1 1 1 1 1 1 0 0 1 0 0 0 1 1 0 1 1 1 0 0 1 0 1 0
 0 1 1 1 1 0 1 0 0 0 0 0 1 1 0 1 0 0 1 1 1 1 0 1 0 0 1 0 0 1 1 1 0 1 0 1 0
 0 0 1 1 1 0 0 1 1 1 1 1 0 0 1 1 0 0 0 1 1 0 0 0 1 1 0 0 1 1 1 0 0 1 1 1 1
 1 1 0 0 0 1 1 0 0 1 1 1 1 1 1 0 1 1 1 1 1 1 0 1 1 1 0 1 0 0 1 1 1 1 1 0 0
 0 0 0 1 0 1 1 1 1 1 0 1 1 1 1 0 0 1 0 1]
Predicted: [0 0 1 1 1 0 1 1 0 0 1 1 0 1 1 1 1 1 1 1 0 1 1 0 0 0 1 1 1 1 1 1 0 1 0 1 1
 1 1 0 1 0 1 1 1 0 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 0 0 1 1 1 0 1 1 1 0
 0 1 1 1 1 1 0 1 1 0 0 0 0 1 1 1 1 0 1 1 1 1 1 1 0 1 1 0 0 0 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 0 1 1 1 1 1 0 0 1 0 0 1 1 1 1 0 1 0 1 1 0 1 1 0 1 0 1 1 0
 0 1 1 1 0 1 1 0 1 0 1 1 1 0 1 0 1 1 1 1 1 1 1 0 1 0 1 1 0 1 1 0

In [None]:
import tensorflow as tf

ann = tf.keras.models.Sequential()
#ann.add(tf.keras.layers.Dense(units=50, activation = 'relu')) 
#ann.add(tf.keras.layers.Dense(units=5, activation = 'relu')) 
ann.add(tf.keras.layers.Dense(units=3, activation = 'relu')) 
#ann.add(tf.keras.layers.Dense(units=1024, activation = 'relu')) 
#ann.add(tf.keras.layers.Dense(units=1024, activation = 'relu')) 
#ann.add(tf.keras.layers.Dense(units=475, activation = 'relu')) 
#ann.add(tf.keras.layers.Dense(units=475, activation = 'relu')) 
ann.add(tf.keras.layers.Dense(units=3, activation = 'relu')) 
#ann.add(tf.keras.layers.Dense(units=8, activation = 'relu'))
ann.add(tf.keras.layers.Dense(units=1, activation = 'sigmoid'))
ann.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy']) 
ann.fit(X_train, y_train, batch_size = 10, epochs = 100)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


<keras.callbacks.History at 0x2bf38c97010>

In [None]:
y_pred = ann.predict(X_test)
print(y_pred)
y_pred = (y_pred > 0.9)
print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)), 1)) #used reshape to print vertically

cm = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:\n", cm)

print("Accuracy", accuracy_score(y_test, y_pred))

print("Recall", recall_score(y_test, y_pred, pos_label=1, average='binary'))

print("Precision", precision_score(y_test, y_pred, pos_label=1,  average='binary'))

[[0.8077032 ]
 [0.6952673 ]
 [0.50684017]
 [0.7363948 ]
 [0.6882054 ]
 [0.48348758]
 [0.7074843 ]
 [0.45947737]
 [0.57451415]
 [0.50684017]
 [0.81008774]
 [0.50684017]
 [0.735671  ]
 [0.43731803]
 [0.50684017]
 [0.6738617 ]
 [0.6988024 ]
 [0.64617634]
 [0.48660505]
 [0.6528033 ]
 [0.6445096 ]
 [0.70006716]
 [0.50684017]
 [0.6964738 ]
 [0.728823  ]
 [0.47514334]
 [0.70555824]
 [0.6952673 ]
 [0.50684017]
 [0.48677096]
 [0.6952673 ]
 [0.7115991 ]
 [0.59531224]
 [0.6952673 ]
 [0.50684017]
 [0.56314087]
 [0.6946469 ]
 [0.6952673 ]
 [0.7310371 ]
 [0.50684017]
 [0.55375564]
 [0.4037719 ]
 [0.5431387 ]
 [0.6571834 ]
 [0.6952673 ]
 [0.49441198]
 [0.6952673 ]
 [0.47919175]
 [0.75432235]
 [0.6099343 ]
 [0.7126796 ]
 [0.5832602 ]
 [0.73313504]
 [0.49078932]
 [0.6952673 ]
 [0.7010431 ]
 [0.4616346 ]
 [0.4089528 ]
 [0.6644103 ]
 [0.70515424]
 [0.6952673 ]
 [0.7734013 ]
 [0.7881098 ]
 [0.50684017]
 [0.62497985]
 [0.6386456 ]
 [0.64565855]
 [0.8800036 ]
 [0.47970164]
 [0.6450759 ]
 [0.7119973 ]
 [0.69