<a href="https://colab.research.google.com/github/khushimehta24/ipd-project/blob/ML-model/IPD.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [2]:
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestClassifier
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
import pickle
from sklearn.metrics import classification_report
%matplotlib inline

In [3]:
#In the DyslexiaML notebook, we found that RandomForest with GridSearch is best fit for the given dataset.
#This model gives the most accurate predictions.
#In this notebook, we will create only a RandomForest model, which will then be used to make final predictions.

In [4]:
#Reading the dataset
data=pd.read_csv('/content/gdrive/MyDrive/labeled_dysx.csv')
#Value to be predicted by the model.
y=data.Label 
#Input taken by the model.
X=data.drop(['Label'],axis=1) 
data

Unnamed: 0,Language_vocab,Memory,Speed,Visual_discrimination,Audio_Discrimination,Survey_Score,Label
0,0.5,0.6,0.5,0.8,0.6,0.7,1
1,0.6,0.7,0.8,0.9,0.5,0.8,2
2,0.6,0.4,0.3,0.3,0.4,0.6,1
3,0.3,0.5,0.2,0.1,0.3,0.5,0
4,0.7,0.6,0.7,0.8,0.9,0.5,2
...,...,...,...,...,...,...,...
495,0.4,0.3,0.1,0.4,0.2,0.5,0
496,0.4,0.6,0.5,0.6,0.5,0.4,1
497,0.7,0.5,0.8,0.9,0.8,0.5,1
498,0.3,0.5,0.4,0.6,0.6,0.6,1


In [5]:
#In the given data, the label is the indication for whether the person has dislexia or not.
#Label = 0 means that there is a high chance that the person has dislexia.
#Label = 1 means that there is a moderte chance that the person has dislexia.
#Label = 2 means that there is a low chance that the person has dislexia.
#The Survey_Score is calculated on the basis of the answers to the quiz given by the applicant.

In [6]:
#Creating the test and train data sets for the given data.
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.8,random_state=10)

In [7]:
#StandardScalar is used for preprocessing of data.
#'copy' is False, which means copies are avoid and inplace scaling is done instead.
sc=StandardScaler(copy=False)
sc.fit_transform(X_train)
sc.transform(X_test)
print(sc.transform(X_test)[3])

[-9.90170051 -8.2165588  -3.82517639 -4.24534156 -2.83972647 -0.14702108]


In [8]:
#Creating a list of possible n_estimators.
n_est = {'n_estimators' : [10,100,500,1000]}
#Creating a RandomForest model using the value of n_estimators given by GridSearch for best result.
model = GridSearchCV(RandomForestClassifier(random_state=0),n_est,scoring='f1_macro')
#Training the model
model.fit(X_train, y_train)
#Making predictions using the model.
predictions = model.predict(X_test)
#Printing the value of n_estimator used in the model.
#This value provides the most accurate predictions for our dataset.
print('Best value of n_estimator for RandomForest model is:')
print(model.best_params_)

Best value of n_estimator for RandomForest model is:
{'n_estimators': 100}


In [9]:
#Giving user description of model
print("Our model uses RandomForestClassifier with GridSearchCV to predict values from the given data.")
print("(Our model makes precitions with an error rate of 5.80%.)\n")

Our model uses RandomForestClassifier with GridSearchCV to predict values from the given data.
(Our model makes precitions with an error rate of 5.80%.)



In [10]:
#Getting input from user
name = input("Enter name of appllicant: ")
print("\nThe scores of all the tests in quiz as well as survey need to be entered.")
print("All the values lie in the range 0 to 1.\n")
lang_vocab = float(input("Enter the score of Language Vocab test: "))
memory = float(input("Enter the score of Memory test: "))
speed = float(input("Enter the score of Speed test: "))
visual = float(input("Enter the score of Visual Discrimination test: "))
audio = float(input("Enter the score of Audio Discrimination test: "))
survey = float(input("Enter the score obtained from Survey: "))

Enter name of appllicant: 0.26

The scores of all the tests in quiz as well as survey need to be entered.
All the values lie in the range 0 to 1.

Enter the score of Language Vocab test: 0.1
Enter the score of Memory test: 0.3
Enter the score of Speed test: 0.4
Enter the score of Visual Discrimination test: 0.1
Enter the score of Audio Discrimination test: 0.1
Enter the score obtained from Survey: 0.0


In [11]:
#Creating a 2D array from the given scores.
#Input taken by our model is a 2D array.
#get_result function can be run to get result from the survey.
def get_result(lang_vocab, memory, speed, visual, audio, survey):
    #2D numpy array created with the values input by the user.
    array = np.array([[lang_vocab, memory, speed, visual, audio, survey]])
    #The output given by model is converted into an int and stored in label.
    label = int(model.predict(array))
    #Giving final output to user depending upon the model prediction.
    if(label == 0):
        output = "There is a high chance of the applicant to have dyslexia."
    elif(label == 1):
        output = "There is a moderate chance of the applicant to have dyslexia."
    else:
        output = "There is a low chance of the applicant to have dyslexia."
    return output

In [12]:
get_result(lang_vocab, memory, speed, visual, audio, survey)
# get_result(0.40516053, 0.25200256, 1, 0.32694813,  0.01971041,  0.54374235)





'There is a moderate chance of the applicant to have dyslexia.'