<b>Grid Search </b> is a way to find out the best hyperparameter for our model by iteratively going through all possible combination of given model parameters

In [2]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score

from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier

In [4]:
titanic_df = pd.read_csv("./datasets/titanic_processed.csv")
titanic_df.head()

Unnamed: 0,Survived,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked_C,Embarked_Q,Embarked_S
0,0,3,1,18.0,1,1,7.8542,0,0,1
1,0,3,1,14.0,4,1,39.6875,0,0,1
2,0,3,1,20.0,0,0,8.05,0,0,1
3,0,1,1,37.0,0,1,29.7,1,0,0
4,1,3,1,3.0,1,1,15.9,0,0,1


In [6]:
X = titanic_df.drop(['Survived'],axis=1)
Y = titanic_df['Survived']

In [7]:
x_train,x_test,y_train,y_test = train_test_split(X,Y,test_size=0.2)

In [9]:
from sklearn.model_selection import GridSearchCV

parameters = {'max_depth':[2,4,5,7,9,10]}

grid_search = GridSearchCV(DecisionTreeClassifier(),parameters,cv=3,return_train_score=True)
grid_search.fit(x_train,y_train)

#cv = 3 means 3 fold cross validation

grid_search.best_params_

{'max_depth': 5}

In [10]:
for i in range(6):
    print("parameters: ", grid_search.cv_results_['params'][i])
    print("Mean Test score: ", grid_search.cv_results_['mean_test_score'][i])    
    print("Rank: ", grid_search.cv_results_['rank_test_score'][i])    

parameters:  {'max_depth': 2}
Mean Test score:  0.804920913884007
Rank:  3
parameters:  {'max_depth': 4}
Mean Test score:  0.81195079086116
Rank:  2
parameters:  {'max_depth': 5}
Mean Test score:  0.8154657293497364
Rank:  1
parameters:  {'max_depth': 7}
Mean Test score:  0.7855887521968365
Rank:  4
parameters:  {'max_depth': 9}
Mean Test score:  0.7820738137082601
Rank:  5
parameters:  {'max_depth': 10}
Mean Test score:  0.7785588752196837
Rank:  6


In [12]:
decision_tree_classifier = DecisionTreeClassifier(max_depth = grid_search.best_params_['max_depth']).fit(x_train,y_train)

In [14]:
y_pred = decision_tree_classifier.predict(x_test)

In [16]:
print("accuracy: ",accuracy_score(y_pred,y_test))
print("precision: ",precision_score(y_pred,y_test))
print("recall: ",recall_score(y_pred,y_test))

accuracy:  0.7482517482517482
precision:  0.6307692307692307
recall:  0.7735849056603774


Example for Logistic regression

In [23]:
parameters = {"penalty":['l1','l2'],"C":[0.5,1,2,3,4,5]}
grid_search = GridSearchCV(LogisticRegression(),parameters,cv=3,return_train_score=True)
grid_search.fit(x_train,y_train)
grid_search.best_params_

{'C': 4, 'penalty': 'l1'}

In [29]:
logistic_regression_model = LogisticRegression(C=4,penalty='l1')
logistic_regression_model.fit(x_train,y_train)
y_pred = logistic_regression_model.predict(x_test)
print("accuracy: ",accuracy_score(y_pred,y_test))
print("precision: ",precision_score(y_pred,y_test))
print("recall: ",recall_score(y_pred,y_test))

accuracy:  0.7412587412587412
precision:  0.6461538461538462
recall:  0.75
