# Grid Search CV

## Use GridSearchCV to try out different combinations of hyper parameters and get the best model/parameter combo and score.

### Import the necessary packages

In [1]:
import pyodbc 
import pandas as pd
import numpy as np


from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import GradientBoostingRegressor 
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import GridSearchCV


### Fetch data from sql server that will be used for Training

In [2]:
conn = pyodbc.connect('Driver={SQL Server};'
                      'Server=LAPTOP-HVRMUNPF;'
                      'PORT=1433;'
                      'Database=RENTERS_STP;'
                      'Trusted_Connection=yes;'
                      )

query = 'SELECT  * from [RENTERS_STP].[dbo].[DR_DetailedRequest_classification]'

    
df = pd.read_sql(query, conn)

### Get the 'features' and 'labels' from the dataset. Split the dataset to training and testing datasets

In [3]:
y=df['PREDICTION_VALUE_Y_BOOL']
X = pd.get_dummies(df.drop(['PREDICTION_VALUE_Y_BOOL'], axis=1)).fillna(0)

X_train, X_test, y_train, y_test = train_test_split(X,y,test_size = 0.3, random_state=42, stratify=y)

SEED=1

### Define parameters for Grid search

In [4]:
# Define params_dt
params_dt = {'max_depth':(2,3,4),
                'min_samples_leaf': ( 0.12, 0.14, 0.16, 0.18)
}

### Instantiate the classifiers

In [5]:
# Instantiate dt
dt = DecisionTreeClassifier(max_depth=2, random_state=1)

# Instantiate grid_dt
grid_dt = GridSearchCV(estimator=dt,
                       param_grid=params_dt,
                       scoring='roc_auc',
                       cv=5,
                       n_jobs=-1)


### Fit and train the model

In [6]:
grid_dt.fit(X_train,y_train)


GridSearchCV(cv=5, error_score=nan,
             estimator=DecisionTreeClassifier(ccp_alpha=0.0, class_weight=None,
                                              criterion='gini', max_depth=2,
                                              max_features=None,
                                              max_leaf_nodes=None,
                                              min_impurity_decrease=0.0,
                                              min_impurity_split=None,
                                              min_samples_leaf=1,
                                              min_samples_split=2,
                                              min_weight_fraction_leaf=0.0,
                                              presort='deprecated',
                                              random_state=1, splitter='best'),
             iid='deprecated', n_jobs=-1,
             param_grid={'max_depth': (2, 3, 4),
                         'min_samples_leaf': (0.12, 0.14, 0.16, 0.18)},
           

### Model Evaluation

In [7]:
# Predict the test set probabilities of the positive class
y_pred_proba = grid_dt.predict_proba(X_test)[:, 1]

# Compute test_roc_auc
test_roc_auc = roc_auc_score(y_test, y_pred_proba)

# Print test_roc_auc
print('Test set ROC AUC score: {:.6f}'.format(test_roc_auc))

Test set ROC AUC score: 0.843606


### Display the best parameters

In [8]:
best_model = grid_dt.best_estimator_
print('Best parameters: {}'.format(grid_dt.best_params_))

Best parameters: {'max_depth': 3, 'min_samples_leaf': 0.12}
