<a href="https://colab.research.google.com/github/mvince33/Coding-Dojo/blob/main/week07/gridsearchcv.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [9]:
# Imports
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split, GridSearchCV

In [10]:
# Load the data
iris = load_iris()

X = iris.data
y = iris.target

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state = 42)

In [15]:
# Make a dictionary of parameters to tune
param_grid = {'max_depth': [1, 2, 3],
              'min_samples_leaf': [3, 15, 20],
              'min_samples_split': [2, 10, 100]}

[1, 2, 3]

In [17]:
# Tune a model using a for loop
scores = {}

for d in param_grid['max_depth']:
  for l in param_grid['min_samples_leaf']:
    for s in param_grid['min_samples_split']:
      model = DecisionTreeClassifier(max_depth = d,
                                     min_samples_leaf = l,
                                     min_samples_split = s)
      model.fit(X_train, y_train)
      score = model.score(X_test, y_test)
      scores[f'max_depth {d}, min_samples_leaf {l}, min_samples_split {s}, accuracy'] = score.round(6) * 100
scores

{'max_depth 1, min_samples_leaf 15, min_samples_split 10, accuracy': 68.4211,
 'max_depth 1, min_samples_leaf 15, min_samples_split 100, accuracy': 68.4211,
 'max_depth 1, min_samples_leaf 15, min_samples_split 2, accuracy': 68.4211,
 'max_depth 1, min_samples_leaf 20, min_samples_split 10, accuracy': 68.4211,
 'max_depth 1, min_samples_leaf 20, min_samples_split 100, accuracy': 68.4211,
 'max_depth 1, min_samples_leaf 20, min_samples_split 2, accuracy': 68.4211,
 'max_depth 1, min_samples_leaf 3, min_samples_split 10, accuracy': 68.4211,
 'max_depth 1, min_samples_leaf 3, min_samples_split 100, accuracy': 68.4211,
 'max_depth 1, min_samples_leaf 3, min_samples_split 2, accuracy': 68.4211,
 'max_depth 2, min_samples_leaf 15, min_samples_split 10, accuracy': 97.3684,
 'max_depth 2, min_samples_leaf 15, min_samples_split 100, accuracy': 68.4211,
 'max_depth 2, min_samples_leaf 15, min_samples_split 2, accuracy': 97.3684,
 'max_depth 2, min_samples_leaf 20, min_samples_split 10, accuracy'

In [22]:
# Tune the model using GridSearchCV
model = DecisionTreeClassifier()

param_grid = {'max_depth': [1, 2, 3],
              'min_samples_leaf': [3, 15, 20],
              'min_samples_split': [2, 10, 100]}

dt_grid_search = GridSearchCV(model, param_grid)
dt_grid_search.fit(X_train, y_train)

GridSearchCV(estimator=DecisionTreeClassifier(),
             param_grid={'max_depth': [1, 2, 3],
                         'min_samples_leaf': [3, 15, 20],
                         'min_samples_split': [2, 10, 100]})

In [25]:
# Find the best parameters
dt_grid_search.best_params_

{'max_depth': 3, 'min_samples_leaf': 3, 'min_samples_split': 2}

In [27]:
# Tune the model again
param_grid2 = {'max_depth': [3, 5, 10],
               'min_samples_leaf': [1, 2, 3],
               'min_samples_split': [2, 4, 7]}
dt_grid_search2 = GridSearchCV(model, param_grid2)
dt_grid_search2.fit(X_train, y_train)
dt_grid_search2.best_params_

{'max_depth': 5, 'min_samples_leaf': 3, 'min_samples_split': 7}

In [32]:
# Get the best model
best_model = dt_grid_search2.best_estimator_
# Train the model
best_model.fit(X_train, y_train)
# Score the model on the test set
best_model.score(X_test, y_test)

1.0

In [37]:
# Using a pipelin with GridSearchCV
knn_pipe = make_pipeline(StandardScaler(), KNeighborsClassifier())
knn_pipe.get_params()

{'kneighborsclassifier': KNeighborsClassifier(),
 'kneighborsclassifier__algorithm': 'auto',
 'kneighborsclassifier__leaf_size': 30,
 'kneighborsclassifier__metric': 'minkowski',
 'kneighborsclassifier__metric_params': None,
 'kneighborsclassifier__n_jobs': None,
 'kneighborsclassifier__n_neighbors': 5,
 'kneighborsclassifier__p': 2,
 'kneighborsclassifier__weights': 'uniform',
 'memory': None,
 'standardscaler': StandardScaler(),
 'standardscaler__copy': True,
 'standardscaler__with_mean': True,
 'standardscaler__with_std': True,
 'steps': [('standardscaler', StandardScaler()),
  ('kneighborsclassifier', KNeighborsClassifier())],
 'verbose': False}

In [38]:
# Make the parameters grid
pipe_param_grid = {'kneighborsclassifier__n_neighbors': range(1, 10),
                   'kneighborsclassifier__p': range(1, 5),
                   'kneighborsclassifier__weights': ['distance', 'uniform']}

In [41]:
# Tune the model
knn_pipe_gs = GridSearchCV(knn_pipe, pipe_param_grid)
knn_pipe_gs.fit(X_train, y_train)
print('Best KNN parameters')
print(knn_pipe_gs.best_params_)
best_pipe = knn_pipe_gs.best_estimator_
print(f'Accuracy of the best model is: {best_pipe.score(X_test, y_test)}')

Best KNN parameters
{'kneighborsclassifier__n_neighbors': 9, 'kneighborsclassifier__p': 2, 'kneighborsclassifier__weights': 'distance'}
Accuracy of the best model is: 1.0
