<a href="https://colab.research.google.com/github/suumino/DataAlanysis/blob/main/Part_04_Grid_Searches_for_Hyper_Parameters.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Grid Searches
=================

Grid-Search with build-in cross validation

In [2]:
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC

In [3]:
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
digits = load_digits()
X_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target)

Define parameter grid:

In [4]:
import numpy as np

param_grid = {'C': 10. ** np.arange(-3, 3), 'gamma' : 10. ** np.arange(-5, 0)}

np.set_printoptions(suppress=True)
print(param_grid)

{'C': array([  0.001,   0.01 ,   0.1  ,   1.   ,  10.   , 100.   ]), 'gamma': array([0.00001, 0.0001 , 0.001  , 0.01   , 0.1    ])}


In [5]:
grid_search = GridSearchCV(SVC(), param_grid, verbose=3)

A GridSearchCV object behaves just like a normal classifier.

In [6]:
grid_search.fit(X_train, y_train)

Fitting 5 folds for each of 30 candidates, totalling 150 fits
[CV 1/5] END ..............C=0.001, gamma=1e-05;, score=0.107 total time=   0.2s
[CV 2/5] END ..............C=0.001, gamma=1e-05;, score=0.107 total time=   0.2s
[CV 3/5] END ..............C=0.001, gamma=1e-05;, score=0.104 total time=   0.2s
[CV 4/5] END ..............C=0.001, gamma=1e-05;, score=0.108 total time=   0.2s
[CV 5/5] END ..............C=0.001, gamma=1e-05;, score=0.108 total time=   0.2s
[CV 1/5] END .............C=0.001, gamma=0.0001;, score=0.107 total time=   0.2s
[CV 2/5] END .............C=0.001, gamma=0.0001;, score=0.107 total time=   0.2s
[CV 3/5] END .............C=0.001, gamma=0.0001;, score=0.104 total time=   0.2s
[CV 4/5] END .............C=0.001, gamma=0.0001;, score=0.108 total time=   0.2s
[CV 5/5] END .............C=0.001, gamma=0.0001;, score=0.108 total time=   0.2s
[CV 1/5] END ..............C=0.001, gamma=0.001;, score=0.107 total time=   0.2s
[CV 2/5] END ..............C=0.001, gamma=0.001

GridSearchCV(estimator=SVC(),
             param_grid={'C': array([  0.001,   0.01 ,   0.1  ,   1.   ,  10.   , 100.   ]),
                         'gamma': array([0.00001, 0.0001 , 0.001  , 0.01   , 0.1    ])},
             verbose=3)

In [7]:
grid_search.predict(X_test)

array([4, 7, 6, 3, 2, 8, 1, 1, 5, 1, 0, 6, 9, 9, 4, 2, 8, 3, 1, 7, 1, 7,
       7, 3, 8, 4, 8, 0, 4, 1, 9, 6, 4, 8, 0, 9, 6, 4, 0, 5, 9, 8, 4, 8,
       0, 0, 9, 0, 8, 3, 0, 9, 5, 3, 4, 2, 2, 8, 0, 2, 5, 3, 0, 0, 9, 0,
       9, 7, 7, 1, 5, 5, 5, 9, 8, 3, 5, 2, 4, 4, 3, 3, 0, 0, 8, 3, 9, 6,
       7, 9, 7, 7, 3, 4, 5, 8, 1, 5, 7, 7, 3, 3, 3, 9, 6, 4, 8, 7, 2, 0,
       0, 2, 3, 8, 5, 8, 3, 0, 9, 3, 9, 1, 6, 2, 6, 0, 2, 4, 5, 4, 4, 8,
       2, 1, 4, 4, 7, 9, 1, 4, 9, 7, 7, 6, 3, 3, 2, 2, 4, 7, 3, 7, 1, 7,
       1, 0, 8, 1, 1, 7, 3, 5, 7, 6, 4, 2, 8, 8, 1, 3, 2, 9, 0, 6, 3, 3,
       9, 9, 2, 3, 6, 2, 2, 8, 4, 2, 2, 8, 6, 9, 0, 9, 9, 9, 6, 8, 1, 7,
       2, 0, 1, 2, 4, 7, 4, 2, 8, 2, 1, 6, 4, 4, 7, 1, 4, 4, 0, 9, 1, 8,
       5, 9, 8, 3, 4, 0, 6, 2, 5, 6, 5, 0, 3, 8, 7, 1, 9, 4, 6, 3, 7, 6,
       4, 6, 3, 8, 3, 8, 5, 1, 3, 4, 0, 0, 6, 2, 9, 6, 1, 1, 5, 8, 0, 7,
       0, 8, 6, 8, 8, 8, 7, 8, 1, 0, 8, 2, 6, 3, 7, 7, 4, 5, 2, 3, 8, 0,
       4, 3, 4, 9, 7, 3, 7, 3, 3, 8, 4, 0, 4, 0, 9,

In [8]:
grid_search.score(X_test, y_test)

0.9911111111111112

In [9]:
grid_search.best_params_

{'C': 10.0, 'gamma': 0.001}

In [30]:
# We extract just the scores
%matplotlib inline
import matplotlib.pyplot as plt

scores = [x[1] for x in grid_search.scoring]
scores = np.array(scores).reshape(6, 5)

plt.matshow(scores)
plt.xlabel('gamma')
plt.ylabel('C')
plt.colorbar()
plt.xticks(np.arange(5), param_grid['gamma'])
plt.yticks(np.arange(6), param_grid['C']);

TypeError: ignored