In [1]:
from sklearn import datasets 
iris = datasets.load_iris()

X = iris.data[:,2:]
y = iris.target

In [2]:
from sklearn.model_selection import train_test_split, cross_val_score 
X_train, X_test, y_train, y_test = train_test_split(X,y,stratify=y,random_state=7)


In [3]:
#Instantiate a nearest neighbour classifier 
from sklearn.neighbors import KNeighborsClassifier 
knn_classifier = KNeighborsClassifier()

In [4]:
#Preparing a parameter grid - a dictionary with the parameters setting to try with grid search

param_grid = {'n_neighbors': list(range(3,9,1))}

In [5]:
#instantiating a grid search 
from sklearn.model_selection import GridSearchCV 
grid_search = GridSearchCV(knn_classifier, param_grid, cv=10)

In [6]:
grid_search.fit(X_train,y_train)

GridSearchCV(cv=10, estimator=KNeighborsClassifier(),
             param_grid={'n_neighbors': [3, 4, 5, 6, 7, 8]})

In [7]:
#View the results 
grid_search.best_params_

{'n_neighbors': 3}

In [8]:
grid_search.cv_results_['mean_test_score']

array([0.95530303, 0.93787879, 0.95530303, 0.95530303, 0.95530303,
       0.95530303])

##### Another way - brute force methods (time consuming)

In [10]:
all_scores = []

for n_neighbors in range(3,9,1):
    knn_classifier = KNeighborsClassifier(n_neighbors=n_neighbors)
    all_scores.append((n_neighbors, cross_val_score(knn_classifier, X_train, y_train, cv=10).mean()))

sorted(all_scores, key=lambda x:x[1], reverse=True)


[(3, 0.9553030303030303),
 (5, 0.9553030303030303),
 (6, 0.9553030303030303),
 (7, 0.9553030303030303),
 (8, 0.9553030303030303),
 (4, 0.9378787878787879)]

#### Randomized Search
##### `RandomizedSearchCV` is more useful with medium amount of data or with a model involving a few parameters, as `GridSearchCV` can be computationally expensive

In [11]:
from sklearn import datasets 
iris = datasets.load_iris()
X = iris.data[:,2:]
y = iris.target 

from sklearn.model_selection import train_test_split 

X_train,X_test,y_train,y_test = train_test_split(X,y,stratify=y,random_state=7)

In [12]:
from sklearn.neighbors import KNeighborsClassifier 
knn_clf = KNeighborsClassifier()

In [13]:
#Parameter distribution 

param_dist = {'n_neighbors':list(range(3,9,1))}

In [14]:
from sklearn.model_selection import RandomizedSearchCV 
rs = RandomizedSearchCV(knn_clf, param_dist, cv=10, n_iter=6)


In [15]:
#fitting the randomized grid search estimator
rs.fit(X_train, y_train)

RandomizedSearchCV(cv=10, estimator=KNeighborsClassifier(), n_iter=6,
                   param_distributions={'n_neighbors': [3, 4, 5, 6, 7, 8]})

In [16]:
#Results 
rs.best_params_

{'n_neighbors': 3}

In [27]:
zip(rs.cv_results_['params'],rs.cv_results_['mean_test_score'])

<zip at 0x23621996cc0>

In [28]:
#If we scan a larger parameter space with RandomizedSearchCV

param_dist = {'n_neighbors': list(range(3,50,1))}
rs = RandomizedSearchCV(knn_clf, param_dist, cv=10, n_iter=15)
rs.fit(X_train, y_train)
rs.best_params_

{'n_neighbors': 15}

##### Compare timing of RandomizedSearchCV and GridSearchCV

In [29]:
#RandomizedSearchCV
%time rs.fit(X_train, y_train)

CPU times: total: 141 ms
Wall time: 210 ms


RandomizedSearchCV(cv=10, estimator=KNeighborsClassifier(), n_iter=15,
                   param_distributions={'n_neighbors': [3, 4, 5, 6, 7, 8, 9, 10,
                                                        11, 12, 13, 14, 15, 16,
                                                        17, 18, 19, 20, 21, 22,
                                                        23, 24, 25, 26, 27, 28,
                                                        29, 30, 31, 32, ...]})

In [30]:
#GridSearchCV
from sklearn.model_selection import GridSearchCV 
param_grid = {'n_neighbors':list(range(3,50,1))}
gs = GridSearchCV(knn_clf, param_grid, cv=10)

In [31]:
#timing grid search
%time gs.fit(X_train, y_train)

CPU times: total: 172 ms
Wall time: 522 ms


GridSearchCV(cv=10, estimator=KNeighborsClassifier(),
             param_grid={'n_neighbors': [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,
                                         14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
                                         24, 25, 26, 27, 28, 29, 30, 31, 32, ...]})

In [32]:
gs.best_params_

{'n_neighbors': 11}