In [None]:
from sklearn.neighbors import KNeighborsClassifier

In [None]:
neigh = KNeighborsClassifier()

In [None]:
neigh.get_params(deep=True)

{'algorithm': 'auto',
 'leaf_size': 30,
 'metric': 'minkowski',
 'metric_params': None,
 'n_jobs': None,
 'n_neighbors': 5,
 'p': 2,
 'weights': 'uniform'}

In [None]:
from sklearn import datasets
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

In [None]:
digits = datasets.load_digits()

X = digits.data

y = digits.target

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=1)

In [None]:
ss = StandardScaler()

In [None]:
X_train_std = ss.fit_transform(X_train)
X_test_std = ss.transform(X_test)

In [None]:
kNN = KNeighborsClassifier()
kNN.fit(X_train, y_train)
print('Train Score', kNN.score(X_train,y_train))
print('Test Score', kNN.score(X_test,y_test))

Train Score 0.9894867037724181
Test Score 1.0


In [None]:
kNN.predict(X_test)

array([1, 5, 0, 7, 1, 0, 6, 1, 5, 4, 9, 2, 7, 8, 4, 6, 9, 3, 7, 4, 7, 1,
       8, 6, 0, 9, 6, 1, 3, 7, 5, 9, 8, 3, 2, 8, 8, 1, 1, 0, 7, 9, 0, 0,
       8, 7, 2, 7, 4, 3, 4, 3, 4, 0, 4, 7, 0, 5, 5, 5, 2, 1, 7, 0, 5, 1,
       8, 3, 3, 4, 0, 3, 7, 4, 3, 4, 2, 9, 7, 3, 2, 5, 3, 4, 1, 5, 5, 2,
       5, 2, 2, 2, 2, 7, 0, 8, 1, 7, 4, 2, 3, 8, 2, 3, 3, 0, 2, 9, 9, 2,
       3, 2, 8, 1, 1, 9, 1, 2, 0, 4, 8, 5, 4, 4, 7, 6, 7, 6, 6, 1, 7, 5,
       6, 3, 8, 3, 7, 1, 8, 5, 3, 4, 7, 8, 5, 0, 6, 0, 6, 3, 7, 6, 5, 6,
       2, 2, 2, 3, 0, 7, 6, 5, 6, 4, 1, 0, 6, 0, 6, 4, 0, 9, 3, 8, 1, 2,
       3, 1, 9, 0])

In [None]:
from sklearn.model_selection import GridSearchCV

grid_params = {
    'weights' : ['uniform', 'distance'],
    'leaf_size' : [5,10,15,20,50],
    'n_neighbors' : [5,10,20,50],
}

gs = GridSearchCV(
    KNeighborsClassifier(),
    grid_params,
)

gs_results = gs.fit(X_train, y_train)

In [None]:
gs_results.best_score_

0.9857814470817567

In [None]:
gs_results.best_estimator_

KNeighborsClassifier(algorithm='auto', leaf_size=5, metric='minkowski',
                     metric_params=None, n_jobs=None, n_neighbors=5, p=2,
                     weights='distance')

In [None]:
gs_results.best_params_

{'leaf_size': 5, 'n_neighbors': 5, 'weights': 'distance'}

In [None]:
from sklearn.model_selection import RandomizedSearchCV

randomized_params = {
    'weights' : ['uniform', 'distance'],
    'leaf_size' : [5,10,15,20,50],
    'n_neighbors' : [5,10,20,50],
}

rs = RandomizedSearchCV(
    KNeighborsClassifier(),
    randomized_params,
)

rs_results = rs.fit(X_train, y_train)

In [None]:
rs_results.best_score_

0.9802125138554446

In [None]:
rs_results.best_estimator_

KNeighborsClassifier(algorithm='auto', leaf_size=15, metric='minkowski',
                     metric_params=None, n_jobs=None, n_neighbors=10, p=2,
                     weights='distance')

In [None]:
rs_results.best_params_

{'leaf_size': 15, 'n_neighbors': 10, 'weights': 'distance'}



*   The randomized search and the grid search explore exactly the same space of 
parameters. The result in parameter settings is quite similar, while the run 
time for randomized search is drastically lower.

*   The performance may slightly worse for the randomized search, and is likely due to a noise effect and would not carry over to a held-out test set.




*   Grid search is thorough and will yield the most optimal results based on the training data — however, it does have some flaws: (1) it is time-consuming, depending on the size of your dataset and the number of hyperparameters. (2) it could lead to overfitting of the training set, leading to a less viable model in the long run.
*   Randomized search selects a random sampling of hyperparameter combinations, reduces the danger of overfitting, and is likely to provide more accurate long term results — especially when there are a smaller number of significant hyperparameters.