In [1]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import KFold , cross_val_score
from sklearn.datasets import load_iris
iris = load_iris()
X = iris.data
y = iris.target

In [2]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size = 0.3,random_state = 14)

# Manual Tuning

In [3]:
k_value = list(range(2,11))
algorithm = ['auto','ball_tree','kd_tree','brute']
scores = []
best_comb = []
kfold = KFold(n_splits=5)

for algo in algorithm:
  for k in k_value:
    knn = KNeighborsClassifier(n_neighbors=k,algorithm=algo)
    results = cross_val_score(knn,X_train,y_train,cv = kfold)

    print(f'Score:{round(results.mean(),4)} with algo = {algo} , K = {k}')
    scores.append(results.mean())
    best_comb.append((k,algo))

best_param = best_comb[scores.index(max(scores))]
print(f'\nThe Best Score : {max(scores)}')
print(f"['algorithm': {best_param[1]} ,'n_neighbors': {best_param[0]}]")


Score:0.9238 with algo = auto , K = 2
Score:0.9524 with algo = auto , K = 3
Score:0.9524 with algo = auto , K = 4
Score:0.9714 with algo = auto , K = 5
Score:0.9714 with algo = auto , K = 6
Score:0.9429 with algo = auto , K = 7
Score:0.9524 with algo = auto , K = 8
Score:0.9714 with algo = auto , K = 9
Score:0.9619 with algo = auto , K = 10
Score:0.9238 with algo = ball_tree , K = 2
Score:0.9524 with algo = ball_tree , K = 3
Score:0.9524 with algo = ball_tree , K = 4
Score:0.9714 with algo = ball_tree , K = 5
Score:0.9714 with algo = ball_tree , K = 6
Score:0.9429 with algo = ball_tree , K = 7
Score:0.9524 with algo = ball_tree , K = 8
Score:0.9714 with algo = ball_tree , K = 9
Score:0.9619 with algo = ball_tree , K = 10
Score:0.9238 with algo = kd_tree , K = 2
Score:0.9524 with algo = kd_tree , K = 3
Score:0.9524 with algo = kd_tree , K = 4
Score:0.9714 with algo = kd_tree , K = 5
Score:0.9714 with algo = kd_tree , K = 6
Score:0.9429 with algo = kd_tree , K = 7
Score:0.9524 with algo 

# GridSearchCV

In [5]:
from sklearn.model_selection import GridSearchCV

knn = KNeighborsClassifier()
grid_param = { 'n_neighbors' : list(range(2,11)) , 'algorithm' : ['auto','ball_tree','kd_tree','brute'] }
grid = GridSearchCV(knn,grid_param)
grid.fit(X_train,y_train)

GridSearchCV(cv=None, error_score=nan,
             estimator=KNeighborsClassifier(algorithm='auto', leaf_size=30,
                                            metric='minkowski',
                                            metric_params=None, n_jobs=None,
                                            n_neighbors=5, p=2,
                                            weights='uniform'),
             iid='deprecated', n_jobs=None,
             param_grid={'algorithm': ['auto', 'ball_tree', 'kd_tree', 'brute'],
                         'n_neighbors': [2, 3, 4, 5, 6, 7, 8, 9, 10]},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring=None, verbose=0)

In [5]:
grid.best_params_

{'algorithm': 'auto', 'n_neighbors': 6}

In [6]:
grid.best_score_

0.9714285714285715

In [7]:
grid.cv_results_['params']

[{'algorithm': 'auto', 'n_neighbors': 2},
 {'algorithm': 'auto', 'n_neighbors': 3},
 {'algorithm': 'auto', 'n_neighbors': 4},
 {'algorithm': 'auto', 'n_neighbors': 5},
 {'algorithm': 'auto', 'n_neighbors': 6},
 {'algorithm': 'auto', 'n_neighbors': 7},
 {'algorithm': 'auto', 'n_neighbors': 8},
 {'algorithm': 'auto', 'n_neighbors': 9},
 {'algorithm': 'auto', 'n_neighbors': 10},
 {'algorithm': 'ball_tree', 'n_neighbors': 2},
 {'algorithm': 'ball_tree', 'n_neighbors': 3},
 {'algorithm': 'ball_tree', 'n_neighbors': 4},
 {'algorithm': 'ball_tree', 'n_neighbors': 5},
 {'algorithm': 'ball_tree', 'n_neighbors': 6},
 {'algorithm': 'ball_tree', 'n_neighbors': 7},
 {'algorithm': 'ball_tree', 'n_neighbors': 8},
 {'algorithm': 'ball_tree', 'n_neighbors': 9},
 {'algorithm': 'ball_tree', 'n_neighbors': 10},
 {'algorithm': 'kd_tree', 'n_neighbors': 2},
 {'algorithm': 'kd_tree', 'n_neighbors': 3},
 {'algorithm': 'kd_tree', 'n_neighbors': 4},
 {'algorithm': 'kd_tree', 'n_neighbors': 5},
 {'algorithm': 'k

In [8]:
grid.cv_results_['mean_test_score']

array([0.93333333, 0.96190476, 0.95238095, 0.97142857, 0.97142857,
       0.94285714, 0.96190476, 0.97142857, 0.97142857, 0.93333333,
       0.96190476, 0.95238095, 0.97142857, 0.97142857, 0.94285714,
       0.96190476, 0.97142857, 0.97142857, 0.93333333, 0.96190476,
       0.95238095, 0.97142857, 0.97142857, 0.94285714, 0.96190476,
       0.97142857, 0.97142857, 0.93333333, 0.96190476, 0.94285714,
       0.96190476, 0.96190476, 0.94285714, 0.96190476, 0.97142857,
       0.96190476])

## RandomizedSearchCV

In [45]:
from sklearn.model_selection import RandomizedSearchCV

knn = KNeighborsClassifier()
grid_param = { 'n_neighbors' : list(range(2,11)) , 'algorithm' : ['auto','ball_tree','kd_tree','brute'] }
rand_ser = RandomizedSearchCV(knn,grid_param,n_iter=10)
rand_ser.fit(X_train,y_train)

RandomizedSearchCV(cv=None, error_score=nan,
                   estimator=KNeighborsClassifier(algorithm='auto',
                                                  leaf_size=30,
                                                  metric='minkowski',
                                                  metric_params=None,
                                                  n_jobs=None, n_neighbors=5,
                                                  p=2, weights='uniform'),
                   iid='deprecated', n_iter=10, n_jobs=None,
                   param_distributions={'algorithm': ['auto', 'ball_tree',
                                                      'kd_tree', 'brute'],
                                        'n_neighbors': [2, 3, 4, 5, 6, 7, 8, 9,
                                                        10]},
                   pre_dispatch='2*n_jobs', random_state=None, refit=True,
                   return_train_score=False, scoring=None, verbose=0)

In [46]:
rand_ser.best_score_

0.9714285714285715

In [47]:
rand_ser.best_params_

{'algorithm': 'kd_tree', 'n_neighbors': 9}

In [48]:
rand_ser.cv_results_['params']

[{'algorithm': 'ball_tree', 'n_neighbors': 4},
 {'algorithm': 'kd_tree', 'n_neighbors': 10},
 {'algorithm': 'kd_tree', 'n_neighbors': 9},
 {'algorithm': 'ball_tree', 'n_neighbors': 5},
 {'algorithm': 'auto', 'n_neighbors': 6},
 {'algorithm': 'auto', 'n_neighbors': 7},
 {'algorithm': 'auto', 'n_neighbors': 10},
 {'algorithm': 'kd_tree', 'n_neighbors': 7},
 {'algorithm': 'brute', 'n_neighbors': 2},
 {'algorithm': 'kd_tree', 'n_neighbors': 5}]

In [49]:
rand_ser.cv_results_['mean_test_score']

array([0.95238095, 0.97142857, 0.97142857, 0.97142857, 0.97142857,
       0.94285714, 0.97142857, 0.94285714, 0.93333333, 0.97142857])

# Optuna

In [None]:
!pip install optuna

In [50]:
import optuna

def objective(trial):

    optimizer = trial.suggest_categorical('algorithm', ['auto','ball_tree','kd_tree','brute'])
    rf_max_depth = trial.suggest_int("k_n_neighbors", 2, 10, log=True)
    knn = KNeighborsClassifier(n_neighbors=rf_max_depth,algorithm=optimizer)

    score = cross_val_score(knn, X_train,y_train, n_jobs=-1, cv=3)
    accuracy = score.mean()
    return accuracy


if __name__ == "__main__":
    study = optuna.create_study(direction="maximize")
    study.optimize(objective, n_trials=10)
    print(study.best_trial)

[I 2020-09-03 06:20:41,695] Trial 0 finished with value: 0.9619047619047619 and parameters: {'algorithm': 'brute', 'k_n_neighbors': 3}. Best is trial 0 with value: 0.9619047619047619.
[I 2020-09-03 06:20:41,725] Trial 1 finished with value: 0.9619047619047619 and parameters: {'algorithm': 'brute', 'k_n_neighbors': 3}. Best is trial 0 with value: 0.9619047619047619.
[I 2020-09-03 06:20:41,747] Trial 2 finished with value: 0.9523809523809522 and parameters: {'algorithm': 'kd_tree', 'k_n_neighbors': 5}. Best is trial 0 with value: 0.9619047619047619.
[I 2020-09-03 06:20:41,770] Trial 3 finished with value: 0.9714285714285714 and parameters: {'algorithm': 'auto', 'k_n_neighbors': 3}. Best is trial 3 with value: 0.9714285714285714.
[I 2020-09-03 06:20:41,794] Trial 4 finished with value: 0.9428571428571427 and parameters: {'algorithm': 'kd_tree', 'k_n_neighbors': 4}. Best is trial 3 with value: 0.9714285714285714.
[I 2020-09-03 06:20:41,816] Trial 5 finished with value: 0.9619047619047619 a

FrozenTrial(number=3, value=0.9714285714285714, datetime_start=datetime.datetime(2020, 9, 3, 6, 20, 41, 755980), datetime_complete=datetime.datetime(2020, 9, 3, 6, 20, 41, 770617), params={'algorithm': 'auto', 'k_n_neighbors': 3}, distributions={'algorithm': CategoricalDistribution(choices=('auto', 'ball_tree', 'kd_tree', 'brute')), 'k_n_neighbors': IntLogUniformDistribution(high=10, low=2, step=1)}, user_attrs={}, system_attrs={}, intermediate_values={}, trial_id=3, state=TrialState.COMPLETE)


In [51]:
study.best_params

{'algorithm': 'auto', 'k_n_neighbors': 3}

In [52]:
study.best_value

0.9714285714285714

# TuneSearchCV Bayesian Optimization

In [None]:
!pip install tune-sklearn "ray[tune]"
!pip install scikit-optimize

In [57]:
from tune_sklearn import TuneSearchCV
from ray.tune.schedulers import MedianStoppingRule
import numpy as np
import warnings
warnings.filterwarnings("ignore") 

knn = KNeighborsClassifier()
grid_param = { 'n_neighbors' : list(range(2,11)) , 'algorithm' : ['auto','ball_tree','kd_tree','brute'] }

# scheduler = MedianStoppingRule(grace_period=10.0)

tune_search = TuneSearchCV(knn,grid_param,search_optimization="bayesian",max_iters=10)
tune_search.fit(X_train,y_train)

# pred = tune_search.predict(X_test)
# accuracy = np.count_nonzero(np.array(pred) == np.array(y_test)) / len(pred)
# print(accuracy)

The dashboard on node 5d53b37e9bc6 failed with the following error:
Traceback (most recent call last):
  File "/usr/lib/python3.6/asyncio/base_events.py", line 1062, in create_server
    sock.bind(sa)
OSError: [Errno 99] Cannot assign requested address

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/usr/local/lib/python3.6/dist-packages/ray/dashboard/dashboard.py", line 961, in <module>
    dashboard.run()
  File "/usr/local/lib/python3.6/dist-packages/ray/dashboard/dashboard.py", line 576, in run
    aiohttp.web.run_app(self.app, host=self.host, port=self.port)
  File "/usr/local/lib/python3.6/dist-packages/aiohttp/web.py", line 433, in run_app
    reuse_port=reuse_port))
  File "/usr/lib/python3.6/asyncio/base_events.py", line 484, in run_until_complete
    return future.result()
  File "/usr/local/lib/python3.6/dist-packages/aiohttp/web.py", line 359, in _run_app
    await site.start()
  File "/usr/local/lib/python3.6

TuneSearchCV(cv=None, early_stopping=None, error_score=nan,
             estimator=KNeighborsClassifier(algorithm='auto', leaf_size=30,
                                            metric='minkowski',
                                            metric_params=None, n_jobs=None,
                                            n_neighbors=5, p=2,
                                            weights='uniform'),
             local_dir='~/ray_results', max_iters=1, n_iter=None, n_jobs=None,
             param_distributions={'algorithm': ['auto', 'ball_tree', 'kd_tree',
                                                'brute'],
                                  'n_neighbors': [2, 3, 4, 5, 6, 7, 8, 9, 10]},
             random_state=None, refit=True, return_train_score=False,
             scoring=<function _passthrough_scorer at 0x7fe01e2f0f28>,
             search_optimization='bayesian', sk_n_jobs=-1, use_gpu=False,
             verbose=0)

In [58]:
tune_search.best_params_

{'algorithm': 'auto', 'n_neighbors': 6}

In [59]:
tune_search.best_score_

0.9714285714285716

In [60]:
tune_search.cv_results_['params']

[{'algorithm': 'auto', 'n_neighbors': 6},
 {'algorithm': 'brute', 'n_neighbors': 6},
 {'algorithm': 'auto', 'n_neighbors': 6},
 {'algorithm': 'auto', 'n_neighbors': 3},
 {'algorithm': 'brute', 'n_neighbors': 4},
 {'algorithm': 'auto', 'n_neighbors': 2},
 {'algorithm': 'ball_tree', 'n_neighbors': 5},
 {'algorithm': 'ball_tree', 'n_neighbors': 4},
 {'algorithm': 'kd_tree', 'n_neighbors': 4},
 {'algorithm': 'kd_tree', 'n_neighbors': 10}]

## Bayesian Optimization

In [None]:
!pip install scikit-optimize

In [7]:
from skopt import BayesSearchCV
import warnings
warnings.filterwarnings("ignore") 
# parameter ranges are specified by one of below
from skopt.space import Real, Categorical, Integer
knn = KNeighborsClassifier()
grid_param = { 'n_neighbors' : list(range(2,11)) , 'algorithm' : ['auto','ball_tree','kd_tree','brute'] }
Bayes = BayesSearchCV(knn,grid_param,n_iter=32,random_state=0)
Bayes.fit(X_train,y_train)


BayesSearchCV(cv=None, error_score='raise',
              estimator=KNeighborsClassifier(algorithm='auto', leaf_size=30,
                                             metric='minkowski',
                                             metric_params=None, n_jobs=None,
                                             n_neighbors=5, p=2,
                                             weights='uniform'),
              fit_params=None, iid=True, n_iter=32, n_jobs=1, n_points=1,
              optimizer_kwargs=None, pre_dispatch='2*n_jobs', random_state=0,
              refit=True, return_train_score=False, scoring=None,
              search_spaces={'algorithm': ['auto', 'ball_tree', 'kd_tree',
                                           'brute'],
                             'n_neighbors': [2, 3, 4, 5, 6, 7, 8, 9, 10]},
              verbose=0)

In [8]:
Bayes.best_params_

OrderedDict([('algorithm', 'ball_tree'), ('n_neighbors', 10)])

In [9]:
Bayes.best_score_

0.9714285714285714

In [12]:
Bayes.cv_results_['params']

[OrderedDict([('algorithm', 'kd_tree'), ('n_neighbors', 8)]),
 OrderedDict([('algorithm', 'ball_tree'), ('n_neighbors', 10)]),
 OrderedDict([('algorithm', 'kd_tree'), ('n_neighbors', 2)]),
 OrderedDict([('algorithm', 'auto'), ('n_neighbors', 9)]),
 OrderedDict([('algorithm', 'auto'), ('n_neighbors', 4)]),
 OrderedDict([('algorithm', 'brute'), ('n_neighbors', 10)]),
 OrderedDict([('algorithm', 'brute'), ('n_neighbors', 8)]),
 OrderedDict([('algorithm', 'kd_tree'), ('n_neighbors', 6)]),
 OrderedDict([('algorithm', 'auto'), ('n_neighbors', 10)]),
 OrderedDict([('algorithm', 'ball_tree'), ('n_neighbors', 5)]),
 OrderedDict([('algorithm', 'ball_tree'), ('n_neighbors', 7)]),
 OrderedDict([('algorithm', 'auto'), ('n_neighbors', 3)]),
 OrderedDict([('algorithm', 'auto'), ('n_neighbors', 6)]),
 OrderedDict([('algorithm', 'auto'), ('n_neighbors', 5)]),
 OrderedDict([('algorithm', 'ball_tree'), ('n_neighbors', 9)]),
 OrderedDict([('algorithm', 'ball_tree'), ('n_neighbors', 6)]),
 OrderedDict([('a