In [1]:
import numpy as np
import pandas as pd

In [2]:
df = pd.read_csv('https://raw.githubusercontent.com/selva86/datasets/master/BostonHousing.csv')

In [3]:
df.head()

Unnamed: 0,crim,zn,indus,chas,nox,rm,age,dis,rad,tax,ptratio,b,lstat,medv
0,0.00632,18.0,2.31,0,0.538,6.575,65.2,4.09,1,296,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0,0.469,6.421,78.9,4.9671,2,242,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0,0.469,7.185,61.1,4.9671,2,242,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0,0.458,6.998,45.8,6.0622,3,222,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0,0.458,7.147,54.2,6.0622,3,222,18.7,396.9,5.33,36.2


In [4]:
X = df.iloc[:,:-1]
y = df.iloc[:,-1]

In [5]:
from sklearn.model_selection import cross_val_score,KFold
from sklearn.neighbors import KNeighborsRegressor

In [6]:
knn = KNeighborsRegressor()

In [7]:
kfold = KFold(n_splits=5, shuffle=True, random_state=1)
scores = cross_val_score(knn, X, y, cv=kfold, scoring='r2')

In [8]:
scores.mean()

0.4761976351913221

### GridSearchCV

In [9]:
from sklearn.model_selection import GridSearchCV

In [10]:
knn = KNeighborsRegressor()

In [12]:
param_grid = {
    'n_neighbors':[1,3,5,7,9,11,13,15,17,19,21,23,25],
    'weights':['uniform','distance'],
    'algorithm':['ball_tree', 'kd_tree', 'brute'],
    'p':[1,2]
}

In [13]:
gcv = GridSearchCV(knn, param_grid, scoring='r2', refit=True, cv=kfold, verbose=2)

In [14]:
gcv.fit(X,y)

Fitting 5 folds for each of 156 candidates, totalling 780 fits
[CV] END algorithm=ball_tree, n_neighbors=1, p=1, weights=uniform; total time=   0.0s
[CV] END algorithm=ball_tree, n_neighbors=1, p=1, weights=uniform; total time=   0.0s
[CV] END algorithm=ball_tree, n_neighbors=1, p=1, weights=uniform; total time=   0.0s
[CV] END algorithm=ball_tree, n_neighbors=1, p=1, weights=uniform; total time=   0.0s
[CV] END algorithm=ball_tree, n_neighbors=1, p=1, weights=uniform; total time=   0.0s
[CV] END algorithm=ball_tree, n_neighbors=1, p=1, weights=distance; total time=   0.0s
[CV] END algorithm=ball_tree, n_neighbors=1, p=1, weights=distance; total time=   0.0s
[CV] END algorithm=ball_tree, n_neighbors=1, p=1, weights=distance; total time=   0.0s
[CV] END algorithm=ball_tree, n_neighbors=1, p=1, weights=distance; total time=   0.0s
[CV] END algorithm=ball_tree, n_neighbors=1, p=1, weights=distance; total time=   0.0s
[CV] END algorithm=ball_tree, n_neighbors=1, p=2, weights=uniform; total

In [15]:
gcv.best_params_

{'algorithm': 'ball_tree', 'n_neighbors': 5, 'p': 1, 'weights': 'distance'}

In [16]:
gcv.best_score_

0.6117139367845081

In [17]:
gcv.cv_results_

{'mean_fit_time': array([0.00217366, 0.00120344, 0.0009006 , 0.00120268, 0.00130076,
        0.00120201, 0.00100064, 0.00109992, 0.00091333, 0.00099745,
        0.00111551, 0.00110097, 0.00140085, 0.00120621, 0.0007956 ,
        0.00119996, 0.00130196, 0.00130177, 0.00110354, 0.00130076,
        0.00100093, 0.00098505, 0.00120101, 0.00059762, 0.00110388,
        0.00111814, 0.00146422, 0.00119901, 0.00160666, 0.00130405,
        0.00151472, 0.0012856 , 0.00098171, 0.00099988, 0.00130396,
        0.00098643, 0.00098667, 0.00150099, 0.00130167, 0.00119929,
        0.00121422, 0.00099959, 0.0009923 , 0.00090213, 0.00128827,
        0.00150156, 0.00140171, 0.0009017 , 0.00130367, 0.00111852,
        0.00120401, 0.00110183, 0.00110345, 0.00128636, 0.00138884,
        0.00150428, 0.00140128, 0.00130076, 0.00111451, 0.00140171,
        0.00110426, 0.00139518, 0.00120335, 0.00156102, 0.00084615,
        0.00127697, 0.00098844, 0.00152287, 0.00120053, 0.00180697,
        0.00100074, 0.00151548,

In [18]:
pd.DataFrame(gcv.cv_results_)[['param_algorithm',	'param_n_neighbors',	'param_p', 'param_weights', 'mean_test_score']].sort_values('mean_test_score',ascending=False)

Unnamed: 0,param_algorithm,param_n_neighbors,param_p,param_weights,mean_test_score
61,kd_tree,5,1,distance,0.611714
9,ball_tree,5,1,distance,0.611714
113,brute,5,1,distance,0.611714
65,kd_tree,7,1,distance,0.605716
117,brute,7,1,distance,0.605716
...,...,...,...,...,...
54,kd_tree,1,2,uniform,0.331522
2,ball_tree,1,2,uniform,0.331522
107,brute,1,2,distance,0.331522
3,ball_tree,1,2,distance,0.331522


In [None]:
# gcv.predict(new_data)

### RandomizedSearchCV

In [19]:
from sklearn.model_selection import RandomizedSearchCV

In [20]:
rcv = RandomizedSearchCV(knn, param_grid, scoring='r2', refit=True, cv=kfold, verbose=2)

In [21]:
rcv.fit(X,y)

Fitting 5 folds for each of 10 candidates, totalling 50 fits
[CV] END algorithm=ball_tree, n_neighbors=13, p=1, weights=uniform; total time=   0.0s
[CV] END algorithm=ball_tree, n_neighbors=13, p=1, weights=uniform; total time=   0.0s
[CV] END algorithm=ball_tree, n_neighbors=13, p=1, weights=uniform; total time=   0.0s
[CV] END algorithm=ball_tree, n_neighbors=13, p=1, weights=uniform; total time=   0.0s
[CV] END algorithm=ball_tree, n_neighbors=13, p=1, weights=uniform; total time=   0.0s
[CV] END algorithm=ball_tree, n_neighbors=23, p=1, weights=distance; total time=   0.0s
[CV] END algorithm=ball_tree, n_neighbors=23, p=1, weights=distance; total time=   0.0s
[CV] END algorithm=ball_tree, n_neighbors=23, p=1, weights=distance; total time=   0.0s
[CV] END algorithm=ball_tree, n_neighbors=23, p=1, weights=distance; total time=   0.0s
[CV] END algorithm=ball_tree, n_neighbors=23, p=1, weights=distance; total time=   0.0s
[CV] END algorithm=ball_tree, n_neighbors=3, p=2, weights=unifor

In [22]:
rcv.best_score_

0.5868988743550128

In [23]:
rcv.best_params_

{'weights': 'distance', 'p': 1, 'n_neighbors': 9, 'algorithm': 'ball_tree'}