In [1]:
import numpy as np
import pandas as pd

In [6]:
column_names = ['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX', 'PTRATIO', 'B', 'LSTAT', 'MEDV']
df = pd.read_csv('housing.csv', header=None, delimiter=r"\s+", names=column_names)


In [7]:
df.head()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,MEDV
0,0.00632,18.0,2.31,0,0.538,6.575,65.2,4.09,1,296.0,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0,0.469,6.421,78.9,4.9671,2,242.0,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0,0.469,7.185,61.1,4.9671,2,242.0,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0,0.458,6.998,45.8,6.0622,3,222.0,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0,0.458,7.147,54.2,6.0622,3,222.0,18.7,396.9,5.33,36.2


In [8]:
X = df.iloc[:,:-1]
y = df.iloc[:,-1]

In [9]:
from sklearn.model_selection import cross_val_score,KFold
from sklearn.neighbors import KNeighborsRegressor

In [10]:
knn = KNeighborsRegressor()

In [11]:
kfold = KFold(n_splits=5, shuffle=True, random_state=1)
scores = cross_val_score(knn, X, y, cv=kfold, scoring='r2')

In [12]:
scores.mean()

0.4761976351913221

# GridSearchCV

In [13]:
from sklearn.model_selection import GridSearchCV

In [14]:
knn = KNeighborsRegressor()

In [15]:
param_grid = {
    'n_neighbors':[1,3,5,7,10,12,15,17,20],
    'weights':['uniform','distance'],
    'algorithm':['ball_tree', 'kd_tree', 'brute'],
    'p':[1,2]
}

In [16]:
gcv = GridSearchCV(knn, param_grid, scoring='r2', refit=True, cv=kfold, verbose=2)

In [17]:
gcv.fit(X,y)

Fitting 5 folds for each of 108 candidates, totalling 540 fits
[CV] END algorithm=ball_tree, n_neighbors=1, p=1, weights=uniform; total time=   0.0s
[CV] END algorithm=ball_tree, n_neighbors=1, p=1, weights=uniform; total time=   0.0s
[CV] END algorithm=ball_tree, n_neighbors=1, p=1, weights=uniform; total time=   0.0s
[CV] END algorithm=ball_tree, n_neighbors=1, p=1, weights=uniform; total time=   0.0s
[CV] END algorithm=ball_tree, n_neighbors=1, p=1, weights=uniform; total time=   0.0s
[CV] END algorithm=ball_tree, n_neighbors=1, p=1, weights=distance; total time=   0.0s
[CV] END algorithm=ball_tree, n_neighbors=1, p=1, weights=distance; total time=   0.0s
[CV] END algorithm=ball_tree, n_neighbors=1, p=1, weights=distance; total time=   0.0s
[CV] END algorithm=ball_tree, n_neighbors=1, p=1, weights=distance; total time=   0.0s
[CV] END algorithm=ball_tree, n_neighbors=1, p=1, weights=distance; total time=   0.0s
[CV] END algorithm=ball_tree, n_neighbors=1, p=2, weights=uniform; total

[CV] END algorithm=ball_tree, n_neighbors=15, p=2, weights=distance; total time=   0.0s
[CV] END algorithm=ball_tree, n_neighbors=15, p=2, weights=distance; total time=   0.0s
[CV] END algorithm=ball_tree, n_neighbors=15, p=2, weights=distance; total time=   0.0s
[CV] END algorithm=ball_tree, n_neighbors=15, p=2, weights=distance; total time=   0.0s
[CV] END algorithm=ball_tree, n_neighbors=15, p=2, weights=distance; total time=   0.0s
[CV] END algorithm=ball_tree, n_neighbors=17, p=1, weights=uniform; total time=   0.0s
[CV] END algorithm=ball_tree, n_neighbors=17, p=1, weights=uniform; total time=   0.0s
[CV] END algorithm=ball_tree, n_neighbors=17, p=1, weights=uniform; total time=   0.0s
[CV] END algorithm=ball_tree, n_neighbors=17, p=1, weights=uniform; total time=   0.0s
[CV] END algorithm=ball_tree, n_neighbors=17, p=1, weights=uniform; total time=   0.0s
[CV] END algorithm=ball_tree, n_neighbors=17, p=1, weights=distance; total time=   0.0s
[CV] END algorithm=ball_tree, n_neigh

[CV] END algorithm=kd_tree, n_neighbors=10, p=2, weights=uniform; total time=   0.0s
[CV] END algorithm=kd_tree, n_neighbors=10, p=2, weights=uniform; total time=   0.0s
[CV] END algorithm=kd_tree, n_neighbors=10, p=2, weights=distance; total time=   0.0s
[CV] END algorithm=kd_tree, n_neighbors=10, p=2, weights=distance; total time=   0.0s
[CV] END algorithm=kd_tree, n_neighbors=10, p=2, weights=distance; total time=   0.0s
[CV] END algorithm=kd_tree, n_neighbors=10, p=2, weights=distance; total time=   0.0s
[CV] END algorithm=kd_tree, n_neighbors=10, p=2, weights=distance; total time=   0.0s
[CV] END algorithm=kd_tree, n_neighbors=12, p=1, weights=uniform; total time=   0.0s
[CV] END algorithm=kd_tree, n_neighbors=12, p=1, weights=uniform; total time=   0.0s
[CV] END algorithm=kd_tree, n_neighbors=12, p=1, weights=uniform; total time=   0.0s
[CV] END algorithm=kd_tree, n_neighbors=12, p=1, weights=uniform; total time=   0.0s
[CV] END algorithm=kd_tree, n_neighbors=12, p=1, weights=uni

[CV] END algorithm=brute, n_neighbors=1, p=2, weights=uniform; total time=   0.0s
[CV] END algorithm=brute, n_neighbors=1, p=2, weights=uniform; total time=   0.0s
[CV] END algorithm=brute, n_neighbors=1, p=2, weights=uniform; total time=   0.0s
[CV] END algorithm=brute, n_neighbors=1, p=2, weights=distance; total time=   0.0s
[CV] END algorithm=brute, n_neighbors=1, p=2, weights=distance; total time=   0.0s
[CV] END algorithm=brute, n_neighbors=1, p=2, weights=distance; total time=   0.0s
[CV] END algorithm=brute, n_neighbors=1, p=2, weights=distance; total time=   0.0s
[CV] END algorithm=brute, n_neighbors=1, p=2, weights=distance; total time=   0.0s
[CV] END algorithm=brute, n_neighbors=3, p=1, weights=uniform; total time=   0.0s
[CV] END algorithm=brute, n_neighbors=3, p=1, weights=uniform; total time=   0.0s
[CV] END algorithm=brute, n_neighbors=3, p=1, weights=uniform; total time=   0.0s
[CV] END algorithm=brute, n_neighbors=3, p=1, weights=uniform; total time=   0.0s
[CV] END al

[CV] END algorithm=brute, n_neighbors=12, p=2, weights=uniform; total time=   0.0s
[CV] END algorithm=brute, n_neighbors=12, p=2, weights=uniform; total time=   0.0s
[CV] END algorithm=brute, n_neighbors=12, p=2, weights=uniform; total time=   0.0s
[CV] END algorithm=brute, n_neighbors=12, p=2, weights=distance; total time=   0.0s
[CV] END algorithm=brute, n_neighbors=12, p=2, weights=distance; total time=   0.0s
[CV] END algorithm=brute, n_neighbors=12, p=2, weights=distance; total time=   0.0s
[CV] END algorithm=brute, n_neighbors=12, p=2, weights=distance; total time=   0.0s
[CV] END algorithm=brute, n_neighbors=12, p=2, weights=distance; total time=   0.0s
[CV] END algorithm=brute, n_neighbors=15, p=1, weights=uniform; total time=   0.0s
[CV] END algorithm=brute, n_neighbors=15, p=1, weights=uniform; total time=   0.0s
[CV] END algorithm=brute, n_neighbors=15, p=1, weights=uniform; total time=   0.0s
[CV] END algorithm=brute, n_neighbors=15, p=1, weights=uniform; total time=   0.0s

In [18]:
gcv.best_params_

{'algorithm': 'ball_tree', 'n_neighbors': 5, 'p': 1, 'weights': 'distance'}

In [19]:
gcv.best_score_

0.6117139367845081

In [20]:
gcv.cv_results_

{'mean_fit_time': array([0.00445738, 0.00160003, 0.00140467, 0.00199623, 0.00140076,
        0.0017962 , 0.00200067, 0.00139999, 0.00120006, 0.00335259,
        0.00179148, 0.00199671, 0.00237617, 0.00159993, 0.00199881,
        0.00160007, 0.00140018, 0.00260348, 0.00201812, 0.00120344,
        0.00150275, 0.00139985, 0.00139389, 0.00178313, 0.00181522,
        0.00200839, 0.00200901, 0.00249085, 0.00219846, 0.0015976 ,
        0.00140281, 0.00139585, 0.00159259, 0.00160036, 0.00221691,
        0.0018188 , 0.00251231, 0.00220733, 0.00199423, 0.0018002 ,
        0.0016027 , 0.00159993, 0.00220771, 0.00181546, 0.00141187,
        0.00181127, 0.00260057, 0.00183201, 0.00182486, 0.0020093 ,
        0.00280862, 0.00231748, 0.00203619, 0.00199237, 0.00241394,
        0.00315824, 0.00219007, 0.00258865, 0.00261254, 0.00200014,
        0.00236073, 0.00332417, 0.00241046, 0.00224252, 0.00260611,
        0.00254879, 0.00260682, 0.00260339, 0.00222564, 0.00219364,
        0.00220962, 0.00179992,

In [22]:
pd.DataFrame(gcv.cv_results_)[['param_algorithm','param_n_neighbors','param_p', 'param_weights', 'mean_test_score']].sort_values('mean_test_score',ascending=False)

Unnamed: 0,param_algorithm,param_n_neighbors,param_p,param_weights,mean_test_score
81,brute,5,1,distance,0.611714
45,kd_tree,5,1,distance,0.611714
9,ball_tree,5,1,distance,0.611714
49,kd_tree,7,1,distance,0.605716
85,brute,7,1,distance,0.605716
...,...,...,...,...,...
38,kd_tree,1,2,uniform,0.331522
2,ball_tree,1,2,uniform,0.331522
75,brute,1,2,distance,0.331522
39,kd_tree,1,2,distance,0.331522


In [None]:
gcv.predict(new_data)