# Boston dataset

In [1]:
from operator import itemgetter
from sklearn.datasets import load_boston
from sklearn.preprocessing import scale
from sklearn.neighbors import KNeighborsRegressor
from sklearn.cross_validation import cross_val_score
from sklearn.cross_validation import KFold
import numpy as np

In [2]:
boston = load_boston()
boston.data

array([[  6.32000000e-03,   1.80000000e+01,   2.31000000e+00, ...,
          1.53000000e+01,   3.96900000e+02,   4.98000000e+00],
       [  2.73100000e-02,   0.00000000e+00,   7.07000000e+00, ...,
          1.78000000e+01,   3.96900000e+02,   9.14000000e+00],
       [  2.72900000e-02,   0.00000000e+00,   7.07000000e+00, ...,
          1.78000000e+01,   3.92830000e+02,   4.03000000e+00],
       ..., 
       [  6.07600000e-02,   0.00000000e+00,   1.19300000e+01, ...,
          2.10000000e+01,   3.96900000e+02,   5.64000000e+00],
       [  1.09590000e-01,   0.00000000e+00,   1.19300000e+01, ...,
          2.10000000e+01,   3.93450000e+02,   6.48000000e+00],
       [  4.74100000e-02,   0.00000000e+00,   1.19300000e+01, ...,
          2.10000000e+01,   3.96900000e+02,   7.88000000e+00]])

In [3]:
x = scale(boston.data)
kfold = KFold(len(x), n_folds=5, shuffle=True, random_state=42)
scores = []
for p in range(1, 10):
    regressor = KNeighborsRegressor(n_neighbors=5, weights='distance', p=p)
    score = cross_val_score(regressor, x, boston.target, cv=kfold, scoring='mean_squared_error')
    scores.append((p, np.average(score)))
sorted(scores, key=itemgetter(1), reverse=True)

[(1, -16.050208508436157),
 (2, -17.33663788425967),
 (3, -18.091449770368897),
 (4, -19.208265576653694),
 (5, -19.780326282248048),
 (6, -20.155055370291404),
 (7, -20.683609264152977),
 (9, -21.01444326765608),
 (8, -21.064284354419261)]

# Wine dataset

In [4]:
import pandas
from sklearn.neighbors import KNeighborsClassifier
from sklearn.cross_validation import KFold
from sklearn.cross_validation import cross_val_score
from sklearn.preprocessing import scale

In [5]:
wine = pandas.read_csv("wine.data", header=None)
wine.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13
0,1,14.23,1.71,2.43,15.6,127,2.8,3.06,0.28,2.29,5.64,1.04,3.92,1065
1,1,13.2,1.78,2.14,11.2,100,2.65,2.76,0.26,1.28,4.38,1.05,3.4,1050
2,1,13.16,2.36,2.67,18.6,101,2.8,3.24,0.3,2.81,5.68,1.03,3.17,1185
3,1,14.37,1.95,2.5,16.8,113,3.85,3.49,0.24,2.18,7.8,0.86,3.45,1480
4,1,13.24,2.59,2.87,21.0,118,2.8,2.69,0.39,1.82,4.32,1.04,2.93,735


In [6]:
y = wine[0]
x = wine[list(range(1, 14))]
kfold = KFold(len(wine), n_folds=5, shuffle=True, random_state=42)
accs = []
for i in range(1, 50):
    classifier = KNeighborsClassifier(n_neighbors=i)
    res = cross_val_score(classifier, x, y, cv=kfold, scoring='accuracy')
    accuracy = sum(res) / len(res)
    accs.append((i, accuracy))
sorted(accs, key=itemgetter(1), reverse=True)[0]

(1, 0.7304761904761905)

In [7]:
accs_scaled = []
x_scaled = scale(x)
for i in range(1, 50):
    classifier = KNeighborsClassifier(n_neighbors=i)
    res = cross_val_score(classifier, x_scaled, y, cv=kfold, scoring='accuracy')
    accuracy = sum(res) / len(res)
    accs_scaled.append((i, accuracy))
sorted(accs_scaled, key=itemgetter(1), reverse=True)[0]

(29, 0.9776190476190475)