In [1]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris

In [2]:
iris = load_iris()
x = iris.data
y = iris.target

In [4]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=233, stratify=y)
x_train.shape, x_test.shape, y_train.shape, y_test.shape

((105, 4), (45, 4), (105,), (45,))

In [5]:
from sklearn.neighbors import KNeighborsClassifier

In [6]:
best_score = -1
best_n = -1
best_weight = ''
best_p = -1
for n in range(1, 20):
    for weight in ['uniform', 'distance']:
        for p in range(1, 7):
            knn = KNeighborsClassifier(n_neighbors=n, weights=weight, p=p)
            knn.fit(x_train, y_train)
            score = knn.score(x_test, y_test)
            if score > best_score:
                best_score = score
                best_n = n
                best_weight = weight
                best_p = p
print('n_neighbors:', best_n)
print('weights:', best_weight)
print('p:', best_p)
print('score:', best_score)

n_neighbors: 5
weights: uniform
p: 2
score: 1.0


In [7]:
from sklearn.model_selection import GridSearchCV

In [8]:
params = {
    'n_neighbors': range(1, 20),
    'weights': ['uniform', 'distance'],
    'p': range(1, 7)
}

In [9]:
grid = GridSearchCV(KNeighborsClassifier(), params, n_jobs=-1)
model = grid.fit(x_train, y_train)

In [10]:
model.best_params_

{'n_neighbors': 9, 'p': 2, 'weights': 'uniform'}

In [11]:
model.best_score_

0.961904761904762

In [12]:
model.score(x_test, y_test)

0.9555555555555556

### Cross Validation

In [14]:
from sklearn.model_selection import cross_val_score

neigh = KNeighborsClassifier()
cv_scores = cross_val_score(neigh, x_train, y_train, cv=5)
cv_scores

array([0.95238095, 1.        , 0.95238095, 0.85714286, 1.        ])

In [15]:
best_score = -1
best_n = -1
best_weight = ''
best_p = -1
best_cv_score = None
for n in range(1, 20):
    for weight in ['uniform', 'distance']:
        for p in range(1, 7):
            knn = KNeighborsClassifier(n_neighbors=n, weights=weight, p=p)
            cv_scores = cross_val_score(knn, x_train, y_train, cv=5)
            score = np.mean(cv_scores)
            if score > best_score:
                best_score = score
                best_n = n
                best_weight = weight
                best_p = p
                best_cv_score = cv_scores
                
print('n_neighbors:', best_n)
print('weights:', best_weight)
print('p:', best_p)
print('score:', best_score)
print('cv_scores:', best_cv_score)

n_neighbors: 9
weights: uniform
p: 2
score: 0.961904761904762
cv_scores: [1.         1.         0.95238095 0.85714286 1.        ]


In [16]:
np.average(best_cv_score)

0.961904761904762