In [16]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn import neighbors, datasets
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [17]:
iris = datasets.load_iris()
iris_X = iris.data
iris_y = iris.target

print('Number of classes:', len(np.unique(iris_y)))
print('Number of data points:', len(iris_y))

print('\nSamples from class 0:\n', iris_X[iris_y == 0][:5, :])
print('\nSamples from class 1:\n', iris_X[iris_y == 1][:5, :])
print('\nSamples from class 2:\n', iris_X[iris_y == 2][:5, :])

Number of classes: 3
Number of data points: 150

Samples from class 0:
 [[5.1 3.5 1.4 0.2]
 [4.9 3.  1.4 0.2]
 [4.7 3.2 1.3 0.2]
 [4.6 3.1 1.5 0.2]
 [5.  3.6 1.4 0.2]]

Samples from class 1:
 [[7.  3.2 4.7 1.4]
 [6.4 3.2 4.5 1.5]
 [6.9 3.1 4.9 1.5]
 [5.5 2.3 4.  1.3]
 [6.5 2.8 4.6 1.5]]

Samples from class 2:
 [[6.3 3.3 6.  2.5]
 [5.8 2.7 5.1 1.9]
 [7.1 3.  5.9 2.1]
 [6.3 2.9 5.6 1.8]
 [6.5 3.  5.8 2.2]]


In [18]:
X_train, X_test, y_train, y_test = train_test_split(iris_X, iris_y, test_size=50)

print("Training size:", len(y_train))
print("Test size    :", len(y_test))

Training size: 100
Test size    : 50


In [19]:
knn = neighbors.KNeighborsClassifier(n_neighbors=1, p=2)
knn.fit(X_train, y_train)
y_pred = knn.predict(X_test)

print("Predict labels:", y_pred[:20])
print("Ground truth:", y_test[:20])
print(f"Accuracy of 1NN: {accuracy_score(y_test, y_pred)*100:.2f} %")

Predict labels: [2 0 1 1 1 2 0 2 0 1 2 1 2 0 2 1 2 1 0 0]
Ground truth: [1 0 1 1 1 2 0 2 0 1 2 1 2 0 2 1 2 1 0 0]
Accuracy of 1NN: 96.00 %


In [20]:
knn = neighbors.KNeighborsClassifier(n_neighbors=10, p=2, weights='distance')
knn.fit(X_train, y_train)
y_pred = knn.predict(X_test)

print("Predict labels:", y_pred[:20])
print("Ground truth:", y_test[:20])
print(f"ccuracy of 10NN (1/distance weights): {accuracy_score(y_test, y_pred)*100:.2f} %")

Predict labels: [2 0 1 1 1 2 0 2 0 1 2 1 2 0 2 1 2 1 0 0]
Ground truth: [1 0 1 1 1 2 0 2 0 1 2 1 2 0 2 1 2 1 0 0]
ccuracy of 10NN (1/distance weights): 96.00 %


In [None]:
knn = neighbors.KNeighborsClassifier(n_neighbors=10, p=2, weights=lambda distances: np.exp(-distances**2/0.4))
knn.fit(X_train, y_train)
y_pred = knn.predict(X_test)
print("Predict labels:", y_pred)
print("Ground truth:", y_test)
print(f"Accuracy of 10NN (customized weights): {accuracy_score(y_test, y_pred)*100:.2f} %")

In [22]:
from mnist.loader import MNIST
import time

mndata = MNIST('./MNIST/')
mndata.load_testing()
mndata.load_training()
X_test = mndata.test_images
X_train = mndata.train_images
y_test = np.asarray(mndata.test_labels)
y_train = np.asarray(mndata.train_labels)


start_time = time.time()
clf = neighbors.KNeighborsClassifier(n_neighbors = 1, p = 2)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
end_time = time.time()
print("Accuracy of 1NN for MNIST: %.2f %%" %(100*accuracy_score(y_test, y_pred)))
print("Running time: %.2f (s)" % (end_time - start_time))

KeyboardInterrupt: 