In [106]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn import datasets, neighbors, model_selection

In [107]:
def normalize(a):
    return (np.max(a) - a) / (np.max(a) - np.min(a))

def accuracy(true, pred):
    return np.mean(true == pred) * 100

def knn_predict(x_test, y_test, x_train, y_train, k=5):
    y_pred = np.empty(y_test.shape)
    for i in range(len(x_test)):
        dist = np.sqrt(np.sum((x_test[i] - x_train) ** 2, 1))
        indices = np.argpartition(dist, k)[:k]
        candidates = y_train[indices]
        values, counts = np.unique(candidates, return_counts=True)
        y_pred[i] = values[np.argmax(counts)]
    return y_pred

In [108]:
x, y = datasets.load_breast_cancer(return_X_y=True)
x_norm = np.apply_along_axis(normalize, 0, x)
x_train, x_test, y_train, y_test = model_selection.train_test_split(x_norm, y, test_size=0.3)

In [109]:
knn = neighbors.KNeighborsClassifier().fit(x_train, y_train)
y_train_pred = knn.predict(x_train)
y_test_pred = knn.predict(x_test)
print("Training accuracy", accuracy(y_train, y_train_pred))
print("Testing accuracy:", accuracy(y_test, y_test_pred))

Training accuracy 97.98994974874373
Testing accuracy: 97.6608187134503


In [110]:
y_train_pred = knn_predict(x_train, y_train, x_train, y_train)
y_test_pred = knn_predict(x_test, y_test, x_train, y_train)
print("Training accuracy", accuracy(y_train, y_train_pred))
print("Testing accuracy:", accuracy(y_test, y_test_pred))

Training accuracy 97.98994974874373
Testing accuracy: 97.6608187134503
