In [81]:
from collections import defaultdict
import numpy as np
from numpy.linalg import norm


class KNN_classifier:
    def __init__(self, n_neighbors: int, p: int = 2):
        self.K = n_neighbors
        self.p = p
        self.x = None
        self.y = None

    def fit(self, x: np.array, y: np.array):
        self.x = x
        self.y = y

    def predict(self, x: np.array):
        if self.x is None or self.y is None:
            raise Exception

        predicted = []
        for point in x:
            indices = np.argsort(self.__norm(point))[: self.K]
            counter = defaultdict(lambda: 0)

            for idx in indices:
                counter[self.y[idx]] += 1

            sorted_counter = sorted(counter, key=counter.__getitem__)
            predicted.append(sorted_counter[-1])

        return predicted

    def __norm(self, point: np.array):
        output = np.empty(self.x.shape[0], dtype=np.float64)
        output[:] = norm((self.x - point), ord=self.p, axis=1)
        return output

In [85]:
import sklearn
from sklearn.datasets import load_breast_cancer
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

X, y = load_breast_cancer(return_X_y=True)
X_train, x_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, shuffle=True, random_state=42
)

clf = KNN_classifier(n_neighbors=7, p=2)
clf.fit(X_train, y_train)

predictions = clf.predict(x_test)
print(accuracy_score(y_test, predictions))

0.9649122807017544


In [83]:
%%timeit -n1
predictions = clf.predict(x_test)

15 ms ± 1.63 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
