In [77]:
import numpy as np
import numpy.typing as npt
from typing import Self

In [78]:
class KNeighborsClassifier:
    def __init__(self, n_neighbors: int = 5, p: int = 2):
        self.n_neighbors = n_neighbors
        self.p = p
        return

    def fit(self, X_train: npt.NDArray[np.float64], y_train: npt.NDArray[np.int64]) -> Self:
        self.X_train = X_train
        self.y_train = y_train
        return self

    def __minkowski_distance(self, X_test: npt.NDArray[np.float64]) -> npt.NDArray[np.float64]:
        absolute_dimension_wise_differences = np.abs(self.X_train - X_test[:, np.newaxis, :])
        return np.power(np.sum(np.power(absolute_dimension_wise_differences, self.n_neighbors), axis=-1), 1 / self.n_neighbors)

    def __plurality_voting(self, classes_of_closest_neighbors: npt.NDArray[np.int64]) -> npt.NDArray[np.int64]:
        y_pred = []
        for classes in classes_of_closest_neighbors:
            unique, unique_counts = np.unique(classes, return_counts=True)
            prediction = unique[np.argmax(unique_counts)]
            y_pred.append(prediction)

        return np.array(y_pred)

    def predict(self, X_test: npt.NDArray[np.float64]) -> npt.NDArray[np.int64]:
        distances = self.__minkowski_distance(X_test)

        indices_of_nearest_neighbors = np.argpartition(a=distances, kth=self.n_neighbors, axis=-1)
        classes_of_closest_neighbors = self.y_train[indices_of_nearest_neighbors][:, :self.n_neighbors]

        return self.__plurality_voting(classes_of_closest_neighbors)

    def score(self, X_test: npt.NDArray[np.float64], y_test: npt.NDArray[np.int64]) -> float:
        y_pred = self.predict(X_test)
        is_correct = y_test == y_pred

        accuracy_score = is_correct.sum() / is_correct.size
        return accuracy_score.item()

In [79]:
from sklearn.datasets import load_iris

In [80]:
data = load_iris()
X = data["data"]
y = data["target"]

In [81]:
print(X.shape)
print(y.shape)

(150, 4)
(150,)


In [82]:
from sklearn.model_selection import train_test_split

In [83]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.33, random_state=42, shuffle=True, stratify=None
)

In [84]:
print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

(100, 4) (100,)
(50, 4) (50,)


In [85]:
clf = KNeighborsClassifier(n_neighbors=3, p=2)
clf.fit(X_train, y_train)

<__main__.KNeighborsClassifier at 0x70eaad588440>

In [86]:
clf.score(X_test, y_test)

0.98