In [34]:
import numpy as np
import numpy.typing as npt
from typing import Self

In [35]:
class KNeighborsClassifier:
    def __init__(
        self,
        n_neighbors: int = 5,
        p: int = 2,
        metric: str = "minkowski",
    ):
        self.n_neighbors = n_neighbors
        self.p = p
        self.metric = metric

        self.__distance_metrics = {
            "minkowski": self.__minkowski_distance,
            "cosine": self.__cosine_distance,
        }
        return

    def fit(
        self,
        X_train: npt.NDArray[np.float64],
        y_train: npt.NDArray[np.int64],
    ) -> Self:
        self.X_train = X_train
        self.y_train = y_train
        return self

    def __minkowski_distance(
        self,
        X_test: npt.NDArray[np.float64],
    ) -> npt.NDArray[np.float64]:
        absolute_dimension_wise_differences = np.abs(
            self.X_train - X_test[:, np.newaxis, :]
        )
        return np.power(
            np.sum(
                np.power(absolute_dimension_wise_differences, self.n_neighbors), axis=-1
            ),
            1 / self.n_neighbors,
        )

    def __cosine_distance(
        self,
        X_test,
    ):
        dimension_wise_products = self.X_train * X_test[:, np.newaxis, :]
        dot_products = np.sum(dimension_wise_products, axis=-1)

        norms_of_train = np.sqrt(np.sum(np.square(self.X_train), axis=-1))
        norms_of_test = np.sqrt(np.sum(np.square(X_test), axis=-1))

        cosine_similarity = dot_products / norms_of_train / norms_of_test[:, np.newaxis]
        return 1 - cosine_similarity

    def __plurality_voting(
        self,
        classes_of_closest_neighbors: npt.NDArray[np.int64],
    ) -> npt.NDArray[np.int64]:
        y_pred = []
        for classes in classes_of_closest_neighbors:
            unique, unique_counts = np.unique(classes, return_counts=True)
            prediction = unique[np.argmax(unique_counts)]
            y_pred.append(prediction)

        return np.array(y_pred)

    def predict(
        self,
        X_test: npt.NDArray[np.float64],
    ) -> npt.NDArray[np.int64]:
        distance_metric = self.__distance_metrics.get(self.metric)
        distances = distance_metric(X_test)

        indices_of_nearest_neighbors = np.argpartition(
            a=distances, kth=self.n_neighbors, axis=-1
        )
        classes_of_closest_neighbors = self.y_train[indices_of_nearest_neighbors][
            :, : self.n_neighbors
        ]

        return self.__plurality_voting(classes_of_closest_neighbors)

    def score(
        self,
        X_test: npt.NDArray[np.float64],
        y_test: npt.NDArray[np.int64],
    ) -> float:
        y_pred = self.predict(X_test)
        is_correct = y_test == y_pred

        accuracy_score = is_correct.sum() / is_correct.size
        return accuracy_score.item()

In [36]:
from sklearn.datasets import load_iris

In [37]:
data = load_iris()
X = data["data"]
y = data["target"]

In [38]:
print(X.shape)
print(y.shape)

(150, 4)
(150,)


In [39]:
from sklearn.model_selection import train_test_split

In [40]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.33, random_state=42, shuffle=True, stratify=None
)

In [41]:
print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

(100, 4) (100,)
(50, 4) (50,)


In [42]:
clf = KNeighborsClassifier(n_neighbors=3, metric="cosine")
clf.fit(X_train, y_train)

<__main__.KNeighborsClassifier at 0x7329d8f91b20>

In [43]:
clf.score(X_test, y_test)

0.98

In [44]:
from sklearn.neighbors import KNeighborsClassifier as KNN

In [45]:
clf = KNN(n_neighbors=3, p=2, metric="cosine")
clf.fit(X_train, y_train)

In [46]:
clf.score(X_test, y_test)

0.98

In [47]:
dir(KNeighborsClassifier)

['_KNeighborsClassifier__cosine_distance',
 '_KNeighborsClassifier__minkowski_distance',
 '_KNeighborsClassifier__plurality_voting',
 '__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 'fit',
 'predict',
 'score']

In [48]:
from pprint import pprint

In [49]:
pprint(KNeighborsClassifier.__dict__)

mappingproxy({'_KNeighborsClassifier__cosine_distance': <function KNeighborsClassifier.__cosine_distance at 0x7329d8f75620>,
              '_KNeighborsClassifier__minkowski_distance': <function KNeighborsClassifier.__minkowski_distance at 0x7329d8f75580>,
              '_KNeighborsClassifier__plurality_voting': <function KNeighborsClassifier.__plurality_voting at 0x7329d8f756c0>,
              '__dict__': <attribute '__dict__' of 'KNeighborsClassifier' objects>,
              '__doc__': None,
              '__init__': <function KNeighborsClassifier.__init__ at 0x7329d8f754e0>,
              '__module__': '__main__',
              '__weakref__': <attribute '__weakref__' of 'KNeighborsClassifier' objects>,
              'fit': <function KNeighborsClassifier.fit at 0x7329d8f753a0>,
              'predict': <function KNeighborsClassifier.predict at 0x7329d8f75760>,
              'score': <function KNeighborsClassifier.score at 0x7329d8f75800>})


In [50]:
{
    (method_name).split("__")[-1].replace("_distance", ""): method
    for method_name, method in KNeighborsClassifier.__dict__.items()
    if method_name.endswith("distance")
}

{'minkowski': <function __main__.KNeighborsClassifier.__minkowski_distance(self, X_test: numpy.ndarray[typing.Any, numpy.dtype[numpy.float64]]) -> numpy.ndarray[typing.Any, numpy.dtype[numpy.float64]]>,
 'cosine': <function __main__.KNeighborsClassifier.__cosine_distance(self, X_test)>}