In [36]:
import numpy as np

In [37]:
class KNeighborsClassifier:
    def __init__(self, n_neighbors):
        self.n_neighbors = n_neighbors
        return

    def fit(self, X_train, y_train):
        self.X_train = X_train
        self.y_train = y_train
        return self

    def __euclidean_distance(self, a, b):
        return np.sqrt(np.sum(np.square(a - b)))

    def __plurality_voting(self, classes_of_closest_neighbors):
        unique, unique_counts = np.unique(classes_of_closest_neighbors, return_counts=True)
        return unique[np.argmax(unique_counts)]
    
    def predict(self, X_test):
        y_pred = []
        for query in X_test:
            distance = [self.__euclidean_distance(training_instance, query) for training_instance in self.X_train]
            
            indices_of_closest_neighbors = np.argpartition(a=distance, kth=self.n_neighbors)[:self.n_neighbors]
            classes_of_closest_neighbors = self.y_train[indices_of_closest_neighbors]

            prediction = self.__plurality_voting(classes_of_closest_neighbors)
            y_pred.append(prediction)

        return np.array(y_pred)

    def score(self, X_test, y_test):
        y_pred = self.predict(X_test)
        is_correct = (y_test == y_pred)

        accuracy_score = is_correct.sum() / is_correct.size
        return accuracy_score.item()

In [38]:
from sklearn.datasets import load_iris

In [39]:
data = load_iris()
X = data['data']
y = data['target']

In [40]:
print(X.shape)
print(y.shape)

(150, 4)
(150,)


In [41]:
from sklearn.model_selection import train_test_split

In [42]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42, shuffle=True, stratify=None)

In [43]:
print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

(100, 4) (100,)
(50, 4) (50,)


In [44]:
clf = KNeighborsClassifier(n_neighbors=3)
clf.fit(X_train, y_train)

<__main__.KNeighborsClassifier at 0x70eab8ef8950>

In [45]:
clf.score(X_test, y_test)

0.98