In [2]:
import numpy as np
from collections import Counter
from sklearn.datasets import load_iris
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

In [3]:
class KNN:
    def __init__(self, top_k=5):
        self.top_k = top_k
        self.X_train = None
        self.y_train = None

    def fit(self, X, y):
        self.X_train = X
        self.y_train = y

    @staticmethod
    def distance(x1, x2):
        return np.linalg.norm(x1 - x2)

    def predict_batch(self, X):
        y_pred = [self.predict(x) for x in X]
        return y_pred

    def predict(self, x):
        # Compute distance to all points in train set
        distances = [self.distance(x, x_train) for x_train in self.X_train]
        # Sort the distance with index
        top_idx = np.argsort(distances)[:self.top_k]
        # Get top K label
        k_nearests = self.y_train[top_idx]
        # Predict the label
        label = Counter(k_nearests).most_common(1)[0][0]
        
        return label

In [4]:
# Load iris data
data = load_iris()
# train test split
X_train, X_test, y_train, y_test = train_test_split(data.data, data.target, test_size=0.3)
# Load model
model = KNN(top_k=5)
model.fit(X_train, y_train)
# Sample predict model
y_pred = model.predict_batch(X_test)
# Caculate accuracy score
acc = accuracy_score(y_pred, y_test)

In [5]:
acc

0.9777777777777777