In [1]:
import numpy as np

In [31]:
# implementation
class KNNClassifier:
    def __init__(self, k=3):
        self.k = k
        self.X_train = None
        self.y_train = None

    def fit(self, X, y):
        self.X_train = X
        self.y_train = y

    def _compute_distance(self, x, points) -> float:
        return np.sqrt(np.sum((points - x)**2, axis=1))
    
    def predict(self, X) -> np.array:
        # compute prediction for each data point
        predictions = []
        # for each point
        for x in X:
            print('x:', x)
            # - compute the distant to the rest
            distances = self._compute_distance(self.X_train, x)
            print('distances:', distances)
            # - get k closer points by sorting
            closer_points = np.argsort(distances)[:self.k]
            print('closer_points:', closer_points)
            # - get labels for closer points
            closer_labels = self.y_train[closer_points]
            print('closer_labels:', closer_labels)
            # - compute prediction for the point
            votes = np.bincount(closer_labels)
            print('votes:', votes)
            predition = np.argmax(votes)
            print('predition:', predition)
            # - append the prediction
            predictions.append(predition)
        # return predictions as a numpy array
        return np.array(predictions)

In [32]:
# test cases
# Test case 1: Simple 2D dataset
X_train = np.array([[1, 1], [1, 2], [2, 2], [2, 3]])
y_train = np.array([0, 0, 1, 1])
X_test = np.array([[1.5, 1.5], [2.5, 2.5]])

knn = KNNClassifier(k=3)
knn.fit(X_train, y_train)
predictions = knn.predict(X_test)
expected = np.array([0, 1])
assert np.array_equal(predictions, expected), f"Test case 1 failed. Expected {expected}, but got {predictions}"

x: [1.5 1.5]
distances: [0.70710678 0.70710678 0.70710678 1.58113883]
closer_points: [0 1 2]
closer_labels: [0 0 1]
votes: [2 1]
predition: 0
x: [2.5 2.5]
distances: [2.12132034 1.58113883 0.70710678 0.70710678]
closer_points: [2 3 1]
closer_labels: [1 1 0]
votes: [1 2]
predition: 1


In [30]:
    # Test case 2: Larger dataset
    np.random.seed(42)
    X_train = np.random.rand(100, 2)
    y_train = (X_train[:, 0] + X_train[:, 1] > 1).astype(int)
    X_test = np.random.rand(20, 2)
    y_test = (X_test[:, 0] + X_test[:, 1] > 1).astype(int)

    knn = KNNClassifier(k=5)
    knn.fit(X_train, y_train)
    predictions = knn.predict(X_test)
    accuracy = np.mean(predictions == y_test)
    assert accuracy > 0.7, f"Test case 2 failed. Accuracy {accuracy} is too low"

x: [0.64203165 0.08413996]
distances: [0.9069194  0.52232415 0.49129596 0.97599995 0.6252728  1.08202812
 0.229546   0.47079044 0.55519776 0.29499545 0.0630461  0.44952208
 0.72528147 0.61697791 0.06230856 0.09301377 1.03956283 0.79326286
 0.33768912 0.35850506 0.66283069 1.02476969 0.69383568 0.54694145
 0.13867073 0.76469738 0.86353971 0.83889589 0.56472508 0.64369887
 0.31501693 0.33041962 0.58366502 0.87562236 1.06629582 0.17344467
 0.96952332 0.64811734 0.12963239 0.28533553 0.58272142 0.31181362
 0.40950648 0.5603037  0.45903806 0.81774918 0.49169331 0.42946724
 0.36353334 0.61706979 0.82330918 0.53620218 0.31270436 0.71024932
 0.41329551 0.36061457 0.77903746 0.7873679  0.19136236 0.51958933
 0.82862835 0.32506259 0.53768283 0.79628076 0.76506197 0.26360872
 0.58043717 0.38428987 0.63103055 0.9302775  0.36162154 0.26064855
 0.36030316 0.41979646 0.62176    0.30828014 0.64142298 0.37816812
 0.67814816 0.76050574 0.61294214 0.45171474 0.93202159 0.3371705
 0.78632561 0.07632735 0.