In [1]:
import numpy as np
import pandas as pd
from collections import Counter

In [2]:
class KNearestNeighbor(object):
    """a kNN classifier with multiple distance metrics"""

    def __init__(self, X_train, y_train):
        """
        Initializing the KNN object
        """
        self.X_train = X_train
        self.y_train = y_train

    def fit_predict(self, X_test, k=1, distance="hamming"):
        """
        Fits the model and predicts labels for test data.

        Parameters:
        - X_test: Test data
        - k: Number of nearest neighbors
        - distance: Distance metric ('hamming', 'euclidean', 'manhattan')
        """
        dists = self.compute_distances(X_test, distance)
        return self.predict_labels(dists, k=k)

    def compute_distances(self, X_test, distance="hamming"):
        """
        Compute the distance between each test and train point.
        Supported distances: 'hamming', 'euclidean', 'manhattan'
        """
        X_test = np.array(X_test)
        test_num = X_test.shape[0]
        train_num = self.X_train.shape[0]
        dists = np.zeros((test_num, train_num))

        for i in range(test_num):
          for j in range(train_num):
            if distance == 'hamming':
              dists[i][j] = np.mean(X_test[i] != self.X_train[j])
            elif distance == 'euclidean':
              dists[i][j] = np.sqrt(np.sum((X_test[i] - self.X_train[j]) ** 2))
            elif distance == 'manhattan':
              dists[i][j] = np.sum(np.abs(X_test[i] - self.X_train[j]))
            else:
              raise ValueError('Unknown distance type')
        return dists

    def predict_labels(self, dists, k=1):
        """
        Predict labels based on nearest neighbors.
        """
        num_test = dists.shape[0]
        y_pred = np.zeros(num_test, dtype=int)

        for i in range(num_test):
          distances = dists[i]
          nearest_indices = np.argsort(distances)[:k]
          nearest_labels = self.y_train[nearest_indices]
          most_common_label = Counter(nearest_labels).most_common(1)[0][0]
          y_pred[i] = most_common_label
        return y_pred

In [3]:
# Training data
X_train = np.array([
    [1, 0, 1],
    [0, 1, 0],
    [1, 1, 1],
    [0, 0, 0]
])
y_train = np.array([0, 1, 0, 1])

# Test data
X_test = np.array([
    [1, 0, 0],
    [0, 1, 1]
])

In [4]:
knn = KNearestNeighbor(X_train, y_train)

for dist in ["hamming", "euclidean", "manhattan"]:
    y_pred = knn.fit_predict(X_test, k=1, distance=dist)
    print(f"Distance: {dist} -> Predictions: {y_pred}")

Distance: hamming -> Predictions: [0 1]
Distance: euclidean -> Predictions: [0 1]
Distance: manhattan -> Predictions: [0 1]
