In [44]:
import numpy as np
class KNearestNeighbours:
    def __init__(self, k=1):
        self.k = k
        self.X_training_data = None
        self.y_training_labels = None

    def fit(self, X, y):
        if self.k > len(X):
            raise ValueError("k must be less than or equal to the number of training data points.")
        self.X_training_data = X
        self.y_training_labels = y

    def predict(self, X):
        predictions = [self._predict_point(point) for point in X]
        return predictions

    def _predict_point(self, point):
        k_nearest = [(-1, float('inf')) for _ in range(self.k)]  # [(index, distance), ...]

        for i, training_point in enumerate(self.X_training_data):
            if (point == training_point).all():  # Skip the identical point
                continue

            distance = self._euclidean_distance(point, training_point)
            
            # Check if the distance is smaller than the current k-nearest distances
            for j, (idx, dist) in enumerate(k_nearest):
                if distance < dist:
                    k_nearest.insert(j, (i, distance))
                    k_nearest = k_nearest[:self.k]  # Keep only the k-nearest distances
                    break

        # Get labels of the k-nearest points
        k_labels = [self.y_training_labels[idx] for idx, dist in k_nearest]
        
        # Majority vote
        majority_label = max(set(k_labels), key=k_labels.count)
        return majority_label

    def _euclidean_distance(self, p1, p2):
        return np.sqrt(np.sum((np.array(p1) - np.array(p2))**2))

In [45]:
from sklearn.datasets import load_iris

iris = load_iris()
threeNN = KNearestNeighbours(k=3)

In [50]:
%load_ext autoreload
import sys
sys.path.append("..")
from train_test_split import train_test_split
iris_X = iris['data']
iris_y = iris['target']
iris_X_train, iris_X_test, iris_y_train, iris_y_test = train_test_split(iris_X, iris_y, seed=21012)
print(iris_X_train.shape)
print(iris_X_test.shape)
print(iris_y_train.shape)
print(iris_y_train.shape)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
(75, 4)
(75, 4)
(75,)
(75,)
