In [18]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.utils import Bunch
from sklearn.model_selection import train_test_split

# K Nearest Neighbour Algorithm	

## Calculating Distances

In [19]:
def get_distance(test: tuple[int], point: tuple[int]) -> float:
    """Calculates the distance between the test point and the point in the data set.

    Args:
            test (tuple[int]): test point to be compared to for distance
            point (tuple[int]): point in the data set to be compared to for distance

    Returns:
            float: distance between the test point and the point in the data set
    """
    distance = 0
    for index, element in enumerate(test):
        distance += (element - point[index])**2
    return distance**0.5

In [20]:
print(get_distance((1, 2, 3), (4, 5, 6)))

5.196152422706632


## Sorting Distances by Ascending Order

In [None]:
def sort_distances(distances: list[tuple[int]]) -> list[tuple[int]]:
    """Sorts the distances in ascending order.

    Args:
            distances (list[tuple[int]]): list of distances

    Returns:
            list[tuple[int]]: list of distances sorted in ascending order
    """
    # Merge sort
    if len(distances) > 1:
        mid = len(distances) // 2
        left = distances[:mid]
        right = distances[mid:]

        sort_distances(left)
        sort_distances(right)

        i = j = k = 0
        while i < len(left) and j < len(right):
            if left[i][1] < right[j][1]:
                distances[k] = left[i]
                i += 1
            else:
                distances[k] = right[j]
                j += 1
            k += 1

        while i < len(left):
            distances[k] = left[i]
            i += 1
            k += 1

        while j < len(right):
            distances[k] = right[j]
            j += 1
            k += 1

## Getting Nearest Neighbour Relative to Nearest Point

In [None]:
def get_neighbors(train: list[tuple[int]], test: tuple[int], k: int) -> list[tuple[int]]:
    """Finds the list of nearest neighbors relative to the test point.

    Args:
            train (list[tuple[int]]): list of points in the dataset
            test (tuple[int]): test point to be compared to for distance
            k (int): number of nearest neighbors

    Returns:
            list: list of neighbors
    """
    distances: list[tuple] = []
    for test_point in train:
        for train_point in train:  # for each point in the training set
            # calculate the distance between the test point and the point in the data set
            distance = get_distance(test_point, train_point)
            # add tuple of point and distance to distances list
            distances.append((train_point, distance))

    sort_distances(distances)

    neighbors: list[tuple[int]] = []  # list of neighbors (currently empty)
    for i in range(k):  # for each neighbor
        # add the point to the neighbors list
        neighbors.append(distances[i][0])
    return neighbors

## Getting Prediction

In [None]:
def get_prediction(neighbors: list) -> int:
    """Gets the prediction based on the neighbors.

    Args:
            neighbors (list): list of neighbors

    Returns:
            int: prediction
    """
    return neighbors[0][-1]

## Executing the Whole Nearest Neighbour Algorithm

In [None]:
def execute(data: list[tuple[int]], test: tuple[int], k: int) -> int:
    """Executes the k-Nearest Neighbor algorithm on the given dataset, test and k.

    Args:
            data (list[tuple[int]]): list of points in the dataset
            test (tuple[int]): test point to be compared to for distance
            k (int): number of nearest neighbors

    Returns:
            int: prediction
    """
    # neighbors = get_neighbors(data, test, k)
    # prediction = get_prediction(get_neighbors(data, test, k))
    return get_prediction(get_neighbors(data, test, k))

# Iris Dataset

## Loading Iris Dataset

In [None]:
iris: Bunch = load_iris()

## Splitting Iris Dataset into Training Set and Testing Set 

In [None]:
X_train_iris, X_test_iris, y_train_iris, y_test_iris = train_test_split(iris['data'], iris['target'], random_state=0) # 75% training and 25% test

iris_train = np.concatenate((X_train_iris, y_train_iris.reshape(-1, 1)), axis=1) # concatenate X_train_iris and y_train_iris
iris_test = np.concatenate((X_test_iris, y_test_iris.reshape(-1, 1)), axis=1) # concatenate X_test_iris and y_test_iris

# Ionosphere Dataset

## Loading Ionosphere Dataset

In [None]:
X_ionosphere = np.genfromtxt("ionosphere.txt", delimiter=",") 

## Splitting Ionosphere Dataset into Training Set and Testing Set

In [None]:
X_train_ionosphere, X_test_ionosphere = train_test_split(X_ionosphere, test_size=0.25, random_state=42)

In [None]:
print(execute(iris_train, iris_test, 3))