In [18]:
import numpy as np

class KNeighborsClassifier:
    def __init__(self, k=5, dist_metric='euclidean'):
        self.k = k
        self.dist_metric = dist_metric

    def _most_common_(self, arr):
        '''
        Get the most common element in an array
        
        Args:
            arr: 1D array

        return: most common element in arr
        '''
        return np.bincount(arr).argmax()

    def _calculate_distance_(self, points):
        '''
        Calculate distance between two points using the specified distance metric

        Args:
            points: 2D array with shape (2, n_features)
        
        return: distance between two points
        '''
        if self.dist_metric == 'euclidean':
            return np.sqrt(np.sum((points[0] - points[1])**2))
        elif self.dist_metric == 'manhattan':
            return np.abs(points[0] - points[1])
        elif self.dist_metric == 'chebychev':
            return np.max(np.abs(points[0] - points[1]))
        elif self.dist_metric == 'hemming':
            return np.sum(points[0] != points[1]) / len(points[0])

    def fit(self, X, y):
        self.X_train = X
        self.y_train = y
    
    def predict(self, X):
        predictions = []
        for x in X:
            # Calculate distances between x and all training samples
            distances = np.array([self._calculate_distance_(np.array([x, x_train])) for x_train in self.X_train])
            # Get k nearest samples
            k_nearest = np.argsort(distances)[:self.k]
            # Get the labels of k nearest samples
            y_sorted = self.y_train[k_nearest]
            # Get the most common class among k nearest samples
            predictions.append(self._most_common_(y_sorted))
        return predictions
    
    def evaluate(self, X, y):
        # Calculate accuracy
        accuracy = np.sum(self.predict(X) == y) / len(y)

        print(f'Accuracy: {accuracy}')

        return accuracy

In [19]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

X = load_iris().data # 4 features -> sepal length, sepal width, petal length, petal width
y = load_iris().target # 3 types of flowers -> 0-setosa, 1-versicolor, 2-virginica

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [20]:
model = KNeighborsClassifier(k=5, dist_metric='euclidean')

model.fit(X_train, y_train)

In [21]:
model.evaluate(X_test, y_test)

Accuracy: 1.0


1.0