In [1]:
import numpy as np
import pandas as pd

In [None]:
class MyKNNClf():
    def __init__(
            self, k: int = 3,
            metric: str = 'euclidean'
            ):
        self.k = k
        self.metric = metric
        self.train_size = None
        self.X = None
        self.y = None
    
    def fit(self, X: pd.DataFrame, y: pd.Series):
        self.X = X.copy()
        self.y = y.copy()
        self.train_size = X.shape

    def predict(self, X: pd.DataFrame):
        predictions = []
        for i in range(X.shape[0]):
            distances, indices = self.calculate_distances_and_indices(X)
            if self.weight == "uniform":
                weights = np.ones(self.k)
            elif self.weight == "distance":
                weights = 1 / distances
            elif self.weight == "rank":
                weights = 1 / (indices + 1)
            weights /= np.sum(weights)
            nearest_classes = self.y.iloc[indices]
            if nearest_classes.nunique() == 1:
                prediction = nearest_classes.iloc[0]
            else:
                votes = np.bincount(nearest_classes, minlength=2)
                prediction = np.argmax(np.dot(weights, votes))
            predictions.append(prediction)
        return np.array(predictions)
                
    def predict_proba(self, X: pd.DataFrame):
        probabilities = []
        for i in range(X.shape[0]):
            distances, indices = self.calculate_distances_and_indices(X.iloc[i])
            nearest_classes = self.y.iloc[indices]
            if self.weight == 'distance':
                weights = 1 / distances
            elif self.weight == 'rank':
                weights = 1 / (indices + 1)
            else:
                weights = np.ones(self.k)
            weights /= np.sum(weights)
            prob_class1 = nearest_classes.sum() / self.k
            prob_class1 = np.dot(weights, np.bincount(nearest_classes, minlength=2))[1]
            probabilities.append(prob_class1)
        return np.array(probabilities)

    def calculate_distances(self, x):
        if self.metric == 'euclidean':
            return np.linalg.norm(self.X - x, axis=1)
        elif self.metric == 'chebyshev':
            return np.abs(self.X - x).max(axis=1)
        elif self.metric == 'manhattan':
            return np.abs(self.X - x).sum(axis=1)
        elif self.metric == 'cosine':
            unit_X = self.X / np.linalg.norm(self.X, axis=1)[:, np.newaxis]
            unit_x = x / np.linalg.norm(x)
            return 1 - np.dot(unit_X, unit_x)
        else:
            raise ValueError("Invalid metric. Please choose from 'euclidean', 'chebyshev', 'manhattan', or 'cosine'.")
    
    def calculate_distances_and_indices(self, x):
        distances = self.calculate_distances(x)
        indicies = np.argsort(distances)[:self.k]
        return distances[indicies], indicies

    def __str__(self):
        return f"{__class__.__name__} class: k={self.k}, metric={self.metric}"