In [None]:
import numpy as np
# KNN class for classification and regression
class KNN():

    # Initialization
    def __init__(self,task = 'Classification', k = 3, distance_metric = 'Euclidean'):
        self.task = task
        self.k = k
        self.distance_metric = distance_metric

    # distance function
    def _distance(self, a, b):
        if self.distance_metric == 'Euclidean':
            return np.sqrt(np.sum((a - b) ** 2))
        elif self.distance_metric == 'Manhattan':
            return np.sum(np.abs(a - b))
        else:
            raise ValueError('Unknown distance metric.')

    # Fit 
    def fit(self, x, y):
        # Just store training data
        self.X_train = x       
        self.y_train = y


    # Predict 
    def predict(self, X):
        y_pred = []
        for x in X:
            # Calculate distances between X_new and all training samples
            distances = np.array([self._distance(x, xi) for xi in self.X_train])
            # Get indices of k nearest neighbors
            k_indices = np.argsort(distances)[:self.k]
            # Get labels of k nearest neighbors
            k_labels = self.y_train[k_indices]

            # Classification usign majority vote
            if self.task == 'Classification':
                # Count occurrences of each class
                classes, counts = np.unique(k_labels, return_counts=True)
                # Take the class with maximum count
                pred = classes[np.argmax(counts)]
                y_pred.append(pred)

            elif self.task == 'Regression':
                # Take the mean of k labels
                pred = np.mean(k_labels)
                y_pred.append(pred)

            else:
                raise ValueError('Unknown task type.')    

        return np.array(y_pred)    
       
    # Score 
    def score(self, X_new, y):
            y_pred = self.predict(X_new)

            # For classification, use accuracy
            if self.task == 'Classification':
                return np.mean(y_pred == y)
            
            # For regression, use R^2 score
            elif self.task == 'Regression':
                # R^2 score
                ss_total = np.sum((y - np.mean(y))**2)
                ss_res = np.sum((y - y_pred)**2)
                return 1 - ss_res / ss_total

In [None]:
# Sample data
# Create number of rows and random x and y matrices
m = 100
# Use rand then multiple by 2 to make sure the samples values are between 0 and 2 this would make sure we simulate feature scaling
# Here we create 2 features
X1 = 2 * np.random.rand(m, 1)
X2 = 2 * np.random.rand(m, 1)
# Combine features to create X matrix
X = np.column_stack((X1, X2))

# Y value will split to intercept + value + noise from 1st feature [row, columns] to simulate real data doing regression
y_output = 4 + 3 * X[:, 0] + np.random.randn(m)

# Y value will be Continuous target (intercept + value + noise) from 1st feature [row, columns] then convert to binary classes doing classification
y_continuous = 4 + 3 * X[:, 0] + np.random.randn(m)
threshold = np.mean(y_continuous)
y_labels = (y_continuous > threshold).astype(int)

# Test data with 2 rows
X_new = np.array([[1,2], [3,4], [4,3]])

# Apply knn with all possible tasks and distance metrics
tasks = ['Classification', 'Regression']
distance_metrics = ['Euclidean', 'Manhattan']
for task in tasks:
    for distance_metric in distance_metrics:
        if task == 'Classification':
            # Initialize KNN object
            knn_model = KNN(k=3, task=task, distance_metric=distance_metric)
            knn_model.fit(X, y_labels)
            y_pred = np.round(knn_model.predict(X_new),2)
            score_train = knn_model.score(X, y_labels)
            print(f'Task: {task}, Distance Metric: {distance_metric}')
            print(f'Predictions for {X_new.tolist()}: {y_pred}')
            print(f'Accuracy score on training data: {score_train:.2f} %')

        elif task == 'Regression':
            # Initialize KNN object
            knn_model = KNN(k=3, task=task, distance_metric=distance_metric)
            knn_model.fit(X, y_output)
            y_pred = np.round(knn_model.predict(X_new), 2)
            score_train = knn_model.score(X, y_output)
            print(f'Task: {task}, Distance Metric: {distance_metric}')
            print(f'Predictions for {X_new.tolist()}: {y_pred}')
            print(f'R^2 score on training data: {score_train:.2f} %')
        print('-' * 40)

Task: Classification, Distance Metric: Euclidean
Predictions for [[1, 2], [3, 4], [4, 3]]: [0 1 1]
Accuracy score on training data: 0.94 %
----------------------------------------
Task: Classification, Distance Metric: Manhattan
Predictions for [[1, 2], [3, 4], [4, 3]]: [0 1 1]
Accuracy score on training data: 0.93 %
----------------------------------------
Task: Regression, Distance Metric: Euclidean
Predictions for [[1, 2], [3, 4], [4, 3]]: [6.65 9.13 8.36]
R^2 score on training data: 0.83 %
----------------------------------------
Task: Regression, Distance Metric: Manhattan
Predictions for [[1, 2], [3, 4], [4, 3]]: [6.59 8.69 8.69]
R^2 score on training data: 0.83 %
----------------------------------------
