In [2]:
### SALAL ALI KHAN(202307216)

from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import numpy as np
from tabulate import tabulate

In [3]:
def euclidean_distance(x1, x2):
    return np.sqrt(np.sum((x1 - x2) ** 2))

def manhattan_distance(x1, x2):
    return np.sum(np.abs(x1 - x2))

In [4]:
class KNN:
    def __init__(self, k=3, distance='euclidean'):
        self.k = k
        self.distance = distance

    def fit(self, X, y):
        self.X_train = X
        self.y_train = y

    def predict(self, X):
        return np.array([self._predict(x) for x in X])

    def _predict(self, x):
        if self.distance == 'euclidean':
            distances = [euclidean_distance(x, x_train) for x_train in self.X_train]
        elif self.distance == 'manhattan':
            distances = [manhattan_distance(x, x_train) for x_train in self.X_train]
        else:
            raise ValueError("Invalid distance measure. Choose 'euclidean' or 'manhattan'.")
        
        k_indices = np.argsort(distances)[:self.k]
        k_nearest_labels = [self.y_train[i] for i in k_indices]
        return max(set(k_nearest_labels), key=k_nearest_labels.count)


In [5]:
# Generating dataset
X, y = make_classification(n_samples=500, n_features=5, n_classes=3, n_clusters_per_class=1, random_state=42)

# Printing the classes
unique_classes = np.unique(y)
print("Classes:", unique_classes)

# Splitting the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


Classes: [0 1 2]


In [6]:
# KNN with different distance measures and neighborhood sizes
k_values = [3, 5]
distance_measures = ['euclidean', 'manhattan']

for k in k_values:
    for distance in distance_measures:
        knn = KNN(k=k, distance=distance)
        knn.fit(X_train, y_train)
        predictions = knn.predict(X_test)
        accuracy = accuracy_score(y_test, predictions)

        results = []
        results.append(["Neighborhood Size (k):", k])
        results.append(["Distance Measure:", distance])
        results.append(["Accuracy:", f"{accuracy:.4f}"])
        results.append(["Predictions:"] + [predictions])

        print(tabulate(results, tablefmt="plain"))
        print()


Neighborhood Size (k):  3
Distance Measure:       euclidean
Accuracy:               0.9500
Predictions:            [1 2 0 0 1 1 0 1 0 0 2 1 2 1 2 1 2 2 1 0 0 2 0 2 0 1 2 2 1 2 0 1 0 0 2 2 1
                         2 0 0 1 2 1 1 1 1 1 1 1 1 2 0 1 1 2 2 0 0 2 1 1 0 0 1 2 2 1 1 2 1 1 1 0 0
                         1 1 0 0 0 0 2 1 1 1 1 2 2 2 0 2 1 1 0 1 0 2 1 2 1 1]

Neighborhood Size (k):  3
Distance Measure:       manhattan
Accuracy:               0.9500
Predictions:            [1 2 0 0 1 1 0 1 0 0 2 1 2 1 2 1 2 2 1 0 0 2 0 2 0 1 2 2 1 2 0 1 0 0 2 2 1
                         2 0 0 1 2 1 1 1 1 1 0 1 1 2 0 1 1 2 2 0 0 2 1 1 0 0 1 2 2 1 1 2 0 1 1 0 0
                         0 1 0 0 0 0 2 1 1 1 1 2 2 2 0 2 1 1 2 1 0 2 1 2 1 1]

Neighborhood Size (k):  5
Distance Measure:       euclidean
Accuracy:               0.9300
Predictions:            [1 2 0 0 1 1 0 1 0 0 2 1 2 1 2 1 2 2 1 0 0 2 0 2 0 1 2 2 1 2 0 1 0 0 2 2 1
                         2 0 0 1 2 1 1 1 1 1 1 1 1 2 0 1 1 2 2 0 0 2 1 1 0