In [4]:
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
import numpy as np
import pandas
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from sklearn.neighbors import KNeighborsClassifier


# Step 1: Load the Iris dataset (3 classes: Setosa, Versicolor, Virginica)
data = pandas.read_csv('iris.txt', names=['sepal-length', 'sepal-width','petal-length', 'petal-width', 'class'])
# We'll use all features 
X = data.iloc[:,:-1].values
y = data.iloc[:,-1].values


# Step 2: Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

# Step 3: Train the model
knnClassifier = KNeighborsClassifier(n_neighbors=5, metric='minkowski', p=2)
knnClassifier.fit(X_train, y_train)

# Step 4: Make predictions
y_pred = knnClassifier.predict(X_test)

# Step 5: Evaluate
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")
print("\nClassification Report:\n", classification_report(y_test, y_pred))

Accuracy: 1.00

Classification Report:
                  precision    recall  f1-score   support

    Iris-setosa       1.00      1.00      1.00        15
Iris-versicolor       1.00      1.00      1.00        11
 Iris-virginica       1.00      1.00      1.00        12

       accuracy                           1.00        38
      macro avg       1.00      1.00      1.00        38
   weighted avg       1.00      1.00      1.00        38



In [11]:
# From Scratch ---------------

# KNN - Class
class KNN:
    def __init__(self, k=3):
        self.k = k
        if self.k < 1:
            raise ValueError("k must be >= 1")
    
    def fit(self, x, y):
        # store training data
        self.X_train = np.asarray(x)
        self.y_train = np.asarray(y)
        if self.X_train.shape[0] != self.y_train.shape[0]:
            raise ValueError("Number of samples in x and y must match")

    def predict(self, x):
        if not hasattr(self, "X_train"):
            raise ValueError("Must fit classifier before calling predict")
        X = np.asarray(x)
        if X.ndim == 1:
            X = X.reshape(1, -1)
        if X.shape[1] != self.X_train.shape[1]:
            raise ValueError("Feature dimension of x does not match training data")

        n_train = self.X_train.shape[0]
        if self.k > n_train:
            raise ValueError("k cannot be larger than number of training samples")

        # pairwise squared Euclidean distances, shape (n_test, n_train)
        #It calculates the distance between every test sample and every training sample without loops using broadcasting.
        dists = np.sqrt(((X[:, None, :] - self.X_train[None, :, :]) ** 2).sum(axis=2))

        # find indices of k nearest neighbors for each test sample
        # For each test sample, find the indices of the k smallest distances (nearest neighbors).
        knn_idx = np.argpartition(dists, self.k - 1, axis=1)[:, :self.k]

        preds = []
        for i in range(dists.shape[0]):
            inds = knn_idx[i]
            neigh_labels = self.y_train[inds]

            # count occurrences
            labels, counts = np.unique(neigh_labels, return_counts=True)
            max_count = counts.max()
            candidates = labels[counts == max_count]

            preds.append(candidates[0])

        return np.asarray(preds)


# Step 1: Load the Iris dataset (3 classes: Setosa, Versicolor, Virginica)
data = pandas.read_csv('iris.txt', names=['sepal-length', 'sepal-width','petal-length', 'petal-width', 'class'])
# We'll use all features 
X = data.iloc[:,:-1].values
y = data.iloc[:,-1].values


# Step 2: Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

# Step 3: Train the model
knn = KNN(k=5)
knn.fit(X_train, y_train)

# Step 4: Make predictions
y_pred = knn.predict(X_test)

# Step 5: Evaluate
accuracy = np.sum(y_test == y_pred)/len(y_test)
print(f"Accuracy: {accuracy:.2f}")


Accuracy: 1.00
