In [2]:
# K-Nearest Neighbors is one of the most basic yet essential classification algorithms in Machine Learning. It belongs to the supervised learning domain and finds intense application in pattern recognition, data mining.

# KNN algorithm can also be used for regression problems.The only difference will be using averages of nearest neighbors rather than voting from nearest neighbors.

# KNN algorithm makes predictions by calculating similarity between the input sample and each training instance. 
#This algorithm does not make strong assumptions about the form of mapping function hence it is Nonparametric. 
#In simple words, by not making assumptions, the algorithm is free to learn any functional form from the training data.

In [1]:
import numpy as np
from collections import Counter
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

# Load Iris dataset
iris = load_iris()
X = iris.data
y = iris.target
class_names = iris.target_names

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [2]:
# Define the number of neighbors
k = 3

# a) Calculate Euclidean Distance
def euclidean_distance(row1, row2):
    return np.sqrt(np.sum((row1 - row2) ** 2))

# b) Get Nearest Neighbors
def get_nearest_neighbors(train_X, train_y, test_row, k):
    distances = []
    for i in range(len(train_X)):
        dist = euclidean_distance(test_row, train_X[i])
        distances.append((train_y[i], dist))
    # Sort by distance and return the k nearest neighbors
    distances.sort(key=lambda x: x[1])
    neighbors = [distances[i][0] for i in range(k)]
    return neighbors

# c) Make Predictions
def predict_classification(train_X, train_y, test_row, k):
    neighbors = get_nearest_neighbors(train_X, train_y, test_row, k)
    # Find the most common class in neighbors
    most_common = Counter(neighbors).most_common(1)
    return most_common[0][0]

# Evaluate the model on test set
correct_predictions = 0
for i in range(len(X_test)):
    prediction = predict_classification(X_train, y_train, X_test[i], k)
    print(f"Test Sample {i+1}: Actual class = {class_names[y_test[i]]}, Predicted class = {class_names[prediction]}")
    if prediction == y_test[i]:
        correct_predictions += 1

# Calculate accuracy
accuracy = correct_predictions / len(X_test)
print(f"\nAccuracy of k-NN classifier: {accuracy * 100:.2f}%")

Test Sample 1: Actual class = versicolor, Predicted class = versicolor
Test Sample 2: Actual class = setosa, Predicted class = setosa
Test Sample 3: Actual class = virginica, Predicted class = virginica
Test Sample 4: Actual class = versicolor, Predicted class = versicolor
Test Sample 5: Actual class = versicolor, Predicted class = versicolor
Test Sample 6: Actual class = setosa, Predicted class = setosa
Test Sample 7: Actual class = versicolor, Predicted class = versicolor
Test Sample 8: Actual class = virginica, Predicted class = virginica
Test Sample 9: Actual class = versicolor, Predicted class = versicolor
Test Sample 10: Actual class = versicolor, Predicted class = versicolor
Test Sample 11: Actual class = virginica, Predicted class = virginica
Test Sample 12: Actual class = setosa, Predicted class = setosa
Test Sample 13: Actual class = setosa, Predicted class = setosa
Test Sample 14: Actual class = setosa, Predicted class = setosa
Test Sample 15: Actual class = setosa, Predicte