In [4]:
import numpy as np
import pandas as pd

# Load the Iris dataset
iris_data = pd.read_csv("../iris.csv")

# Select one random sample from each class as test data
test_samples = []
for class_name in iris_data["Species"].unique():
    class_data = iris_data[iris_data["Species"] == class_name]
    random_sample = class_data.sample(1)
    test_samples.append(random_sample)

# Compute mean vector and covariance matrix for each class
class_mean_cov = {}
for class_name in iris_data["Species"].unique():
    class_data = iris_data[iris_data["Species"] == class_name]
    class_data = class_data.drop(columns=["Species"])
    class_mean = np.mean(class_data, axis=0)
    class_cov = np.cov(class_data, rowvar=False)
    class_mean_cov[class_name] = (class_mean, class_cov)

# Calculate Mahalanobis distance and classify test samples
def mahalanobis_distance(x, mean, cov_inv):
    diff = x - mean
    distance = np.sqrt(np.dot(np.dot(diff.T, cov_inv), diff))
    return distance

for test_sample in test_samples:
    sample_features = test_sample.drop(columns=["Species"]).values.flatten()
    sample_class = test_sample["Species"].values[0]
    
    min_distance = float("inf")
    predicted_class = None
    
    for class_name, (class_mean, class_cov) in class_mean_cov.items():
        cov_inv = np.linalg.inv(class_cov)
        distance = mahalanobis_distance(sample_features, class_mean, cov_inv)
        
        if distance < min_distance:
            min_distance = distance
            predicted_class = class_name
    
    print(f"Original Class: {sample_class}, Predicted Class: {predicted_class}, Mahalanobis Distance: {min_distance:.3f}")


Original Class: Iris-setosa, Predicted Class: Iris-setosa, Mahalanobis Distance: 2.035
Original Class: Iris-versicolor, Predicted Class: Iris-versicolor, Mahalanobis Distance: 2.086
Original Class: Iris-virginica, Predicted Class: Iris-virginica, Mahalanobis Distance: 2.315
