In [23]:
import pandas as pd
import numpy as np
from collections import Counter
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Load and prepare the dataset
df = pd.read_csv('Iris.csv')
df = df.drop('Id', axis=1)

# Separate features and labels
X = df.drop('Species', axis=1).values
y = df['Species'].values  # Convert to NumPy array

# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Define the kNN algorithm from scratch
def euclidean_distance(x1, x2):
    return np.sqrt(np.sum((x1 - x2) ** 2))

def knn_predict(X_train, y_train, x_new, k=3):
    distances = [euclidean_distance(x_new, x_train) for x_train in X_train]
    k_indices = np.argsort(distances)[:k]
    k_labels = [y_train[i] for i in k_indices]  # Now safe: y_train is a NumPy array
    most_common = Counter(k_labels).most_common(1)
    return most_common[0][0]

# Predict labels for the test set
y_pred = [knn_predict(X_train_scaled, y_train, x, k=3) for x in X_test_scaled]

# Evaluate accuracy
accuracy = np.mean(y_pred == y_test)
print("Accuracy:", accuracy)

# Predict a new instance
new_instance = [[5.1, 3.5, 1.4, 0.2]]
new_instance_scaled = scaler.transform(new_instance)
new_prediction = knn_predict(X_train_scaled, y_train, new_instance_scaled[0], k=3)
print("Predicted species for new instance:", new_prediction)


Accuracy: 1.0
Predicted species for new instance: Iris-setosa
