In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
import seaborn as sns

# Load Iris dataset
iris = load_iris()
X = iris.data
y = iris.target
target_names = iris.target_names

# Create DataFrame for visualization
df = pd.DataFrame(X, columns=iris.feature_names)
df['Species'] = y
df['Species'] = df['Species'].map({0: 'setosa', 1: 'versicolor', 2: 'virginica'})

# Data visualization
sns.pairplot(df, hue='Species', markers=['o', 's', 'D'])
plt.suptitle('Iris Dataset Feature Relationships', y=1.02)
plt.show()

# Data preprocessing
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split data
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.3, random_state=42, stratify=y)

# Find optimal k value
k_values = range(1, 20)
accuracies = []

for k in k_values:
    knn = KNeighborsClassifier(n_neighbors=k)
    knn.fit(X_train, y_train)
    y_pred = knn.predict(X_test)
    accuracies.append(accuracy_score(y_test, y_pred))

plt.plot(k_values, accuracies, 'bo-')
plt.xlabel('k Value')
plt.ylabel('Accuracy')
plt.title('Finding Optimal k Value')
plt.xticks(k_values)
plt.grid()
plt.show()

# Train model with optimal k (k=3 from plot)
optimal_k = 3
knn = KNeighborsClassifier(n_neighbors=optimal_k)
knn.fit(X_train, y_train)

# Predictions
y_pred = knn.predict(X_test)

# Evaluation
print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=target_names))

print("\nConfusion Matrix:")
cm = confusion_matrix(y_test, y_pred)
print(cm)

# Visualize confusion matrix
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
            xticklabels=target_names, yticklabels=target_names)
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('Confusion Matrix')
plt.show()

# Print correct and wrong predictions
results = pd.DataFrame({'Actual': y_test, 'Predicted': y_pred})
results['Actual'] = results['Actual'].map({0: 'setosa', 1: 'versicolor', 2: 'virginica'})
results['Predicted'] = results['Predicted'].map({0: 'setosa', 1: 'versicolor', 2: 'virginica'})

correct = results[results['Actual'] == results['Predicted']]
wrong = results[results['Actual'] != results['Predicted']]

print("\nCorrect Predictions:")
print(correct)
print("\nNumber of correct predictions:", len(correct))
print("\nWrong Predictions:")
print(wrong)
print("\nNumber of wrong predictions:", len(wrong))