# 🍷 k-NN Classification on Wine Dataset
This notebook demonstrates how to apply **k-Nearest Neighbors (k-NN)** algorithm to the Wine dataset from scikit-learn, including preprocessing, elbow method, training, evaluation, and visualization.

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.decomposition import PCA
from sklearn.metrics import classification_report, accuracy_score

## 1. Load Wine Dataset

In [None]:
wine = load_wine()
df = pd.DataFrame(wine.data, columns=wine.feature_names)
df['target'] = wine.target
df.head()

## 2. Split and Scale Data

In [None]:
X = df.drop('target', axis=1)
y = df['target']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

## 3. Elbow Method to Find Optimal k

In [None]:
error_rates = []
k_range = range(1, 21)

for k in k_range:
    model = KNeighborsClassifier(n_neighbors=k)
    model.fit(X_train_scaled, y_train)
    preds = model.predict(X_test_scaled)
    error = 1 - accuracy_score(y_test, preds)
    error_rates.append(error)

plt.figure(figsize=(8, 5))
plt.plot(k_range, error_rates, marker='o')
plt.title("Elbow Method to Find Optimal k")
plt.xlabel("k (Number of Neighbors)")
plt.ylabel("Error Rate")
plt.grid(True)
plt.show()

## 4. Train k-NN Classifier

In [None]:
best_k = 5
knn = KNeighborsClassifier(n_neighbors=best_k)
knn.fit(X_train_scaled, y_train)

## 5. Evaluate the Model

In [None]:
y_pred = knn.predict(X_test_scaled)

print("✅ Accuracy:", accuracy_score(y_test, y_pred))
print("\n📊 Classification Report:")
print(classification_report(y_test, y_pred, target_names=wine.target_names))

## 6. PCA + Scatter Plot of Predicted Classes

In [None]:
pca = PCA(n_components=2)
X_train_pca = pca.fit_transform(X_train_scaled)

train_preds = knn.predict(X_train_scaled)

df_viz = pd.DataFrame(X_train_pca, columns=['PCA1', 'PCA2'])
df_viz['Predicted Class'] = train_preds

plt.figure(figsize=(8, 6))
sns.scatterplot(data=df_viz, x="PCA1", y="PCA2", hue="Predicted Class", palette="Set2", s=100)
plt.title("k-NN Predictions Visualized with PCA")
plt.grid(True)
plt.show()