In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler, LabelEncoder, label_binarize
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report, confusion_matrix, roc_curve, auc
from matplotlib.colors import ListedColormap

# REPLACE WITH YOUR RAW GITHUB LINK
url = 'https://raw.githubusercontent.com/mostafa-cse/Machine-Learning-Assignment/refs/heads/main/KNN%20Classification%20Assignment/youth_unemployment_global.csv'

try:
    df = pd.read_csv(url)
except:
    df = pd.read_csv('youth_unemployment_global.csv')

df = df.dropna(subset=['YouthUnemployment'])

df['Class'] = pd.qcut(df['YouthUnemployment'], q=3, labels=['Low', 'Medium', 'High'])

le = LabelEncoder()
df['Country_Encoded'] = le.fit_transform(df['Country'])

X = df[['Country_Encoded', 'Year']].values
y = df['Class'].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

param_grid = {
    'n_neighbors': range(1, 20),
    'weights': ['uniform', 'distance'],
    'metric': ['euclidean', 'manhattan']
}

knn = KNeighborsClassifier()
grid_search = GridSearchCV(knn, param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_train_scaled, y_train)

best_model = grid_search.best_estimator_
y_pred = best_model.predict(X_test_scaled)

print(grid_search.best_params_)
print(classification_report(y_test, y_pred))

plt.figure(figsize=(6, 5))
sns.heatmap(confusion_matrix(y_test, y_pred), annot=True, fmt='d', cmap='Blues')
plt.title('Confusion Matrix')
plt.ylabel('True Label')
plt.xlabel('Predicted Label')
plt.show()

results = pd.DataFrame(grid_search.cv_results_)
mean_scores = results.groupby('param_n_neighbors')['mean_test_score'].mean()
plt.figure(figsize=(8, 5))
plt.plot(mean_scores.index, mean_scores.values, marker='o', linestyle='dashed')
plt.title('Elbow Method')
plt.xlabel('K Neighbors')
plt.ylabel('Accuracy')
plt.grid(True)
plt.show()

y_test_bin = label_binarize(y_test, classes=['Low', 'Medium', 'High'])
y_score = best_model.predict_proba(X_test_scaled)
n_classes = y_test_bin.shape[1]
classes = ['Low', 'Medium', 'High']

plt.figure(figsize=(8, 6))
for i in range(n_classes):
    fpr, tpr, _ = roc_curve(y_test_bin[:, i], y_score[:, i])
    roc_auc = auc(fpr, tpr)
    plt.plot(fpr, tpr, label=f'Class {classes[i]} (AUC = {roc_auc:.2f})')
plt.plot([0, 1], [0, 1], 'k--')
plt.legend(loc="lower right")
plt.title('ROC Curve')
plt.show()

h = .02
x_min, x_max = X_train_scaled[:, 0].min() - 1, X_train_scaled[:, 0].max() + 1
y_min, y_max = X_train_scaled[:, 1].min() - 1, X_train_scaled[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))

Z = best_model.predict(np.c_[xx.ravel(), yy.ravel()])
label_map = {'Low': 0, 'Medium': 1, 'High': 2}
Z_num = np.vectorize(label_map.get)(Z).reshape(xx.shape)

plt.figure(figsize=(10, 6))
plt.contourf(xx, yy, Z_num, cmap=ListedColormap(['#FFAAAA', '#AAFFAA', '#AAAAFF']), alpha=0.3)
plt.scatter(X_train_scaled[:, 0], X_train_scaled[:, 1], c=np.vectorize(label_map.get)(y_train), cmap=ListedColormap(['#FF0000', '#00FF00', '#0000FF']), edgecolor='k', s=20)
plt.title('2D Decision Boundary')
plt.show()