# Q6: KNN Classification â€” Student Result Prediction
Dataset: `knn_students.csv`

In [None]:
# Common imports used across notebooks
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()
RANDOM_STATE = 42


In [None]:
df = pd.read_csv('/mnt/data/aiml/knn_students.csv')
df.head()

In [None]:
# Map target
if df['result'].dtype == 'object':
    df['result'] = df['result'].map({'pass':1,'fail':0}).fillna(df['result'])
y = df['result']
X = df.drop(columns=['result'])

In [None]:
# If participation_level is categorical, encode ordinally
if X['participation_level'].dtype == 'object':
    # simple mapping if values like Low/Med/High
    mapping = {'Low':0,'Medium':1,'High':2}
    X['participation_level'] = X['participation_level'].map(mapping).fillna(X['participation_level'])


In [None]:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import GridSearchCV, train_test_split

pipe = Pipeline([('scaler', StandardScaler()), ('knn', KNeighborsClassifier())])
param_grid = {'knn__n_neighbors': list(range(1,22,2)), 'knn__weights':['uniform','distance']}
X_train,X_test,y_train,y_test = train_test_split(X,y,stratify=y,test_size=0.2,random_state=RANDOM_STATE)

gs = GridSearchCV(pipe, param_grid, cv=5, scoring='accuracy', n_jobs=-1)
gs.fit(X_train, y_train)
print('Best params:', gs.best_params_)
y_pred = gs.predict(X_test)
from sklearn.metrics import classification_report, confusion_matrix
print(classification_report(y_test, y_pred))
sns.heatmap(confusion_matrix(y_test, y_pred), annot=True, fmt='d'); plt.show()