In [None]:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, ConfusionMatrixDisplay
from matplotlib.colors import ListedColormap


In [None]:

dataset = pd.DataFrame({
    'HoursStudied': [3, 6, 2, 8, 4, 5, 7, 1, 9, 2],
    'ClassAttendance': [70, 88, 60, 92, 68, 78, 85, 50, 96, 58],
    'StudentAge': [17, 18, 16, 19, 18, 17, 20, 16, 21, 17],
    'Sex': ['Male', 'Female', 'Male', 'Female', 'Female', 'Male', 'Female', 'Male', 'Female', 'Male'],
    'FinalResult': ['Pass', 'Pass', 'Fail', 'Pass', 'Fail', 'Pass', 'Pass', 'Fail', 'Pass', 'Fail']
})


In [None]:

lbl = LabelEncoder()
dataset['Sex'] = lbl.fit_transform(dataset['Sex'])
dataset['FinalResult'] = lbl.fit_transform(dataset['FinalResult'])


In [None]:

features = dataset.drop('FinalResult', axis=1)
target = dataset['FinalResult']

x_train, x_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=1)


In [None]:

model_log = LogisticRegression()
model_log.fit(x_train, y_train)
pred_log = model_log.predict(x_test)
acc_log = accuracy_score(y_test, pred_log)

model_knn = KNeighborsClassifier(n_neighbors=3)
model_knn.fit(x_train, y_train)
pred_knn = model_knn.predict(x_test)
acc_knn = accuracy_score(y_test, pred_knn)

print("LogReg Accuracy:", acc_log)
print("KNN Accuracy:", acc_knn)


In [None]:

ks = [1, 3, 5, 7]
acc_knn_list = []

for k in ks:
    clf = KNeighborsClassifier(n_neighbors=k)
    clf.fit(x_train, y_train)
    acc = accuracy_score(y_test, clf.predict(x_test))
    acc_knn_list.append(acc)

plt.plot(ks, acc_knn_list, marker='o')
plt.xlabel("K Value")
plt.ylabel("Accuracy")
plt.title("KNN Tuning")
plt.grid()
plt.show()


In [None]:

plot_data = dataset[['HoursStudied', 'ClassAttendance']]
x_vis_train, x_vis_test, y_vis_train, y_vis_test = train_test_split(plot_data, target, test_size=0.2, random_state=1)

viz_model = KNeighborsClassifier(n_neighbors=3)
viz_model.fit(x_vis_train, y_vis_train)

h = .03
x_min, x_max = plot_data.iloc[:, 0].min() - 1, plot_data.iloc[:, 0].max() + 1
y_min, y_max = plot_data.iloc[:, 1].min() - 1, plot_data.iloc[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
z = viz_model.predict(np.c_[xx.ravel(), yy.ravel()])
z = z.reshape(xx.shape)

plt.figure(figsize=(7,5))
plt.contourf(xx, yy, z, cmap=ListedColormap(['#FFA07A','#90EE90']), alpha=0.7)
plt.scatter(plot_data['HoursStudied'], plot_data['ClassAttendance'], c=target, cmap=ListedColormap(['red','green']), edgecolor='k')
plt.xlabel("Hours Studied")
plt.ylabel("Class Attendance")
plt.title("KNN Decision Zone")
plt.show()


In [None]:

cfm = confusion_matrix(y_test, pred_knn)
ConfusionMatrixDisplay(confusion_matrix=cfm, display_labels=['Fail','Pass']).plot()
plt.title("ConfMatrix - KNN")
plt.show()


In [None]:

features_less = dataset.drop(columns=['FinalResult', 'StudentAge', 'Sex'])
x_train_l, x_test_l, y_train_l, y_test_l = train_test_split(features_less, target, test_size=0.2, random_state=1)

recheck_model = KNeighborsClassifier(n_neighbors=3)
recheck_model.fit(x_train_l, y_train_l)
pred_l = recheck_model.predict(x_test_l)
acc_l = accuracy_score(y_test_l, pred_l)

print("Original KNN Accuracy:", acc_knn)
print("Accuracy after dropping age & sex:", acc_l)


In [None]:

probs = model_log.predict_proba(x_test)[:, 1]
plt.plot(range(len(probs)), probs, marker='o', linestyle='--')
plt.title("Logistic Regression Probabilities")
plt.xlabel("Sample")
plt.ylabel("Prob of Pass")
plt.grid(True)
plt.show()
