In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score,confusion_matrix,classification_report,mean_absolute_error,mean_squared_error,r2_score
from sklearn.preprocessing import LabelEncoder, OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
import warnings
warnings.filterwarnings('ignore')

In [2]:
data = pd.DataFrame({
    "Hours_Studied": [2, 3, 5, 6, 1, 7, 8, 4, 9, 10, 5, 3],
    "Attendance": [60, 65, 75, 80, 55, 90, 95, 70, 98, 100, 85, 68],
    "Extra_Class": ["Yes", "No", "Yes", "No", "No", "Yes", "Yes", "No", "Yes", "No", "No", "Yes"],
    "Grade": ["Fail", "Fail", "Pass", "Pass", "Fail", "Pass", "Pass", "Fail", "Pass", "Pass", "Pass", "Fail"]
})
data

Unnamed: 0,Hours_Studied,Attendance,Extra_Class,Grade
0,2,60,Yes,Fail
1,3,65,No,Fail
2,5,75,Yes,Pass
3,6,80,No,Pass
4,1,55,No,Fail
5,7,90,Yes,Pass
6,8,95,Yes,Pass
7,4,70,No,Fail
8,9,98,Yes,Pass
9,10,100,No,Pass


In [3]:
x = data[['Hours_Studied','Attendance','Extra_Class']]
y = data['Grade']

In [4]:
num_ft = ['Hours_Studied','Attendance']
cat_ft = ['Extra_Class']

num_transform = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='mean')),
    ('scaler', StandardScaler())
])

cat_transform = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('encoder', OneHotEncoder(drop='first'))
])

In [5]:
preprocessor = ColumnTransformer(transformers=[
    ('num', num_transform, num_ft),
    ('cat', cat_transform, cat_ft)
])

pipe = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', LogisticRegression())
])

In [6]:
models = {
    'Logistic Regression': LogisticRegression(),
    'Decision Tree': DecisionTreeClassifier(),
    'K-Nearest Neighbors': KNeighborsClassifier()
}

In [7]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

pipe.fit(x_train,y_train)

In [8]:

for name , model in models.items():
    clf=Pipeline([
        ("preprocessor",preprocessor),
        ("classifier",model)
    ])
clf.fit(x_train,y_train)
y_pred=clf.predict(x_test)

accuracy=accuracy_score(y_test,y_pred)
print(f"Accuracy for {name}: {accuracy}")
cm=confusion_matrix(y_test,y_pred)
print(f"Confusion Matrix for {name}:")
print(cm)
cr=classification_report(y_test,y_pred)
print(f"Classification Report for {name}:")
print(cr)

Accuracy for K-Nearest Neighbors: 1.0
Confusion Matrix for K-Nearest Neighbors:
[[1 0]
 [0 2]]
Classification Report for K-Nearest Neighbors:
              precision    recall  f1-score   support

        Fail       1.00      1.00      1.00         1
        Pass       1.00      1.00      1.00         2

    accuracy                           1.00         3
   macro avg       1.00      1.00      1.00         3
weighted avg       1.00      1.00      1.00         3



In [9]:
y_pred = pipe.predict(x_test)
print(y_pred)

['Pass' 'Pass' 'Fail']


In [10]:
# Replace regression metrics with classification metrics
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
class_report = classification_report(y_test, y_pred)

print("Accuracy:", accuracy)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", class_report)

Accuracy: 1.0
Confusion Matrix:
 [[1 0]
 [0 2]]
Classification Report:
               precision    recall  f1-score   support

        Fail       1.00      1.00      1.00         1
        Pass       1.00      1.00      1.00         2

    accuracy                           1.00         3
   macro avg       1.00      1.00      1.00         3
weighted avg       1.00      1.00      1.00         3

