In [48]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score,confusion_matrix,classification_report
from sklearn.preprocessing import LabelEncoder,OneHotEncoder,StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer

#import warnings
#warnings.filterwarnings('ignore')


In [49]:
data = pd.DataFrame({
    "Hours_Studied": [2, 3, 5, 6, 1, 7, 8, 4, 9, 10, 5, 3],
    "Attendance": [60, 65, 75, 80, 55, 90, 95, 70, 98, 100, 85, 68],
    "Extra_Class": ["Yes", "No", "Yes", "No", "No", "Yes", "Yes", "No", "Yes", "No", "No", "Yes"],
    "Grade": ["Fail", "Fail", "Pass", "Pass", "Fail", "Pass", "Pass", "Fail", "Pass", "Pass", "Pass", "Fail"]
})
data

Unnamed: 0,Hours_Studied,Attendance,Extra_Class,Grade
0,2,60,Yes,Fail
1,3,65,No,Fail
2,5,75,Yes,Pass
3,6,80,No,Pass
4,1,55,No,Fail
5,7,90,Yes,Pass
6,8,95,Yes,Pass
7,4,70,No,Fail
8,9,98,Yes,Pass
9,10,100,No,Pass


In [50]:
X=data[["Hours_Studied","Attendance","Extra_Class"]]
y=data["Grade"]

In [51]:
num_features=["Hours_Studied","Attendance"]
cat_features=["Extra_Class"]

In [52]:
num_transformer=Pipeline([
    ("imputer",SimpleImputer(strategy="mean")),
    ("scaler",StandardScaler())
])

In [53]:
cat_transformer=Pipeline([
    ("imputer",SimpleImputer(strategy="most_frequent")),
    ("encoder",OneHotEncoder(handle_unknown="ignore",sparse_output=False))
])

In [54]:
preprocessor=ColumnTransformer([
    ("num",num_transformer,num_features),
    ("cat",cat_transformer,cat_features)
])

In [55]:
# model=Pipeline([
#     ("preprocessor",preprocessor),
#     ("classifier",LogisticRegression())
# ])
models={
    "Logistic Regression":LogisticRegression(),
    "KNN Classifier":KNeighborsClassifier(),
    "Decision Tree":DecisionTreeClassifier()
}

In [56]:
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=42)


In [60]:
for name , model in models.items():
    pipe=Pipeline([
        ("preprocessor",preprocessor),
        ("classifier",model)
    ])
    pipe.fit(X_train,y_train)
    y_pred=pipe.predict(X_test)

    accuracy=accuracy_score(y_test,y_pred)
    print(f"Accuracy for {name}: {accuracy}")
    cm=confusion_matrix(y_test,y_pred)
    print(f"Confusion Matrix for {name}:")
    print(cm)
    cr=classification_report(y_test,y_pred)
    print(f"Classification Report for {name}:")
    print(cr)



Accuracy for Logistic Regression: 1.0
Confusion Matrix for Logistic Regression:
[[1 0]
 [0 2]]
Classification Report for Logistic Regression:
              precision    recall  f1-score   support

        Fail       1.00      1.00      1.00         1
        Pass       1.00      1.00      1.00         2

    accuracy                           1.00         3
   macro avg       1.00      1.00      1.00         3
weighted avg       1.00      1.00      1.00         3

Accuracy for KNN Classifier: 1.0
Confusion Matrix for KNN Classifier:
[[1 0]
 [0 2]]
Classification Report for KNN Classifier:
              precision    recall  f1-score   support

        Fail       1.00      1.00      1.00         1
        Pass       1.00      1.00      1.00         2

    accuracy                           1.00         3
   macro avg       1.00      1.00      1.00         3
weighted avg       1.00      1.00      1.00         3

Accuracy for Decision Tree: 1.0
Confusion Matrix for Decision Tree:
[[1 0]
 [

In [58]:
#Evaluation for this Model

