In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, ConfusionMatrixDisplay

In [2]:
data = pd.read_csv("student-mat.csv", sep=';')
data.head()

Unnamed: 0,school,sex,age,address,famsize,Pstatus,Medu,Fedu,Mjob,Fjob,...,famrel,freetime,goout,Dalc,Walc,health,absences,G1,G2,G3
0,GP,F,18,U,GT3,A,4,4,at_home,teacher,...,4,3,4,1,1,3,6,5,6,6
1,GP,F,17,U,GT3,T,1,1,at_home,other,...,5,3,3,1,1,3,4,5,5,6
2,GP,F,15,U,LE3,T,1,1,at_home,other,...,4,3,2,2,3,3,10,7,8,10
3,GP,F,15,U,GT3,T,4,2,health,services,...,3,2,2,1,1,5,2,15,14,15
4,GP,F,16,U,GT3,T,3,3,other,other,...,4,3,2,1,2,5,4,6,10,10


In [3]:
data['pass'] = (data['G3'] >= 10).astype(int)

In [4]:
data = data.drop(['G1', 'G2', 'G3'], axis=1)

In [5]:
label_encoder = LabelEncoder()
for column in data.columns:
    if data[column].dtype == 'object':
        data[column] = label_encoder.fit_transform(data[column])

In [6]:
X = data.drop('pass', axis=1)
y = data['pass']

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [8]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [9]:
logreg = LogisticRegression(max_iter=1000)
logreg.fit(X_train_scaled, y_train)
y_pred_logreg = logreg.predict(X_test_scaled)

In [10]:
tree = DecisionTreeClassifier(random_state=42)
tree.fit(X_train, y_train)
y_pred_tree = tree.predict(X_test)

In [11]:
acc_logreg = accuracy_score(y_test, y_pred_logreg) * 100
acc_tree = accuracy_score(y_test, y_pred_tree) * 100

print(f"Logistic Regression Accuracy: {acc_logreg:.2f}%")
print("Confusion Matrix (LogReg):")
print(confusion_matrix(y_test, y_pred_logreg))

print(f"\nDecision Tree Accuracy: {acc_tree:.2f}%")
print("Confusion Matrix (Decision Tree):")
print(confusion_matrix(y_test, y_pred_tree))

Logistic Regression Accuracy: 74.68%
Confusion Matrix (LogReg):
[[12 15]
 [ 5 47]]

Decision Tree Accuracy: 70.89%
Confusion Matrix (Decision Tree):
[[13 14]
 [ 9 43]]
