In [46]:
import pandas as pd
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import precision_score, recall_score

In [48]:
df = pd.read_csv('studentperformance.csv')
df

Unnamed: 0,Marital status,Application mode,Application order,Course,Daytime/evening attendance,Previous qualification,Previous qualification (grade),Nacionality,Mother's qualification,Father's qualification,...,Curricular units 2nd sem (credited),Curricular units 2nd sem (1),Curricular units 2nd sem (evaluations),Curricular units 2nd sem (approved),Curricular units 2nd sem (grade),Curricular units 2nd sem (without evaluations),Unemployment rate,Inflation rate,GDP,Target
0,1,17,5,171,1,1,122.0,1,19,12,...,0,0,0,0,0.000000,0,10.8,1.4,1.74,0
1,1,15,1,9254,1,1,160.0,1,1,3,...,0,6,6,6,13.666667,0,13.9,-0.3,0.79,1
2,1,1,5,9070,1,1,122.0,1,37,37,...,0,6,0,0,0.000000,0,10.8,1.4,1.74,0
3,1,17,2,9773,1,1,122.0,1,38,37,...,0,6,10,5,12.400000,0,9.4,-0.8,-3.12,1
4,2,39,1,8014,0,1,100.0,1,37,38,...,0,6,6,6,13.000000,0,13.9,-0.3,0.79,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4419,1,1,6,9773,1,1,125.0,1,1,1,...,0,6,8,5,12.666667,0,15.5,2.8,-4.06,1
4420,1,1,2,9773,1,1,120.0,105,1,1,...,0,6,6,2,11.000000,0,11.1,0.6,2.02,0
4421,1,1,1,9500,1,1,154.0,1,37,37,...,0,8,9,1,13.500000,0,13.9,-0.3,0.79,0
4422,1,1,1,9147,1,1,180.0,1,37,37,...,0,5,6,5,12.000000,0,9.4,-0.8,-3.12,1


In [58]:
X = df.drop("Target", axis=1)
y = df['Target']

In [59]:
scaler = StandardScaler()
X = scaler.fit_transform(X)

In [66]:
Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, test_size=0.3, random_state=42)

In [67]:
pca = PCA(n_components=0.95)
Xtrainpca = pca.fit_transform(Xtrain)
Xtestpca = pca.transform(Xtest)

In [68]:
classifiers = [
    LogisticRegression(),
    MLPClassifier(hidden_layer_sizes=(5,)),
    DecisionTreeClassifier(),
    RandomForestClassifier(),
    GaussianNB()
]

In [69]:
for classifier in classifiers:
    print(f"{classifier.__class__.__name__} (Without PCA)")
    classifier.fit(Xtrain, ytrain)
    ypred = classifier.predict(Xtest)
    precision = precision_score(ytest, ypred)
    recall = recall_score(ytest, ypred)
    print("Without PCA:")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}\n")


LogisticRegression (Without PCA)
Without PCA:
Precision: 0.8793
Recall: 0.9448

MLPClassifier (Without PCA)




Without PCA:
Precision: 0.8802
Recall: 0.9357

DecisionTreeClassifier (Without PCA)
Without PCA:
Precision: 0.8535
Recall: 0.8410

RandomForestClassifier (Without PCA)
Without PCA:
Precision: 0.8713
Recall: 0.9312

GaussianNB (Without PCA)
Without PCA:
Precision: 0.8667
Recall: 0.8794



In [70]:
for classifier in classifiers:
    print(f"{classifier.__class__.__name__} (With PCA)")
    classifier.fit(X_train_pca, y_train)
    y_pred_pca = classifier.predict(X_test_pca)
    precision_pca = precision_score(y_test, y_pred_pca)
    recall_pca = recall_score(y_test, y_pred_pca)
    print(f"Precision: {precision_pca:.4f}")
    print(f"Recall: {recall_pca:.4f}")

LogisticRegression (With PCA)
Precision: 0.8582
Recall: 0.9549
MLPClassifier (With PCA)




Precision: 0.8715
Recall: 0.9402
DecisionTreeClassifier (With PCA)
Precision: 0.8225
Recall: 0.8253
RandomForestClassifier (With PCA)
Precision: 0.8511
Recall: 0.9538
GaussianNB (With PCA)
Precision: 0.8535
Recall: 0.8343
