In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, precision_score, recall_score, f1_score
from sklearn.impute import SimpleImputer

# Load the dataset
df = pd.read_csv('student_dataset.csv')

# Drop rows with missing values
df.dropna(inplace=True)

# Split the dataset into features (X) and target variable (y)
X = df[['HSGA', 'SAT', 'GAT']]
y = df['CGPA_Category']

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Initialize the ANN model with standard values
model = MLPClassifier(hidden_layer_sizes=(100, 100), max_iter=500, random_state=42)

# Train the model
model.fit(X_train, y_train)

# Predict the CGPA categories
y_pred = model.predict(X_test)

# Print accuracy, precision, recall, and f1-score
print("\nMetrics:")
print(f"Accuracy: {accuracy_score(y_test, y_pred)}")
print(f"Precision: {precision_score(y_test, y_pred, average='weighted')}")
print(f"Recall: {recall_score(y_test, y_pred, average='weighted')}")
print(f"F1 Score: {f1_score(y_test, y_pred, average='weighted')}")

# Evaluate the model
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))
print("\nClassification Report:")
print(classification_report(y_test, y_pred))



Metrics:
Accuracy: 0.988
Precision: 0.9881603563049083
Recall: 0.988
F1 Score: 0.9879785304319987

Confusion Matrix:
[[208   0   3   4]
 [  0 167   1   0]
 [  0   0 198   0]
 [  1   0   0 168]]

Classification Report:
              precision    recall  f1-score   support

     Average       1.00      0.97      0.98       215
   Excellent       1.00      0.99      1.00       168
        Good       0.98      1.00      0.99       198
        Poor       0.98      0.99      0.99       169

    accuracy                           0.99       750
   macro avg       0.99      0.99      0.99       750
weighted avg       0.99      0.99      0.99       750



In [4]:
# Decision Tree Classifier
from sklearn.tree import DecisionTreeClassifier

# Initialize the model
dt_model = DecisionTreeClassifier(random_state=42)

# Train the model
dt_model.fit(X_train, y_train)

# Predict the CGPA categories
dt_y_pred = dt_model.predict(X_test)

# Print metrics
print("\nDecision Tree Classifier Metrics:")
print(f"Accuracy: {accuracy_score(y_test, dt_y_pred)}")
print(f"Precision: {precision_score(y_test, dt_y_pred, average='weighted')}")
print(f"Recall: {recall_score(y_test, dt_y_pred, average='weighted')}")
print(f"F1 Score: {f1_score(y_test, dt_y_pred, average='weighted')}")
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, dt_y_pred))
print("\nClassification Report:")
print(classification_report(y_test, dt_y_pred))



Decision Tree Classifier Metrics:
Accuracy: 0.9826666666666667
Precision: 0.9832533234711724
Recall: 0.9826666666666667
F1 Score: 0.9827063368916729

Confusion Matrix:
[[207   0   6   2]
 [  0 164   4   0]
 [  0   1 197   0]
 [  0   0   0 169]]

Classification Report:
              precision    recall  f1-score   support

     Average       1.00      0.96      0.98       215
   Excellent       0.99      0.98      0.98       168
        Good       0.95      0.99      0.97       198
        Poor       0.99      1.00      0.99       169

    accuracy                           0.98       750
   macro avg       0.98      0.98      0.98       750
weighted avg       0.98      0.98      0.98       750



In [5]:
from sklearn.svm import SVC

# Initialize the model
svm_model = SVC(random_state=42)

# Train the model
svm_model.fit(X_train, y_train)

# Predict the CGPA categories
svm_y_pred = svm_model.predict(X_test)

# Print metrics
print("\nSupport Vector Machine Classifier Metrics:")
print(f"Accuracy: {accuracy_score(y_test, svm_y_pred)}")
print(f"Precision: {precision_score(y_test, svm_y_pred, average='weighted')}")
print(f"Recall: {recall_score(y_test, svm_y_pred, average='weighted')}")
print(f"F1 Score: {f1_score(y_test, svm_y_pred, average='weighted')}")
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, svm_y_pred))
print("\nClassification Report:")
print(classification_report(y_test, svm_y_pred))



Support Vector Machine Classifier Metrics:
Accuracy: 0.964
Precision: 0.964276667423719
Recall: 0.964
F1 Score: 0.963996451279948

Confusion Matrix:
[[203   0   6   6]
 [  0 162   6   0]
 [  4   3 191   0]
 [  2   0   0 167]]

Classification Report:
              precision    recall  f1-score   support

     Average       0.97      0.94      0.96       215
   Excellent       0.98      0.96      0.97       168
        Good       0.94      0.96      0.95       198
        Poor       0.97      0.99      0.98       169

    accuracy                           0.96       750
   macro avg       0.96      0.97      0.96       750
weighted avg       0.96      0.96      0.96       750



In [6]:
from sklearn.naive_bayes import GaussianNB

# Initialize the model
nb_model = GaussianNB()

# Train the model
nb_model.fit(X_train, y_train)

# Predict the CGPA categories
nb_y_pred = nb_model.predict(X_test)

# Print metrics
print("\nNaive Bayes Classifier Metrics:")
print(f"Accuracy: {accuracy_score(y_test, nb_y_pred)}")
print(f"Precision: {precision_score(y_test, nb_y_pred, average='weighted')}")
print(f"Recall: {recall_score(y_test, nb_y_pred, average='weighted')}")
print(f"F1 Score: {f1_score(y_test, nb_y_pred, average='weighted')}")
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, nb_y_pred))
print("\nClassification Report:")
print(classification_report(y_test, nb_y_pred))



Naive Bayes Classifier Metrics:
Accuracy: 0.9253333333333333
Precision: 0.9260243902439024
Recall: 0.9253333333333333
F1 Score: 0.9253880678708265

Confusion Matrix:
[[190   0  18   7]
 [  0 160   8   0]
 [  8   8 182   0]
 [  7   0   0 162]]

Classification Report:
              precision    recall  f1-score   support

     Average       0.93      0.88      0.90       215
   Excellent       0.95      0.95      0.95       168
        Good       0.88      0.92      0.90       198
        Poor       0.96      0.96      0.96       169

    accuracy                           0.93       750
   macro avg       0.93      0.93      0.93       750
weighted avg       0.93      0.93      0.93       750



In [7]:
from sklearn.ensemble import GradientBoostingClassifier

# Initialize the model
gb_model = GradientBoostingClassifier(random_state=42)

# Train the model
gb_model.fit(X_train, y_train)

# Predict the CGPA categories
gb_y_pred = gb_model.predict(X_test)

# Print metrics
print("\nGradient Boosting Classifier Metrics:")
print(f"Accuracy: {accuracy_score(y_test, gb_y_pred)}")
print(f"Precision: {precision_score(y_test, gb_y_pred, average='weighted')}")
print(f"Recall: {recall_score(y_test, gb_y_pred, average='weighted')}")
print(f"F1 Score: {f1_score(y_test, gb_y_pred, average='weighted')}")
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, gb_y_pred))
print("\nClassification Report:")
print(classification_report(y_test, gb_y_pred))



Gradient Boosting Classifier Metrics:
Accuracy: 0.984
Precision: 0.9844646881760932
Recall: 0.984
F1 Score: 0.9840350597939058

Confusion Matrix:
[[209   0   4   2]
 [  0 163   5   0]
 [  0   1 197   0]
 [  0   0   0 169]]

Classification Report:
              precision    recall  f1-score   support

     Average       1.00      0.97      0.99       215
   Excellent       0.99      0.97      0.98       168
        Good       0.96      0.99      0.98       198
        Poor       0.99      1.00      0.99       169

    accuracy                           0.98       750
   macro avg       0.98      0.98      0.98       750
weighted avg       0.98      0.98      0.98       750



In [8]:
from sklearn.neighbors import KNeighborsClassifier

# Initialize the model
knn_model = KNeighborsClassifier()

# Train the model
knn_model.fit(X_train, y_train)

# Predict the CGPA categories
knn_y_pred = knn_model.predict(X_test)

# Print metrics
print("\nk-Nearest Neighbors Classifier Metrics:")
print(f"Accuracy: {accuracy_score(y_test, knn_y_pred)}")
print(f"Precision: {precision_score(y_test, knn_y_pred, average='weighted')}")
print(f"Recall: {recall_score(y_test, knn_y_pred, average='weighted')}")
print(f"F1 Score: {f1_score(y_test, knn_y_pred, average='weighted')}")
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, knn_y_pred))
print("\nClassification Report:")
print(classification_report(y_test, knn_y_pred))



k-Nearest Neighbors Classifier Metrics:
Accuracy: 0.9333333333333333
Precision: 0.9343574321623662
Recall: 0.9333333333333333
F1 Score: 0.9332188875240298

Confusion Matrix:
[[191   0  11  13]
 [  0 158  10   0]
 [  5   7 186   0]
 [  4   0   0 165]]

Classification Report:
              precision    recall  f1-score   support

     Average       0.95      0.89      0.92       215
   Excellent       0.96      0.94      0.95       168
        Good       0.90      0.94      0.92       198
        Poor       0.93      0.98      0.95       169

    accuracy                           0.93       750
   macro avg       0.93      0.94      0.93       750
weighted avg       0.93      0.93      0.93       750



In [9]:
from sklearn.ensemble import RandomForestClassifier

# Initialize the model
rf_model = RandomForestClassifier(random_state=42)

# Train the model
rf_model.fit(X_train, y_train)

# Predict the CGPA categories
rf_y_pred = rf_model.predict(X_test)

# Print metrics
print("\nRandom Forest Classifier Metrics:")
print(f"Accuracy: {accuracy_score(y_test, rf_y_pred)}")
print(f"Precision: {precision_score(y_test, rf_y_pred, average='weighted')}")
print(f"Recall: {recall_score(y_test, rf_y_pred, average='weighted')}")
print(f"F1 Score: {f1_score(y_test, rf_y_pred, average='weighted')}")
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, rf_y_pred))
print("\nClassification Report:")
print(classification_report(y_test, rf_y_pred))



Random Forest Classifier Metrics:
Accuracy: 0.9706666666666667
Precision: 0.9708452340468257
Recall: 0.9706666666666667
F1 Score: 0.970674646716542

Confusion Matrix:
[[204   0   9   2]
 [  0 166   2   0]
 [  3   3 192   0]
 [  3   0   0 166]]

Classification Report:
              precision    recall  f1-score   support

     Average       0.97      0.95      0.96       215
   Excellent       0.98      0.99      0.99       168
        Good       0.95      0.97      0.96       198
        Poor       0.99      0.98      0.99       169

    accuracy                           0.97       750
   macro avg       0.97      0.97      0.97       750
weighted avg       0.97      0.97      0.97       750



In [10]:
from sklearn.linear_model import LogisticRegression

# Initialize the model
lr_model = LogisticRegression(random_state=42)

# Train the model
lr_model.fit(X_train, y_train)

# Predict the CGPA categories
lr_y_pred = lr_model.predict(X_test)

# Print metrics
print("\nLogistic Regression Classifier Metrics:")
print(f"Accuracy: {accuracy_score(y_test, lr_y_pred)}")
print(f"Precision: {precision_score(y_test, lr_y_pred, average='weighted')}")
print(f"Recall: {recall_score(y_test, lr_y_pred, average='weighted')}")
print(f"F1 Score: {f1_score(y_test, lr_y_pred, average='weighted')}")
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, lr_y_pred))
print("\nClassification Report:")
print(classification_report(y_test, lr_y_pred))



Logistic Regression Classifier Metrics:
Accuracy: 0.992
Precision: 0.9920408137715179
Recall: 0.992
F1 Score: 0.9920076394433225

Confusion Matrix:
[[212   0   3   0]
 [  0 167   1   0]
 [  1   1 196   0]
 [  0   0   0 169]]

Classification Report:
              precision    recall  f1-score   support

     Average       1.00      0.99      0.99       215
   Excellent       0.99      0.99      0.99       168
        Good       0.98      0.99      0.98       198
        Poor       1.00      1.00      1.00       169

    accuracy                           0.99       750
   macro avg       0.99      0.99      0.99       750
weighted avg       0.99      0.99      0.99       750

