In [1]:
#import neccessary libraries
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from xgboost import XGBClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score 
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
import joblib
from sklearn.preprocessing import LabelEncoder
import numpy as np  

In [2]:
#load the cleaned dataset
df=pd.read_csv(r"C:\Users\kumar\Academic_Decision_Support_System\data\processed\student_performance_cleaned.csv")
df.head()

Unnamed: 0,avg_marks,previous_marks,study_hours,attendance_score,family_support,school_support,pass_fail
0,32.25,50,1.358103,0.866667,0,1,0
1,19.5,82,5.834504,0.266667,1,0,1
2,87.5,89,2.51499,0.133333,0,1,1
3,26.75,36,1.437883,0.6,0,1,0
4,78.75,73,3.223473,0.066667,0,0,1


In [3]:
#separate features and target variable
X = df.drop("pass_fail", axis=1)


y = df["pass_fail"]


In [4]:
#split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [5]:
# train a XGBClassifier
model = XGBClassifier(
n_estimators=150,
max_depth=5,
learning_rate=0.05,
reg_lambda=1.5,
reg_alpha=0.5,
subsample=0.8,
colsample_bytree=0.85
)

model.fit(X_train, y_train)


In [6]:
# make predictions and display the first 10 predictions
y_pred = model.predict(X_test)
y_pred[:10]

array([1, 1, 0, 1, 1, 1, 1, 0, 1, 1])

In [7]:
# check model accuracy 
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy:.2f}")


Model Accuracy: 0.87


In [8]:
# detailed performance report

print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.79      0.72      0.75       284
           1       0.89      0.93      0.91       716

    accuracy                           0.87      1000
   macro avg       0.84      0.82      0.83      1000
weighted avg       0.86      0.87      0.86      1000



In [9]:
# confusion matrix

print(confusion_matrix(y_test, y_pred))

[[204  80]
 [ 53 663]]


In [10]:
# feature importance
importance = pd.Series(model.feature_importances_, index=X.columns)
importance.sort_values(ascending=False)

previous_marks      0.507901
avg_marks           0.170905
study_hours         0.164694
school_support      0.055243
attendance_score    0.054504
family_support      0.046754
dtype: float32

In [11]:
# check training and testing accuracy to detect overfitting and underfitting
train_acc = model.score(X_train, y_train)
test_acc = model.score(X_test, y_test)

print("Training Accuracy:", train_acc)
print("Testing Accuracy:", test_acc)


Training Accuracy: 0.9195
Testing Accuracy: 0.867


In [12]:
cm = confusion_matrix(y_test, y_pred)

plt.figure(figsize=(5,4))
plt.imshow(cm)
plt.title("Confusion Matrix")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.colorbar()
plt.savefig(r"C:\Users\kumar\Academic_Decision_Support_System\reports\confusion_matrix.png")
plt.close()


In [13]:
# save the trained model and encoders
joblib.dump(model, r"C:\Users\kumar\Academic_Decision_Support_System\models\pass_fail_model.pkl")
print("Model saved successfully.")



Model saved successfully.


In [14]:
# load the trained model and encoder
loaded_model = joblib.load(r"C:\Users\kumar\Academic_Decision_Support_System\models\pass_fail_model.pkl")


In [15]:
# verify the loaded model classes
print(loaded_model.classes_)

[0 1]


In [16]:
# Example input data for prediction

new_student = {
    "avg_marks": 33,
    "previous_marks": 29,
    "study_hours": 4,
    "attendance_score": 0.3,
    "family_support": 1,
    "school_support": 1
}

input_df = pd.DataFrame([new_student])

prediction = loaded_model.predict(input_df)
probability = loaded_model.predict_proba(input_df)

print("Prediction:", "Pass" if prediction[0] == 1 else "Fail")
print(f"Pass Probability: {float(probability[0][1]):.2f}")
print(f"Fail Probability: {float(probability[0][0]):.2f}")

Prediction: Pass
Pass Probability: 0.51
Fail Probability: 0.49
