import libraries

In [5]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
import joblib

load data

In [6]:
df = pd.read_csv("./data/cleaned_school_data.csv")

In [7]:
term_mapping = {"Term 1": 0, "Term 2": 1, "Term 3": 2}
df["term_encoded"] = df["term"].map(term_mapping)


performance_mapping = {"High": 0, "Low": 1, "Medium": 2}
df["performance_band_encoded"] = df["performance_band"].map(performance_mapping)


numeric_cols = ["attendance_rate", "staff_performance", "parent_feedback",
                "budget_per_student", "extracurricular_count", "class_size",
                "internal_assessment", "exam_score"]

for col in numeric_cols:
    Q1 = df[col].quantile(0.25)
    Q3 = df[col].quantile(0.75)
    IQR = Q3 - Q1
    lower = Q1 - 1.5 * IQR
    upper = Q3 + 1.5 * IQR
    df = df[(df[col] >= lower) & (df[col] <= upper)]

X = df[numeric_cols + ["term_encoded"]]
y = df["performance_band_encoded"]

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)



X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.2, random_state=42, stratify=y
)


gb_model = GradientBoostingClassifier(
    n_estimators=200,
    learning_rate=0.1,
    max_depth=3,
    random_state=42
)
gb_model.fit(X_train, y_train)


y_pred = gb_model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

Accuracy: 1.0

Confusion Matrix:
 [[32  0  0]
 [ 0 12  0]
 [ 0  0 70]]

Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        32
           1       1.00      1.00      1.00        12
           2       1.00      1.00      1.00        70

    accuracy                           1.00       114
   macro avg       1.00      1.00      1.00       114
weighted avg       1.00      1.00      1.00       114



Save the trained model

In [8]:
joblib.dump(gb_model, "school_performance_gb_model.pkl")
joblib.dump(scaler, "scaler.pkl")

['scaler.pkl']