
# 🔁 Rebuilding JAMB Regression Model using Scikit-learn

This notebook recreates the best model from PyCaret using native Scikit-learn tools for transparency and control.


[Eda Report](https://mukhtarasif.github.io/EDA_Reports/ydata/JAMB_YData_Profile_Report.html)

In [1]:

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.preprocessing import LabelEncoder
import numpy as np

df = pd.read_csv("jamb_exam_results.csv")
df = df.drop(columns=["Student_ID"])

# Encode categorical features
df_encoded = df.copy()
for col in df_encoded.select_dtypes(include="object").columns:
    df_encoded[col] = LabelEncoder().fit_transform(df_encoded[col])

X = df_encoded.drop("JAMB_Score", axis=1)
y = df_encoded["JAMB_Score"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=123)

model = RandomForestRegressor(n_estimators=100, random_state=123)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

# Metrics
print("MAE:", mean_absolute_error(y_test, y_pred))
print("RMSE:", np.sqrt(mean_squared_error(y_test, y_pred)))
print("R² Score:", r2_score(y_test, y_pred))


MAE: 31.24795
RMSE: 38.95153830210047
R² Score: 0.2562509137195814
