In [None]:
# =========================================================
# 0.  Minimal Student‑Performance Model (6 features only)
# =========================================================
import os, joblib, pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, accuracy_score
import numpy as np

# ---------------------------------------------------------
# 1. Load data
# ---------------------------------------------------------
df = pd.read_csv("student-mat.csv", sep=';')          # adjust path if needed

# ---------------------------------------------------------
# 2. Keep only the 6 requested features + target
# ---------------------------------------------------------
FEATURES = ["health", "activities", "reason", "age", "absences", "G2"]
TARGET   = "G3"

# Encode the two categorical columns
label_encoders = {}
for col in ["activities", "reason"]:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le

X = df[FEATURES]
y = df[TARGET]

# ---------------------------------------------------------
# 3. Split + scale
# ---------------------------------------------------------
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled  = scaler.transform(X_test)

# ---------------------------------------------------------
# 4. Train model
# ---------------------------------------------------------
rf = RandomForestRegressor(
    n_estimators=200,
    random_state=42,
    min_samples_leaf=2,
    max_depth=None
)
rf.fit(X_train_scaled, y_train)

# ---------------------------------------------------------
# 5. Evaluate
# ---------------------------------------------------------
y_pred = rf.predict(X_test_scaled)

mse  = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
mae  = mean_absolute_error(y_test, y_pred)
r2   = r2_score(y_test, y_pred)

#  binary pass/fail accuracy (pass if G3 ≥ 10)
acc  = accuracy_score((y_test>=10).astype(int), (y_pred>=10).astype(int))

print("========= Regression Accuracy =========")
print(f"MSE   : {mse:.2f}")
print(f"RMSE  : {rmse:.2f}")
print(f"MAE   : {mae:.2f}")
print(f"R²    : {r2:.2f}")
print("\n========= Pass/Fail Accuracy ==========")
print(f"Accuracy: {acc*100:.2f}%")

# ---------------------------------------------------------
# 6. Save artefacts
# ---------------------------------------------------------
os.makedirs("artefacts", exist_ok=True)
joblib.dump(rf,              "artefacts/rf_regressor_slim.pkl")
joblib.dump(scaler,          "artefacts/scaler_slim.pkl")
joblib.dump(label_encoders,  "artefacts/label_encoders_slim.pkl")
print("\n✅ Saved artefacts to /artefacts")

# ---------------------------------------------------------
# 7. Tiny inference helper (ready for your Flask/FastAPI backend)
# ---------------------------------------------------------
def grade_letter(score):
    if score >= 16: return "A"
    if score >= 14: return "B"
    if score >= 12: return "C"
    if score >= 10: return "D (Pass)"
    return "F (Fail)"

def predict_slim(student: dict) -> dict:
    """
    student = {
        "health": 4,
        "activities": "yes",   # or "no"
        "reason": "home",      # or "course"/"other"
        "age": 16,
        "absences": 3,
        "G2": 14
    }
    """
    df_in = pd.DataFrame([student])
    # encode categoricals
    for col, le in label_encoders.items():
        df_in[col] = le.transform(df_in[col].astype(str))
    X_scaled = scaler.transform(df_in.astype(float))

    g3 = rf.predict(X_scaled)[0]
    return {
        "predicted_g3": float(round(g3, 2)),
        "grade": grade_letter(g3),
        "passed": g3 >= 10
    }

# quick sanity check
print("\nExample prediction:", predict_slim({
    "health": 4, "activities": "yes", "reason": "home",
    "age": 16, "absences": 3, "G2": 14
}))


MSE   : 3.85
RMSE  : 1.96
MAE   : 1.18
R²    : 0.81

Accuracy: 87.34%

✅ Saved artefacts to /artefacts

Example prediction: {'predicted_g3': 14.11, 'grade': 'B', 'passed': np.True_}
