# 🚀 Notebook 04 : Boosting & Classification

Deux approches complémentaires :
- 🔹 Régression avec modèles de boosting
- 🔸 Classification sur tranches de prix

In [4]:
# 📦 Imports
import joblib
import numpy as np
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score, accuracy_score, classification_report
import matplotlib.pyplot as plt
from sklearn.ensemble import HistGradientBoostingRegressor
from sklearn.ensemble import HistGradientBoostingClassifier
from sklearn.preprocessing import KBinsDiscretizer


## 📥 Chargement des données

In [5]:
X_train = joblib.load("train_test/X_train.pkl")
X_test = joblib.load("train_test/X_test.pkl")
y_train = joblib.load("train_test/y_train.pkl")
y_test = joblib.load("train_test/y_test.pkl")

## 🔹 Régression avec HistGradientBoostingRegressor

In [7]:
reg = HistGradientBoostingRegressor(random_state=42)
reg.fit(X_train.toarray(), y_train)
y_pred = reg.predict(X_test.toarray())

mae = mean_absolute_error(y_test, y_pred)
rmse = mean_squared_error(y_test, y_pred, squared=False)
r2 = r2_score(y_test, y_pred)

print(f"MAE  : {mae:.2f}")
print(f"RMSE : {rmse:.2f}")
print(f"R²   : {r2:.2f}")

joblib.dump(reg, "models/hgb_regressor.pkl")

MAE  : 17.25
RMSE : 49.19
R²   : -0.02


['models/hgb_regressor.pkl']

In [None]:
plt.figure(figsize=(10, 5))
plt.scatter(y_test, y_pred, alpha=0.5)
plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'k--')
plt.xlabel("Valeurs réelles")
plt.ylabel("Prédictions")
plt.title("HistGradientBoostingRegressor : Prédictions vs Réel")
plt.grid(True)
plt.tight_layout()
plt.show()

## 🔸 Classification : prédire la tranche de prix

In [None]:
# Binarisation ou multi-classes selon quantiles
binner = KBinsDiscretizer(n_bins=4, encode='ordinal', strategy='quantile')
y_train_bins = binner.fit_transform(y_train.reshape(-1, 1)).ravel()
y_test_bins = binner.transform(y_test.reshape(-1, 1)).ravel()

# Entraînement du classifieur
clf = HistGradientBoostingClassifier(random_state=42)
clf.fit(X_train, y_train_bins)

# Prédiction
y_pred_bins = clf.predict(X_test)
acc = accuracy_score(y_test_bins, y_pred_bins)
print(f"Accuracy : {acc:.2%}")
print(classification_report(y_test_bins, y_pred_bins))

joblib.dump(clf, "models/hgb_classifier.pkl")
