In [1]:
import pandas as pd
import numpy as np
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler

In [3]:
# 📂 Chargement
input_path = "../../data/processed/dataset_clean_no_outliers.parquet"
df = pd.read_parquet(input_path)

# 🎯 Encodage du grade en score numérique
grade_map = {'A': 7, 'B': 6, 'C': 5, 'D': 4, 'E': 3, 'F': 2, 'G': 1}
df = df[df['grade'].isin(grade_map.keys())]  # filtre sécurité
df['grade_score'] = df['grade'].map(grade_map)

# ✅ Features utilisées
features = [
    'purpose', 'loan_amnt', 'monthly_income',
    'emp_length', 'home_ownership', 'job_category'
]

df = df[features + ['grade_score']].dropna()

# 🔢 Encodage des variables catégorielles
df['home_ownership_encoded'] = LabelEncoder().fit_transform(df['home_ownership'])
df['purpose_encoded'] = LabelEncoder().fit_transform(df['purpose'])
df['job_category_encoded'] = LabelEncoder().fit_transform(df['job_category'])

# 🔧 Jeu final
X = df[['loan_amnt', 'monthly_income', 'emp_length', 'home_ownership_encoded', 'purpose_encoded', 'job_category_encoded']]
y = df['grade_score']

# 🧪 Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# ⚖️ Standardisation
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# 📈 Régression Ridge
model = Ridge(alpha=1.0)
model.fit(X_train_scaled, y_train)

# 🔮 Prédictions
y_pred = model.predict(X_test_scaled)

# 🧮 Évaluation
rmse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"🎯 RMSE : {rmse:.4f}")
print(f"📏 MAE : {mae:.4f}")
print(f"📊 R² Score : {r2:.4f}")

# 🔍 Affichage de quelques prédictions (optionnel)
results = pd.DataFrame({'Réel': y_test.values, 'Prédit': y_pred})
print("\n🔍 Exemple de prédictions :")
print(results.head(10))


🎯 RMSE : 1.5168
📏 MAE : 0.9992
📊 R² Score : 0.0881

🔍 Exemple de prédictions :
   Réel    Prédit
0     6  5.282015
1     5  5.558956
2     6  5.902990
3     5  4.899473
4     6  4.891242
5     5  4.767567
6     5  5.020969
7     6  5.300847
8     3  5.191247
9     2  3.928972


In [5]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import numpy as np

# 🌲 Modèle Random Forest
rf_model = RandomForestRegressor(n_estimators=100, max_depth=10, random_state=42)
rf_model.fit(X_train, y_train)

# 🔮 Prédictions
y_pred_rf = rf_model.predict(X_test)

# 📊 Évaluation
rmse_rf = np.sqrt(mean_squared_error(y_test, y_pred_rf))
mae_rf = mean_absolute_error(y_test, y_pred_rf)
r2_rf = r2_score(y_test, y_pred_rf)

print("📦 Random Forest Regressor")
print(f"🎯 RMSE : {rmse_rf:.4f}")
print(f"📏 MAE : {mae_rf:.4f}")
print(f"📊 R² Score : {r2_rf:.4f}")

# 🔍 Quelques exemples
print("\n🔍 Exemple de prédictions :")
print(pd.DataFrame({"Réel": y_test[:10].values, "Prédit": y_pred_rf[:10]}))


📦 Random Forest Regressor
🎯 RMSE : 1.1991
📏 MAE : 0.9693
📊 R² Score : 0.1357

🔍 Exemple de prédictions :
   Réel    Prédit
0     6  5.457993
1     5  5.412449
2     6  5.836179
3     5  5.014531
4     6  4.981928
5     5  4.651303
6     5  5.114625
7     6  5.064158
8     3  5.459291
9     2  4.064466


In [6]:
from xgboost import XGBRegressor

# ⚡ Modèle XGBoost Regressor
xgb_model = XGBRegressor(n_estimators=100, max_depth=6, learning_rate=0.1, random_state=42)
xgb_model.fit(X_train, y_train)

# 🔮 Prédictions
y_pred_xgb = xgb_model.predict(X_test)

# 📊 Évaluation
rmse_xgb = np.sqrt(mean_squared_error(y_test, y_pred_xgb))
mae_xgb = mean_absolute_error(y_test, y_pred_xgb)
r2_xgb = r2_score(y_test, y_pred_xgb)

print("\n⚡ XGBoost Regressor")
print(f"🎯 RMSE : {rmse_xgb:.4f}")
print(f"📏 MAE : {mae_xgb:.4f}")
print(f"📊 R² Score : {r2_xgb:.4f}")

# 🔍 Quelques exemples
print("\n🔍 Exemple de prédictions :")
print(pd.DataFrame({"Réel": y_test[:10].values, "Prédit": y_pred_xgb[:10]}))



⚡ XGBoost Regressor
🎯 RMSE : 1.1916
📏 MAE : 0.9622
📊 R² Score : 0.1464

🔍 Exemple de prédictions :
   Réel    Prédit
0     6  5.440286
1     5  5.399595
2     6  5.902311
3     5  5.017663
4     6  5.062352
5     5  4.759347
6     5  4.970632
7     6  5.299908
8     3  5.378512
9     2  3.864398
