In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import RobustScaler
from sklearn.linear_model import Ridge, Lasso, ElasticNet
from sklearn.svm import SVR
from sklearn.ensemble import GradientBoostingRegressor, HistGradientBoostingRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.linear_model import TweedieRegressor
import lightgbm as lgb
import xgboost as xgb

In [2]:
# 🔹 1️⃣ Chargement des données déjà transformées (avec ACP appliqué)
train = pd.read_csv("./final_data.csv")
test = pd.read_csv("./final_test_data.csv")
sample_submission = pd.read_csv("../Housing-prices-competition/home-data-for-ml-course/sample_submission.csv")

# 🔹 2️⃣ Définition des features et de la target
y_train = train["SalePrice"]  # Variable cible
X_train = train.drop(columns=["SalePrice"])  # Features après ACP
X_test = test.copy()  # Test après ACP

# 🔹 3️⃣ Division des données en ensembles d'entraînement et de validation
X_train_split, X_val, y_train_split, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

# 🔹 4️⃣ Mise à l'échelle des données pour certains modèles
scaler = RobustScaler()
X_train_scaled = scaler.fit_transform(X_train_split)
X_val_scaled = scaler.transform(X_val)

# 🔹 5️⃣ Définition des modèles à utiliser
models = {
    "Ridge": Ridge(),
    "Lasso": Lasso(),
    "ElasticNet": ElasticNet(),
    "SVR": SVR(),
    "GradientBoosting": GradientBoostingRegressor(),
    "LightGBM": lgb.LGBMRegressor(),
    "XGBoost": xgb.XGBRegressor(),
    "HistGradientBoosting": HistGradientBoostingRegressor(),
    "Tweedie": TweedieRegressor(power=0)
}

# 🔹 6️⃣ Entraînement et évaluation des modèles
for name, model in models.items():
    if name in ["Ridge", "Lasso", "ElasticNet", "SVR"]:
        model.fit(X_train_scaled, y_train_split)
        y_val_pred = model.predict(X_val_scaled)
    else:
        model.fit(X_train_split, y_train_split)
        y_val_pred = model.predict(X_val)

    # Calcul des métriques
    val_r2 = r2_score(y_val, y_val_pred)
    val_mae = mean_absolute_error(y_val, y_val_pred)
    val_rmse = np.sqrt(mean_squared_error(y_val, y_val_pred))

    # Affichage des performances du modèle
    print(f"\n📊 Performance du modèle {name}:")
    print(f"📈 Validation R² Score: {val_r2:.4f}")
    print(f"📉 Validation MAE: {val_mae:.2f}")
    print(f"📉 Validation RMSE: {val_rmse:.2f}")

# 🔹 🔟 Prédictions sur les données test avec le meilleur modèle (ici, on peut choisir par exemple XGBoost)
best_model = xgb.XGBRegressor()
best_model.fit(X_train_split, y_train_split)
y_test_pred = best_model.predict(X_test)

# 🔹 1️⃣1️⃣ Création du fichier de soumission
submission = pd.DataFrame({"Id": sample_submission["Id"], "SalePrice": y_test_pred})
submission.to_csv("Sumb_Final_Data.csv", index=False)
print("\n📁 Prédictions enregistrées dans submission_final_data_models.csv ✅")



📊 Performance du modèle Ridge:
📈 Validation R² Score: 0.8729
📉 Validation MAE: 19360.63
📉 Validation RMSE: 31221.79


  model = cd_fast.enet_coordinate_descent(



📊 Performance du modèle Lasso:
📈 Validation R² Score: 0.8741
📉 Validation MAE: 19999.18
📉 Validation RMSE: 31079.69

📊 Performance du modèle ElasticNet:
📈 Validation R² Score: 0.7586
📉 Validation MAE: 25085.82
📉 Validation RMSE: 43031.81

📊 Performance du modèle SVR:
📈 Validation R² Score: -0.0244
📉 Validation MAE: 59539.78
📉 Validation RMSE: 88641.29

📊 Performance du modèle GradientBoosting:
📈 Validation R² Score: 0.8978
📉 Validation MAE: 16969.49
📉 Validation RMSE: 27996.76
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002379 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 5112
[LightGBM] [Info] Number of data points in the train set: 1168, number of used features: 125
[LightGBM] [Info] Start training from score 181441.541952

📊 Performance du modèle LightGBM:
📈 Validation R² Score: 0.8887
📉 Validation MAE: 16825.07
📉 Validation RMSE: 29214.67

📊 Performance du modèle XGBoost:
📈 Validation R² 