In [1]:
# Importation des biblioth√®ques n√©cessaires
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import xgboost as xgb
from sklearn.preprocessing import StandardScaler, LabelEncoder

In [2]:
# 1. Chargement des donn√©es pr√©trait√©es
df = pd.read_csv('preprocessed_data.csv')

In [3]:
# 2. S√©lection des caract√©ristiques (features) et de la variable cible (target)
X = df.drop(columns=['resale_price'])  # Caract√©ristiques
y = df['resale_price']  # Variable cible

In [4]:
# 3. Gestion des colonnes non num√©riques
# Convertir les colonnes de type date en format num√©rique (par exemple, ann√©es ou jours depuis une date de r√©f√©rence)
if 'lease_commence_date' in X.columns:
    X['lease_commence_date'] = pd.to_datetime(X['lease_commence_date'], errors='coerce')
    X['lease_commence_date'] = (X['lease_commence_date'] - X['lease_commence_date'].min()) / pd.Timedelta(days=1)  # Nombre de jours depuis la premi√®re date

# Encoder les variables cat√©gorielles si n√©cessaire
for col in X.select_dtypes(include=['object']).columns:
    le = LabelEncoder()
    X[col] = le.fit_transform(X[col].astype(str))

In [5]:
# 4. Division des donn√©es en ensembles d'entra√Ænement et de test
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 5. Normalisation des donn√©es
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


In [6]:
# 6. Mod√®le de r√©gression lin√©aire
from sklearn.linear_model import LinearRegression
lr_model = LinearRegression()
lr_model.fit(X_train_scaled, y_train)

# Pr√©dictions avec le mod√®le de r√©gression lin√©aire
y_pred_lr = lr_model.predict(X_test_scaled)

# √âvaluation du mod√®le de r√©gression lin√©aire
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
mae_lr = mean_absolute_error(y_test, y_pred_lr)
mse_lr = mean_squared_error(y_test, y_pred_lr)
r2_lr = r2_score(y_test, y_pred_lr)

print("R√©gression Lin√©aire - √âvaluation :")
print(f"MAE: {mae_lr}")
print(f"MSE: {mse_lr}")
print(f"R2: {r2_lr}")

R√©gression Lin√©aire - √âvaluation :
MAE: 72928.70521472645
MSE: 10448395308.479977
R2: 0.6326645568368452


In [7]:
# 2. Entra√Ænement du mod√®le XGBoost
model = xgb.XGBRegressor(objective='reg:squarederror', n_estimators=100, learning_rate=0.1, max_depth=5)
model.fit(X_train_scaled, y_train)

# 3. Pr√©diction
y_pred = model.predict(X_test_scaled)

# 4. √âvaluation
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

# 5. Affichage des r√©sultats
print(f"MAE: {mae}")
print(f"MSE: {mse}")
print(f"R¬≤: {r2}")

MAE: 40151.03920628834
MSE: 3579235005.6439905
R¬≤: 0.8741644206439788


In [8]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import xgboost as xgb
from sklearn.preprocessing import StandardScaler, LabelEncoder
import mlflow
import mlflow.sklearn

In [9]:
# 6. D√©finir et configurer MLflow
mlflow.set_experiment("First experiment")
mlflow.set_tracking_uri("http://127.0.0.1:5000/")

# Mod√®le de r√©gression lin√©aire
with mlflow.start_run(run_name="Linear Regression"):
    # Entra√Ænement du mod√®le
    lr_model = LinearRegression()
    lr_model.fit(X_train_scaled, y_train)

    # Pr√©dictions
    y_pred_lr = lr_model.predict(X_test_scaled)

    # √âvaluation
    mae_lr = mean_absolute_error(y_test, y_pred_lr)
    mse_lr = mean_squared_error(y_test, y_pred_lr)
    r2_lr = r2_score(y_test, y_pred_lr)

    # Enregistrement dans MLflow
    mlflow.log_params({"model_type": "LinearRegression"})
    mlflow.log_metrics({"mae": mae_lr, "mse": mse_lr, "r2": r2_lr})
    mlflow.sklearn.log_model(lr_model, "linear_regression_model")

    # Affichage des r√©sultats
    print("R√©gression Lin√©aire - √âvaluation :")
    print(f"MAE: {mae_lr}")
    print(f"MSE: {mse_lr}")
    print(f"R2: {r2_lr}")



MlflowException: API request to http://127.0.0.1:5000/api/2.0/mlflow/runs/create failed with exception HTTPConnectionPool(host='127.0.0.1', port=5000): Max retries exceeded with url: /api/2.0/mlflow/runs/create (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x0000022E2C002B90>: Failed to establish a new connection: [WinError 10061] Aucune connexion n‚Äôa pu √™tre √©tablie car l‚Äôordinateur cible l‚Äôa express√©ment refus√©e'))

In [None]:
# Mod√®le XGBoost
with mlflow.start_run(run_name="XGBoost"):
    # Entra√Ænement du mod√®le
    xgb_model = xgb.XGBRegressor(objective='reg:squarederror', n_estimators=100, learning_rate=0.1, max_depth=5)
    xgb_model.fit(X_train_scaled, y_train)

    # Pr√©dictions
    y_pred_xgb = xgb_model.predict(X_test_scaled)

    # √âvaluation
    mae_xgb = mean_absolute_error(y_test, y_pred_xgb)
    mse_xgb = mean_squared_error(y_test, y_pred_xgb)
    r2_xgb = r2_score(y_test, y_pred_xgb)

    # Enregistrement dans MLflow
    mlflow.log_params({
        "model_type": "XGBoost",
        "n_estimators": 100,
        "learning_rate": 0.1,
        "max_depth": 5
    })
    mlflow.log_metrics({"mae": mae_xgb, "mse": mse_xgb, "r2": r2_xgb})
    mlflow.sklearn.log_model(xgb_model, "xgboost_model")

    # Affichage des r√©sultats
    print("\nXGBoost - √âvaluation :")
    print(f"MAE: {mae_xgb}")
    print(f"MSE: {mse_xgb}")
    print(f"R2: {r2_xgb}")




XGBoost - √âvaluation :
MAE: 40151.03920628834
MSE: 3579235005.6439905
R2: 0.8741644206439788
üèÉ View run XGBoost at: http://127.0.0.1:5000/#/experiments/130527551246863566/runs/89863fc1667e48dd912315cf844b18f8
üß™ View experiment at: http://127.0.0.1:5000/#/experiments/130527551246863566


In [None]:
# 7. Register and log the Linear Regression model
with mlflow.start_run(run_name="Linear Regression with SMOTE"):
    # Train the model
    lr_model = LinearRegression()
    lr_model.fit(X_train_scaled, y_train)

    # Predictions and evaluation
    y_pred_lr = lr_model.predict(X_test_scaled)
    mae_lr = mean_absolute_error(y_test, y_pred_lr)
    mse_lr = mean_squared_error(y_test, y_pred_lr)
    r2_lr = r2_score(y_test, y_pred_lr)

    # Log parameters and metrics
    mlflow.log_params({"model_type": "LinearRegression"})
    mlflow.log_metrics({"mae": mae_lr, "mse": mse_lr, "r2": r2_lr})

    # Log and register the model
    mlflow.sklearn.log_model(lr_model, "linear_regression_model_with_smote")
    result = mlflow.register_model(
        "runs:/{}/linear_regression_model_with_smote".format(mlflow.active_run().info.run_id),
        "LinearRegression_SMOTE"
    )
    print("Model registered successfully with version:", result.version)

# 8. Register and log the XGBoost model
with mlflow.start_run(run_name="XGBoost with SMOTE"):
    # Train the model
    xgb_model = xgb.XGBRegressor(objective='reg:squarederror', n_estimators=100, learning_rate=0.1, max_depth=5)
    xgb_model.fit(X_train_scaled, y_train)

    # Predictions and evaluation
    y_pred_xgb = xgb_model.predict(X_test_scaled)
    mae_xgb = mean_absolute_error(y_test, y_pred_xgb)
    mse_xgb = mean_squared_error(y_test, y_pred_xgb)
    r2_xgb = r2_score(y_test, y_pred_xgb)

    # Log parameters and metrics
    mlflow.log_params({
        "model_type": "XGBoost",
        "n_estimators": 100,
        "learning_rate": 0.1,
        "max_depth": 5
    })
    mlflow.log_metrics({"mae": mae_xgb, "mse": mse_xgb, "r2": r2_xgb})

    # Log and register the model
    mlflow.sklearn.log_model(xgb_model, "xgboost_model_with_smote")
    result = mlflow.register_model(
        "runs:/{}/xgboost_model_with_smote".format(mlflow.active_run().info.run_id),
        "XGBoost_SMOTE"
    )
    print("Model registered successfully with version:", result.version)


NameError: name 'X_train_scaled' is not defined