# SVR

## Import de sql

In [15]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.model_selection import train_test_split, cross_validate, GridSearchCV, RandomizedSearchCV, KFold
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestClassifier, GradientBoostingRegressor,RandomForestRegressor
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import FunctionTransformer
from sklearn.compose import TransformedTargetRegressor
from sklearn.linear_model import ElasticNet,  Ridge, Lasso
import joblib

In [16]:
import psycopg2
from psycopg2 import sql
from dotenv import load_dotenv
from sqlalchemy import create_engine
import os

In [17]:
load_dotenv()

True

In [18]:
user = os.getenv("PGUSER")
password = os.getenv("PGPASSWORD")
host = os.getenv("PGHOST")
port = os.getenv("PGPORT")
database = os.getenv("PGDATABASE")

engine = create_engine(f'postgresql+psycopg2://{user}:{password}@{host}:{port}/{database}')

In [19]:
def import_table_to_dataframe(table_name):
    query = f"SELECT * FROM {table_name}"
    df = pd.read_sql(query, engine)
    return df

data_clean = import_table_to_dataframe("immo")
print(data_clean.head())

       price    city_name  salon  nb_rooms  nb_baths  surface_area  Ascenseur  \
0  2000000.0   Casablanca    1.0       2.0       2.0         168.0          1   
1  1195000.0   Casablanca    1.0       2.0       2.0          98.0          1   
2  1350000.0  Dar Bouazza    1.0       2.0       2.0          81.0          1   
3   900000.0   Casablanca    1.0       1.0       1.0          56.0          1   
4  3100000.0        Rabat    2.0       3.0       2.0         200.0          1   

   Balcon  Chauffage  Climatisation  ...  Meublé  Parking  Sécurité  Terrasse  \
0       1          0              0  ...       0        1         0         1   
1       1          1              1  ...       0        1         0         0   
2       1          1              1  ...       0        1         1         1   
3       0          1              1  ...       1        1         1         1   
4       1          0              0  ...       0        1         1         0   

   ville_encoded  ville_nb

## Sélection des variables explicatives

In [20]:
colonnes_selectionnees=['surface_area', 'ville_encoded', 'equipement_tot', 'nb_tot', 'Ascenseur']

## Séparation des données

In [21]:
X=data_clean[colonnes_selectionnees]
y=data_clean['price']

X_train, X_test, y_train, y_test=train_test_split(X, y,test_size=0.20,random_state=42 )
y_train=y_train.ravel()
X_train.shape, y_train.shape

  y_train=y_train.ravel()


((1318, 5), (1318,))

### SVR

In [22]:
pipeline_svr = Pipeline([
    ('scaler', StandardScaler()),
    ('model', TransformedTargetRegressor(
        regressor=SVR(),
        func=np.log1p,
        inverse_func=np.expm1
    ))
])

In [None]:
param_grid = {
    'model__regressor__kernel': ['rbf'],
    'model__regressor__C': [0.1, 1, 10, 100],
    'model__regressor__epsilon': [0.01, 0.1, 0.2],
    'model__regressor__gamma': ['scale', 'auto']
}

In [24]:
grid_search_svr = GridSearchCV(
    pipeline_svr,
    param_grid=param_grid,
    cv=3,
    scoring='r2',
    n_jobs=-1,
    verbose=2,
    error_score='raise' 
)

In [25]:
grid_search_svr.fit(X_train, y_train)

print("Meilleurs hyperparamètres :", grid_search_svr.best_params_)


Fitting 3 folds for each of 24 candidates, totalling 72 fits
Meilleurs hyperparamètres : {'model__regressor__C': 1, 'model__regressor__epsilon': 0.1, 'model__regressor__gamma': 'scale', 'model__regressor__kernel': 'rbf'}


In [26]:
# Évaluation sur le jeu train
best_model_svr = grid_search_svr.best_estimator_

y_pred_t = best_model_svr.predict(X_train)

mse_train = mean_squared_error(y_train, y_pred_t)
rmse_train = np.sqrt(mse_train)
mae_train = mean_absolute_error(y_train, y_pred_t)
r2_train = r2_score(y_train, y_pred_t)

print("\nPerformance sur jeu train :")
print(f"MSE  : {mse_train:.3f}")
print(f"RMSE : {rmse_train:.3f}")
print(f"MAE  : {mae_train:.3f}")
print(f"R²   : {r2_train:.3f}")


Performance sur jeu train :
MSE  : 345101676966.109
RMSE : 587453.553
MAE  : 327197.728
R²   : 0.440


In [27]:
# Évaluation sur le jeu test

y_pred = best_model_svr.predict(X_test)

mse_test = mean_squared_error(y_test, y_pred)
rmse_test = np.sqrt(mse_test)
mae_test = mean_absolute_error(y_test, y_pred)
r2_test = r2_score(y_test, y_pred)

print("\nPerformance sur jeu test :")
print(f"MSE  : {mse_test:.3f}")
print(f"RMSE : {rmse_test:.3f}")
print(f"MAE  : {mae_test:.3f}")
print(f"R²   : {r2_test:.3f}")


Performance sur jeu test :
MSE  : 336356529226.751
RMSE : 579962.524
MAE  : 345829.901
R²   : 0.408
