In [1]:
import os
import pandas as pd

pd.options.plotting.backend = 'plotly'
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from sklearn import metrics
from sklearn.preprocessing import RobustScaler, StandardScaler
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split, GridSearchCV, KFold
from sklearn.pipeline import make_pipeline
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor, \
                             GradientBoostingRegressor

from Pélec_04_fonctions import *


In [2]:
write_data = True

if write_data is True:
    try:
        os.mkdir("./Figures/")
    except OSError as error:
        print(error)
    try:
        os.mkdir("./Tableaux/")
    except OSError as error:
        print(error)
else:
    print("""Visualisation uniquement dans le notebook
    pas de création de figures ni de tableaux""")


[Errno 17] File exists: './Figures/'
[Errno 17] File exists: './Tableaux/'


In [3]:
BEBNum = pd.read_csv('BEBNum.csv')

BEBNumM = BEBNum.drop(columns=['SiteEnergyUse(kBtu)', 'TotalGHGEmissions'])
SiteEnergyUse = np.array(BEBNum['SiteEnergyUse(kBtu)']).reshape(-1, 1)
TotalGHGEmissions = np.array(BEBNum.TotalGHGEmissions).reshape(-1, 1)

BEBNumM_train, BEBNumM_test, SiteEnergyUse_train, SiteEnergyUse_test = train_test_split(
    BEBNumM, SiteEnergyUse, test_size=.2)

score = 'neg_root_mean_squared_error'


In [4]:
# Scaler moins sensible aux outlier d'après la doc
scaler = RobustScaler(quantile_range=(10, 90))


 # 1. Modèle de prédiction sur la consommation énergétique
 (SiteEnergyUse) avec les données numériques uniquement
 ## 1.1 Consommation énergétique brute

 ### 1.1.1 Modèle LinearRegression

In [5]:
# modèle régression linéaire
pipeLR = make_pipeline(scaler, LinearRegression())

pipeLR.fit(BEBNumM_train, SiteEnergyUse_train)

SiteEnergyUse_predLR = pipeLR.predict(BEBNumM_test)

LRr2 = metrics.r2_score(SiteEnergyUse_test, SiteEnergyUse_predLR)
print("r2 :", LRr2)
LRrmse = metrics.mean_squared_error(SiteEnergyUse_test,
                                    SiteEnergyUse_predLR,
                                    squared=False)
print("rmse :", LRrmse)

fig = px.scatter(
    x=SiteEnergyUse_predLR.squeeze(),
    y=SiteEnergyUse_test.squeeze(),
    labels={
        'x': f'{SiteEnergyUse_predLR=}'.partition('=')[0],
        'y': f'{SiteEnergyUse_test=}'.partition('=')[0]
    },
    title=
    'Visualisation des données de consommation prédites par le modèle de régression linéaire<br>vs les données test'
)
fig.show()


r2 : 0.8452540008574219
rmse : 11371966.097141473


 ### 1.1.2 Modèle Ridge

In [6]:
# régression ridge
# réglage des paramètre pour la gridsearch
alphasridge = np.logspace(-3, 5, 1000)
param_gridRidge = {'ridge__alpha': alphasridge}

GridRidge, \
BestParametresRidge, \
ScoresRidge, \
SiteEnergyUse_predRidge, \
figRidge = reg_modelGrid(model=Ridge(),
                            scaler=scaler,
                            X_train=BEBNumM_train,
                            X_test=BEBNumM_test,
                            y_train=SiteEnergyUse_train,
                            y_test=SiteEnergyUse_test,
                            y_test_name='SiteEnergyUse_test',
                            y_pred_name='SiteEnergyUse_predRidge',
                            score=score,
                            param_grid=param_gridRidge)

print(BestParametresRidge)
print(ScoresRidge)
figRidge.show()

      paramètre      Ridge()
0  ridge__alpha  1175.087131
               R²          RMSE           MAE
Ridge()  0.826684  1.203498e+07  3.217855e+06


In [7]:
# graph visualisation RMSE Ridge pour tout les paramètres de GridSearchCV
FigRMSEGRidRidge = visuRMSEGrid(Ridge(), 'Ridge', alphasridge, 'alpha',
                                GridRidge)
FigRMSEGRidRidge.show()
if write_data is True:
    FigRMSEGRidRidge.write_image('./Figures/ConsoGraphRMSERidge.pdf')


 ### 1.1.3 Modèle Lasso

In [8]:
# régression lasso
# réglage des paramètre pour la gridsearch
alphaslasso = np.linspace(0.1, 1, 5)
param_gridLasso = {'lasso__alpha': alphaslasso}

GridLasso, \
BestParametresLasso, \
ScoresLasso, \
SiteEnergyUse_predLasso, \
figLasso = reg_modelGrid(model=Lasso(),
                            scaler=RobustScaler(quantile_range=(10, 90)),
                            X_train=BEBNumM_train,
                            X_test=BEBNumM_test,
                            y_train=SiteEnergyUse_train,
                            y_test=SiteEnergyUse_test,
                            y_test_name='SiteEnergyUse_test',
                            y_pred_name='SiteEnergyUse_predLasso',
                            score=score,
                            param_grid=param_gridLasso)

print(BestParametresLasso)
print(ScoresLasso)
figLasso.show()


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


      paramètre  Lasso()
0  lasso__alpha      1.0
               R²          RMSE           MAE
Lasso()  0.845165  1.137523e+07  3.431214e+06


In [9]:
# graph visualisation RMSE Lasso pour tout les paramètres de GridSearchCV
FigRMSEGRidLasso = visuRMSEGrid(Lasso(), 'Lasso', alphaslasso, 'alpha',
                                GridLasso, None, None)
FigRMSEGRidLasso.show()
if write_data is True:
    FigRMSEGRidLasso.write_image('./Figures/ConsoGraphRMSELasso.pdf')


 ### 1.1.4 Modèle ElasticNet

In [10]:
# régression elasticnet
# réglage des paramètre pour la gridsearch
alphasEN = np.logspace(-3, 3, 200)
l1ratioEN = np.linspace(0, 1, 6)
param_gridEN = {
    'elasticnet__alpha': alphasEN,
    'elasticnet__l1_ratio': l1ratioEN
}

GridEN, \
BestParametresEN, \
ScoresEN, \
SiteEnergyUse_predEN, \
figEN = reg_modelGrid(model=ElasticNet(),
                         scaler=scaler,
                         X_train=BEBNumM_train,
                         X_test=BEBNumM_test,
                         y_train=SiteEnergyUse_train,
                         y_test=SiteEnergyUse_test,
                         y_test_name='SiteEnergyUse_test',
                         y_pred_name='SiteEnergyUse_predEN',
                         score=score,
                         param_grid=param_gridEN)

print(BestParametresEN)
print(ScoresEN)
figEN.show()


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = c

              paramètre  ElasticNet()
0     elasticnet__alpha      0.276829
1  elasticnet__l1_ratio      0.000000
                    R²          RMSE           MAE
ElasticNet()  0.819662  1.227637e+07  3.230115e+06


In [11]:
# graph visualisation RMSE ElasticNet pour tout le meilleur paramètre l1 ratio
FigRMSEGRidEN = visuRMSEGrid(ElasticNet(), 'EN', alphasEN, 'alpha', GridEN,
                             BestParametresEN, 'elasticnet__l1_ratio')
FigRMSEGRidEN.show()
if write_data is True:
    FigRMSEGRidEN.write_image('./Figures/ConsoGraphRMSEEN.pdf')


 ### 1.1.5 Modèle kNeighborsRegressor

In [12]:
# modèle kNN
# réglage des paramètre pour la gridsearch
n_neighbors = np.linspace(1, 100, dtype=int)
param_gridkNN = {'kneighborsregressor__n_neighbors': n_neighbors}


GridkNN, \
BestParametreskNN, \
ScoreskNN, \
SiteEnergyUse_predkNN, \
figkNN = reg_modelGrid(model=KNeighborsRegressor(),
                         scaler=scaler,
                         X_train=BEBNumM_train,
                         X_test=BEBNumM_test,
                         y_train=SiteEnergyUse_train,
                         y_test=SiteEnergyUse_test,
                         y_test_name='SiteEnergyUse_test',
                         y_pred_name='SiteEnergyUse_predkNN',
                         score=score,
                         param_grid=param_gridkNN)

print(BestParametreskNN)
print(ScoreskNN)
figkNN.show()


                          paramètre  KNeighborsRegressor()
0  kneighborsregressor__n_neighbors                     15
                             R²          RMSE           MAE
KNeighborsRegressor()  0.304837  2.410289e+07  3.513263e+06


In [13]:
# graph visualisation RMSE kNN pour tout les paramètres de GridSearchCV
FigRMSEGRidkNN = visuRMSEGrid(KNeighborsRegressor(), 'kNN', n_neighbors,
                              'n neighbors', GridkNN)
FigRMSEGRidkNN.show()
if write_data is True:
    FigRMSEGRidkNN.write_image('./Figures/ConsoGraphRMSEkNN.pdf')


 ### 1.1.6 Modèle RandomForestRegressor

In [14]:
# modèle RandomForestRegressor
# réglage des paramètre pour la gridsearch
n_estimatorsRF = np.logspace(0, 3, 10, dtype=int)
param_gridRF = {
    'randomforestregressor__n_estimators': n_estimatorsRF,
    'randomforestregressor__max_features': ['auto', 'sqrt', 'log2'],
}

GridRF, \
BestParametresRF, \
ScoresRF, \
SiteEnergyUse_predRF, \
figRF = reg_modelGrid(model=RandomForestRegressor(),
                         scaler=scaler,
                         X_train=BEBNumM_train,
                         X_test=BEBNumM_test,
                         y_train=SiteEnergyUse_train.ravel(),
                         y_test=SiteEnergyUse_test,
                         y_test_name='SiteEnergyUse_test',
                         y_pred_name='SiteEnergyUse_predRF',
                         score=score,
                         param_grid=param_gridRF)

print(BestParametresRF)
print(ScoresRF)
figRF.show()


                             paramètre RandomForestRegressor()
0  randomforestregressor__n_estimators                     215
1  randomforestregressor__max_features                    sqrt
                               R²          RMSE           MAE
RandomForestRegressor()  0.459721  2.124882e+07  2.905579e+06


In [15]:
# graph visualisation RMSE RandomForestRegressor
# pour le meilleur paramètre max features
FigRMSEGRidRF = visuRMSEGrid(RandomForestRegressor(), 'RF', n_estimatorsRF,
                             'n estimators', GridRF, BestParametresRF,
                             'randomforestregressor__max_features')
FigRMSEGRidRF.show()
if write_data is True:
    FigRMSEGRidRF.write_image('./Figures/ConsoGraphRMSERF.pdf')


 ### 1.1.7 Modèle AdaboostRegressor

In [16]:
# modèle AdaBoostRegressor
# réglage des paramètre pour la gridsearch
n_estimatorsAB = np.logspace(0, 2, 30, dtype=int)
param_gridAB = {
    'adaboostregressor__n_estimators': n_estimatorsAB,
    'adaboostregressor__loss': ['linear', 'square', 'exponential']
}

GridAB, \
BestParametresAB, \
ScoresAB, \
SiteEnergyUse_predAB, \
figAB = reg_modelGrid(model=AdaBoostRegressor(),
                         scaler=scaler,
                         X_train=BEBNumM_train,
                         X_test=BEBNumM_test,
                         y_train=SiteEnergyUse_train.ravel(),
                         y_test=SiteEnergyUse_test,
                         y_test_name='SiteEnergyUse_test',
                         y_pred_name='SiteEnergyUse_predAB',
                         score=score,
                         param_grid=param_gridAB)

print(BestParametresAB)
print(ScoresAB)
figAB.show()


                         paramètre AdaBoostRegressor()
0  adaboostregressor__n_estimators                  28
1          adaboostregressor__loss              square
                           R²          RMSE           MAE
AdaBoostRegressor()  0.475406  2.093811e+07  4.403702e+06


In [17]:
# graph visualisation RMSE AdaBoostRegressor
# pour le meilleur paramètre loss
FigRMSEGRidAB = visuRMSEGrid(AdaBoostRegressor(), 'AB', n_estimatorsAB,
                             'n estimators', GridAB, BestParametresAB,
                             'adaboostregressor__loss')
FigRMSEGRidAB.show()
if write_data is True:
    FigRMSEGRidAB.write_image('./Figures/ConsoGraphRMSEAB.pdf')


 ## 1.2 Consommation énergétique au log

In [18]:
SiteEnergyUse_train_log = np.log2(1 + SiteEnergyUse_train)
SiteEnergyUse_test_log = np.log2(1 + SiteEnergyUse_test)


 ### 1.2.1 Modèle LinearRegression

In [19]:
# modèle régression linéaire
pipeLR = make_pipeline(scaler, LinearRegression())

pipeLR.fit(BEBNumM_train, SiteEnergyUse_train_log)

SiteEnergyUse_pred_logLR = pipeLR.predict(BEBNumM_test)

LRr2_log = metrics.r2_score(SiteEnergyUse_test_log, SiteEnergyUse_pred_logLR)
print("r2 :", LRr2)
LRrmse_log = metrics.mean_squared_error(SiteEnergyUse_test_log,
                                        SiteEnergyUse_pred_logLR,
                                        squared=False)
print("rmse :", LRrmse)

fig = px.scatter(
    x=SiteEnergyUse_pred_logLR.squeeze(),
    y=SiteEnergyUse_test_log.squeeze(),
    labels={
        'x': f'{SiteEnergyUse_pred_logLR=}'.partition('=')[0],
        'y': f'{SiteEnergyUse_test_log=}'.partition('=')[0]
    },
    title=
    'Visualisation des données de consommation prédites par le modèle de régression linéaire<br>vs les données test'
)
fig.show()

r2 : 0.8452540008574219
rmse : 11371966.097141473


 ### 1.2.2 Modèle Ridge

In [20]:
# régression ridge
# réglage des paramètre pour la gridsearch
alphasridge_log = np.logspace(-3, 5, 1000)
param_gridRidge_log = {'ridge__alpha': alphasridge_log}

GridRidge_log, \
BestParametresRidge_log, \
ScoresRidge_log, \
SiteEnergyUse_pred_logRidge_log, \
figRidge_log = reg_modelGrid(model=Ridge(),
                            scaler=scaler,
                            X_train=BEBNumM_train,
                            X_test=BEBNumM_test,
                            y_train=SiteEnergyUse_train_log,
                            y_test=SiteEnergyUse_test_log,
                            y_test_name='SiteEnergyUse_test_log',
                            y_pred_name='SiteEnergyUse_pred_logRidge',
                            score=score,
                            param_grid=param_gridRidge_log)

print(BestParametresRidge_log)
print(ScoresRidge_log)
figRidge_log.show()


      paramètre      Ridge()
0  ridge__alpha  1052.015218
               R²      RMSE       MAE
Ridge() -0.169007  2.392635  1.124331


In [21]:
# graph visualisation RMSE Ridge pour tout les paramètres de GridSearchCV
FigRMSEGRidRidge_log = visuRMSEGrid(Ridge(), 'Ridge', alphasridge_log, 'alpha',
                                    GridRidge_log)
FigRMSEGRidRidge_log.show()
if write_data is True:
    FigRMSEGRidRidge_log.write_image('./Figures/ConsoGraphRMSERidge_log.pdf')


 ### 1.2.3 Modèle Lasso

In [22]:
# régression lasso
# réglage des paramètre pour la gridsearch
alphaslasso_log = np.linspace(0.1, 1, 5)
param_gridLasso_log = {'lasso__alpha': alphaslasso_log}

GridLasso_log, \
BestParametresLasso_log, \
ScoresLasso_log, \
SiteEnergyUse_pred_logLasso_log, \
figLasso_log = reg_modelGrid(model=Lasso(),
                            scaler=RobustScaler(quantile_range=(10, 90)),
                            X_train=BEBNumM_train,
                            X_test=BEBNumM_test,
                            y_train=SiteEnergyUse_train_log,
                            y_test=SiteEnergyUse_test_log,
                            y_test_name='SiteEnergyUse_test_log',
                            y_pred_name='SiteEnergyUse_pred_logLasso',
                            score=score,
                            param_grid=param_gridLasso_log)

print(BestParametresLasso_log)
print(ScoresLasso_log)
figLasso_log.show()


      paramètre  Lasso()
0  lasso__alpha      0.1
               R²      RMSE       MAE
Lasso() -0.136205  2.358827  1.130725


In [23]:
# graph visualisation RMSE Lasso pour tout les paramètres de GridSearchCV
FigRMSEGRidLasso_log = visuRMSEGrid(Lasso(), 'Lasso', alphaslasso_log, 'alpha',
                                    GridLasso_log, None, None)
FigRMSEGRidLasso_log.show()
if write_data is True:
    FigRMSEGRidLasso_log.write_image('./Figures/ConsoGraphRMSELasso_log.pdf')


 ### 1.2.4 Modèle ElasticNet

In [24]:
# régression elasticnet
# réglage des paramètre pour la gridsearch
alphasEN_log = np.logspace(-1, 3, 200)
l1ratioEN_log = np.linspace(0, 1, 6)
param_gridEN_log = {
    'elasticnet__alpha': alphasEN_log,
    'elasticnet__l1_ratio': l1ratioEN_log
}

GridEN_log, \
BestParametresEN_log, \
ScoresEN_log, \
SiteEnergyUse_pred_logEN, \
figEN_log = reg_modelGrid(model=ElasticNet(),
                         scaler=scaler,
                         X_train=BEBNumM_train,
                         X_test=BEBNumM_test,
                         y_train=SiteEnergyUse_train_log,
                         y_test=SiteEnergyUse_test_log,
                         y_test_name='SiteEnergyUse_test_log',
                         y_pred_name='SiteEnergyUse_pred_logEN',
                         score=score,
                         param_grid=param_gridEN_log)

print(BestParametresEN_log)
print(ScoresEN_log)
figEN_log.show()


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = c

              paramètre  ElasticNet()
0     elasticnet__alpha      0.252354
1  elasticnet__l1_ratio      0.000000
                    R²      RMSE       MAE
ElasticNet() -0.149984  2.373087  1.125546



Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 7.649e+03, tolerance: 2.051e+00 Linear regression models with null weight for the l1 regularization term are more efficiently fitted using one of the solvers implemented in sklearn.linear_model.Ridge/RidgeCV instead.



In [25]:
# graph visualisation RMSE ElasticNet pour tout le meilleur paramètre l1 ratio
FigRMSEGRidEN_log = visuRMSEGrid(ElasticNet(), 'EN', alphasEN_log, 'alpha',
                                 GridEN_log, BestParametresEN_log,
                                 'elasticnet__l1_ratio')
FigRMSEGRidEN_log.show()
if write_data is True:
    FigRMSEGRidEN_log.write_image('./Figures/ConsoGraphRMSEEN_log.pdf')


 ### 1.2.5 Modèle kNeighborsRegressor

In [26]:
# modèle kNN
# réglage des paramètre pour la gridsearch
n_neighbors_log = np.linspace(1, 100, dtype=int)
param_gridkNN_log = {'kneighborsregressor__n_neighbors': n_neighbors_log}


GridkNN_log, \
BestParametreskNN_log, \
ScoreskNN_log, \
SiteEnergyUse_pred_logkNN_log, \
figkNN_log = reg_modelGrid(model=KNeighborsRegressor(),
                         scaler=scaler,
                         X_train=BEBNumM_train,
                         X_test=BEBNumM_test,
                         y_train=SiteEnergyUse_train_log,
                         y_test=SiteEnergyUse_test_log,
                         y_test_name='SiteEnergyUse_test_log',
                         y_pred_name='SiteEnergyUse_pred_logkNN',
                         score=score,
                         param_grid=param_gridkNN_log)

print(BestParametreskNN_log)
print(ScoreskNN_log)
figkNN_log.show()


                          paramètre  KNeighborsRegressor()
0  kneighborsregressor__n_neighbors                     37
                             R²      RMSE       MAE
KNeighborsRegressor()  0.349275  1.785116  0.845344


In [27]:
# graph visualisation RMSE kNN pour les paramètres de GridSearchCV
FigRMSEGRidkNN_log = visuRMSEGrid(KNeighborsRegressor(), 'kNN',
                                  n_neighbors_log, 'n neighbors', GridkNN_log)
FigRMSEGRidkNN_log.show()
if write_data is True:
    FigRMSEGRidkNN_log.write_image('./Figures/ConsoGraphRMSEkNN_log.pdf')


 ### 1.2.6 Modèle RandomForestRegressor

In [28]:
# modèle RandomForestRegressor
# réglage des paramètre pour la gridsearch
n_estimatorsRF_log = np.logspace(0, 3, 10, dtype=int)
param_gridRF_log = {
    'randomforestregressor__n_estimators': n_estimatorsRF_log,
    'randomforestregressor__max_features': ['auto', 'sqrt', 'log2'],
}

GridRF_log, \
BestParametresRF_log, \
ScoresRF_log, \
SiteEnergyUse_pred_logRF_log, \
figRF_log = reg_modelGrid(model=RandomForestRegressor(),
                         scaler=scaler,
                         X_train=BEBNumM_train,
                         X_test=BEBNumM_test,
                         y_train=SiteEnergyUse_train_log.ravel(),
                         y_test=SiteEnergyUse_test_log,
                         y_test_name='SiteEnergyUse_test_log',
                         y_pred_name='SiteEnergyUse_pred_log_logRF',
                         score=score,
                         param_grid=param_gridRF)

print(BestParametresRF_log)
print(ScoresRF_log)
figRF_log.show()


                             paramètre RandomForestRegressor()
0  randomforestregressor__n_estimators                     464
1  randomforestregressor__max_features                    sqrt
                               R²     RMSE       MAE
RandomForestRegressor()  0.349607  1.78466  0.782007


In [29]:
# graph visualisation RMSE RandomForestRegressor
# pour le meilleur paramètre max features
FigRMSEGRidRF_log = visuRMSEGrid(RandomForestRegressor(), 'RF',
                                 n_estimatorsRF_log, 'n estimators',
                                 GridRF_log, BestParametresRF_log,
                                 'randomforestregressor__max_features')
FigRMSEGRidRF_log.show()
if write_data is True:
    FigRMSEGRidRF_log.write_image('./Figures/ConsoGraphRMSERF_log.pdf')


 ### 1.2.7 Modèle AdaboostRegressor

In [30]:
# modèle AdaBoostRegressor
# réglage des paramètre pour la gridsearch
n_estimatorsAB_log = np.logspace(0, 2, 30, dtype=int)
param_gridAB_log = {
    'adaboostregressor__n_estimators': n_estimatorsAB_log,
    'adaboostregressor__loss': ['linear', 'square', 'exponential']
}

GridAB_log, \
BestParametresAB_log, \
ScoresAB_log, \
SiteEnergyUse_pred_logAB, \
figAB_log = reg_modelGrid(model=AdaBoostRegressor(),
                         scaler=scaler,
                         X_train=BEBNumM_train,
                         X_test=BEBNumM_test,
                         y_train=SiteEnergyUse_train_log.ravel(),
                         y_test=SiteEnergyUse_test_log,
                         y_test_name='SiteEnergyUse_test_log',
                         y_pred_name='SiteEnergyUse_predAB_log',
                         score=score,
                         param_grid=param_gridAB_log)

print(BestParametresAB_log)
print(ScoresAB_log)
figAB_log.show()


                         paramètre AdaBoostRegressor()
0  adaboostregressor__n_estimators                   1
1          adaboostregressor__loss              square
                           R²      RMSE       MAE
AdaBoostRegressor()  0.339668  1.798246  0.879046


In [31]:
# graph visualisation RMSE AdaBoostRegressor
# pour le meilleur paramètre loss
FigRMSEGRidAB_log = visuRMSEGrid(AdaBoostRegressor(), 'AB', n_estimatorsAB_log,
                                 'n estimators', GridAB_log,
                                 BestParametresAB_log,
                                 'adaboostregressor__loss')
FigRMSEGRidAB_log.show()
if write_data is True:
    FigRMSEGRidAB_log.write_image('./Figures/ConsoGraphRMSEAB_log.pdf')


In [32]:
Scores = ScoresLasso.append(
    [ScoresRidge, ScoresEN, ScoreskNN, ScoresRF, ScoresAB])
Scores


Unnamed: 0,R²,RMSE,MAE
Lasso(),0.845165,11375230.0,3431214.0
Ridge(),0.826684,12034980.0,3217855.0
ElasticNet(),0.819662,12276370.0,3230115.0
KNeighborsRegressor(),0.304837,24102890.0,3513263.0
RandomForestRegressor(),0.459721,21248820.0,2905579.0
AdaBoostRegressor(),0.475406,20938110.0,4403702.0


In [33]:
ScoresLog = ScoresLasso_log.append(
    [ScoresRidge_log, ScoresEN_log, ScoreskNN_log, ScoresRF_log,
     ScoresAB_log]).rename('{}_log'.format)
ScoresLog


Unnamed: 0,R²,RMSE,MAE
Lasso()_log,-0.136205,2.358827,1.130725
Ridge()_log,-0.169007,2.392635,1.124331
ElasticNet()_log,-0.149984,2.373087,1.125546
KNeighborsRegressor()_log,0.349275,1.785116,0.845344
RandomForestRegressor()_log,0.349607,1.78466,0.782007
AdaBoostRegressor()_log,0.339668,1.798246,0.879046


In [34]:
CompareScores = Scores.append(ScoresLog)
if write_data is True:
    CompareScores.to_latex('./Tableaux/ConsoScoresModèles.tex')
CompareScores


Unnamed: 0,R²,RMSE,MAE
Lasso(),0.845165,11375230.0,3431214.0
Ridge(),0.826684,12034980.0,3217855.0
ElasticNet(),0.819662,12276370.0,3230115.0
KNeighborsRegressor(),0.304837,24102890.0,3513263.0
RandomForestRegressor(),0.459721,21248820.0,2905579.0
AdaBoostRegressor(),0.475406,20938110.0,4403702.0
Lasso()_log,-0.136205,2.358827,1.130725
Ridge()_log,-0.169007,2.392635,1.124331
ElasticNet()_log,-0.149984,2.373087,1.125546
KNeighborsRegressor()_log,0.349275,1.785116,0.8453436


In [35]:
fig = make_subplots(3,
                    2,
                    column_titles=("Consommation brute", "Consommation log2"),
                    row_titles=('R²', 'RMSE', 'MAE'),
                    shared_xaxes=True)
fig.add_trace(go.Bar(x=Scores.index, y=Scores['R²']), row=1, col=1)
fig.add_trace(go.Bar(x=Scores.index, y=Scores['RMSE']), row=2, col=1)
fig.add_trace(go.Bar(x=Scores.index, y=Scores['MAE']), row=3, col=1)
fig.add_trace(go.Bar(x=ScoresLog.index, y=ScoresLog['R²']), row=1, col=2)
fig.add_trace(go.Bar(x=ScoresLog.index, y=ScoresLog['RMSE']), row=2, col=2)
fig.add_trace(go.Bar(x=ScoresLog.index, y=ScoresLog['MAE']), row=3, col=2)
fig.update_layout(
    title_text="Comparaison des scores des modèles de consommation",
    showlegend=False)
fig.show()


# 2. Modèle de prédiction sur la consommation énergétique (SiteEnergyUse) avec les données catégorielles

In [36]:
BEBCat = pd.read_csv('BEBCat.csv')