# Projet de Machine Learning  UMONS 2024-2025

### Thème : Prédiction du score de Macron aux 2nd Tour des éléctions 2022

----

# Configuration et Installation des Dépendances

----

In [1]:
from sys import modules as sys_modules
from os.path import join
# chemin vers le dossier contenant toute les données à utiliser
if 'google.colab' in sys_modules :
  from google.colab import drive
  drive.mount('/content/drive')
  data_path = "/content/drive/MyDrive/Colab_Notebooks/ProjetML/src/datasets"
else:
  data_path = "datasets"


In [2]:
if 'google.colab' in sys_modules:
      bib = ["openpyxl",
             "xlrd",
             "optuna",
             "lightgbm",
             "xgboost"
            ]
      for b in bib:
        %pip install {b}

## a. Import des Bibliothèques/dépendances


In [3]:
# @title manipulation des vecteurs
import pandas as pd
import numpy as np


In [4]:
# @title création des graphiques
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib.ticker as mtick
from tqdm import tqdm

In [5]:
# @title prétraitement des données en masse
from sklearn.preprocessing import OneHotEncoder, RobustScaler , FunctionTransformer
from sklearn.compose import ColumnTransformer , make_column_transformer
from sklearn.compose import make_column_selector
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer


In [6]:
# @title selection des features
from collections import defaultdict
from sklearn.feature_selection import SelectFromModel
from functools import lru_cache
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LassoCV

In [7]:
# @title sélection du meilleur modèle
import optuna
from optuna.pruners import MedianPruner
from optuna.exceptions import TrialPruned
from sklearn.model_selection import train_test_split, KFold, cross_val_score , RepeatedKFold
from typing import Dict, Any, Tuple , List
from sklearn.base import clone
from sklearn.model_selection import GridSearchCV

In [8]:
# @title fonction de score et evaluation
from scipy import stats
from sklearn.metrics import mean_squared_error, r2_score ,mean_absolute_error
from sklearn.metrics import confusion_matrix , ConfusionMatrixDisplay

In [9]:
# @title initialisation des modèles
from sklearn.linear_model import ElasticNet , Lasso
from xgboost import XGBRegressor


In [10]:
# @title options sytèmes
from sys import modules as sys_modules
import os
import sys
import joblib
from joblib import Parallel , delayed
import json
import warnings
import os
import glob
import re
from IPython.display import Markdown, display




## b. Configuration


In [11]:
#Pour ignorer les warnings
warnings.filterwarnings('ignore')

# configuration des graphiques
plt.style.use('seaborn-v0_8-whitegrid')
plt.rcParams['figure.figsize'] = (12, 8)
sns.set_palette('Set2')

# Pour une meilleure lisibilité dans le notebook
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.float_format', '{:.3f}'.format)

print("chargement des bibliothèques terminé")

chargement des bibliothèques terminé


## c. Constantes

In [12]:
# @title Paramètres globaux
CORRTHRESHOLD = 60 # @param {"type":"integer"}
RANDOM_STATE = 42 # @param {"type":"integer"}
IDs= [] # @param {type:"raw"}

## d. Fonctions utilitaires

In [13]:
# @title Utilitaires

def sep(lg=90):
    """Affiche une ligne de séparation"""
    print("\n" + "-"*lg + "\n")

def sub(l1,l2):
  """Retourne la liste des éléments de l1 qui ne sont pas dans l2"""
  return [x for x in l1 if x not in l2]

def get_columns_above_missing_threshold(df, threshold:int=CORRTHRESHOLD):
    """
    Identifie les colonnes ayant un pourcentage de valeurs manquantes supérieur au seuil spécifié.

    Args:
        df (pandas.DataFrame): Le DataFrame à analyser
        threshold (float): Le seuil en pourcentage (entre 0 et 100) au-delà duquel une colonne est considérée
                          comme ayant trop de valeurs manquantes. Par défaut : 50

    Returns:
        list: Liste des noms de colonnes dont le pourcentage de valeurs manquantes dépasse le seuil,
              triée par pourcentage décroissant
    """
    # Vérification que le seuil est valide
    if not 0 <= threshold <= 100:
        raise ValueError("Le seuil doit être compris entre 0 et 100")

    # Calcul du pourcentage de valeurs manquantes par colonne
    missing_percentages = (df.isnull().sum() / len(df)) * 100

    # Sélection des colonnes dépassant le seuil
    columns_above_threshold = missing_percentages[missing_percentages > threshold]

    # Tri par pourcentage décroissant
    columns_above_threshold = columns_above_threshold.sort_values(ascending=False)

    # Création d'un DataFrame avec les colonnes et leurs pourcentages
    """result_df = pd.DataFrame({
        'Colonne': columns_above_threshold.index,
        'Pourcentage de valeurs manquantes': columns_above_threshold.values
    })"""

    return  columns_above_threshold.index.to_list() , pd.DataFrame(columns_above_threshold)

def visualise(df):
  print(f"forme : {df.shape}")

def all_columns(df, res=True):
  all_col = df.columns.tolist()
  print(f"les colonnes sont : \n{df.columns}")
  print(f" il y'a {len(all_col)} colonnes dans le dataframe")
  sep()
  if res:
    return all_col
  else:
    return None

def get_analyse(data , id , col_to_drop , res =True):

    df= data.copy()
    all_col = df.columns.tolist()
    col_to_keep = sub(all_col,col_to_drop)
    to_holes,_ = get_columns_above_missing_threshold(df[col_to_keep])
    col_to_keep2 = sub(col_to_keep,to_holes)
    IDs.append(id)
    if res:
      return col_to_keep2 , to_holes
    else:
      return None

def write_markdown_conclusion(id_colonne, colonnes_manquantes, colonnes_supprimer, colonnes_conserver):
    """
    Génère une section Markdown pour la conclusion de l'analyse d'un DataFrame.

    Args:
        id_colonne (str): Nom de la colonne d'identification.
        colonnes_manquantes (list): Liste des colonnes avec un fort taux de valeurs manquantes.
        colonnes_supprimer (list): Liste des colonnes à supprimer.
        colonnes_conserver (list): Liste des colonnes à conserver.
    """

    markdown_text = f"""
#### Conclusion

* _Identifiant_ : ``'{id_colonne}'``

* _Colonnes avec plus de {CORRTHRESHOLD}% de valeurs manquantes_ : ``{colonnes_manquantes}``

* _Colonnes à supprimer_ : ``{colonnes_supprimer}``

* _Colonnes à conserver_ : ``{colonnes_conserver}``

----
"""
    display(Markdown(markdown_text))

def display_correlation_matrix(df , save = False,name=None , table=False , target=None, threshold=0.5):
    """
    Affiche la matrice de corrélation entre les colonnes numériques du DataFrame et la colonne cible.

    Args:
    ----
    df : pd.DataFrame
        Le DataFrame à analyser.
    target : str
        Le nom de la colonne cible.
    threshold : float
        Le seuil de corrélation au-delà duquel les colonnes sont considérées comme corrélées.
    """
    df = df.copy()
    t_in = (target in df.columns)

    num_cols = df.select_dtypes(include=[np.number]).columns
    # Calculer la matrice de corrélation

    if len(num_cols) == 0:
        raise ValueError("Aucune colonne numérique trouvée dans le DataFrame.")

    if (target is not None) and t_in:
        num_cols = [target] + [col for col in num_cols if col != target]

    corr_matrix = df[num_cols].corr()

    # Masquer la moitié inférieure
    upper = corr_matrix.where(np.tril(np.ones(corr_matrix.shape), k=1).astype(bool))

    # filter les correlations dépassant le seuil
    filtered_corr = upper[
                          (upper.abs() > threshold) &
                          (upper != 1.0)
                          ].dropna(how='all',axis=0).dropna(axis=1, how='all').sort_values(by=target, ascending=False) if t_in else upper[ (upper.abs() > threshold) & (upper != 1.0)].dropna(how='all',axis=0).dropna(axis=1, how='all').sort_values(ascending=False)

    if t_in:
        corr_with_target=  corr_matrix[target].drop(target).sort_values(ascending=False)


        if table :
            filt_title = f"Tableau de corrélation entre les colonnes dépassnt  {threshold} : \n"
            display(Markdown(filt_title))
            display(filtered_corr)
            display(Markdown("-"*50))
            with_ta_title = f"Tableau de corrélation entre les colonnes et la colonne {target} : \n"
            display(Markdown(with_ta_title))
            display(corr_with_target)

        # Afficher la matrice de corrélation
        fig , (ax1 , ax2) = plt.subplots( 1 , 2 , figsize=(12, 8))
        sns.heatmap( filtered_corr , ax= ax1 , annot=True, fmt=".2f", cmap='coolwarm', square=True)
        ax1.set_title(f"Matrice de corrélation (seuil : {threshold})")
        sns.heatmap( corr_with_target , ax= ax2 , annot=True, fmt=".2f", cmap='coolwarm', square=True)
        ax2.set_title(f"Matrice de corrélation avec {target}")
        plt.legend(loc='upper right', fontsize=10)
        plt.tight_layout()

        if save:
            if name is None:
                plt.savefig("corr_matrix.png")
            else:
                plt.savefig(f"corr_matrix_{name}.png")

        plt.show()
    else:
        if table :
            display(Markdown(f"La colonne cible '{target}' n'existe pas dans le DataFrame."))
            display(Markdown("-"*50))
            filt_title = f"Tableau de corrélation entre les colonnes dépassnt  {threshold} : \n"
            display(Markdown(filt_title))
            display(filtered_corr)

        # Afficher la matrice de corrélation
        plt.figsize=(12, 8)
        sns.heatmap( filtered_corr  , annot=True, fmt=".2f", cmap='coolwarm', square=True)
        plt.title(f"Matrice de corrélation (seuil : {threshold})")
        plt.tight_layout()

        if save:
            if name is None:
                plt.savefig("corr_matrix.png")
            else:
                plt.savefig(f"corr_matrix_{name}.png")

        plt.show()



# 1. EXPLORATION DES DONNEES

----


Le but ici c'est d'essayer de comprendre  les données , c'est pouvoir repondre aux questions :
* Quelles sont les données visiblement non-pertinentes ?
* Detecter les outliers ?
* Vérifier le taux de valeurs manquantes
* regrouper les informations en indices synthétiques

----

## 1.1 Chargement des données

In [14]:

print("debut du chargement des données ... .. ... ..")

# Chargement des données de d'entrainement et de test
result_train = pd.read_csv( os.path.join(data_path,"results_train.csv") , sep = ',',encoding='utf-8')
result_test = pd.read_csv( os.path.join(data_path,"results_test.csv") , sep = ',',encoding='utf-8')

res_train_df = result_train.copy()
res_test_df = result_test.copy()

#-------------------------
# Données additionnelles
#------------------------

#  Niveau de vie
niveau_vie = pd.read_excel(os.path.join(data_path, "Niveau_de_vie_2013_a_la_commune.xlsx"))
niveau_vie_df = niveau_vie.copy()

#  Communes de France
communes_france = pd.read_csv(os.path.join(data_path, "communes-france-2022.csv"), sep=',', encoding='utf-8')
communes_df = communes_france.copy()

#  Données d'âge
age_insee = pd.read_excel(os.path.join(data_path, "age-insee-2020.xlsx"))
age_df = age_insee.copy()

# Données diverses INSEE
insee_divers = pd.read_excel(os.path.join(data_path, "MDB-INSEE-V2.xls"))
insee_divers_df = insee_divers.copy()

print("chargement des données terminé !! ")

debut du chargement des données ... .. ... ..
chargement des données terminé !! 


## 1.2 Pré-Analyse  et Pré-Selection  des Features (colonnes)

In [15]:
# @title colonne cible
target = '% Voix/Ins' # @param {type:"string"}

### results_train

In [16]:
# @title visualisation
visualise(res_train_df)

res_train_df.head(3)

forme : (20892, 32)


Unnamed: 0,CodeINSEE,Libellé du département,Libellé de la commune,Etat saisie,Inscrits,Abstentions,% Abs/Ins,Votants,% Vot/Ins,Blancs,% Blancs/Ins,% Blancs/Vot,Nuls,% Nuls/Ins,% Nuls/Vot,Exprimés,% Exp/Ins,% Exp/Vot,N°Panneau,Sexe,Nom,Prénom,Voix,% Voix/Ins,% Voix/Exp,Unnamed: 26,Unnamed: 27,Unnamed: 28,Unnamed: 29,Unnamed: 30,Unnamed: 31,Unnamed: 32
0,1006,Ain,Ambléon,Complet,103,19,18.45,84,81.55,12,11.65,14.29,0,0.0,0.0,72,69.9,85.71,1,M,MACRON,Emmanuel,45,43.69,62.5,2,F,LE PEN,Marine,27,26.21,37.5
1,1009,Ain,Andert-et-Condon,Complet,280,73,26.07,207,73.93,22,7.86,10.63,3,1.07,1.45,182,65.0,87.92,1,M,MACRON,Emmanuel,102,36.43,56.04,2,F,LE PEN,Marine,80,28.57,43.96
2,1010,Ain,Anglefort,Complet,792,185,23.36,607,76.64,31,3.91,5.11,8,1.01,1.32,568,71.72,93.57,1,M,MACRON,Emmanuel,227,28.66,39.96,2,F,LE PEN,Marine,341,43.06,60.04


In [17]:
# @title affichage des colonnes
all_col_train = all_columns(res_train_df)

les colonnes sont : 
Index(['CodeINSEE', 'Libellé du département', 'Libellé de la commune',
       'Etat saisie', 'Inscrits', 'Abstentions', '% Abs/Ins', 'Votants',
       '% Vot/Ins', 'Blancs', '% Blancs/Ins', '% Blancs/Vot', 'Nuls',
       '% Nuls/Ins', '% Nuls/Vot', 'Exprimés', '% Exp/Ins', '% Exp/Vot',
       'N°Panneau', 'Sexe', 'Nom', 'Prénom', 'Voix', '% Voix/Ins',
       '% Voix/Exp', 'Unnamed: 26', 'Unnamed: 27', 'Unnamed: 28',
       'Unnamed: 29', 'Unnamed: 30', 'Unnamed: 31', 'Unnamed: 32'],
      dtype='object')
 il y'a 32 colonnes dans le dataframe

------------------------------------------------------------------------------------------



In [18]:
# présélection
# colonnes clairement non informatives à supprimer
col_to_drop_train = ['Unnamed: 27' ,'Unnamed: 26' , 'Unnamed: 28' ,
                'Unnamed: 29' , 'Unnamed: 30','Unnamed: 31' ,
                'Unnamed: 32' ,'Prénom','Sexe','Nom','N°Panneau','Libellé de la commune']

id_train = 'CodeINSEE'

col_to_keep1 , to_holes_train = get_analyse(res_train_df,id_train,col_to_drop_train)

In [19]:
# @title conclusion
write_markdown_conclusion(id_train, to_holes_train, col_to_drop_train, col_to_keep1)


#### Conclusion

* _Identifiant_ : ``'CodeINSEE'``

* _Colonnes avec plus de 60% de valeurs manquantes_ : ``[]``

* _Colonnes à supprimer_ : ``['Unnamed: 27', 'Unnamed: 26', 'Unnamed: 28', 'Unnamed: 29', 'Unnamed: 30', 'Unnamed: 31', 'Unnamed: 32', 'Prénom', 'Sexe', 'Nom', 'N°Panneau', 'Libellé de la commune']``

* _Colonnes à conserver_ : ``['CodeINSEE', 'Libellé du département', 'Etat saisie', 'Inscrits', 'Abstentions', '% Abs/Ins', 'Votants', '% Vot/Ins', 'Blancs', '% Blancs/Ins', '% Blancs/Vot', 'Nuls', '% Nuls/Ins', '% Nuls/Vot', 'Exprimés', '% Exp/Ins', '% Exp/Vot', 'Voix', '% Voix/Ins', '% Voix/Exp']``

----


### niveau de vie

In [20]:
# @title visualisation
visualise(niveau_vie_df)

niveau_vie_df.head(3)

forme : (36572, 4)


Unnamed: 0,Code Commune,Nom Commune,Niveau de vie Commune,Niveau de vie Département
0,5047,Éourres,10021.25,19202.516
1,26142,Glandage,10215.0,19300.504
2,11317,Rodome,10908.5,17599.733


In [21]:
all_col_niveau_vie = all_columns(niveau_vie_df)


les colonnes sont : 
Index(['Code Commune', 'Nom Commune', 'Niveau de vie Commune',
       'Niveau de vie Département'],
      dtype='object')
 il y'a 4 colonnes dans le dataframe

------------------------------------------------------------------------------------------



In [22]:
# @title présélection
# colonnes clairement non informatives à supprimer
col_to_drop_niveau = []
id_niveau = 'Code Commune'

col_to_keep2 , to_holes_niveau = get_analyse(niveau_vie_df,id_niveau,col_to_drop_niveau)

In [23]:
# @title conclusion
write_markdown_conclusion(id_niveau, to_holes_niveau, col_to_drop_niveau, col_to_keep2)


#### Conclusion

* _Identifiant_ : ``'Code Commune'``

* _Colonnes avec plus de 60% de valeurs manquantes_ : ``[]``

* _Colonnes à supprimer_ : ``[]``

* _Colonnes à conserver_ : ``['Code Commune', 'Nom Commune', 'Niveau de vie Commune', 'Niveau de vie Département']``

----


### communes de frances

In [24]:
# @title visualisation
visualise(communes_df)

communes_df.head(3)

forme : (35010, 39)


Unnamed: 0.1,Unnamed: 0,code_insee,nom_standard,nom_sans_pronom,nom_a,nom_de,nom_sans_accent,nom_standard_majuscule,typecom,typecom_texte,reg_code,reg_nom,dep_code,dep_nom,canton_code,canton_nom,epci_code,epci_nom,academie_code,academie_nom,code_postal,codes_postaux,zone_emploi,code_insee_centre_zone_emploi,population,superficie_hectare,superficie_km2,densite,altitude_moyenne,altitude_minimale,altitude_maximale,latitude_mairie,longitude_mairie,latitude_centre,longitude_centre,grille_densite,gentile,url_wikipedia,url_villedereve
0,0,1001,L'Abergement-Clémenciat,Abergement-Clémenciat,à Abergement-Clémenciat,de l'Abergement-Clémenciat,l-abergement-clemenciat,L'ABERGEMENT-CLÉMENCIAT,COM,commune,84,Auvergne-Rhône-Alpes,1,Ain,108,Châtillon-sur-Chalaronne,200069193,CC de la Dombes,10,Lyon,1400,01400,8405.0,1053,779,1565,16,48.7,242,206.0,272.0,46.153,4.926,46.153,4.926,Rural à habitat dispersé,,https://fr.wikipedia.org/wiki/fr:L'Abergement-...,https://villedereve.fr/ville/01001-l-abergemen...
1,1,1002,L'Abergement-de-Varey,Abergement-de-Varey,à Abergement-de-Varey,de l'Abergement-de-Varey,l-abergement-de-varey,L'ABERGEMENT-DE-VAREY,COM,commune,84,Auvergne-Rhône-Alpes,1,Ain,101,Ambérieu-en-Bugey,240100883,CC de la Plaine de l'Ain,10,Lyon,1640,01640,8405.0,1053,256,912,9,27.1,483,290.0,748.0,46.009,5.428,46.009,5.428,Rural à habitat dispersé,"Abergementais, Abergementaises",https://fr.wikipedia.org/wiki/fr:L'Abergement-...,https://villedereve.fr/ville/01002-l-abergemen...
2,2,1004,Ambérieu-en-Bugey,Ambérieu-en-Bugey,à Ambérieu-en-Bugey,d'Ambérieu-en-Bugey,amberieu-en-bugey,AMBÉRIEU-EN-BUGEY,COM,commune,84,Auvergne-Rhône-Alpes,1,Ain,101,Ambérieu-en-Bugey,240100883,CC de la Plaine de l'Ain,10,Lyon,1500,"01500, 01501, 01504, 01503, 01502, 01505, 01506",8405.0,1053,14134,2448,24,570.5,379,237.0,753.0,45.961,5.373,45.961,5.373,Centres urbains intermédiaires,"Ambarrois, Ambarroises",https://fr.wikipedia.org/wiki/fr:Ambérieu-en-B...,https://villedereve.fr/ville/01004-amberieu-en...


In [25]:
# @title all colonnes
all_col_commune = all_columns(communes_df)

les colonnes sont : 
Index(['Unnamed: 0', 'code_insee', 'nom_standard', 'nom_sans_pronom', 'nom_a',
       'nom_de', 'nom_sans_accent', 'nom_standard_majuscule', 'typecom',
       'typecom_texte', 'reg_code', 'reg_nom', 'dep_code', 'dep_nom',
       'canton_code', 'canton_nom', 'epci_code', 'epci_nom', 'academie_code',
       'academie_nom', 'code_postal', 'codes_postaux', 'zone_emploi',
       'code_insee_centre_zone_emploi', 'population', 'superficie_hectare',
       'superficie_km2', 'densite', 'altitude_moyenne', 'altitude_minimale',
       'altitude_maximale', 'latitude_mairie', 'longitude_mairie',
       'latitude_centre', 'longitude_centre', 'grille_densite', 'gentile',
       'url_wikipedia', 'url_villedereve'],
      dtype='object')
 il y'a 39 colonnes dans le dataframe

------------------------------------------------------------------------------------------



A première vu , on n'a pas besoin des
* url vers les sites internet des communes  c'est à dire ``url_wikipedia`` , ``url_ville``
* ``typecom`` et ``typecom_texte`` sont des colonnes constantes : on est toujours censé avoir à faire à des communes
* pas besoin de tous les type de noms de la communes , un seul suffira `nom_standard` , mais il est aussi renseigné dans `Niveau de vie`
* unamed ici represente un index donc inutile aussi
* on peut regrouper ` 'altitude_moyenne'`, `'altitude_minimale'`,
       `'altitude_maximale'` en  un ou deux indices d'altitude , idem pour les ``latitude...`` et ``longitute...``
* supprimer les infos d'identification (x_code , x_nom) sur les départements et régions , leurs codes sont déjà fourni dans ``MDB-insee-divers`` ce dernier ayant plus d'échantillons

In [26]:
# @title présélection
# colonnes jugées non informatives à supprimer

col_to_drop_commune = ['url_wikipedia','url_villedereve',
                       'typecom','typecom_texte',
                       'nom_standard','nom_a','nom_de',
                       'nom_sans_pronom','gentile',
                       'nom_sans_accent','nom_standard_majuscule',
                       'superficie_hectare','Unnamed: 0',
                       'academie_nom','codes postaux',
                       'longitude_centre','latitude_centre',
                       'reg_nom','dep_nom','canton_nom','epci_nom']
id_commune = 'code_insee'

col_to_keep3 , to_holes_commune = get_analyse(communes_df,id_commune,col_to_drop_commune)

In [27]:
# @title conclusion
write_markdown_conclusion(id_commune, to_holes_commune, col_to_drop_commune, col_to_keep3)


#### Conclusion

* _Identifiant_ : ``'code_insee'``

* _Colonnes avec plus de 60% de valeurs manquantes_ : ``[]``

* _Colonnes à supprimer_ : ``['url_wikipedia', 'url_villedereve', 'typecom', 'typecom_texte', 'nom_standard', 'nom_a', 'nom_de', 'nom_sans_pronom', 'gentile', 'nom_sans_accent', 'nom_standard_majuscule', 'superficie_hectare', 'Unnamed: 0', 'academie_nom', 'codes postaux', 'longitude_centre', 'latitude_centre', 'reg_nom', 'dep_nom', 'canton_nom', 'epci_nom']``

* _Colonnes à conserver_ : ``['code_insee', 'reg_code', 'dep_code', 'canton_code', 'epci_code', 'academie_code', 'code_postal', 'codes_postaux', 'zone_emploi', 'code_insee_centre_zone_emploi', 'population', 'superficie_km2', 'densite', 'altitude_moyenne', 'altitude_minimale', 'altitude_maximale', 'latitude_mairie', 'longitude_mairie', 'grille_densite']``

----


### age-insee

#### première analyse visuelle

In [28]:
# @title visualisation
visualise(age_df)

age_df.head(3)

forme : (34980, 26)


Unnamed: 0,INSEE,NOM,EPCI,DEP,REG,F0-2,F3-5,F6-10,F11-17,F18-24,F25-39,F40-54,F55-64,F65-79,F80+,Unnamed: 15,H0-2,H3-5,H6-10,H11-17,H18-24,H25-39,H40-54,H55-64,H65-79,H80+
0,1001,L'Abergement-Clémenciat,200069193,D1,R84,13.414,12.509,19.214,37.182,14.062,70.119,84.512,60.032,64.311,20.164,,18.07,14.403,34.54,40.257,14.231,72.498,81.849,61.039,55.24,18.353
1,1002,L'Abergement-de-Varey,240100883,D1,R84,2.994,6.05,12.232,11.869,5.202,20.498,33.975,12.365,12.419,7.027,,2.994,6.116,6.953,22.349,6.394,19.54,37.479,10.977,15.687,8.879
2,1004,Ambérieu-en-Bugey,240100883,D1,R84,294.668,245.153,382.801,599.105,680.831,1451.111,1268.502,903.062,1064.992,517.133,,256.304,289.985,485.793,613.182,669.385,1542.699,1238.12,782.771,750.04,252.364


In [29]:
# @title Présélection
# visuellement
col_to_drop_age = ['NOM']
id_age = 'INSEE'
all_col_age = age_df.columns.tolist()
col_to_keep4 , to_holes_age = get_analyse(age_df,id_age,col_to_drop_age)


In [30]:
col_to_drop_age += to_holes_age
print(col_to_drop_age)


['NOM', 'Unnamed: 15']


A defaut d'avoir toutes les tranches d'âge pour chaque sexe , on regroupe les nombres d'hommes et femmes de chaque tranche pour avoir moins de variables:
* Sur le plan social  :
    * `% Mineurs` (0 - 17),
    * `% Adultes`(18-54) ,
    * `% Seniors`(55-79) ,
    * `% Tres_seniors`(80+)
* sur le plan économique :
    *`% Travailleurs` ceux qui ont l'age de potentiellement travailler
    * `% Retraites`  ceux qu'on a jugés ne plus pouvoir travailler car ont plus de `64` ans

#### Regroupement des âges

In [31]:
age_groups = age_df[age_df.columns[:5]]

# population total
col_genre = sub(sub(all_col_age,col_to_drop_age),age_df.columns[:5].tolist())
col_hom = col_genre[10:]
age_groups['Population'] = age_df[col_genre].sum(axis=1)


In [32]:
#--------------------------------
# Regroupement par cycle de vie
#-------------------------------

# les mineurs
age_groups['% Mineurs'] = (age_df['F0-2'] + age_df['F3-5'] +age_df['F6-10']+age_df['F11-17'] + age_df['H0-2'] + age_df['H3-5'] + age_df['H0-2']+age_df['H6-10']+age_df['F11-17'] ) / age_groups['Population'] * 100

# les adultes
age_groups['% Adultes'] = (age_df['F18-24'] + age_df['F25-39'] + age_df['F40-54']+age_df['H18-24']+age_df['H25-39'] + age_df['H40-54']) / age_groups['Population'] * 100


# les agés
age_groups['% Seniors'] = (age_df['F55-64'] + age_df['F65-79'] + age_df['H55-64'] + age_df['H65-79']) / age_groups['Population'] * 100


# les très agés
age_groups['% Tres_Seniors'] = (age_df['F80+'] + age_df['H80+']) / age_groups['Population'] * 100

# les retraités
age_groups['% Retraites'] = (age_df['F65-79'] + age_df['H65-79'] + age_df['F80+'] + age_df['H80+']) / age_groups['Population'] * 100

# travailleurs potentiels
age_groups['% Travailleurs'] = (age_groups['% Adultes'] + age_groups['% Seniors'] + age_groups['% Tres_Seniors'] - age_groups['% Retraites']) / age_groups['Population'] * 100

# ratio hommes/femmes
age_groups['rH/F'] = ( age_df[col_hom].sum(axis=1)  / age_df[col_genre[:10]].sum(axis=1) )

age_groups.to_csv("age_groups.csv",index=False)
age_groups.head(3)


Unnamed: 0,INSEE,NOM,EPCI,DEP,REG,Population,% Mineurs,% Adultes,% Seniors,% Tres_Seniors,% Retraites,% Travailleurs,rH/F
0,1001,L'Abergement-Clémenciat,200069193,D1,R84,806.0,25.383,41.845,29.854,4.779,19.611,7.055,1.038
1,1002,L'Abergement-de-Varey,240100883,D1,R84,262.0,24.455,46.98,19.637,6.071,16.798,21.332,1.102
2,1004,Ambérieu-en-Bugey,240100883,D1,R84,14288.0,23.861,47.947,24.502,5.386,18.089,0.418,0.929


#### mise à jour et conclusion

In [33]:
col_to_drop_age = []
colt_to_keep4 = sub(all_col_age,col_to_drop_age)


### MDB-INSEE-Divers

#### première analyse visuelle

In [34]:
# @title visualisation
visualise(insee_divers_df)

insee_divers_df.head(3)

forme : (36677, 101)


Unnamed: 0,CODGEO,Nb Pharmacies et parfumerie,Dynamique Entrepreneuriale,Dynamique Entrepreneuriale Service et Commerce,Synergie Médicale COMMUNE,Orientation Economique,Indice Fiscal Partiel,Score Fiscal,Indice Evasion Client,Score Evasion Client,Indice Synergie Médicale,Score Synergie Médicale,SEG Croissance POP,LIBGEO,REG,DEP,Nb Omnipraticiens BV,Nb Infirmiers Libéraux BV,Nb dentistes Libéraux BV,Nb pharmaciens Libéraux BV,Densité Médicale BV,Score équipement de santé BV,Indice Démographique,Score Démographique,Indice Ménages,Score Ménages,Population,Evolution Population,Evolution Pop %,Nb Ménages,Nb Résidences Principales,Nb propriétaire,Nb Logement,Nb Résidences Secondaires,Nb Log Vacants,Nb Occupants Résidence Principale,Nb Femme,Nb Homme,Nb Mineurs,Nb Majeurs,Nb Etudiants,Nb Entreprises Secteur Services,Nb Entreprises Secteur Commerce,Nb Entreprises Secteur Construction,Nb Entreprises Secteur Industrie,Nb Création Enteprises,Nb Création Industrielles,Nb Création Construction,Nb Création Commerces,Nb Création Services,Moyenne Revenus Fiscaux Départementaux,Moyenne Revenus Fiscaux Régionaux,Dep Moyenne Salaires Horaires,Dep Moyenne Salaires Cadre Horaires,Dep Moyenne Salaires Prof Intermédiaire Horaires,Dep Moyenne Salaires Employé Horaires,Dep Moyenne Salaires Ouvrié Horaires,Reg Moyenne Salaires Horaires,Reg Moyenne Salaires Cadre Horaires,Reg Moyenne Salaires Prof Intermédiaire Horaires,Reg Moyenne Salaires Employé Horaires,Reg Moyenne Salaires Ouvrié Horaires,Valeur ajoutée régionale,Urbanité Ruralité,Score Urbanité,Nb Atifs,Nb Actifs Salariés,Nb Actifs Non Salariés,Nb Logement Secondaire et Occasionnel,Nb Hotel,Capacité Hotel,Nb Camping,Capacité Camping,Dynamique Démographique BV,Taux étudiants,Taux Propriété,Dynamique Démographique INSEE,Capacité Fisc,Capacité Fiscale,Moyenne Revnus fiscaux,Taux Evasion Client,"Nb Education, santé, action sociale",Nb Services personnels et domestiques,"Nb Santé, action sociale",Nb Industries des biens intermédiaires,Nb de Commerce,Nb de Services aux particuliers,"Nb institution de Education, santé, action sociale, administration",PIB Régionnal,SEG Environnement Démographique Obsolète,Score Croissance Population,Score Croissance Entrepreneuriale,Score VA Région,Score PIB,Environnement Démographique,Fidélité,SYN MEDICAL,Seg Cap Fiscale,Seg Dyn Entre,DYN SetC,CP
0,1001,0.0,57.0,23.0,114,Bassin Industriel,101.939,59.041,0.0,0.0,114.567,0.135,en croissance démographique,L' Abergement-Clémenciat,82,1,9,14,7,7,0.093,4,44.198,0.034,37.22,0.022,725,16,2,247,248,196,289,32,9,728,694,714,909,499,51,7.0,11.0,2.0,2.0,4.0,0.0,2.0,1.0,1.0,12509,10458,11.41,21.964,12.559,8.743,9.269,11.873,21.788,12.704,8.783,9.301,86957.458,Com rurale < 2 000 m habts,0.0,295.0,254.0,41.0,32.0,0.0,0.0,0.0,0.0,1.Accroissement par excédent naturel et migrat...,0,67,-1,117,117,11483.5,0,3.0,1.0,0.0,9364,9350,3372,15105,173681,Zone rurale en croissance démographique,72.131,0.016,32.426,33.838,Bassin Industriel en croissance démographique,Pop Sédentaire,Synergie Médicale,Fiscalité moyenne,Faible dynamique,Faible Dynamique Serv et Com,1
1,1002,0.0,45.0,4.0,143,Bassin Résidentiel,101.939,59.041,0.0,0.0,143.711,0.174,en croissance démographique,L' Abergement-de-Varey,82,1,31,36,18,18,0.099,4,10.181,0.008,10.096,0.006,167,4,2,67,67,61,142,71,4,168,162,164,202,124,5,4.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,12509,10458,11.41,21.964,12.559,8.743,9.269,11.873,21.788,12.704,8.783,9.301,86957.458,Com rurale < 2 000 m habts,0.0,57.0,49.0,8.0,71.0,0.0,0.0,0.0,0.0,1.Accroissement par excédent naturel et migrat...,0,42,0,110,110,11483.5,0,0.0,0.0,0.0,9364,9350,3372,15105,173681,Zone rurale en croissance démographique,72.131,0.002,32.426,33.838,Bassin Résidentiel en croissance démographique,Pop Sédentaire,Forte Synergie Médicale,Fiscalité moyenne,Faible dynamique,Faible Dynamique Serv et Com,1
2,1004,0.0,634.0,828.0,366,Bassin Résidentiel,101.939,59.041,248.455,0.106,367.821,0.471,en croissance démographique,Ambérieu-en-Bugey,82,1,31,36,18,18,0.099,4,696.921,0.538,699.199,0.418,11432,512,4,4640,4635,1968,5184,135,414,11015,11350,10878,13624,8604,904,342.0,301.0,58.0,108.0,83.0,4.0,14.0,27.0,38.0,12509,10458,11.41,21.964,12.559,8.743,9.269,11.873,21.788,12.704,8.783,9.301,86957.458,Com < 50 m habts,37.5,4556.0,4203.0,353.0,135.0,2.0,52.0,0.0,0.0,1.Accroissement par excédent naturel et migrat...,0,37,-55,250,250,11483.5,2,113.0,41.0,118.0,9364,9350,3372,15105,173681,Zone rurale en croissance démographique,72.951,0.385,32.426,33.838,Bassin Résidentiel en croissance démographique,Pop Mobile,Forte Synergie Médicale,Fiscalité moyenne,Dynamique Economique,Bonne Dynamique Entreprise Serv et Com,1


In [35]:
# @title affichage des colonnes
all_col_insee_divers = all_columns(insee_divers_df)

les colonnes sont : 
Index(['CODGEO', 'Nb Pharmacies et parfumerie', 'Dynamique Entrepreneuriale',
       'Dynamique Entrepreneuriale Service et Commerce',
       'Synergie Médicale COMMUNE', 'Orientation Economique',
       'Indice Fiscal Partiel', 'Score Fiscal', 'Indice Evasion Client',
       'Score Evasion Client',
       ...
       'Score Croissance Entrepreneuriale', 'Score VA Région', 'Score PIB',
       'Environnement Démographique', 'Fidélité', 'SYN MEDICAL',
       'Seg Cap Fiscale', 'Seg Dyn Entre', 'DYN SetC', 'CP'],
      dtype='object', length=101)
 il y'a 101 colonnes dans le dataframe

------------------------------------------------------------------------------------------



#### regroupement

In [36]:
# @title présélection
id_mdb = 'CODGEO'
# colonnes jugées non informatives ou trop difficile à manipuler
col_to_drop_mdb = ['Nb Résidences Principales','Nb Log Vacants' ,
                   'Nb Résidences Secondaires',
                   'Score démographique' , 'Score Ménages' ,
                   'Evolution Pop %','Score Fiscal',
                   'Score Evasion Client','Score Synergie Médicale',
                   'Population','Nb Logement Secondaires',
                   'Capacité Camping', 'Capacité Hotel',
                   'Dynamique Démographique BV', 'Capacité Fisc',
                   'CP' , 'Urbanité Ruralité' , 'SEG Environnement Démographique Obsolète']
# les colonnes qui on été regroupées
HF = ['Nb Homme' , 'Nb Femme']

mM = ['Nb Mineurs' , 'Nb Majeurs']

Nb_entreprises = ['Nb Entreprises Secteur Commerce' ,
                  'Nb Entreprises Secteur Construction',
                  'Nb Entreprises Secteur Industrie' ,
                  'Nb Entreprises Secteur Services']

Nb_medecins = ['Nb Omnipraticiens BV' ,
             'Nb Infirmiers Libéraux BV',
             'Nb dentistes Libéraux BV',
             'Nb pharmaciens Libéraux BV']

Indice_creation_eco = ['Nb Création Enteprises' ,
                    'Nb Création Industrielles',
                    'Nb Création Construction',
                    'Nb Création Commerces',
                    'Nb création Services']

Indice_fiscal = ['Moyenne Revenus Fiscaux Départementaux',
                 'Moyenne Revenus Fiscaux Régionaux']

Indice_social =['Nb Education, santé, action sociale',
                'Nb Santé, action sociale',
                'Nb institution de Education, santé, action sociale, administration']

Indice_services = ['Nb Services personnels et domestiques',
                   'Nb Industries des biens intermédiaires',
                   'Nb de Commerce',
                   'Nb de Services aux particuliers']

Indice_salaire_global = ['Dep Moyenne Salaires Horaires',
                         'Reg Moyenne Salaires Horaires',]

indice_salaire_csp = ['Dep Moyenne Salaires Cadre Horaires',
                      'Reg Moyenne Salaires Cadre Horaires',
                      'Dep Moyenne Salaires Prof Intermédiaire Horaires',
                      'Reg Moyenne Salaires Prof Intermédiaire Horaires',
                      'Dep Moyenne Salaires Employé Horaires',
                      'Reg Moyenne Salaires Employé Horaires',
                      'Dep Moyenne Salaires Ouvrié Horaires',
                      'Reg Moyenne Salaires Ouvrié Horaires']

dict_indice_salaire = {
    'indice_salaire_global': Indice_salaire_global,
    **{'indice_salaire_'+str(i) : indice_salaire_csp[i:i+2] for i in range(0, len(indice_salaire_csp), 2)}
}

col_to_drop_mdb += HF + mM + Nb_entreprises + Nb_medecins + Indice_creation_eco + Indice_fiscal + Indice_salaire_global + indice_salaire_csp + Indice_services + Indice_social

col_to_keep5 = sub(all_col_insee_divers,col_to_drop_mdb)

# ...existing code...

# Dictionnaire de correspondance pour les nouveaux noms
nouveaux_noms = {
    'indice_salaire_0': 'indice_salaire_cadre',
    'indice_salaire_2': 'indice_salaire_prof',
    'indice_salaire_4': 'indice_salaire_employé',
    'indice_salaire_6': 'indice_salaire_ouvrié'
}

for old_key, new_key in nouveaux_noms.items():
    if old_key in dict_indice_salaire:
        dict_indice_salaire[new_key] = dict_indice_salaire.pop(old_key)

# Vérification
for k, v in dict_indice_salaire.items():
    print(f"{k} : {v}")
# ...existing code...

indice_salaire_global : ['Dep Moyenne Salaires Horaires', 'Reg Moyenne Salaires Horaires']
indice_salaire_cadre : ['Dep Moyenne Salaires Cadre Horaires', 'Reg Moyenne Salaires Cadre Horaires']
indice_salaire_prof : ['Dep Moyenne Salaires Prof Intermédiaire Horaires', 'Reg Moyenne Salaires Prof Intermédiaire Horaires']
indice_salaire_employé : ['Dep Moyenne Salaires Employé Horaires', 'Reg Moyenne Salaires Employé Horaires']
indice_salaire_ouvrié : ['Dep Moyenne Salaires Ouvrié Horaires', 'Reg Moyenne Salaires Ouvrié Horaires']


In [37]:
mdb_df = insee_divers_df[col_to_keep5].copy()

# nombre d'entreprises
mdb_df['Nb Entreprises'] = insee_divers_df[Nb_entreprises].sum(axis=1)

# nombre de médecins
mdb_df['Nb Medecins'] = insee_divers_df[Nb_medecins].sum(axis=1)
mdb_df.head(3)

# ratio du nombre d'hommes par rapport au femmes
mdb_df['rH/F'] = insee_divers_df[HF[0]] / insee_divers_df[HF[1]]

# ratio du nombre de mineurs par par rapport au majeurs
mdb_df['rm/M'] = insee_divers_df[mM[0]] / insee_divers_df[mM[1]]

# Indice salariax
for k , v in dict_indice_salaire.items():
    mdb_df[k] = insee_divers_df[v[0]]/insee_divers_df[v[1]]

# indice social
mdb_df['indice_social'] = insee_divers_df[Indice_social].sum(axis=1)

# indice services
mdb_df['indice_services'] = insee_divers_df[Indice_services].sum(axis=1)

mdb_df.to_csv("MDB_groupe.csv",index=False)
mdb_df.head(3)

Unnamed: 0,CODGEO,Nb Pharmacies et parfumerie,Dynamique Entrepreneuriale,Dynamique Entrepreneuriale Service et Commerce,Synergie Médicale COMMUNE,Orientation Economique,Indice Fiscal Partiel,Indice Evasion Client,Indice Synergie Médicale,SEG Croissance POP,LIBGEO,REG,DEP,Densité Médicale BV,Score équipement de santé BV,Indice Démographique,Score Démographique,Indice Ménages,Evolution Population,Nb Ménages,Nb propriétaire,Nb Logement,Nb Occupants Résidence Principale,Nb Etudiants,Nb Création Services,Valeur ajoutée régionale,Score Urbanité,Nb Atifs,Nb Actifs Salariés,Nb Actifs Non Salariés,Nb Logement Secondaire et Occasionnel,Nb Hotel,Nb Camping,Taux étudiants,Taux Propriété,Dynamique Démographique INSEE,Capacité Fiscale,Moyenne Revnus fiscaux,Taux Evasion Client,PIB Régionnal,Score Croissance Population,Score Croissance Entrepreneuriale,Score VA Région,Score PIB,Environnement Démographique,Fidélité,SYN MEDICAL,Seg Cap Fiscale,Seg Dyn Entre,DYN SetC,Nb Entreprises,Nb Medecins,rH/F,rm/M,indice_salaire_global,indice_salaire_cadre,indice_salaire_prof,indice_salaire_employé,indice_salaire_ouvrié,indice_social,indice_services
0,1001,0.0,57.0,23.0,114,Bassin Industriel,101.939,0.0,114.567,en croissance démographique,L' Abergement-Clémenciat,82,1,0.093,4,44.198,0.034,37.22,16,247,196,289,728,51,1.0,86957.458,0.0,295.0,254.0,41.0,32.0,0.0,0.0,0,67,-1,117,11483.5,0,173681,72.131,0.016,32.426,33.838,Bassin Industriel en croissance démographique,Pop Sédentaire,Synergie Médicale,Fiscalité moyenne,Faible dynamique,Faible Dynamique Serv et Com,22.0,37,1.029,1.822,0.961,1.008,0.989,0.995,0.997,15108.0,22087.0
1,1002,0.0,45.0,4.0,143,Bassin Résidentiel,101.939,0.0,143.711,en croissance démographique,L' Abergement-de-Varey,82,1,0.099,4,10.181,0.008,10.096,4,67,61,142,168,5,1.0,86957.458,0.0,57.0,49.0,8.0,71.0,0.0,0.0,0,42,0,110,11483.5,0,173681,72.131,0.002,32.426,33.838,Bassin Résidentiel en croissance démographique,Pop Sédentaire,Forte Synergie Médicale,Fiscalité moyenne,Faible dynamique,Faible Dynamique Serv et Com,5.0,103,1.012,1.629,0.961,1.008,0.989,0.995,0.997,15105.0,22086.0
2,1004,0.0,634.0,828.0,366,Bassin Résidentiel,101.939,248.455,367.821,en croissance démographique,Ambérieu-en-Bugey,82,1,0.099,4,696.921,0.538,699.199,512,4640,1968,5184,11015,904,38.0,86957.458,37.5,4556.0,4203.0,353.0,135.0,2.0,0.0,0,37,-55,250,11483.5,2,173681,72.951,0.385,32.426,33.838,Bassin Résidentiel en croissance démographique,Pop Mobile,Forte Synergie Médicale,Fiscalité moyenne,Dynamique Economique,Bonne Dynamique Entreprise Serv et Com,809.0,103,0.958,1.583,0.961,1.008,0.989,0.995,0.997,15336.0,22127.0


* pas besoin du `Nb Résidences Principales` et `Nb Résidences Secondaires` , `Nb log Vacants`  car leur somme = `Nb Logement`
* on peut regrouper les colonnes `Moyenne Revenus Fiscaux Départementaux` à `Moyenne Revenus Fiscaux Régionaux` en `indice fiscal` par  Indice_fiscal_dep $= \frac{Moyenne Revenus Fiscaux Départementaux}{Moyenne Revenus Fiscaux Régionaux}$

* regrouper  `Dep Moyenne Salaires Horaires` à `Reg Moyenne Salaires Horaires`  en indice salariaux -> utiliser la méthodes ACP apprament qui permet de les réduire à 2 indices synthétiques `CP1` et `CP2`

## 1.3 Fusion des sources fournies et nettoyage

### resultat de la préanalyse

In [38]:
# @title colonnes à supprimer , avec trop de valeurs manquantes et à conserver

cols_to_drop = col_to_drop_train + col_to_drop_niveau + col_to_drop_commune + col_to_drop_age + col_to_drop_mdb
print(f"{len(cols_to_drop)} colonnes(s) à supprimer")

cols_to_keep = col_to_keep1 + col_to_keep2 + col_to_keep3 + col_to_keep4 + col_to_keep5
print(f"{len(cols_to_keep)} colonne(s) à conserver y compris les identifiants")


87 colonnes(s) à supprimer
117 colonne(s) à conserver y compris les identifiants


### fusion et extration des données

In [39]:
def safe_feature_selection(X, selected_features):
    """
    Retourne les colonnes de X correspondant aux selected_features valides.
    """
    return X[[feat for feat in selected_features if feat in X.columns]]

merge_list = [(niveau_vie_df , id_niveau),
              (communes_df , id_commune),
              (age_groups , id_age),
              (mdb_df , id_mdb)]

#fonction de fussion
def prepare_datasets(train_data,
                     test_data,
                     merge_list ,
                     base_id = id_train,
                     cols_to_drop = cols_to_drop,
                     verbose=False):
    """
    Fonction pour préparer et fusionner les datasets pour la modélisation
    """
    #Extraire les données de Macron
    train_features = train_data[train_data['Nom']=='MACRON'].copy()
    test_features = test_data.copy()

    # harmoniser les colonnes d'identification
    train_features['CodeINSEE'] = train_features['CodeINSEE'].astype(str).str.zfill(5)
    test_features['CodeINSEE'] = test_features['CodeINSEE'].astype(str).str.zfill(5)

    for i , (df , id_col) in enumerate(merge_list):
        # Vérifier si la colonne d'identification est présente dans le DataFrame
        if id_col not in df.columns:
            raise ValueError(f"La colonne d'identification '{id_col}' n'est pas présente dans le DataFrame à l'index {i}.")

        # Assurer que la colonne d'identification est au format string avec padding
        df[id_col] = df[id_col].astype(str).str.zfill(5)

        # nombre d'échantillons avant la fusion
        n_samples_before = train_features.shape[0]

        # Fusionner les DataFrames sur la colonne d'identification
        train_features = pd.merge(train_features, df, left_on=base_id, right_on=id_col, how='left')

        test_features = pd.merge(test_features, df, left_on=base_id, right_on=id_col, how='left')

        # nombre d'échantillons après la fusion
        n_samples_after = train_features.shape[0]

        assert n_samples_before == n_samples_after, f"Erreur de fusion : le nombre d'échantillons a changé après la fusion avec {id_col}."

        # supprimer l'identifiant
        if id_col != base_id:
            train_features = train_features.drop(columns=[id_col], axis=1, errors='ignore')
            test_features = test_features.drop(columns=[id_col], axis=1, errors='ignore')
    #-----------------------------------------------------------------
    # Supprimer les colonnes non informatives
    train_features = train_features.drop(columns=cols_to_drop, axis=1, errors='ignore')
    test_features = test_features.drop(columns=cols_to_drop, axis=1, errors='ignore')
    #-----------------------------------------------------------------
    # Supprimer les colonnes d'identification
    train_features = train_features.drop(columns=IDs, axis=1 , errors='ignore')

    if verbose :
        missing1 = ((train_data.isnull().sum()/ train_data.shape[0]).sum()/train_data.shape[1] )* 100
        missing2 = ((train_features.isnull().sum()/ train_features.shape[0]).sum()/train_features.shape[1] )* 100
        #-------------------------------------------------------------
        print(f" {train_features.shape[1]} colonnes vs {train_data.shape[1]} avant la fusion\n")
        print(f" {len(cols_to_drop) + len(merge_list) + 1} colonnes supprimées pendant la fusion \n")
        print(f" \n{train_features.shape[0]} lignes vs {train_data.shape[0]} avant la fusion\n")
        print(f" \n{missing2}% de valeurs manquantes vs {missing1}% avant la fusion")


    return train_features, test_features


In [40]:
train_data , test_data = prepare_datasets(res_train_df, res_test_df,
                                          merge_list,
                                          base_id = id_train,
                                          cols_to_drop = cols_to_drop,
                                          verbose=True)

print(f"forme du train_data : {train_data.shape}")
print(f"forme du test_data : {test_data.shape}")
test_data.head(3)

 111 colonnes vs 32 avant la fusion

 92 colonnes supprimées pendant la fusion 

 
20892 lignes vs 20892 avant la fusion

 
0.13070221283891595% de valeurs manquantes vs 0.0% avant la fusion
forme du train_data : (20892, 111)
forme du test_data : (13928, 96)


Unnamed: 0,CodeINSEE,Libellé du département,Etat saisie,Inscrits,Nom Commune,Niveau de vie Commune,Niveau de vie Département,reg_code,dep_code,canton_code,epci_code,academie_code,code_postal,codes_postaux,zone_emploi,code_insee_centre_zone_emploi,population,superficie_km2,densite,altitude_moyenne,altitude_minimale,altitude_maximale,latitude_mairie,longitude_mairie,grille_densite,NOM,EPCI,DEP_x,REG_x,% Mineurs,% Adultes,% Seniors,% Tres_Seniors,% Retraites,% Travailleurs,rH/F_x,Nb Pharmacies et parfumerie,Dynamique Entrepreneuriale,Dynamique Entrepreneuriale Service et Commerce,Synergie Médicale COMMUNE,Orientation Economique,Indice Fiscal Partiel,Indice Evasion Client,Indice Synergie Médicale,SEG Croissance POP,LIBGEO,REG_y,DEP_y,Densité Médicale BV,Score équipement de santé BV,Indice Démographique,Score Démographique,Indice Ménages,Evolution Population,Nb Ménages,Nb propriétaire,Nb Logement,Nb Occupants Résidence Principale,Nb Etudiants,Nb Création Services,Valeur ajoutée régionale,Score Urbanité,Nb Atifs,Nb Actifs Salariés,Nb Actifs Non Salariés,Nb Logement Secondaire et Occasionnel,Nb Hotel,Nb Camping,Taux étudiants,Taux Propriété,Dynamique Démographique INSEE,Capacité Fiscale,Moyenne Revnus fiscaux,Taux Evasion Client,PIB Régionnal,Score Croissance Population,Score Croissance Entrepreneuriale,Score VA Région,Score PIB,Environnement Démographique,Fidélité,SYN MEDICAL,Seg Cap Fiscale,Seg Dyn Entre,DYN SetC,Nb Entreprises,Nb Medecins,rH/F_y,rm/M,indice_salaire_global,indice_salaire_cadre,indice_salaire_prof,indice_salaire_employé,indice_salaire_ouvrié,indice_social,indice_services
0,1001,Ain,Complet,643,L'Abergement-Clémenciat,22130.0,22343.575,84,1,108,200069193,10,1400,01400,8405.0,1053,779,16,48.7,242,206.0,272.0,46.153,4.926,Rural à habitat dispersé,L'Abergement-Clémenciat,200069193,D1,R84,25.383,41.845,29.854,4.779,19.611,7.055,1.038,0.0,57.0,23.0,114.0,Bassin Industriel,101.939,0.0,114.567,en croissance démographique,L' Abergement-Clémenciat,82.0,1,0.093,4.0,44.198,0.034,37.22,16.0,247.0,196.0,289.0,728.0,51.0,1.0,86957.458,0.0,295.0,254.0,41.0,32.0,0.0,0.0,0.0,67.0,-1.0,117.0,11483.5,0.0,173681.0,72.131,0.016,32.426,33.838,Bassin Industriel en croissance démographique,Pop Sédentaire,Synergie Médicale,Fiscalité moyenne,Faible dynamique,Faible Dynamique Serv et Com,22.0,37.0,1.029,1.822,0.961,1.008,0.989,0.995,0.997,15108.0,22087.0
1,1002,Ain,Complet,213,L'Abergement-de-Varey,23213.0,22343.575,84,1,101,240100883,10,1640,01640,8405.0,1053,256,9,27.1,483,290.0,748.0,46.009,5.428,Rural à habitat dispersé,L'Abergement-de-Varey,240100883,D1,R84,24.455,46.98,19.637,6.071,16.798,21.332,1.102,0.0,45.0,4.0,143.0,Bassin Résidentiel,101.939,0.0,143.711,en croissance démographique,L' Abergement-de-Varey,82.0,1,0.099,4.0,10.181,0.008,10.096,4.0,67.0,61.0,142.0,168.0,5.0,1.0,86957.458,0.0,57.0,49.0,8.0,71.0,0.0,0.0,0.0,42.0,0.0,110.0,11483.5,0.0,173681.0,72.131,0.002,32.426,33.838,Bassin Résidentiel en croissance démographique,Pop Sédentaire,Forte Synergie Médicale,Fiscalité moyenne,Faible dynamique,Faible Dynamique Serv et Com,5.0,103.0,1.012,1.629,0.961,1.008,0.989,0.995,0.997,15105.0,22086.0
2,1004,Ain,Complet,8763,Ambérieu-en-Bugey,19554.0,22343.575,84,1,101,240100883,10,1500,"01500, 01501, 01504, 01503, 01502, 01505, 01506",8405.0,1053,14134,24,570.5,379,237.0,753.0,45.961,5.373,Centres urbains intermédiaires,Ambérieu-en-Bugey,240100883,D1,R84,23.861,47.947,24.502,5.386,18.089,0.418,0.929,0.0,634.0,828.0,366.0,Bassin Résidentiel,101.939,248.455,367.821,en croissance démographique,Ambérieu-en-Bugey,82.0,1,0.099,4.0,696.921,0.538,699.199,512.0,4640.0,1968.0,5184.0,11015.0,904.0,38.0,86957.458,37.5,4556.0,4203.0,353.0,135.0,2.0,0.0,0.0,37.0,-55.0,250.0,11483.5,2.0,173681.0,72.951,0.385,32.426,33.838,Bassin Résidentiel en croissance démographique,Pop Mobile,Forte Synergie Médicale,Fiscalité moyenne,Dynamique Economique,Bonne Dynamique Entreprise Serv et Com,809.0,103.0,0.958,1.583,0.961,1.008,0.989,0.995,0.997,15336.0,22127.0


In [41]:
X = train_data.drop(columns=[target], axis=1,errors='ignore')
y = train_data[target]

print(X.info())
print(f"forme des prédicteurs : {X.shape}")
print(f"forme de la cible : {y.shape}")


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20892 entries, 0 to 20891
Columns: 110 entries, Libellé du département to indice_services
dtypes: float64(74), int64(13), object(23)
memory usage: 17.5+ MB
None
forme des prédicteurs : (20892, 110)
forme de la cible : (20892,)


# 2. Prétraitement , Pipeline et Selection automatique de features

----

## 2.1 Création du pipelines de prétraitement

In [42]:
def get_preprocessor(data,features=None):
    """
    Crée un préprocesseur complet avec gestion des valeurs infinies, manquantes et aberrantes
    """
    X=data.copy()
    # Remplacer les valeurs infinies par NaN
    X = X.replace([np.inf, -np.inf], np.nan)

    if features is not None:
        X = X[features]

    numeric_features = X.select_dtypes(include=['int64', 'float64',np.number]).columns
    categorical_features = X.select_dtypes(include=['object', 'category','bool']).columns

    # conversion des colonnes catégorielles en type str pour éviter les type mixte
    X[categorical_features] = X[categorical_features].astype(str)


    # Pipeline pour les variables numériques
    numeric_transformer = Pipeline(steps=[
        ('imputer', SimpleImputer(strategy='median')),
        ('scaler', RobustScaler())
    ] ,   memory='cache_directory')

    # Pipeline pour les variables catégorielles
    categorical_transformer = Pipeline(
        steps=[
        ('imputer', SimpleImputer(strategy='most_frequent')),
        ('onehot', OneHotEncoder( sparse_output=False, handle_unknown='ignore'))
        ])

    # Combinaison des transformateurs
    preprocessor = ColumnTransformer(
        transformers=[
            ('num', numeric_transformer, numeric_features),
            ('cat', categorical_transformer, categorical_features)
        ],
        remainder = 'drop',
        verbose_feature_names_out=False,
        n_jobs=-1)

    return preprocessor


## 2.2 Selection automatique des features

### 2.2.1 Selection grossière automatique avec xgboost


In [43]:
def remove_quasi_constant_features(df, threshold=0.01):
    """
    Supprime les colonnes avec une variance inférieure à un seuil donné : elle sont quasi contantes"""

    # Traitement des variables numériques
    numerical_df = df.select_dtypes(include=[np.number, 'bool'])
    variances = numerical_df.var()
    low_variance_cols = variances[variances < threshold].index.tolist()

    # Traitement des variables catégorielles (en regardant le ratio du mode)
    categorical_df = df.select_dtypes(include=['object', 'category'])
    cat_low_variance = []

    for col in categorical_df.columns:
        # Calcul du ratio de la valeur la plus fréquente
        value_counts = categorical_df[col].value_counts(normalize=True)
        if len(value_counts) > 0 and value_counts.iloc[0] > 1 - threshold:
            cat_low_variance.append(col)

    # Combinaison des résultats
    all_low_variance = low_variance_cols + cat_low_variance

    keep = sub(df.columns.tolist(), all_low_variance)

    return all_low_variance, keep

drop , keep = remove_quasi_constant_features(X)
print(f"nombre de colonnes jugées quasi constantes : {len(drop)}")


nombre de colonnes jugées quasi constantes : 7


In [44]:

def fast_feature_selection(x_data, y_data, n_features=20,sort=False):
    """
    Selection automatique des ``n_features`` meilleures features avec  XGBoost
    Args:
        X (pd.DataFrame): DataFrame contenant les caractéristiques
        y (pd.Series): Série contenant la cible
        n_features (int): Nombre de features/colonnes à sélectionner
        sort (bool): Si True, trie les features sélectionnées par importance décroissante
    Returns:
        list: Liste des noms des features sélectionnées
    """
    # Créer une copie de X pour éviter les modifications sur l'original
    X = x_data.copy()
    # Supprimer les colonnes avec une variance inférieure à 0.01
    drop, keep = remove_quasi_constant_features(X)

    X = X[keep]
    # Remplacer les valeurs infinies par NaN
    X = X.replace([np.inf, -np.inf], np.nan)

    # identification des colonnes numériques et catégorielles
    num_col = X.select_dtypes(include=[np.number,'float64','int64']).columns
    cat_col = X.select_dtypes(include=['object', 'category','bool']).columns

    # transformer tous les colonnes catégorielles en type str
    # pour éviter les types mixtes
    X[cat_col] = X[cat_col].astype(str)

    # Préprocessing light
    preprocessor = make_column_transformer(
        (SimpleImputer(strategy='median'), num_col),
        (Pipeline([
            ('imputer', SimpleImputer(strategy='most_frequent')),
            ('encoder', OneHotEncoder(handle_unknown='ignore'))
            ])              ,            cat_col ),
        remainder='drop',
        verbose_feature_names_out=False,
        n_jobs=-1
    )

    # Modèle unique mais robuste
    model = Pipeline([
        ('preprocessor', preprocessor),
        ('selector', SelectFromModel(
            XGBRegressor(
                tree_method='hist',
                n_estimators=50,
                max_depth=6),
            max_features=n_features
        )),
        ('estimator', XGBRegressor())
    ])

    model.fit(X, y_data)

    transformed_features_names = model.named_steps['preprocessor'].get_feature_names_out(input_features=keep)

    if sort:

        importances = model.named_steps['estimator'].feature_importances_

        feature_importance = list(zip(transformed_features_names, importances))

        feature_importance.sort(key=lambda x: x[1], reverse=True)

        selected_features = [name for name, _ in feature_importance[:n_features]]

    selected_indices = model.named_steps['selector'].get_support(indices=True)

    selected_features = [transformed_features_names[i] for i in selected_indices]

    return list(selected_features)


In [None]:
def combined_feature_selection(X_data, y, n_features=20):
    X = X_data.copy()
    drop, keep = remove_quasi_constant_features(X)
    X = X[keep]
    X = X.replace([np.inf, -np.inf], np.nan)

    num_col = X.select_dtypes(include=[np.number, 'float64','int64']).columns
    cat_col = X.select_dtypes(include=['object', 'category', 'bool']).columns

    X[cat_col] = X[cat_col].astype(str)

    num_pipeline = Pipeline([
        ('imputer', SimpleImputer(strategy='median')),
        ('scaler', RobustScaler())
    ])

    cat_pipeline = Pipeline([
        ('imputer', SimpleImputer(strategy='most_frequent')),
        ('encoder', OneHotEncoder(handle_unknown='ignore'))
    ])

    preprocessor = make_column_transformer(
        (num_pipeline, num_col),
        (cat_pipeline, cat_col),
        remainder='drop',
        verbose_feature_names_out=False,
        n_jobs=-1
    )

    # Apply preprocessing
    X_transformed = preprocessor.fit_transform(X)
    feature_names = preprocessor.get_feature_names_out(input_features=keep)

    # Define models
    xgb = XGBRegressor(tree_method='hist',
                       n_estimators=50,
                       max_depth=6,
                       max_features=n_features,
                       random_state=RANDOM_STATE)
    lasso = LassoCV(cv=5)
    rf = RandomForestRegressor(n_estimators=50,
                               max_depth=6,
                               max_features=n_features,
                               random_state=RANDOM_STATE)

    # Fit models
    xgb.fit(X_transformed, y)
    lasso.fit(X_transformed, y)
    rf.fit(X_transformed, y)

    # Get importances or coefficients
    xgb_importance = xgb.feature_importances_
    lasso_coef = np.abs(lasso.coef_)
    rf_importance = rf.feature_importances_

    # Normalize
    def normalize(arr):
        return arr / np.max(arr) if np.max(arr) != 0 else arr

    # scores nomalisés
    scores_norm = [
        normalize(xgb_importance),
        normalize(lasso_coef) ,
        normalize(rf_importance)
    ]
    scores = np.vstack(scores_norm)
    scores = np.median(scores, axis=0)

    # Select top features
    top_indices = np.argsort(scores)[::-1][:n_features]
    selected_features = [feature_names[i] for i in top_indices]

    return selected_features



In [None]:

selected_features_20 = fast_feature_selection(X, y, n_features=40)
with open("40_best_features.json" , 'w') as file:
    json.dump(selected_features_20 , file)


### 2.2.2 Selection plus fine pour un modèle donné

In [48]:

def compute_rmse(pipeline , X_sub, y,cv=None,
                 message="Début du calcul...",
                 error="une erreur s'est produite")->float:
    """
    Function to compute cross-validated RMSE
    """
    if cv is None:
        cv = KFold(n_splits=5, shuffle=True, random_state=RANDOM_STATE)
    if message is not None:
        print(message)
    try :
        pipeline.fit(X_sub, y)
        # Calculer le RMSE avec validation croisée
        scores = cross_val_score(pipeline, X_sub, y,
                                scoring='neg_root_mean_squared_error',
                                cv=cv,n_jobs=-1)
        return -scores.mean()
    except Exception as e:
        if error is not None:
            print(error)
            print(f"Erreur : {e}")
        # Si une erreur se produit, on retourne une valeur élevée pour forcer la suppression de la feature
        return float('inf')

def make_pipeline(model , X,features=None):

    preprocessor = get_preprocessor(X , features)
    return Pipeline([
        ('preprocessor', preprocessor),
        ('model', model)
    ])


def backward_stepwise(model, X, y,
                       initial_features:List[str]=None,
                       min_features:int=3,
                       rtol:float=0.005,
                       max_degrad:float=0.005,
                       cv=None,
                       logg:bool=True)->Tuple[List[str] , float]:
    """
    Backward stepwise feature selection algorithm.

    Parameters:
    - model: The predictive model (must implement fit and predict).
    - X: DataFrame of predictors.
    - y: Target variable.
    - initial_features: Initial subset of features to test. If None, uses all features.
    - min_features: Minimum number of features to keep.
    - max_degrad: Maximum allowed degradation in RMSE to consider a feature removable.
    - rtol: Relative tolerance for improvement in RMSE. If improvement < rtol, stops early.

    Returns:
    - best_features: List of selected features.
    - best_score: Best RMSE score achieved.
    """


    all_features = X.columns.tolist()
    if initial_features is None:
        current_features = all_features.copy()
    else:
        current_features = [f for f in initial_features if f in all_features]

    if min_features is None:
        min_features = max(5, int(0.1 * len(current_features)))

    if cv is None:
        cv = KFold(n_splits=5, shuffle=True, random_state=RANDOM_STATE)

    pipeline = make_pipeline(model, X ,current_features)

    # Compute initial RMSE
    best_score = compute_rmse(pipeline,
                              X[current_features], y,cv=cv , message="calcul du score initial..." if logg else None,
                              error="Erreur lors de l'évaluation initiale" if logg else None)

    if logg:
        print("début des suppressions...\nInitial RMSE: {best_score:.4f}")
    features_removed = []
    improve = True
    while improve and len(current_features) > min_features:
        worst_feature = None
        current_rmse = best_score
        # essai de suppression de chaque feature
        for feature in current_features:
            trial_features = [f for f in current_features if f != feature]
            trial_pipeline = make_pipeline(model, X,trial_features)
            # Compute RMSE without the feature
            trial_rmse = compute_rmse(trial_pipeline,
                                      X[trial_features], y,cv=cv,message=f"Test sans '{feature}'"if logg else None,error=f"Erreur en testant sans {feature} "if logg else None)

            relative_change = (best_score-trial_rmse) / best_score
            # If removing the feature improves RMSE beyond rtol
            if relative_change >= rtol and trial_rmse < current_rmse:
                    current_rmse = trial_rmse
                    worst_feature = feature
            elif relative_change < -max_degrad:
                if logg:
                    print(f" suppression de {feature} rejetée : dégradation de {-relative_change*100:.2f} %")

        # Remove the worst feature if found
        if worst_feature is not None:
            current_features.remove(worst_feature)
            features_removed.append(worst_feature)
            best_score = current_rmse
            improve = True
            if logg:
                print(f" '{worst_feature}' supprimée  , nouvelle RMSE: {best_score:.4f},  {len(current_features)} features restante")
        else:
            improve = False

    if logg:
        print(f"selction terminée {len(current_features)} Features conservées. RMSE final: {best_score:.4f}")
    return current_features, best_score



# 3. MODELISATION

### Separation X_train , y_train ,....

In [49]:
train_data , test_data = prepare_datasets(res_train_df, res_test_df,
                                          merge_list,
                                          base_id = id_train,
                                          cols_to_drop = cols_to_drop,
                                          verbose=True)

 111 colonnes vs 32 avant la fusion

 92 colonnes supprimées pendant la fusion 

 
20892 lignes vs 20892 avant la fusion

 
0.13070221283891595% de valeurs manquantes vs 0.0% avant la fusion


In [50]:
X = train_data.drop(columns=[target], axis=1,errors='ignore')
y = train_data[target]

#selection des 20 meilleures features
selected_features = fast_feature_selection(X, y, n_features=20)

In [51]:
X_train , X_test , y_train , y_test = train_test_split(safe_feature_selection(X,selected_features),y, test_size=0.2, random_state=RANDOM_STATE)

### Fonction d'optimisation des hyperparamètres et des features pour un moèle


In [None]:
def quick_gridsearch(model, param_grid, X, y, cv=3, scoring='neg_root_mean_squared_error', verbose=0):
    """
    Effectue une GridSearchCV rapide pour évaluer rapidement un modèle de régression.

    Returns:
    - best_estimator_: le modèle entraîné avec les meilleurs paramètres
    - best_params_: dictionnaire des meilleurs paramètres
    - best_score_: meilleur score trouvé (négatif car RMSE)
    - cv_results_: résultats détaillés
    """
    grid_search = GridSearchCV(
        estimator=model,
        param_grid=param_grid,
        scoring=scoring,
        cv=cv,
        n_jobs=-1,
        verbose=verbose,
        return_train_score=True
    )
    grid_search.fit(X, y)

    return grid_search.best_estimator_, grid_search.best_params_, grid_search.best_score_, grid_search.cv_results_


In [53]:
class TQDMCallback:
    """
    Callback to update tqdm progress bar during hyperparameter tuning.
    """
    def __init__(self, total):
        self.pbar = tqdm(total=total, desc="Optimisation des hyperparamètres", unit="itération")

    def __call__(self, study, trial):
        self.pbar.update(1)
        self.pbar.set_postfix({"Best RMSE": study.best_value})

#Fonction d'optimisation des hyperparamètres et des features pour un modèle
def optimise_model(model, params, X, y,
                   n_trials: int = 3,
                   model_name: str = '',
                   cv:int=3,
                   selected_feature_fn = None
                   ) -> Tuple[Dict, List[str], optuna.study.Study]:
    """
    Optimise les hyperparamètres et selectionne les features optimales pour le modèle

    Args
    ---
    model : (fit_Object)
      instance du modèle choisit , ici ce sera soit Lasso , ElesticNet ,  XGboost , LightGBM
    params :
      disctionnaire permettant de definir l'espace des hyperparamètres à optimiser
    X : (pd.DataFrame)
      dataframes des predicteurs
    y : (pd.DataFrame)
      variables cible
    n_trials : (int)
      nombre d'essais pour l'optimisation des hyperparamètres

    Returns
    -------
    Tuple[Dict, List[str], optuna.study.Study]:
      tuple contenant
    -best_params : (dict)
      dictionnaire des meilleurs hyperparamètres
    -best_features : (list)
      liste des features sélectionnées
    -study : (optuna.study.Study)
      instance de l'étude d'optimisation
    """
    if cv is None:
      cv = RepeatedKFold(n_splits=5, n_repeats=3, random_state=RANDOM_STATE )

    def objective(trial):
        # Définir les espaces de recherche pour chaque hyperparamètre
        param_grid = {}
        for name, conf in params.items():
            if conf['type'] == 'int':
                if isinstance(conf['range'], (list, tuple)) and len(conf['range']) == 2:
                    param_grid[name] = trial.suggest_int(name, conf['range'][0], conf['range'][1])
                else:
                    print(f"Erreur: conf['range'] n'est pas une liste ou un tuple valide pour {name} (type int)")
                    continue  # Passe à l'hyperparamètre suivant
            elif conf['type'] == 'float':
                if isinstance(conf['range'], (list, tuple)) and len(conf['range']) == 2:
                    param_grid[name] = trial.suggest_float(name, conf['range'][0], conf['range'][1])
                else:
                    print(f"Erreur: conf['range'] n'est pas une liste ou un tuple valide pour {name} (type float)")
                    continue  # Passe à l'hyperparamètre suivant
            elif conf['type'] == 'categorical':
                if isinstance(conf['range'], (list, tuple)):
                    param_grid[name] = trial.suggest_categorical(name, conf['range'])
                else:
                    print(f"Erreur: conf['range'] n'est pas une liste ou un tuple valide pour {name} (type categorical)")
                    continue  # Passe à l'hyperparamètre suivant
            elif conf['type'] == 'log':
                if isinstance(conf['range'], (list, tuple)) and len(conf['range']) == 2:
                    param_grid[name] = trial.suggest_float(name, conf['range'][0], conf['range'][1], log=True)
                else:
                    print(f"Erreur: conf['range'] n'est pas une liste ou un tuple valide pour {name} (type log)")
                    continue  # Passe à l'hyperparamètre suivant

        # Appliquer les paramètres au modèle
        model_trial = clone(model).set_params(**param_grid)

        try:
            # Selection des features
            if selected_feature_fn is not None:
                selected_features = selected_feature_fn(X, y)
            else:
              selected_features = backward_stepwise(model_trial, X, y, cv=cv, logg=False)[0]

            # sélection sécurisée des colonnes
            X_selected = safe_feature_selection(X, selected_features)

            # si aucune colonne valide , utiliser toutes le colonnes
            if X_selected.shape[1] == 0:
                X_selected = X.copy()
                selected_features = X.columns.tolist()

            prepro = get_preprocessor(X_selected)
            pipeline = Pipeline([
                ('preprocessor', prepro),
                ('model', model_trial)
            ])

            trial.set_user_attr('selected_features', selected_features)

            # Effectuer la validation croisée
            scores = cross_val_score(pipeline,
                                      X_selected, y,
                                      cv=cv, scoring='neg_root_mean_squared_error',
                                      n_jobs=-1)

            score = -np.mean(scores)
            trial.report(score, step=0)
            if trial.should_prune():
                raise TrialPruned()
            print(f"[Trial {trial.number}] RMSE: {score:.4f} | Params: {param_grid}")
            return score

        except Exception as e:
            print(f"Trial {trial.number} Erreur : {e}")
            # on retourne une valeur élevée pour forcer la suppression de la feature
            trial.set_user_attr('error', str(e))
            trial.set_user_attr('selected_features', X.columns.tolist())
            return float('inf')

    study = optuna.create_study(direction='minimize',
                                  pruner=MedianPruner())
    study.optimize(objective,
                   n_trials=n_trials,
                   n_jobs=-1,
                   callbacks=[TQDMCallback(n_trials)])

    joblib.dump(study, f'{model_name}_study.pkl')

    best_params = study.best_params
    best_features = study.best_trial.user_attrs['selected_features']

    return best_params, best_features, study



def optimise_then_select(model, param_space, X, y,
                           n_trials=10, cv=None,
                           min_features=5,
                           rtol=0.01, logg=False,
                           model_name=None):
  best_params , _ , _ = optimise_model(model = model,
                                       params=  param_space ,
                                       X=X , y=y ,
                                       selected_feature_fn=None,
                                       n_trials=n_trials ,
                                       model_name=model_name ,
                                       cv=cv,
                                       )
  model = model.set_params(**best_params)

  best_features = backward_stepwise(model=model,
                                    X=X,
                                    y=y,
                                    min_features=min_features,
                                    rtol=rtol,
                                    logg=False)[0]
  return model , best_features , best_params

## Initialisation des modèles et recherche d'hyperparamètres


In [54]:
# 1. Définir les modèles et leurs espaces de recherche
models_grids = {
    "Lasso": (             # modèle imposé
        Lasso(random_state=RANDOM_STATE),
        {
            'alpha': {'type': 'log', 'range': (1e-4, 10)},
            'max_iter': {'type': 'int', 'range': (1000, 5000)},
            'fit_intercept': {'type': 'categorical', 'range': [True]}
        }
    ),
    "ElasticNet": (       #modèle au choix
        ElasticNet(random_state=RANDOM_STATE),
        {
            'alpha': {'type': 'log', 'range': (1e-4, 10)},
            'l1_ratio': {'type': 'float', 'range': (0.1, 1)},
            'max_iter': {'type': 'int', 'range': (1000, 5000)},
        }
    ),
    "XGBoost": (      # modèle au choix 2
        XGBRegressor(random_state=RANDOM_STATE),
        {
            'n_estimators': {'type': 'int', 'range': (100, 300)},
            'max_depth': {'type': 'int', 'range': (5, 10)},
            'learning_rate': {'type': 'float', 'range': (0.01, 0.3)},
            'subsample': {'type': 'float', 'range': (0.6, 1.0)},
            'colsample_bytree': {'type': 'float', 'range': (0.6, 1.0)},
            'gamma': {'type': 'float', 'range': (0, 5)},
            'reg_alpha': {'type': 'float', 'range': (0, 1)},
            'reg_lambda': {'type': 'float', 'range': (0, 1)}
        }
    )
}

# 2. Boucle d'optimisation + sélection des best features
models_info = {}

for name, (model, space) in models_grids.items():
    print(f"\n🔍 Traitement du modèle : {name}")

    final_model, selected_features, best_params = optimise_then_select(
        model=model,
        param_space=space,
        X=X_train,
        y=y_train,
        n_trials=200,
        min_features=5,
        rtol=0.01,
        logg=False,
        model_name = name
    )

    models_info[name] = {
        "model": final_model,
        "features": selected_features,
        "params": best_params
    }


[I 2025-05-11 08:14:25,538] A new study created in memory with name: no-name-e88aff76-5656-40fe-8f71-9fb231ea15e3



🔍 Traitement du modèle : Lasso


Optimisation des hyperparamètres:   0%|          | 0/200 [00:00<?, ?itération/s][I 2025-05-11 08:19:08,067] Trial 4 finished with value: 34.4120234282995 and parameters: {'alpha': 4.9209872773667405, 'max_iter': 2864, 'fit_intercept': False}. Best is trial 4 with value: 34.4120234282995.
Optimisation des hyperparamètres:   0%|          | 1/200 [04:42<15:37:02, 282.53s/itération, Best RMSE=34.4]

[Trial 4] RMSE: 34.4120 | Params: {'alpha': 4.9209872773667405, 'max_iter': 2864, 'fit_intercept': False}


[I 2025-05-11 08:19:17,308] Trial 3 finished with value: 34.976447886368156 and parameters: {'alpha': 6.300176700754486, 'max_iter': 1376, 'fit_intercept': False}. Best is trial 4 with value: 34.4120234282995.
Optimisation des hyperparamètres:   1%|          | 2/200 [04:51<6:41:50, 121.77s/itération, Best RMSE=34.4] 

[Trial 3] RMSE: 34.9764 | Params: {'alpha': 6.300176700754486, 'max_iter': 1376, 'fit_intercept': False}


[I 2025-05-11 08:19:18,616] Trial 1 finished with value: 1.0724575397324585 and parameters: {'alpha': 0.1632727739244035, 'max_iter': 4558, 'fit_intercept': True}. Best is trial 1 with value: 1.0724575397324585.
Optimisation des hyperparamètres:   2%|▏         | 3/200 [04:53<3:39:12, 66.76s/itération, Best RMSE=1.07] 

[Trial 1] RMSE: 1.0725 | Params: {'alpha': 0.1632727739244035, 'max_iter': 4558, 'fit_intercept': True}


[I 2025-05-11 08:20:59,904] Trial 5 finished with value: 0.9916502748945343 and parameters: {'alpha': 0.024485466290068546, 'max_iter': 2772, 'fit_intercept': True}. Best is trial 5 with value: 0.9916502748945343.
Optimisation des hyperparamètres:   2%|▏         | 4/200 [06:34<4:22:37, 80.39s/itération, Best RMSE=0.992]

[Trial 5] RMSE: 0.9917 | Params: {'alpha': 0.024485466290068546, 'max_iter': 2772, 'fit_intercept': True}


[I 2025-05-11 08:21:17,414] Trial 10 finished with value: 0.9890343617479039 and parameters: {'alpha': 0.01902394880379772, 'max_iter': 1790, 'fit_intercept': True}. Best is trial 10 with value: 0.9890343617479039.
Optimisation des hyperparamètres:   2%|▎         | 5/200 [06:51<3:07:34, 57.72s/itération, Best RMSE=0.989]

[Trial 10] RMSE: 0.9890 | Params: {'alpha': 0.01902394880379772, 'max_iter': 1790, 'fit_intercept': True}


[I 2025-05-11 08:21:22,712] Trial 11 finished with value: 0.9873278139435876 and parameters: {'alpha': 0.016168219213407588, 'max_iter': 4077, 'fit_intercept': True}. Best is trial 11 with value: 0.9873278139435876.
Optimisation des hyperparamètres:   3%|▎         | 6/200 [06:57<2:08:59, 39.90s/itération, Best RMSE=0.987]

[Trial 11] RMSE: 0.9873 | Params: {'alpha': 0.016168219213407588, 'max_iter': 4077, 'fit_intercept': True}


[I 2025-05-11 08:21:58,423] Trial 6 finished with value: 0.9844335696338898 and parameters: {'alpha': 0.003846665405296903, 'max_iter': 4433, 'fit_intercept': True}. Best is trial 6 with value: 0.9844335696338898.
Optimisation des hyperparamètres:   4%|▎         | 7/200 [07:32<2:03:55, 38.53s/itération, Best RMSE=0.984]

[Trial 6] RMSE: 0.9844 | Params: {'alpha': 0.003846665405296903, 'max_iter': 4433, 'fit_intercept': True}


[I 2025-05-11 08:22:44,239] Trial 2 finished with value: inf and parameters: {'alpha': 0.08279229292441463, 'max_iter': 4666, 'fit_intercept': False}. Best is trial 6 with value: 0.9844335696338898.
Optimisation des hyperparamètres:   4%|▍         | 8/200 [08:18<2:10:43, 40.85s/itération, Best RMSE=0.984]

Trial 2 Erreur : 


[I 2025-05-11 08:22:47,510] Trial 0 finished with value: inf and parameters: {'alpha': 0.3900787166572812, 'max_iter': 4325, 'fit_intercept': False}. Best is trial 6 with value: 0.9844335696338898.
Optimisation des hyperparamètres:   4%|▍         | 9/200 [08:21<1:32:37, 29.10s/itération, Best RMSE=0.984]

Trial 0 Erreur : 


[I 2025-05-11 10:26:12,182] Trial 12 finished with value: inf and parameters: {'alpha': 6.5047097698508916, 'max_iter': 1724, 'fit_intercept': False}. Best is trial 6 with value: 0.9844335696338898.
Optimisation des hyperparamètres:   5%|▌         | 10/200 [2:11:46<121:42:37, 2306.09s/itération, Best RMSE=0.984]

Trial 12 Erreur : 


[I 2025-05-11 10:28:51,627] Trial 17 finished with value: inf and parameters: {'alpha': 8.43672135393618, 'max_iter': 3607, 'fit_intercept': False}. Best is trial 6 with value: 0.9844335696338898.
Optimisation des hyperparamètres:   6%|▌         | 11/200 [2:14:26<86:34:42, 1649.11s/itération, Best RMSE=0.984] 

Trial 17 Erreur : 


[I 2025-05-11 10:32:52,540] Trial 20 finished with value: 0.9844443025068321 and parameters: {'alpha': 0.0036344461869257023, 'max_iter': 4122, 'fit_intercept': True}. Best is trial 6 with value: 0.9844335696338898.
Optimisation des hyperparamètres:   6%|▌         | 12/200 [2:18:27<63:44:54, 1220.72s/itération, Best RMSE=0.984]

[Trial 20] RMSE: 0.9844 | Params: {'alpha': 0.0036344461869257023, 'max_iter': 4122, 'fit_intercept': True}


[I 2025-05-11 10:33:32,588] Trial 9 finished with value: 0.9814859946947604 and parameters: {'alpha': 0.0001602070828787948, 'max_iter': 2763, 'fit_intercept': True}. Best is trial 9 with value: 0.9814859946947604.
Optimisation des hyperparamètres:   6%|▋         | 13/200 [2:19:07<44:49:50, 863.05s/itération, Best RMSE=0.981] 

[Trial 9] RMSE: 0.9815 | Params: {'alpha': 0.0001602070828787948, 'max_iter': 2763, 'fit_intercept': True}


[I 2025-05-11 10:37:51,537] Trial 21 finished with value: 0.9860831658019737 and parameters: {'alpha': 0.00033265760049661753, 'max_iter': 3526, 'fit_intercept': True}. Best is trial 9 with value: 0.9814859946947604.
Optimisation des hyperparamètres:   7%|▋         | 14/200 [2:23:26<35:09:48, 680.58s/itération, Best RMSE=0.981]

[Trial 21] RMSE: 0.9861 | Params: {'alpha': 0.00033265760049661753, 'max_iter': 3526, 'fit_intercept': True}


[I 2025-05-11 10:39:39,092] Trial 14 finished with value: inf and parameters: {'alpha': 0.04718599405711551, 'max_iter': 1989, 'fit_intercept': False}. Best is trial 9 with value: 0.9814859946947604.
Optimisation des hyperparamètres:   8%|▊         | 15/200 [2:25:13<26:05:52, 507.85s/itération, Best RMSE=0.981]

Trial 14 Erreur : 


[I 2025-05-11 10:40:50,370] Trial 22 finished with value: 0.9853090051303339 and parameters: {'alpha': 0.0007721219636233057, 'max_iter': 3784, 'fit_intercept': True}. Best is trial 9 with value: 0.9814859946947604.
Optimisation des hyperparamètres:   8%|▊         | 16/200 [2:26:24<19:14:25, 376.45s/itération, Best RMSE=0.981]

[Trial 22] RMSE: 0.9853 | Params: {'alpha': 0.0007721219636233057, 'max_iter': 3784, 'fit_intercept': True}


[I 2025-05-11 10:42:28,607] Trial 15 finished with value: inf and parameters: {'alpha': 0.03559289559489558, 'max_iter': 4118, 'fit_intercept': False}. Best is trial 9 with value: 0.9814859946947604.
Optimisation des hyperparamètres:   8%|▊         | 17/200 [2:28:03<14:53:00, 292.79s/itération, Best RMSE=0.981]

Trial 15 Erreur : 


[I 2025-05-11 10:42:52,594] Trial 8 finished with value: inf and parameters: {'alpha': 0.002295662458552428, 'max_iter': 3626, 'fit_intercept': False}. Best is trial 9 with value: 0.9814859946947604.
Optimisation des hyperparamètres:   9%|▉         | 18/200 [2:28:27<10:43:07, 212.02s/itération, Best RMSE=0.981]

Trial 8 Erreur : 


[I 2025-05-11 10:45:07,326] Trial 23 finished with value: 0.985602765942661 and parameters: {'alpha': 0.0005660727498900619, 'max_iter': 4999, 'fit_intercept': True}. Best is trial 9 with value: 0.9814859946947604.
Optimisation des hyperparamètres:  10%|▉         | 19/200 [2:30:41<9:29:33, 188.80s/itération, Best RMSE=0.981] 

[Trial 23] RMSE: 0.9856 | Params: {'alpha': 0.0005660727498900619, 'max_iter': 4999, 'fit_intercept': True}


[I 2025-05-11 10:45:31,553] Trial 7 finished with value: inf and parameters: {'alpha': 0.0009737755961261342, 'max_iter': 4724, 'fit_intercept': False}. Best is trial 9 with value: 0.9814859946947604.
Optimisation des hyperparamètres:  10%|█         | 20/200 [2:31:06<6:58:11, 139.40s/itération, Best RMSE=0.981]

Trial 7 Erreur : 


[I 2025-05-11 10:48:19,281] Trial 16 finished with value: inf and parameters: {'alpha': 0.7030722284399786, 'max_iter': 2993, 'fit_intercept': False}. Best is trial 9 with value: 0.9814859946947604.
Optimisation des hyperparamètres:  10%|█         | 21/200 [2:33:53<7:21:13, 147.90s/itération, Best RMSE=0.981]

Trial 16 Erreur : 


[I 2025-05-11 10:50:45,293] Trial 13 finished with value: inf and parameters: {'alpha': 0.0010097068396664787, 'max_iter': 3342, 'fit_intercept': False}. Best is trial 9 with value: 0.9814859946947604.
Optimisation des hyperparamètres:  11%|█         | 22/200 [2:36:19<7:17:04, 147.33s/itération, Best RMSE=0.981]

Trial 13 Erreur : 


[I 2025-05-11 10:52:04,154] Trial 26 finished with value: 0.986336517727256 and parameters: {'alpha': 0.00021958480418154583, 'max_iter': 2367, 'fit_intercept': True}. Best is trial 9 with value: 0.9814859946947604.
Optimisation des hyperparamètres:  12%|█▏        | 23/200 [2:37:38<6:14:00, 126.78s/itération, Best RMSE=0.981]

[Trial 26] RMSE: 0.9863 | Params: {'alpha': 0.00021958480418154583, 'max_iter': 2367, 'fit_intercept': True}


[I 2025-05-11 10:55:50,618] Trial 18 finished with value: inf and parameters: {'alpha': 0.0015612657009190223, 'max_iter': 3031, 'fit_intercept': False}. Best is trial 9 with value: 0.9814859946947604.
Optimisation des hyperparamètres:  12%|█▏        | 24/200 [2:41:25<7:39:38, 156.70s/itération, Best RMSE=0.981]

Trial 18 Erreur : 


[I 2025-05-11 10:56:14,133] Trial 19 finished with value: 21.630726571404427 and parameters: {'alpha': 0.00046221374398929113, 'max_iter': 4911, 'fit_intercept': False}. Best is trial 9 with value: 0.9814859946947604.
Optimisation des hyperparamètres:  12%|█▎        | 25/200 [2:41:48<5:40:28, 116.74s/itération, Best RMSE=0.981]

[Trial 19] RMSE: 21.6307 | Params: {'alpha': 0.00046221374398929113, 'max_iter': 4911, 'fit_intercept': False}


[I 2025-05-11 10:56:27,434] Trial 24 finished with value: 0.9814797812713346 and parameters: {'alpha': 0.00012894867462182128, 'max_iter': 2347, 'fit_intercept': True}. Best is trial 24 with value: 0.9814797812713346.
Optimisation des hyperparamètres:  13%|█▎        | 26/200 [2:42:01<4:08:31, 85.70s/itération, Best RMSE=0.981] 

[Trial 24] RMSE: 0.9815 | Params: {'alpha': 0.00012894867462182128, 'max_iter': 2347, 'fit_intercept': True}


[I 2025-05-11 11:00:48,741] Trial 25 finished with value: 0.9814746105223674 and parameters: {'alpha': 0.00010387176304800992, 'max_iter': 2355, 'fit_intercept': True}. Best is trial 25 with value: 0.9814746105223674.
Optimisation des hyperparamètres:  14%|█▎        | 27/200 [2:46:23<6:39:00, 138.39s/itération, Best RMSE=0.981]

[Trial 25] RMSE: 0.9815 | Params: {'alpha': 0.00010387176304800992, 'max_iter': 2355, 'fit_intercept': True}


[I 2025-05-11 11:01:54,995] Trial 33 finished with value: 0.9844330311532495 and parameters: {'alpha': 0.0038569492264078045, 'max_iter': 2422, 'fit_intercept': True}. Best is trial 25 with value: 0.9814746105223674.
Optimisation des hyperparamètres:  14%|█▍        | 28/200 [2:47:29<5:34:40, 116.75s/itération, Best RMSE=0.981]

[Trial 33] RMSE: 0.9844 | Params: {'alpha': 0.0038569492264078045, 'max_iter': 2422, 'fit_intercept': True}


[I 2025-05-11 11:02:51,839] Trial 34 finished with value: 0.9844117491684361 and parameters: {'alpha': 0.004400994265237361, 'max_iter': 2328, 'fit_intercept': True}. Best is trial 25 with value: 0.9814746105223674.
Optimisation des hyperparamètres:  14%|█▍        | 29/200 [2:48:26<4:41:30, 98.77s/itération, Best RMSE=0.981] 

[Trial 34] RMSE: 0.9844 | Params: {'alpha': 0.004400994265237361, 'max_iter': 2328, 'fit_intercept': True}


[I 2025-05-11 11:04:26,321] Trial 29 finished with value: 0.9814756804499887 and parameters: {'alpha': 0.00010956004684121728, 'max_iter': 2404, 'fit_intercept': True}. Best is trial 25 with value: 0.9814746105223674.
Optimisation des hyperparamètres:  15%|█▌        | 30/200 [2:50:00<4:36:12, 97.49s/itération, Best RMSE=0.981]

[Trial 29] RMSE: 0.9815 | Params: {'alpha': 0.00010956004684121728, 'max_iter': 2404, 'fit_intercept': True}


[I 2025-05-11 11:04:42,390] Trial 27 finished with value: 0.9814753228423247 and parameters: {'alpha': 0.00010765533163900751, 'max_iter': 2387, 'fit_intercept': True}. Best is trial 25 with value: 0.9814746105223674.
Optimisation des hyperparamètres:  16%|█▌        | 31/200 [2:50:16<3:25:47, 73.06s/itération, Best RMSE=0.981]

[Trial 27] RMSE: 0.9815 | Params: {'alpha': 0.00010765533163900751, 'max_iter': 2387, 'fit_intercept': True}


[I 2025-05-11 11:06:16,930] Trial 28 finished with value: 0.9814867376448322 and parameters: {'alpha': 0.00016309206151026096, 'max_iter': 2402, 'fit_intercept': True}. Best is trial 25 with value: 0.9814746105223674.
Optimisation des hyperparamètres:  16%|█▌        | 32/200 [2:51:51<3:42:36, 79.51s/itération, Best RMSE=0.981]

[Trial 28] RMSE: 0.9815 | Params: {'alpha': 0.00016309206151026096, 'max_iter': 2402, 'fit_intercept': True}


[I 2025-05-11 11:06:35,233] Trial 35 finished with value: 0.9843976273330095 and parameters: {'alpha': 0.00486773949113544, 'max_iter': 2337, 'fit_intercept': True}. Best is trial 25 with value: 0.9814746105223674.
Optimisation des hyperparamètres:  16%|█▋        | 33/200 [2:52:09<2:50:11, 61.15s/itération, Best RMSE=0.981]

[Trial 35] RMSE: 0.9844 | Params: {'alpha': 0.00486773949113544, 'max_iter': 2337, 'fit_intercept': True}


[I 2025-05-11 11:07:02,799] Trial 36 finished with value: 0.9843984037815533 and parameters: {'alpha': 0.005153793976418414, 'max_iter': 2296, 'fit_intercept': True}. Best is trial 25 with value: 0.9814746105223674.
Optimisation des hyperparamètres:  17%|█▋        | 34/200 [2:52:37<2:21:18, 51.08s/itération, Best RMSE=0.981]

[Trial 36] RMSE: 0.9844 | Params: {'alpha': 0.005153793976418414, 'max_iter': 2296, 'fit_intercept': True}


[I 2025-05-11 11:07:50,487] Trial 30 finished with value: 0.9814788862264382 and parameters: {'alpha': 0.00012605091818469158, 'max_iter': 2547, 'fit_intercept': True}. Best is trial 25 with value: 0.9814746105223674.
Optimisation des hyperparamètres:  18%|█▊        | 35/200 [2:53:24<2:17:38, 50.05s/itération, Best RMSE=0.981]

[Trial 30] RMSE: 0.9815 | Params: {'alpha': 0.00012605091818469158, 'max_iter': 2547, 'fit_intercept': True}


[I 2025-05-11 11:08:20,792] Trial 31 finished with value: 0.9814737120465915 and parameters: {'alpha': 0.00010043735072662933, 'max_iter': 2463, 'fit_intercept': True}. Best is trial 31 with value: 0.9814737120465915.
Optimisation des hyperparamètres:  18%|█▊        | 36/200 [2:53:55<2:00:38, 44.13s/itération, Best RMSE=0.981]

[Trial 31] RMSE: 0.9815 | Params: {'alpha': 0.00010043735072662933, 'max_iter': 2463, 'fit_intercept': True}


[I 2025-05-11 11:11:01,585] Trial 32 finished with value: 0.9814804780747259 and parameters: {'alpha': 0.0001322860585544701, 'max_iter': 2343, 'fit_intercept': True}. Best is trial 31 with value: 0.9814737120465915.
Optimisation des hyperparamètres:  18%|█▊        | 37/200 [2:56:36<3:34:57, 79.13s/itération, Best RMSE=0.981]

[Trial 32] RMSE: 0.9815 | Params: {'alpha': 0.0001322860585544701, 'max_iter': 2343, 'fit_intercept': True}


[I 2025-05-11 11:12:44,316] Trial 40 finished with value: inf and parameters: {'alpha': 0.00015347133154568554, 'max_iter': 1151, 'fit_intercept': True}. Best is trial 31 with value: 0.9814737120465915.
Optimisation des hyperparamètres:  19%|█▉        | 38/200 [2:58:18<3:52:45, 86.21s/itération, Best RMSE=0.981]

Trial 40 Erreur : 


[I 2025-05-11 11:13:07,924] Trial 41 finished with value: inf and parameters: {'alpha': 0.00012448813675583817, 'max_iter': 1022, 'fit_intercept': True}. Best is trial 31 with value: 0.9814737120465915.
Optimisation des hyperparamètres:  20%|█▉        | 39/200 [2:58:42<3:00:55, 67.43s/itération, Best RMSE=0.981]

Trial 41 Erreur : 


[I 2025-05-11 11:17:36,668] Trial 37 finished with value: 0.9814771838665433 and parameters: {'alpha': 0.0001171150902303406, 'max_iter': 2431, 'fit_intercept': True}. Best is trial 31 with value: 0.9814737120465915.
Optimisation des hyperparamètres:  20%|██        | 40/200 [3:03:11<5:40:52, 127.83s/itération, Best RMSE=0.981]

[Trial 37] RMSE: 0.9815 | Params: {'alpha': 0.0001171150902303406, 'max_iter': 2431, 'fit_intercept': True}


[I 2025-05-11 11:17:43,586] Trial 47 finished with value: 0.9861171967898775 and parameters: {'alpha': 0.00030076785537388274, 'max_iter': 1133, 'fit_intercept': True}. Best is trial 31 with value: 0.9814737120465915.
Optimisation des hyperparamètres:  20%|██        | 41/200 [3:03:18<4:02:36, 91.55s/itération, Best RMSE=0.981] 

[Trial 47] RMSE: 0.9861 | Params: {'alpha': 0.00030076785537388274, 'max_iter': 1133, 'fit_intercept': True}


[I 2025-05-11 11:20:30,024] Trial 48 finished with value: 0.9861291879773908 and parameters: {'alpha': 0.00029814159829574636, 'max_iter': 1220, 'fit_intercept': True}. Best is trial 31 with value: 0.9814737120465915.
Optimisation des hyperparamètres:  21%|██        | 42/200 [3:06:04<5:00:15, 114.02s/itération, Best RMSE=0.981]

[Trial 48] RMSE: 0.9861 | Params: {'alpha': 0.00029814159829574636, 'max_iter': 1220, 'fit_intercept': True}


[I 2025-05-11 11:22:16,328] Trial 38 finished with value: 0.9814757568306923 and parameters: {'alpha': 0.00010986025580850499, 'max_iter': 2397, 'fit_intercept': True}. Best is trial 31 with value: 0.9814737120465915.
Optimisation des hyperparamètres:  22%|██▏       | 43/200 [3:07:50<4:52:17, 111.71s/itération, Best RMSE=0.981]

[Trial 38] RMSE: 0.9815 | Params: {'alpha': 0.00010986025580850499, 'max_iter': 2397, 'fit_intercept': True}


[I 2025-05-11 11:22:46,826] Trial 39 finished with value: 0.9814745706345076 and parameters: {'alpha': 0.00010408829724951894, 'max_iter': 2397, 'fit_intercept': True}. Best is trial 31 with value: 0.9814737120465915.
Optimisation des hyperparamètres:  22%|██▏       | 44/200 [3:08:21<3:47:05, 87.35s/itération, Best RMSE=0.981] 

[Trial 39] RMSE: 0.9815 | Params: {'alpha': 0.00010408829724951894, 'max_iter': 2397, 'fit_intercept': True}


[I 2025-05-11 11:23:17,661] Trial 49 finished with value: inf and parameters: {'alpha': 0.0002947165550827191, 'max_iter': 2041, 'fit_intercept': True}. Best is trial 31 with value: 0.9814737120465915.
Optimisation des hyperparamètres:  22%|██▎       | 45/200 [3:08:52<3:01:49, 70.39s/itération, Best RMSE=0.981]

Trial 49 Erreur : 


[I 2025-05-11 11:23:44,401] Trial 50 finished with value: 0.9860271136009916 and parameters: {'alpha': 0.0003587779188225454, 'max_iter': 1943, 'fit_intercept': True}. Best is trial 31 with value: 0.9814737120465915.
Optimisation des hyperparamètres:  23%|██▎       | 46/200 [3:09:18<2:27:02, 57.29s/itération, Best RMSE=0.981]

[Trial 50] RMSE: 0.9860 | Params: {'alpha': 0.0003587779188225454, 'max_iter': 1943, 'fit_intercept': True}


[I 2025-05-11 11:24:10,893] Trial 46 finished with value: 0.9815223868265905 and parameters: {'alpha': 0.00010053422181662462, 'max_iter': 1046, 'fit_intercept': True}. Best is trial 31 with value: 0.9814737120465915.
Optimisation des hyperparamètres:  24%|██▎       | 47/200 [3:09:45<2:02:32, 48.05s/itération, Best RMSE=0.981]

[Trial 46] RMSE: 0.9815 | Params: {'alpha': 0.00010053422181662462, 'max_iter': 1046, 'fit_intercept': True}


[I 2025-05-11 11:24:25,914] Trial 45 finished with value: 0.9814899299234862 and parameters: {'alpha': 0.0001125681049347809, 'max_iter': 1348, 'fit_intercept': True}. Best is trial 31 with value: 0.9814737120465915.
Optimisation des hyperparamètres:  24%|██▍       | 48/200 [3:10:00<1:36:37, 38.14s/itération, Best RMSE=0.981]

[Trial 45] RMSE: 0.9815 | Params: {'alpha': 0.0001125681049347809, 'max_iter': 1348, 'fit_intercept': True}


[I 2025-05-11 11:24:48,400] Trial 42 finished with value: 0.9814762726049039 and parameters: {'alpha': 0.00010825917837810566, 'max_iter': 2040, 'fit_intercept': True}. Best is trial 31 with value: 0.9814737120465915.
Optimisation des hyperparamètres:  24%|██▍       | 49/200 [3:10:22<1:24:11, 33.45s/itération, Best RMSE=0.981]

[Trial 42] RMSE: 0.9815 | Params: {'alpha': 0.00010825917837810566, 'max_iter': 2040, 'fit_intercept': True}


[I 2025-05-11 11:25:37,220] Trial 43 finished with value: 0.9814779376892009 and parameters: {'alpha': 0.00011635630460984673, 'max_iter': 2045, 'fit_intercept': True}. Best is trial 31 with value: 0.9814737120465915.
Optimisation des hyperparamètres:  25%|██▌       | 50/200 [3:11:11<1:35:09, 38.06s/itération, Best RMSE=0.981]

[Trial 43] RMSE: 0.9815 | Params: {'alpha': 0.00011635630460984673, 'max_iter': 2045, 'fit_intercept': True}


[I 2025-05-11 11:25:55,679] Trial 44 finished with value: 0.9814774276011173 and parameters: {'alpha': 0.00011438953705513961, 'max_iter': 2076, 'fit_intercept': True}. Best is trial 31 with value: 0.9814737120465915.
Optimisation des hyperparamètres:  26%|██▌       | 51/200 [3:11:30<1:19:54, 32.18s/itération, Best RMSE=0.981]

[Trial 44] RMSE: 0.9815 | Params: {'alpha': 0.00011438953705513961, 'max_iter': 2076, 'fit_intercept': True}


[I 2025-05-11 11:26:29,135] Trial 51 finished with value: inf and parameters: {'alpha': 0.000324973320874466, 'max_iter': 1996, 'fit_intercept': True}. Best is trial 31 with value: 0.9814737120465915.
Optimisation des hyperparamètres:  26%|██▌       | 52/200 [3:12:03<1:20:18, 32.56s/itération, Best RMSE=0.981]

Trial 51 Erreur : 


[I 2025-05-11 11:26:37,254] Trial 54 finished with value: inf and parameters: {'alpha': 0.7143528457139912, 'max_iter': 2031, 'fit_intercept': True}. Best is trial 31 with value: 0.9814737120465915.
Optimisation des hyperparamètres:  26%|██▋       | 53/200 [3:12:11<1:01:48, 25.23s/itération, Best RMSE=0.981]

Trial 54 Erreur : 


[I 2025-05-11 11:26:42,675] Trial 55 finished with value: inf and parameters: {'alpha': 0.6531817492858248, 'max_iter': 1972, 'fit_intercept': True}. Best is trial 31 with value: 0.9814737120465915.
Optimisation des hyperparamètres:  27%|██▋       | 54/200 [3:12:17<46:55, 19.28s/itération, Best RMSE=0.981]  

Trial 55 Erreur : 


[I 2025-05-11 11:27:01,671] Trial 52 finished with value: inf and parameters: {'alpha': 0.00031085148783007853, 'max_iter': 1993, 'fit_intercept': True}. Best is trial 31 with value: 0.9814737120465915.
Optimisation des hyperparamètres:  28%|██▊       | 55/200 [3:12:36<46:23, 19.20s/itération, Best RMSE=0.981]

Trial 52 Erreur : 


[I 2025-05-11 11:27:39,790] Trial 58 finished with value: inf and parameters: {'alpha': 0.7467268042889096, 'max_iter': 1516, 'fit_intercept': True}. Best is trial 31 with value: 0.9814737120465915.
Optimisation des hyperparamètres:  28%|██▊       | 56/200 [3:13:14<59:43, 24.88s/itération, Best RMSE=0.981]

Trial 58 Erreur : 


[I 2025-05-11 11:27:47,555] Trial 59 finished with value: inf and parameters: {'alpha': 1.431876379497274, 'max_iter': 2717, 'fit_intercept': True}. Best is trial 31 with value: 0.9814737120465915.
Optimisation des hyperparamètres:  28%|██▊       | 57/200 [3:13:22<47:02, 19.74s/itération, Best RMSE=0.981]

Trial 59 Erreur : 


[I 2025-05-11 11:28:11,482] Trial 60 finished with value: inf and parameters: {'alpha': 1.480777523999428, 'max_iter': 2723, 'fit_intercept': True}. Best is trial 31 with value: 0.9814737120465915.
Optimisation des hyperparamètres:  29%|██▉       | 58/200 [3:13:45<49:43, 21.01s/itération, Best RMSE=0.981]

Trial 60 Erreur : 


[I 2025-05-11 11:28:44,295] Trial 53 finished with value: 0.9858098583415285 and parameters: {'alpha': 0.0004614905226664248, 'max_iter': 1991, 'fit_intercept': True}. Best is trial 31 with value: 0.9814737120465915.
Optimisation des hyperparamètres:  30%|██▉       | 59/200 [3:14:18<57:39, 24.54s/itération, Best RMSE=0.981]

[Trial 53] RMSE: 0.9858 | Params: {'alpha': 0.0004614905226664248, 'max_iter': 1991, 'fit_intercept': True}


[I 2025-05-11 11:29:51,951] Trial 61 finished with value: inf and parameters: {'alpha': 0.2192556520437208, 'max_iter': 2688, 'fit_intercept': True}. Best is trial 31 with value: 0.9814737120465915.
Optimisation des hyperparamètres:  30%|███       | 60/200 [3:15:26<1:27:26, 37.48s/itération, Best RMSE=0.981]

Trial 61 Erreur : 


[I 2025-05-11 11:30:17,006] Trial 62 finished with value: inf and parameters: {'alpha': 0.2014127191673224, 'max_iter': 2721, 'fit_intercept': True}. Best is trial 31 with value: 0.9814737120465915.
Optimisation des hyperparamètres:  30%|███       | 61/200 [3:15:51<1:18:10, 33.75s/itération, Best RMSE=0.981]

Trial 62 Erreur : 


[I 2025-05-11 11:31:41,379] Trial 56 finished with value: 0.9857159186774936 and parameters: {'alpha': 0.0005082527844703374, 'max_iter': 2715, 'fit_intercept': True}. Best is trial 31 with value: 0.9814737120465915.
Optimisation des hyperparamètres:  31%|███       | 62/200 [3:17:15<1:52:33, 48.94s/itération, Best RMSE=0.981]

[Trial 56] RMSE: 0.9857 | Params: {'alpha': 0.0005082527844703374, 'max_iter': 2715, 'fit_intercept': True}


[I 2025-05-11 11:32:12,227] Trial 57 finished with value: inf and parameters: {'alpha': 0.32637206332538, 'max_iter': 2721, 'fit_intercept': True}. Best is trial 31 with value: 0.9814737120465915.
Optimisation des hyperparamètres:  32%|███▏      | 63/200 [3:17:46<1:39:20, 43.51s/itération, Best RMSE=0.981]

Trial 57 Erreur : 


[I 2025-05-11 11:36:29,626] Trial 63 finished with value: 0.9855621269364635 and parameters: {'alpha': 0.0005871065946715873, 'max_iter': 2597, 'fit_intercept': True}. Best is trial 31 with value: 0.9814737120465915.
Optimisation des hyperparamètres:  32%|███▏      | 64/200 [3:22:04<4:04:03, 107.68s/itération, Best RMSE=0.981]

[Trial 63] RMSE: 0.9856 | Params: {'alpha': 0.0005871065946715873, 'max_iter': 2597, 'fit_intercept': True}


[I 2025-05-11 11:36:58,289] Trial 65 finished with value: 0.9853973298486512 and parameters: {'alpha': 0.0006968207351341128, 'max_iter': 2605, 'fit_intercept': True}. Best is trial 31 with value: 0.9814737120465915.
Optimisation des hyperparamètres:  32%|███▎      | 65/200 [3:22:32<3:08:56, 83.97s/itération, Best RMSE=0.981] 

[Trial 65] RMSE: 0.9854 | Params: {'alpha': 0.0006968207351341128, 'max_iter': 2605, 'fit_intercept': True}


[I 2025-05-11 11:38:15,702] Trial 64 finished with value: inf and parameters: {'alpha': 0.0002150329909445435, 'max_iter': 2668, 'fit_intercept': True}. Best is trial 31 with value: 0.9814737120465915.
Optimisation des hyperparamètres:  33%|███▎      | 66/200 [3:23:50<3:03:08, 82.00s/itération, Best RMSE=0.981]

Trial 64 Erreur : 


[I 2025-05-11 11:38:24,563] Trial 66 finished with value: 0.9854729313073883 and parameters: {'alpha': 0.000633695653688239, 'max_iter': 2720, 'fit_intercept': True}. Best is trial 31 with value: 0.9814737120465915.
Optimisation des hyperparamètres:  34%|███▎      | 67/200 [3:23:59<2:13:08, 60.06s/itération, Best RMSE=0.981]

[Trial 66] RMSE: 0.9855 | Params: {'alpha': 0.000633695653688239, 'max_iter': 2720, 'fit_intercept': True}


[I 2025-05-11 11:38:31,608] Trial 68 finished with value: 0.9853796218349505 and parameters: {'alpha': 0.0007117812703374037, 'max_iter': 2636, 'fit_intercept': True}. Best is trial 31 with value: 0.9814737120465915.
Optimisation des hyperparamètres:  34%|███▍      | 68/200 [3:24:06<1:37:08, 44.16s/itération, Best RMSE=0.981]

[Trial 68] RMSE: 0.9854 | Params: {'alpha': 0.0007117812703374037, 'max_iter': 2636, 'fit_intercept': True}


[I 2025-05-11 11:39:14,120] Trial 67 finished with value: 0.9853731943115788 and parameters: {'alpha': 0.0007172364434009205, 'max_iter': 2676, 'fit_intercept': True}. Best is trial 31 with value: 0.9814737120465915.
Optimisation des hyperparamètres:  34%|███▍      | 69/200 [3:24:48<1:35:19, 43.66s/itération, Best RMSE=0.981]

[Trial 67] RMSE: 0.9854 | Params: {'alpha': 0.0007172364434009205, 'max_iter': 2676, 'fit_intercept': True}


[I 2025-05-11 11:40:35,866] Trial 69 finished with value: 0.9854371514401518 and parameters: {'alpha': 0.0006633858630460247, 'max_iter': 3171, 'fit_intercept': True}. Best is trial 31 with value: 0.9814737120465915.
Optimisation des hyperparamètres:  35%|███▌      | 70/200 [3:26:10<1:59:21, 55.09s/itération, Best RMSE=0.981]

[Trial 69] RMSE: 0.9854 | Params: {'alpha': 0.0006633858630460247, 'max_iter': 3171, 'fit_intercept': True}


[I 2025-05-11 11:43:55,634] Trial 72 finished with value: inf and parameters: {'alpha': 0.00020673073290133434, 'max_iter': 3162, 'fit_intercept': True}. Best is trial 31 with value: 0.9814737120465915.
Optimisation des hyperparamètres:  36%|███▌      | 71/200 [3:29:30<3:31:45, 98.49s/itération, Best RMSE=0.981]

Trial 72 Erreur : 


[I 2025-05-11 11:45:40,284] Trial 73 finished with value: inf and parameters: {'alpha': 0.00019586736850215884, 'max_iter': 2186, 'fit_intercept': True}. Best is trial 31 with value: 0.9814737120465915.
Optimisation des hyperparamètres:  36%|███▌      | 72/200 [3:31:14<3:34:03, 100.34s/itération, Best RMSE=0.981]

Trial 73 Erreur : 


[I 2025-05-11 12:04:56,143] Trial 70 finished with value: inf and parameters: {'alpha': 0.0007220150056901585, 'max_iter': 3138, 'fit_intercept': False}. Best is trial 31 with value: 0.9814737120465915.
Optimisation des hyperparamètres:  36%|███▋      | 73/200 [3:50:30<14:42:38, 417.00s/itération, Best RMSE=0.981]

Trial 70 Erreur : 


[I 2025-05-11 12:08:16,153] Trial 74 finished with value: inf and parameters: {'alpha': 0.00021624029486328627, 'max_iter': 3027, 'fit_intercept': False}. Best is trial 31 with value: 0.9814737120465915.
Optimisation des hyperparamètres:  37%|███▋      | 74/200 [3:53:50<12:18:59, 351.90s/itération, Best RMSE=0.981]

Trial 74 Erreur : 


[I 2025-05-11 12:08:27,545] Trial 71 finished with value: inf and parameters: {'alpha': 0.0002065356894735426, 'max_iter': 3069, 'fit_intercept': False}. Best is trial 31 with value: 0.9814737120465915.
Optimisation des hyperparamètres:  38%|███▊      | 75/200 [3:54:02<8:40:18, 249.75s/itération, Best RMSE=0.981] 

Trial 71 Erreur : 


[I 2025-05-11 12:09:47,096] Trial 76 finished with value: inf and parameters: {'alpha': 0.00021065039077178927, 'max_iter': 2193, 'fit_intercept': False}. Best is trial 31 with value: 0.9814737120465915.
Optimisation des hyperparamètres:  38%|███▊      | 76/200 [3:55:21<6:50:37, 198.69s/itération, Best RMSE=0.981]

Trial 76 Erreur : 


[I 2025-05-11 12:11:05,212] Trial 78 finished with value: inf and parameters: {'alpha': 0.0014087146759654981, 'max_iter': 2182, 'fit_intercept': False}. Best is trial 31 with value: 0.9814737120465915.
Optimisation des hyperparamètres:  38%|███▊      | 77/200 [3:56:39<5:33:09, 162.52s/itération, Best RMSE=0.981]

Trial 78 Erreur : 


[I 2025-05-11 12:11:13,338] Trial 75 finished with value: inf and parameters: {'alpha': 0.00023291109174168934, 'max_iter': 2951, 'fit_intercept': False}. Best is trial 31 with value: 0.9814737120465915.
Optimisation des hyperparamètres:  39%|███▉      | 78/200 [3:56:47<3:56:16, 116.20s/itération, Best RMSE=0.981]

Trial 75 Erreur : 


[I 2025-05-11 12:12:19,923] Trial 77 finished with value: inf and parameters: {'alpha': 0.00021334674977939878, 'max_iter': 2176, 'fit_intercept': False}. Best is trial 31 with value: 0.9814737120465915.
Optimisation des hyperparamètres:  40%|███▉      | 79/200 [3:57:54<3:24:20, 101.32s/itération, Best RMSE=0.981]

Trial 77 Erreur : 


[I 2025-05-11 12:13:22,359] Trial 79 finished with value: inf and parameters: {'alpha': 0.00019888457623664328, 'max_iter': 3121, 'fit_intercept': False}. Best is trial 31 with value: 0.9814737120465915.
Optimisation des hyperparamètres:  40%|████      | 80/200 [3:58:56<2:59:17, 89.65s/itération, Best RMSE=0.981] 

Trial 79 Erreur : 


[I 2025-05-11 12:13:55,017] Trial 81 finished with value: inf and parameters: {'alpha': 0.00019600663800024357, 'max_iter': 2168, 'fit_intercept': False}. Best is trial 31 with value: 0.9814737120465915.
Optimisation des hyperparamètres:  40%|████      | 81/200 [3:59:29<2:23:54, 72.56s/itération, Best RMSE=0.981]

Trial 81 Erreur : 


[I 2025-05-11 12:15:39,742] Trial 83 finished with value: inf and parameters: {'alpha': 0.0001888085517930295, 'max_iter': 1777, 'fit_intercept': False}. Best is trial 31 with value: 0.9814737120465915.
Optimisation des hyperparamètres:  41%|████      | 82/200 [4:01:14<2:41:40, 82.21s/itération, Best RMSE=0.981]

Trial 83 Erreur : 


[I 2025-05-11 12:15:44,620] Trial 84 finished with value: inf and parameters: {'alpha': 0.00018194870488466585, 'max_iter': 1816, 'fit_intercept': True}. Best is trial 31 with value: 0.9814737120465915.
Optimisation des hyperparamètres:  42%|████▏     | 83/200 [4:01:19<1:55:03, 59.00s/itération, Best RMSE=0.981]

Trial 84 Erreur : 


[I 2025-05-11 12:15:52,921] Trial 80 finished with value: inf and parameters: {'alpha': 0.00020424577386756495, 'max_iter': 2992, 'fit_intercept': False}. Best is trial 31 with value: 0.9814737120465915.
Optimisation des hyperparamètres:  42%|████▏     | 84/200 [4:01:27<1:24:40, 43.80s/itération, Best RMSE=0.981]

Trial 80 Erreur : 


[I 2025-05-11 12:16:07,615] Trial 82 finished with value: inf and parameters: {'alpha': 0.00019219753035315065, 'max_iter': 2204, 'fit_intercept': False}. Best is trial 31 with value: 0.9814737120465915.
Optimisation des hyperparamètres:  42%|████▎     | 85/200 [4:01:42<1:07:11, 35.06s/itération, Best RMSE=0.981]

Trial 82 Erreur : 


[I 2025-05-11 12:17:51,283] Trial 86 finished with value: 0.9846289251469451 and parameters: {'alpha': 0.007916765083056177, 'max_iter': 1812, 'fit_intercept': True}. Best is trial 31 with value: 0.9814737120465915.
Optimisation des hyperparamètres:  43%|████▎     | 86/200 [4:03:25<1:45:43, 55.64s/itération, Best RMSE=0.981]

[Trial 86] RMSE: 0.9846 | Params: {'alpha': 0.007916765083056177, 'max_iter': 1812, 'fit_intercept': True}


[I 2025-05-11 12:22:14,678] Trial 90 finished with value: 0.9856753233406277 and parameters: {'alpha': 0.01227770652537267, 'max_iter': 1784, 'fit_intercept': True}. Best is trial 31 with value: 0.9814737120465915.
Optimisation des hyperparamètres:  44%|████▎     | 87/200 [4:07:49<3:42:10, 117.97s/itération, Best RMSE=0.981]

[Trial 90] RMSE: 0.9857 | Params: {'alpha': 0.01227770652537267, 'max_iter': 1784, 'fit_intercept': True}


[I 2025-05-11 12:27:31,025] Trial 85 finished with value: 0.9814894955140654 and parameters: {'alpha': 0.0001662068620540917, 'max_iter': 1810, 'fit_intercept': True}. Best is trial 31 with value: 0.9814737120465915.
Optimisation des hyperparamètres:  44%|████▍     | 88/200 [4:13:05<5:31:17, 177.48s/itération, Best RMSE=0.981]

[Trial 85] RMSE: 0.9815 | Params: {'alpha': 0.0001662068620540917, 'max_iter': 1810, 'fit_intercept': True}


[I 2025-05-11 12:29:24,175] Trial 87 finished with value: 0.9814765164169522 and parameters: {'alpha': 0.0001003868094422999, 'max_iter': 1742, 'fit_intercept': True}. Best is trial 31 with value: 0.9814737120465915.
Optimisation des hyperparamètres:  44%|████▍     | 89/200 [4:14:58<4:52:38, 158.18s/itération, Best RMSE=0.981]

[Trial 87] RMSE: 0.9815 | Params: {'alpha': 0.0001003868094422999, 'max_iter': 1742, 'fit_intercept': True}


[I 2025-05-11 12:30:52,635] Trial 89 finished with value: 0.981475940190819 and parameters: {'alpha': 0.00010077115827103124, 'max_iter': 1813, 'fit_intercept': True}. Best is trial 31 with value: 0.9814737120465915.
Optimisation des hyperparamètres:  45%|████▌     | 90/200 [4:16:27<4:11:39, 137.27s/itération, Best RMSE=0.981]

[Trial 89] RMSE: 0.9815 | Params: {'alpha': 0.00010077115827103124, 'max_iter': 1813, 'fit_intercept': True}


[I 2025-05-11 12:31:19,895] Trial 88 finished with value: 0.981486972511044 and parameters: {'alpha': 0.00015386217585358107, 'max_iter': 1805, 'fit_intercept': True}. Best is trial 31 with value: 0.9814737120465915.
Optimisation des hyperparamètres:  46%|████▌     | 91/200 [4:16:54<3:09:24, 104.26s/itération, Best RMSE=0.981]

[Trial 88] RMSE: 0.9815 | Params: {'alpha': 0.00015386217585358107, 'max_iter': 1805, 'fit_intercept': True}


[I 2025-05-11 12:34:19,477] Trial 91 finished with value: 0.9814841856153724 and parameters: {'alpha': 0.00015134030038824165, 'max_iter': 2484, 'fit_intercept': True}. Best is trial 31 with value: 0.9814737120465915.
Optimisation des hyperparamètres:  46%|████▌     | 92/200 [4:19:53<3:48:21, 126.86s/itération, Best RMSE=0.981]

[Trial 91] RMSE: 0.9815 | Params: {'alpha': 0.00015134030038824165, 'max_iter': 2484, 'fit_intercept': True}


[I 2025-05-11 12:36:04,476] Trial 93 finished with value: 0.9814789692620339 and parameters: {'alpha': 0.00010050927057282894, 'max_iter': 1583, 'fit_intercept': True}. Best is trial 31 with value: 0.9814737120465915.
Optimisation des hyperparamètres:  46%|████▋     | 93/200 [4:21:38<3:34:32, 120.30s/itération, Best RMSE=0.981]

[Trial 93] RMSE: 0.9815 | Params: {'alpha': 0.00010050927057282894, 'max_iter': 1583, 'fit_intercept': True}


[I 2025-05-11 12:36:34,446] Trial 92 finished with value: 0.9814749585740542 and parameters: {'alpha': 0.00010667307273035604, 'max_iter': 2487, 'fit_intercept': True}. Best is trial 31 with value: 0.9814737120465915.
Optimisation des hyperparamètres:  47%|████▋     | 94/200 [4:22:08<2:44:40, 93.21s/itération, Best RMSE=0.981] 

[Trial 92] RMSE: 0.9815 | Params: {'alpha': 0.00010667307273035604, 'max_iter': 2487, 'fit_intercept': True}


[I 2025-05-11 12:38:16,958] Trial 94 finished with value: 0.981480855368207 and parameters: {'alpha': 0.00013523254394044054, 'max_iter': 2475, 'fit_intercept': True}. Best is trial 31 with value: 0.9814737120465915.
Optimisation des hyperparamètres:  48%|████▊     | 95/200 [4:23:51<2:47:59, 96.00s/itération, Best RMSE=0.981]

[Trial 94] RMSE: 0.9815 | Params: {'alpha': 0.00013523254394044054, 'max_iter': 2475, 'fit_intercept': True}


[I 2025-05-11 12:38:27,065] Trial 95 finished with value: 0.9814743212247478 and parameters: {'alpha': 0.00010354419597133986, 'max_iter': 2482, 'fit_intercept': True}. Best is trial 31 with value: 0.9814737120465915.
Optimisation des hyperparamètres:  48%|████▊     | 96/200 [4:24:01<2:01:43, 70.23s/itération, Best RMSE=0.981]

[Trial 95] RMSE: 0.9815 | Params: {'alpha': 0.00010354419597133986, 'max_iter': 2482, 'fit_intercept': True}


[I 2025-05-11 12:38:41,258] Trial 96 finished with value: 0.9814737878065984 and parameters: {'alpha': 0.00010083412555522695, 'max_iter': 2467, 'fit_intercept': True}. Best is trial 31 with value: 0.9814737120465915.
Optimisation des hyperparamètres:  48%|████▊     | 97/200 [4:24:15<1:31:41, 53.42s/itération, Best RMSE=0.981]

[Trial 96] RMSE: 0.9815 | Params: {'alpha': 0.00010083412555522695, 'max_iter': 2467, 'fit_intercept': True}


[I 2025-05-11 12:39:47,285] Trial 102 finished with value: inf and parameters: {'alpha': 0.07512930188120587, 'max_iter': 2472, 'fit_intercept': True}. Best is trial 31 with value: 0.9814737120465915.
Optimisation des hyperparamètres:  49%|████▉     | 98/200 [4:25:21<1:37:14, 57.20s/itération, Best RMSE=0.981]

Trial 102 Erreur : 


[I 2025-05-11 12:40:36,102] Trial 97 finished with value: 0.9814738186977695 and parameters: {'alpha': 0.00010107325509697764, 'max_iter': 2479, 'fit_intercept': True}. Best is trial 31 with value: 0.9814737120465915.
Optimisation des hyperparamètres:  50%|████▉     | 99/200 [4:26:10<1:32:03, 54.68s/itération, Best RMSE=0.981]

[Trial 97] RMSE: 0.9815 | Params: {'alpha': 0.00010107325509697764, 'max_iter': 2479, 'fit_intercept': True}


[I 2025-05-11 12:44:33,461] Trial 98 finished with value: 0.9814824071533026 and parameters: {'alpha': 0.00014285622087774645, 'max_iter': 2499, 'fit_intercept': True}. Best is trial 31 with value: 0.9814737120465915.
Optimisation des hyperparamètres:  50%|█████     | 100/200 [4:30:07<3:02:29, 109.49s/itération, Best RMSE=0.981]

[Trial 98] RMSE: 0.9815 | Params: {'alpha': 0.00014285622087774645, 'max_iter': 2499, 'fit_intercept': True}


[I 2025-05-11 12:46:09,725] Trial 106 finished with value: inf and parameters: {'alpha': 0.07624225801003551, 'max_iter': 2854, 'fit_intercept': True}. Best is trial 31 with value: 0.9814737120465915.
Optimisation des hyperparamètres:  50%|█████     | 101/200 [4:31:44<2:54:06, 105.52s/itération, Best RMSE=0.981]

Trial 106 Erreur : 


[I 2025-05-11 12:46:58,395] Trial 105 finished with value: 0.9859168851505193 and parameters: {'alpha': 0.0004087596232518041, 'max_iter': 1632, 'fit_intercept': True}. Best is trial 31 with value: 0.9814737120465915.
Optimisation des hyperparamètres:  51%|█████     | 102/200 [4:32:32<2:24:29, 88.46s/itération, Best RMSE=0.981] 

[Trial 105] RMSE: 0.9859 | Params: {'alpha': 0.0004087596232518041, 'max_iter': 1632, 'fit_intercept': True}


[I 2025-05-11 12:47:22,605] Trial 104 finished with value: 0.9859050447508222 and parameters: {'alpha': 0.00041472545024672853, 'max_iter': 2490, 'fit_intercept': True}. Best is trial 31 with value: 0.9814737120465915.
Optimisation des hyperparamètres:  52%|█████▏    | 103/200 [4:32:57<1:51:51, 69.19s/itération, Best RMSE=0.981]

[Trial 104] RMSE: 0.9859 | Params: {'alpha': 0.00041472545024672853, 'max_iter': 2490, 'fit_intercept': True}


[I 2025-05-11 12:49:17,736] Trial 99 finished with value: 0.9814745447700989 and parameters: {'alpha': 0.00010482169993305698, 'max_iter': 2514, 'fit_intercept': True}. Best is trial 31 with value: 0.9814737120465915.
Optimisation des hyperparamètres:  52%|█████▏    | 104/200 [4:34:52<2:12:45, 82.97s/itération, Best RMSE=0.981]

[Trial 99] RMSE: 0.9815 | Params: {'alpha': 0.00010482169993305698, 'max_iter': 2514, 'fit_intercept': True}


[I 2025-05-11 12:49:46,781] Trial 100 finished with value: 0.9814736428180454 and parameters: {'alpha': 0.00010016037191802047, 'max_iter': 2472, 'fit_intercept': True}. Best is trial 100 with value: 0.9814736428180454.
Optimisation des hyperparamètres:  52%|█████▎    | 105/200 [4:35:21<1:45:45, 66.79s/itération, Best RMSE=0.981]

[Trial 100] RMSE: 0.9815 | Params: {'alpha': 0.00010016037191802047, 'max_iter': 2472, 'fit_intercept': True}


[I 2025-05-11 12:49:49,263] Trial 107 finished with value: 0.9859757117156547 and parameters: {'alpha': 0.00038192014625693837, 'max_iter': 2527, 'fit_intercept': True}. Best is trial 100 with value: 0.9814736428180454.
Optimisation des hyperparamètres:  53%|█████▎    | 106/200 [4:35:23<1:14:24, 47.50s/itération, Best RMSE=0.981]

[Trial 107] RMSE: 0.9860 | Params: {'alpha': 0.00038192014625693837, 'max_iter': 2527, 'fit_intercept': True}


[I 2025-05-11 12:49:53,466] Trial 108 finished with value: 0.9859459745806808 and parameters: {'alpha': 0.000395694284368949, 'max_iter': 2826, 'fit_intercept': True}. Best is trial 100 with value: 0.9814736428180454.
Optimisation des hyperparamètres:  54%|█████▎    | 107/200 [4:35:27<53:29, 34.51s/itération, Best RMSE=0.981]  

[Trial 108] RMSE: 0.9859 | Params: {'alpha': 0.000395694284368949, 'max_iter': 2826, 'fit_intercept': True}


[I 2025-05-11 12:51:00,091] Trial 109 finished with value: 0.98593041204273 and parameters: {'alpha': 0.00040292837172534705, 'max_iter': 2897, 'fit_intercept': True}. Best is trial 100 with value: 0.9814736428180454.
Optimisation des hyperparamètres:  54%|█████▍    | 108/200 [4:36:34<1:07:41, 44.14s/itération, Best RMSE=0.981]

[Trial 109] RMSE: 0.9859 | Params: {'alpha': 0.00040292837172534705, 'max_iter': 2897, 'fit_intercept': True}


[I 2025-05-11 12:52:20,668] Trial 101 finished with value: 0.981481964591891 and parameters: {'alpha': 0.00014069773170424763, 'max_iter': 2492, 'fit_intercept': True}. Best is trial 100 with value: 0.9814736428180454.
Optimisation des hyperparamètres:  55%|█████▍    | 109/200 [4:37:55<1:23:31, 55.07s/itération, Best RMSE=0.981]

[Trial 101] RMSE: 0.9815 | Params: {'alpha': 0.00014069773170424763, 'max_iter': 2492, 'fit_intercept': True}


[I 2025-05-11 12:52:25,345] Trial 110 finished with value: 0.985950719799366 and parameters: {'alpha': 0.0003934966643276754, 'max_iter': 2894, 'fit_intercept': True}. Best is trial 100 with value: 0.9814736428180454.
Optimisation des hyperparamètres:  55%|█████▌    | 110/200 [4:37:59<59:56, 39.96s/itération, Best RMSE=0.981]  

[Trial 110] RMSE: 0.9860 | Params: {'alpha': 0.0003934966643276754, 'max_iter': 2894, 'fit_intercept': True}


[I 2025-05-11 12:55:38,815] Trial 103 finished with value: 0.9814743180883261 and parameters: {'alpha': 0.00010367342235374155, 'max_iter': 2505, 'fit_intercept': True}. Best is trial 100 with value: 0.9814736428180454.
Optimisation des hyperparamètres:  56%|█████▌    | 111/200 [4:41:13<2:07:34, 86.01s/itération, Best RMSE=0.981]

[Trial 103] RMSE: 0.9815 | Params: {'alpha': 0.00010367342235374155, 'max_iter': 2505, 'fit_intercept': True}


[I 2025-05-11 12:57:10,715] Trial 111 finished with value: 0.9859071677633916 and parameters: {'alpha': 0.00041374521099531, 'max_iter': 2841, 'fit_intercept': True}. Best is trial 100 with value: 0.9814736428180454.
Optimisation des hyperparamètres:  56%|█████▌    | 112/200 [4:42:45<2:08:44, 87.78s/itération, Best RMSE=0.981]

[Trial 111] RMSE: 0.9859 | Params: {'alpha': 0.00041374521099531, 'max_iter': 2841, 'fit_intercept': True}


[I 2025-05-11 12:58:17,293] Trial 112 finished with value: 0.985937216548733 and parameters: {'alpha': 0.00039975749750251395, 'max_iter': 2296, 'fit_intercept': True}. Best is trial 100 with value: 0.9814736428180454.
Optimisation des hyperparamètres:  56%|█████▋    | 113/200 [4:43:51<1:58:03, 81.42s/itération, Best RMSE=0.981]

[Trial 112] RMSE: 0.9859 | Params: {'alpha': 0.00039975749750251395, 'max_iter': 2296, 'fit_intercept': True}


[I 2025-05-11 12:59:24,235] Trial 114 finished with value: inf and parameters: {'alpha': 0.0002679953638117987, 'max_iter': 2833, 'fit_intercept': True}. Best is trial 100 with value: 0.9814736428180454.
Optimisation des hyperparamètres:  57%|█████▋    | 114/200 [4:44:58<1:50:28, 77.07s/itération, Best RMSE=0.981]

Trial 114 Erreur : 


[I 2025-05-11 12:59:53,540] Trial 113 finished with value: inf and parameters: {'alpha': 0.00028109297532537264, 'max_iter': 2841, 'fit_intercept': True}. Best is trial 100 with value: 0.9814736428180454.
Optimisation des hyperparamètres:  57%|█████▊    | 115/200 [4:45:28<1:28:53, 62.74s/itération, Best RMSE=0.981]

Trial 113 Erreur : 


[I 2025-05-11 13:01:13,160] Trial 115 finished with value: inf and parameters: {'alpha': 0.000291253017326245, 'max_iter': 2817, 'fit_intercept': True}. Best is trial 100 with value: 0.9814736428180454.
Optimisation des hyperparamètres:  58%|█████▊    | 116/200 [4:46:47<1:34:55, 67.81s/itération, Best RMSE=0.981]

Trial 115 Erreur : 


[I 2025-05-11 13:01:50,591] Trial 116 finished with value: inf and parameters: {'alpha': 0.00027307016854948555, 'max_iter': 2835, 'fit_intercept': True}. Best is trial 100 with value: 0.9814736428180454.
Optimisation des hyperparamètres:  58%|█████▊    | 117/200 [4:47:25<1:21:11, 58.69s/itération, Best RMSE=0.981]

Trial 116 Erreur : 


[I 2025-05-11 13:04:12,575] Trial 120 finished with value: inf and parameters: {'alpha': 0.0002646743564019664, 'max_iter': 2296, 'fit_intercept': True}. Best is trial 100 with value: 0.9814736428180454.
Optimisation des hyperparamètres:  59%|█████▉    | 118/200 [4:49:47<1:54:21, 83.68s/itération, Best RMSE=0.981]

Trial 120 Erreur : 


[I 2025-05-11 13:04:37,858] Trial 121 finished with value: inf and parameters: {'alpha': 0.00024853649783616546, 'max_iter': 2286, 'fit_intercept': True}. Best is trial 100 with value: 0.9814736428180454.
Optimisation des hyperparamètres:  60%|█████▉    | 119/200 [4:50:12<1:29:19, 66.16s/itération, Best RMSE=0.981]

Trial 121 Erreur : 


[I 2025-05-11 13:07:18,244] Trial 122 finished with value: inf and parameters: {'alpha': 0.000280232655020204, 'max_iter': 2274, 'fit_intercept': True}. Best is trial 100 with value: 0.9814736428180454.
Optimisation des hyperparamètres:  60%|██████    | 120/200 [4:52:52<2:05:54, 94.43s/itération, Best RMSE=0.981]

Trial 122 Erreur : 


[I 2025-05-11 13:10:15,914] Trial 123 finished with value: inf and parameters: {'alpha': 0.00027704247485671365, 'max_iter': 2310, 'fit_intercept': True}. Best is trial 100 with value: 0.9814736428180454.
Optimisation des hyperparamètres:  60%|██████    | 121/200 [4:55:50<2:37:12, 119.40s/itération, Best RMSE=0.981]

Trial 123 Erreur : 


[I 2025-05-11 13:11:00,005] Trial 124 finished with value: inf and parameters: {'alpha': 0.00025419108163869246, 'max_iter': 2268, 'fit_intercept': True}. Best is trial 100 with value: 0.9814736428180454.
Optimisation des hyperparamètres:  61%|██████    | 122/200 [4:56:34<2:05:50, 96.81s/itération, Best RMSE=0.981] 

Trial 124 Erreur : 


[I 2025-05-11 13:11:41,647] Trial 125 finished with value: inf and parameters: {'alpha': 0.00025803864977154564, 'max_iter': 2585, 'fit_intercept': True}. Best is trial 100 with value: 0.9814736428180454.
Optimisation des hyperparamètres:  62%|██████▏   | 123/200 [4:57:16<1:42:59, 80.26s/itération, Best RMSE=0.981]

Trial 125 Erreur : 


[I 2025-05-11 13:13:02,985] Trial 117 finished with value: 0.981481559703853 and parameters: {'alpha': 0.0001364206185822872, 'max_iter': 2251, 'fit_intercept': True}. Best is trial 100 with value: 0.9814736428180454.
Optimisation des hyperparamètres:  62%|██████▏   | 124/200 [4:58:37<1:42:04, 80.59s/itération, Best RMSE=0.981]

[Trial 117] RMSE: 0.9815 | Params: {'alpha': 0.0001364206185822872, 'max_iter': 2251, 'fit_intercept': True}


[I 2025-05-11 13:13:07,793] Trial 118 finished with value: 0.9814819230109133 and parameters: {'alpha': 0.00013919522531952763, 'max_iter': 2334, 'fit_intercept': True}. Best is trial 100 with value: 0.9814736428180454.
Optimisation des hyperparamètres:  62%|██████▎   | 125/200 [4:58:42<1:12:18, 57.85s/itération, Best RMSE=0.981]

[Trial 118] RMSE: 0.9815 | Params: {'alpha': 0.00013919522531952763, 'max_iter': 2334, 'fit_intercept': True}


[I 2025-05-11 13:14:06,107] Trial 119 finished with value: 0.9814832154551241 and parameters: {'alpha': 0.00014473879480290702, 'max_iter': 2276, 'fit_intercept': True}. Best is trial 100 with value: 0.9814736428180454.
Optimisation des hyperparamètres:  63%|██████▎   | 126/200 [4:59:40<1:11:31, 57.99s/itération, Best RMSE=0.981]

[Trial 119] RMSE: 0.9815 | Params: {'alpha': 0.00014473879480290702, 'max_iter': 2276, 'fit_intercept': True}


[I 2025-05-11 13:23:37,633] Trial 127 finished with value: 0.981478882780531 and parameters: {'alpha': 0.0001239614792205822, 'max_iter': 2291, 'fit_intercept': True}. Best is trial 100 with value: 0.9814736428180454.
Optimisation des hyperparamètres:  64%|██████▎   | 127/200 [5:09:12<4:17:59, 212.05s/itération, Best RMSE=0.981]

[Trial 127] RMSE: 0.9815 | Params: {'alpha': 0.0001239614792205822, 'max_iter': 2291, 'fit_intercept': True}


[I 2025-05-11 13:24:04,028] Trial 126 finished with value: 0.9814803636666589 and parameters: {'alpha': 0.00013090169580108618, 'max_iter': 2272, 'fit_intercept': True}. Best is trial 100 with value: 0.9814736428180454.
Optimisation des hyperparamètres:  64%|██████▍   | 128/200 [5:09:38<3:07:37, 156.35s/itération, Best RMSE=0.981]

[Trial 126] RMSE: 0.9815 | Params: {'alpha': 0.00013090169580108618, 'max_iter': 2272, 'fit_intercept': True}


[I 2025-05-11 13:24:30,723] Trial 128 finished with value: 0.9814815890109514 and parameters: {'alpha': 0.00013678437372566075, 'max_iter': 2269, 'fit_intercept': True}. Best is trial 100 with value: 0.9814736428180454.
Optimisation des hyperparamètres:  64%|██████▍   | 129/200 [5:10:05<2:18:59, 117.46s/itération, Best RMSE=0.981]

[Trial 128] RMSE: 0.9815 | Params: {'alpha': 0.00013678437372566075, 'max_iter': 2269, 'fit_intercept': True}


[I 2025-05-11 13:26:43,760] Trial 129 finished with value: 0.9814786613949403 and parameters: {'alpha': 0.0001251027878445694, 'max_iter': 2586, 'fit_intercept': True}. Best is trial 100 with value: 0.9814736428180454.
Optimisation des hyperparamètres:  65%|██████▌   | 130/200 [5:12:18<2:22:29, 122.13s/itération, Best RMSE=0.981]

[Trial 129] RMSE: 0.9815 | Params: {'alpha': 0.0001251027878445694, 'max_iter': 2586, 'fit_intercept': True}


[I 2025-05-11 13:29:10,527] Trial 130 finished with value: 0.981479283016134 and parameters: {'alpha': 0.00012697004542449233, 'max_iter': 2391, 'fit_intercept': True}. Best is trial 100 with value: 0.9814736428180454.
Optimisation des hyperparamètres:  66%|██████▌   | 131/200 [5:14:44<2:28:57, 129.52s/itération, Best RMSE=0.981]

[Trial 130] RMSE: 0.9815 | Params: {'alpha': 0.00012697004542449233, 'max_iter': 2391, 'fit_intercept': True}


[I 2025-05-11 13:31:02,983] Trial 131 finished with value: 0.9814794643269791 and parameters: {'alpha': 0.00012894817828628841, 'max_iter': 2577, 'fit_intercept': True}. Best is trial 100 with value: 0.9814736428180454.
Optimisation des hyperparamètres:  66%|██████▌   | 132/200 [5:16:37<2:20:59, 124.41s/itération, Best RMSE=0.981]

[Trial 131] RMSE: 0.9815 | Params: {'alpha': 0.00012894817828628841, 'max_iter': 2577, 'fit_intercept': True}


[I 2025-05-11 13:34:24,518] Trial 134 finished with value: 0.9814787368182712 and parameters: {'alpha': 0.0001242898644245653, 'max_iter': 2389, 'fit_intercept': True}. Best is trial 100 with value: 0.9814736428180454.
Optimisation des hyperparamètres:  66%|██████▋   | 133/200 [5:19:58<2:44:45, 147.54s/itération, Best RMSE=0.981]

[Trial 134] RMSE: 0.9815 | Params: {'alpha': 0.0001242898644245653, 'max_iter': 2389, 'fit_intercept': True}


[I 2025-05-11 13:34:29,519] Trial 132 finished with value: 0.9814799451450422 and parameters: {'alpha': 0.00013127432869712861, 'max_iter': 2585, 'fit_intercept': True}. Best is trial 100 with value: 0.9814736428180454.
Optimisation des hyperparamètres:  67%|██████▋   | 134/200 [5:20:03<1:55:15, 104.78s/itération, Best RMSE=0.981]

[Trial 132] RMSE: 0.9815 | Params: {'alpha': 0.00013127432869712861, 'max_iter': 2585, 'fit_intercept': True}


[I 2025-05-11 13:35:06,341] Trial 133 finished with value: 0.9814787787930762 and parameters: {'alpha': 0.00012453302928557835, 'max_iter': 2392, 'fit_intercept': True}. Best is trial 100 with value: 0.9814736428180454.
Optimisation des hyperparamètres:  68%|██████▊   | 135/200 [5:20:40<1:31:25, 84.39s/itération, Best RMSE=0.981] 

[Trial 133] RMSE: 0.9815 | Params: {'alpha': 0.00012453302928557835, 'max_iter': 2392, 'fit_intercept': True}


[I 2025-05-11 13:36:28,926] Trial 135 finished with value: 0.981478921926205 and parameters: {'alpha': 0.00012520330317678985, 'max_iter': 2389, 'fit_intercept': True}. Best is trial 100 with value: 0.9814736428180454.
Optimisation des hyperparamètres:  68%|██████▊   | 136/200 [5:22:03<1:29:26, 83.85s/itération, Best RMSE=0.981]

[Trial 135] RMSE: 0.9815 | Params: {'alpha': 0.00012520330317678985, 'max_iter': 2389, 'fit_intercept': True}


[I 2025-05-11 13:36:39,459] Trial 136 finished with value: 0.9814772296302323 and parameters: {'alpha': 0.00011715337789788457, 'max_iter': 2409, 'fit_intercept': True}. Best is trial 100 with value: 0.9814736428180454.
Optimisation des hyperparamètres:  68%|██████▊   | 137/200 [5:22:13<1:04:56, 61.85s/itération, Best RMSE=0.981]

[Trial 136] RMSE: 0.9815 | Params: {'alpha': 0.00011715337789788457, 'max_iter': 2409, 'fit_intercept': True}


[I 2025-05-11 13:38:57,389] Trial 137 finished with value: 0.9814767750934211 and parameters: {'alpha': 0.0001159836007559319, 'max_iter': 2585, 'fit_intercept': True}. Best is trial 100 with value: 0.9814736428180454.
Optimisation des hyperparamètres:  69%|██████▉   | 138/200 [5:24:31<1:27:30, 84.68s/itération, Best RMSE=0.981]

[Trial 137] RMSE: 0.9815 | Params: {'alpha': 0.0001159836007559319, 'max_iter': 2585, 'fit_intercept': True}


[I 2025-05-11 13:47:40,566] Trial 138 finished with value: 0.9814748372269831 and parameters: {'alpha': 0.00010558354931608006, 'max_iter': 2420, 'fit_intercept': True}. Best is trial 100 with value: 0.9814736428180454.
Optimisation des hyperparamètres:  70%|██████▉   | 139/200 [5:33:15<3:39:49, 216.22s/itération, Best RMSE=0.981]

[Trial 138] RMSE: 0.9815 | Params: {'alpha': 0.00010558354931608006, 'max_iter': 2420, 'fit_intercept': True}


[I 2025-05-11 13:47:53,344] Trial 140 finished with value: 0.9814746963849907 and parameters: {'alpha': 0.0001047677642049178, 'max_iter': 2404, 'fit_intercept': True}. Best is trial 100 with value: 0.9814736428180454.
Optimisation des hyperparamètres:  70%|███████   | 140/200 [5:33:27<2:35:11, 155.19s/itération, Best RMSE=0.981]

[Trial 140] RMSE: 0.9815 | Params: {'alpha': 0.0001047677642049178, 'max_iter': 2404, 'fit_intercept': True}


[I 2025-05-11 13:48:48,297] Trial 139 finished with value: 0.9814738334138426 and parameters: {'alpha': 0.00010070733236739107, 'max_iter': 2423, 'fit_intercept': True}. Best is trial 100 with value: 0.9814736428180454.
Optimisation des hyperparamètres:  70%|███████   | 141/200 [5:34:22<2:03:02, 125.12s/itération, Best RMSE=0.981]

[Trial 139] RMSE: 0.9815 | Params: {'alpha': 0.00010070733236739107, 'max_iter': 2423, 'fit_intercept': True}


[I 2025-05-11 13:49:21,295] Trial 141 finished with value: 0.9814740104443704 and parameters: {'alpha': 0.00010145568370926256, 'max_iter': 2409, 'fit_intercept': True}. Best is trial 100 with value: 0.9814736428180454.
Optimisation des hyperparamètres:  71%|███████   | 142/200 [5:34:55<1:34:14, 97.49s/itération, Best RMSE=0.981] 

[Trial 141] RMSE: 0.9815 | Params: {'alpha': 0.00010145568370926256, 'max_iter': 2409, 'fit_intercept': True}


[I 2025-05-11 13:52:53,359] Trial 142 finished with value: 0.98147439409785 and parameters: {'alpha': 0.00010039123504056475, 'max_iter': 2119, 'fit_intercept': True}. Best is trial 100 with value: 0.9814736428180454.
Optimisation des hyperparamètres:  72%|███████▏  | 143/200 [5:38:27<2:05:15, 131.86s/itération, Best RMSE=0.981]

[Trial 142] RMSE: 0.9815 | Params: {'alpha': 0.00010039123504056475, 'max_iter': 2119, 'fit_intercept': True}


[I 2025-05-11 13:53:56,751] Trial 143 finished with value: 0.9814751308872921 and parameters: {'alpha': 0.00010689799429143464, 'max_iter': 2405, 'fit_intercept': True}. Best is trial 100 with value: 0.9814736428180454.
Optimisation des hyperparamètres:  72%|███████▏  | 144/200 [5:39:31<1:43:53, 111.32s/itération, Best RMSE=0.981]

[Trial 143] RMSE: 0.9815 | Params: {'alpha': 0.00010689799429143464, 'max_iter': 2405, 'fit_intercept': True}


[I 2025-05-11 13:56:34,196] Trial 144 finished with value: 0.9814747129057518 and parameters: {'alpha': 0.000101447412059344, 'max_iter': 2085, 'fit_intercept': True}. Best is trial 100 with value: 0.9814736428180454.
Optimisation des hyperparamètres:  72%|███████▎  | 145/200 [5:42:08<1:54:43, 125.16s/itération, Best RMSE=0.981]

[Trial 144] RMSE: 0.9815 | Params: {'alpha': 0.000101447412059344, 'max_iter': 2085, 'fit_intercept': True}


[I 2025-05-11 13:57:40,851] Trial 145 finished with value: 0.9814749774371655 and parameters: {'alpha': 0.00010288219070941133, 'max_iter': 2096, 'fit_intercept': True}. Best is trial 100 with value: 0.9814736428180454.
Optimisation des hyperparamètres:  73%|███████▎  | 146/200 [5:43:15<1:36:50, 107.60s/itération, Best RMSE=0.981]

[Trial 145] RMSE: 0.9815 | Params: {'alpha': 0.00010288219070941133, 'max_iter': 2096, 'fit_intercept': True}


[I 2025-05-11 13:58:16,039] Trial 146 finished with value: 0.9814748624727039 and parameters: {'alpha': 0.00010223342917378548, 'max_iter': 2089, 'fit_intercept': True}. Best is trial 100 with value: 0.9814736428180454.
Optimisation des hyperparamètres:  74%|███████▎  | 147/200 [5:43:50<1:15:51, 85.88s/itération, Best RMSE=0.981] 

[Trial 146] RMSE: 0.9815 | Params: {'alpha': 0.00010223342917378548, 'max_iter': 2089, 'fit_intercept': True}


[I 2025-05-11 13:59:09,054] Trial 147 finished with value: 0.9814745956147881 and parameters: {'alpha': 0.00010172469303144563, 'max_iter': 2147, 'fit_intercept': True}. Best is trial 100 with value: 0.9814736428180454.
Optimisation des hyperparamètres:  74%|███████▍  | 148/200 [5:44:43<1:05:53, 76.02s/itération, Best RMSE=0.981]

[Trial 147] RMSE: 0.9815 | Params: {'alpha': 0.00010172469303144563, 'max_iter': 2147, 'fit_intercept': True}


[I 2025-05-11 13:59:37,324] Trial 149 finished with value: 0.9814907243680728 and parameters: {'alpha': 0.00017437671556568143, 'max_iter': 1895, 'fit_intercept': True}. Best is trial 100 with value: 0.9814736428180454.
Optimisation des hyperparamètres:  74%|███████▍  | 149/200 [5:45:11<52:26, 61.70s/itération, Best RMSE=0.981]  

[Trial 149] RMSE: 0.9815 | Params: {'alpha': 0.00017437671556568143, 'max_iter': 1895, 'fit_intercept': True}


[I 2025-05-11 13:59:58,764] Trial 148 finished with value: 0.9814741955188885 and parameters: {'alpha': 0.0001036928394956078, 'max_iter': 2666, 'fit_intercept': True}. Best is trial 100 with value: 0.9814736428180454.
Optimisation des hyperparamètres:  75%|███████▌  | 150/200 [5:45:33<41:20, 49.62s/itération, Best RMSE=0.981]

[Trial 148] RMSE: 0.9815 | Params: {'alpha': 0.0001036928394956078, 'max_iter': 2666, 'fit_intercept': True}


[I 2025-05-11 14:08:55,104] Trial 150 finished with value: 0.9814886742767882 and parameters: {'alpha': 0.00017296567336744608, 'max_iter': 2652, 'fit_intercept': True}. Best is trial 100 with value: 0.9814736428180454.
Optimisation des hyperparamètres:  76%|███████▌  | 151/200 [5:54:29<2:39:46, 195.63s/itération, Best RMSE=0.981]

[Trial 150] RMSE: 0.9815 | Params: {'alpha': 0.00017296567336744608, 'max_iter': 2652, 'fit_intercept': True}


[I 2025-05-11 14:09:39,426] Trial 151 finished with value: 0.9814746679957058 and parameters: {'alpha': 0.00010117458476232903, 'max_iter': 2081, 'fit_intercept': True}. Best is trial 100 with value: 0.9814736428180454.
Optimisation des hyperparamètres:  76%|███████▌  | 152/200 [5:55:13<2:00:11, 150.24s/itération, Best RMSE=0.981]

[Trial 151] RMSE: 0.9815 | Params: {'alpha': 0.00010117458476232903, 'max_iter': 2081, 'fit_intercept': True}


[I 2025-05-11 14:10:42,651] Trial 159 finished with value: inf and parameters: {'alpha': 0.00018409487885286065, 'max_iter': 2122, 'fit_intercept': True}. Best is trial 100 with value: 0.9814736428180454.
Optimisation des hyperparamètres:  76%|███████▋  | 153/200 [5:56:17<1:37:14, 124.14s/itération, Best RMSE=0.981]

Trial 159 Erreur : 


[I 2025-05-11 14:11:07,215] Trial 153 finished with value: 0.9814879009015194 and parameters: {'alpha': 0.00016536756070992421, 'max_iter': 2134, 'fit_intercept': True}. Best is trial 100 with value: 0.9814736428180454.
Optimisation des hyperparamètres:  77%|███████▋  | 154/200 [5:56:41<1:12:16, 94.26s/itération, Best RMSE=0.981] 

[Trial 153] RMSE: 0.9815 | Params: {'alpha': 0.00016536756070992421, 'max_iter': 2134, 'fit_intercept': True}


[I 2025-05-11 14:12:31,393] Trial 152 finished with value: 0.9814738093566916 and parameters: {'alpha': 0.00010085878035306426, 'max_iter': 2456, 'fit_intercept': True}. Best is trial 100 with value: 0.9814736428180454.
Optimisation des hyperparamètres:  78%|███████▊  | 155/200 [5:58:05<1:08:25, 91.24s/itération, Best RMSE=0.981]

[Trial 152] RMSE: 0.9815 | Params: {'alpha': 0.00010085878035306426, 'max_iter': 2456, 'fit_intercept': True}


[I 2025-05-11 14:14:35,498] Trial 155 finished with value: 0.9814901773991896 and parameters: {'alpha': 0.00017579709049650105, 'max_iter': 2104, 'fit_intercept': True}. Best is trial 100 with value: 0.9814736428180454.
Optimisation des hyperparamètres:  78%|███████▊  | 156/200 [6:00:09<1:14:08, 101.10s/itération, Best RMSE=0.981]

[Trial 155] RMSE: 0.9815 | Params: {'alpha': 0.00017579709049650105, 'max_iter': 2104, 'fit_intercept': True}


[I 2025-05-11 14:15:57,560] Trial 154 finished with value: 0.9814893763130453 and parameters: {'alpha': 0.0001719631795170261, 'max_iter': 2101, 'fit_intercept': True}. Best is trial 100 with value: 0.9814736428180454.
Optimisation des hyperparamètres:  78%|███████▊  | 157/200 [6:01:32<1:08:21, 95.39s/itération, Best RMSE=0.981] 

[Trial 154] RMSE: 0.9815 | Params: {'alpha': 0.0001719631795170261, 'max_iter': 2101, 'fit_intercept': True}


[I 2025-05-11 14:18:30,504] Trial 156 finished with value: 0.9814890158726565 and parameters: {'alpha': 0.0001705337906230194, 'max_iter': 2123, 'fit_intercept': True}. Best is trial 100 with value: 0.9814736428180454.
Optimisation des hyperparamètres:  79%|███████▉  | 158/200 [6:04:04<1:18:51, 112.65s/itération, Best RMSE=0.981]

[Trial 156] RMSE: 0.9815 | Params: {'alpha': 0.0001705337906230194, 'max_iter': 2123, 'fit_intercept': True}


[I 2025-05-11 14:19:45,680] Trial 157 finished with value: 0.9814890979456747 and parameters: {'alpha': 0.00017098481950934586, 'max_iter': 2127, 'fit_intercept': True}. Best is trial 100 with value: 0.9814736428180454.
Optimisation des hyperparamètres:  80%|███████▉  | 159/200 [6:05:20<1:09:17, 101.41s/itération, Best RMSE=0.981]

[Trial 157] RMSE: 0.9815 | Params: {'alpha': 0.00017098481950934586, 'max_iter': 2127, 'fit_intercept': True}


[I 2025-05-11 14:19:55,000] Trial 158 finished with value: 0.9814889070716888 and parameters: {'alpha': 0.00016635081848922112, 'max_iter': 1921, 'fit_intercept': True}. Best is trial 100 with value: 0.9814736428180454.
Optimisation des hyperparamètres:  80%|████████  | 160/200 [6:05:29<49:11, 73.78s/itération, Best RMSE=0.981]   

[Trial 158] RMSE: 0.9815 | Params: {'alpha': 0.00016635081848922112, 'max_iter': 1921, 'fit_intercept': True}


[I 2025-05-11 14:21:01,110] Trial 160 finished with value: 0.981488286340548 and parameters: {'alpha': 0.0001671468506445384, 'max_iter': 2130, 'fit_intercept': True}. Best is trial 100 with value: 0.9814736428180454.
Optimisation des hyperparamètres:  80%|████████  | 161/200 [6:06:35<46:27, 71.48s/itération, Best RMSE=0.981]

[Trial 160] RMSE: 0.9815 | Params: {'alpha': 0.0001671468506445384, 'max_iter': 2130, 'fit_intercept': True}


[I 2025-05-11 14:22:59,241] Trial 161 finished with value: 0.9814880221746057 and parameters: {'alpha': 0.0001698685144645117, 'max_iter': 2655, 'fit_intercept': True}. Best is trial 100 with value: 0.9814736428180454.
Optimisation des hyperparamètres:  81%|████████  | 162/200 [6:08:33<54:08, 85.48s/itération, Best RMSE=0.981]

[Trial 161] RMSE: 0.9815 | Params: {'alpha': 0.0001698685144645117, 'max_iter': 2655, 'fit_intercept': True}


[I 2025-05-11 14:28:30,639] Trial 171 finished with value: inf and parameters: {'alpha': 0.03586179776776321, 'max_iter': 2520, 'fit_intercept': True}. Best is trial 100 with value: 0.9814736428180454.
Optimisation des hyperparamètres:  82%|████████▏ | 163/200 [6:14:05<1:38:12, 159.25s/itération, Best RMSE=0.981]

Trial 171 Erreur : 


[I 2025-05-11 14:29:12,306] Trial 162 finished with value: 0.9814875254594749 and parameters: {'alpha': 0.00016316719296121814, 'max_iter': 2103, 'fit_intercept': True}. Best is trial 100 with value: 0.9814736428180454.
Optimisation des hyperparamètres:  82%|████████▏ | 164/200 [6:14:46<1:14:23, 123.98s/itération, Best RMSE=0.981]

[Trial 162] RMSE: 0.9815 | Params: {'alpha': 0.00016316719296121814, 'max_iter': 2103, 'fit_intercept': True}


[I 2025-05-11 14:30:36,848] Trial 163 finished with value: 0.9814882308977868 and parameters: {'alpha': 0.00016751052235351152, 'max_iter': 2178, 'fit_intercept': True}. Best is trial 100 with value: 0.9814736428180454.
Optimisation des hyperparamètres:  82%|████████▎ | 165/200 [6:16:11<1:05:25, 112.15s/itération, Best RMSE=0.981]

[Trial 163] RMSE: 0.9815 | Params: {'alpha': 0.00016751052235351152, 'max_iter': 2178, 'fit_intercept': True}


[I 2025-05-11 14:31:29,726] Trial 172 finished with value: 0.9844779600978731 and parameters: {'alpha': 0.0027104858181235345, 'max_iter': 2463, 'fit_intercept': True}. Best is trial 100 with value: 0.9814736428180454.
Optimisation des hyperparamètres:  83%|████████▎ | 166/200 [6:17:04<53:28, 94.37s/itération, Best RMSE=0.981]   

[Trial 172] RMSE: 0.9845 | Params: {'alpha': 0.0027104858181235345, 'max_iter': 2463, 'fit_intercept': True}


[I 2025-05-11 14:32:07,357] Trial 165 finished with value: 0.9814866943222678 and parameters: {'alpha': 0.00016353774188140812, 'max_iter': 2563, 'fit_intercept': True}. Best is trial 100 with value: 0.9814736428180454.
Optimisation des hyperparamètres:  84%|████████▎ | 167/200 [6:17:41<42:32, 77.35s/itération, Best RMSE=0.981]

[Trial 165] RMSE: 0.9815 | Params: {'alpha': 0.00016353774188140812, 'max_iter': 2563, 'fit_intercept': True}


[I 2025-05-11 14:32:37,140] Trial 164 finished with value: 0.9814860716849112 and parameters: {'alpha': 0.00016055223746345786, 'max_iter': 2538, 'fit_intercept': True}. Best is trial 100 with value: 0.9814736428180454.
Optimisation des hyperparamètres:  84%|████████▍ | 168/200 [6:18:11<33:38, 63.08s/itération, Best RMSE=0.981]

[Trial 164] RMSE: 0.9815 | Params: {'alpha': 0.00016055223746345786, 'max_iter': 2538, 'fit_intercept': True}


[I 2025-05-11 15:00:02,079] Trial 166 finished with value: 0.981485253963326 and parameters: {'alpha': 0.00015663297244137656, 'max_iter': 2526, 'fit_intercept': True}. Best is trial 100 with value: 0.9814736428180454.
Optimisation des hyperparamètres:  84%|████████▍ | 169/200 [6:45:36<4:37:46, 537.63s/itération, Best RMSE=0.981]

[Trial 166] RMSE: 0.9815 | Params: {'alpha': 0.00015663297244137656, 'max_iter': 2526, 'fit_intercept': True}


[I 2025-05-11 15:01:47,583] Trial 167 finished with value: 0.9814879199595042 and parameters: {'alpha': 0.00016937372373860936, 'max_iter': 2544, 'fit_intercept': True}. Best is trial 100 with value: 0.9814736428180454.
Optimisation des hyperparamètres:  85%|████████▌ | 170/200 [6:47:22<3:23:59, 408.00s/itération, Best RMSE=0.981]

[Trial 167] RMSE: 0.9815 | Params: {'alpha': 0.00016937372373860936, 'max_iter': 2544, 'fit_intercept': True}


[I 2025-05-11 15:03:14,118] Trial 168 finished with value: 0.9814850787585527 and parameters: {'alpha': 0.0001557495685797489, 'max_iter': 2515, 'fit_intercept': True}. Best is trial 100 with value: 0.9814736428180454.
Optimisation des hyperparamètres:  86%|████████▌ | 171/200 [6:48:48<2:30:35, 311.56s/itération, Best RMSE=0.981]

[Trial 168] RMSE: 0.9815 | Params: {'alpha': 0.0001557495685797489, 'max_iter': 2515, 'fit_intercept': True}


[I 2025-05-11 15:05:01,130] Trial 177 finished with value: inf and parameters: {'alpha': 3.1003132968284377, 'max_iter': 2758, 'fit_intercept': True}. Best is trial 100 with value: 0.9814736428180454.
Optimisation des hyperparamètres:  86%|████████▌ | 172/200 [6:50:35<1:56:45, 250.19s/itération, Best RMSE=0.981]

Trial 177 Erreur : 


[I 2025-05-11 15:06:09,249] Trial 169 finished with value: 0.9814853616410268 and parameters: {'alpha': 0.0001570203231685198, 'max_iter': 2496, 'fit_intercept': True}. Best is trial 100 with value: 0.9814736428180454.
Optimisation des hyperparamètres:  86%|████████▋ | 173/200 [6:51:43<1:28:00, 195.57s/itération, Best RMSE=0.981]

[Trial 169] RMSE: 0.9815 | Params: {'alpha': 0.0001570203231685198, 'max_iter': 2496, 'fit_intercept': True}


[I 2025-05-11 15:08:00,364] Trial 170 finished with value: 0.9814840930144764 and parameters: {'alpha': 0.00015112345158158492, 'max_iter': 2553, 'fit_intercept': True}. Best is trial 100 with value: 0.9814736428180454.
Optimisation des hyperparamètres:  87%|████████▋ | 174/200 [6:53:34<1:13:46, 170.23s/itération, Best RMSE=0.981]

[Trial 170] RMSE: 0.9815 | Params: {'alpha': 0.00015112345158158492, 'max_iter': 2553, 'fit_intercept': True}


[I 2025-05-11 15:11:43,072] Trial 173 finished with value: 0.9814742133193874 and parameters: {'alpha': 0.00010320000432571973, 'max_iter': 2511, 'fit_intercept': True}. Best is trial 100 with value: 0.9814736428180454.
Optimisation des hyperparamètres:  88%|████████▊ | 175/200 [6:57:17<1:17:29, 185.98s/itération, Best RMSE=0.981]

[Trial 173] RMSE: 0.9815 | Params: {'alpha': 0.00010320000432571973, 'max_iter': 2511, 'fit_intercept': True}


[I 2025-05-11 15:18:07,518] Trial 175 finished with value: 0.9814742472030086 and parameters: {'alpha': 0.00010312495006712677, 'max_iter': 2473, 'fit_intercept': True}. Best is trial 100 with value: 0.9814736428180454.
Optimisation des hyperparamètres:  88%|████████▊ | 176/200 [7:03:41<1:38:12, 245.52s/itération, Best RMSE=0.981]

[Trial 175] RMSE: 0.9815 | Params: {'alpha': 0.00010312495006712677, 'max_iter': 2473, 'fit_intercept': True}


[I 2025-05-11 15:18:10,807] Trial 174 finished with value: 0.9814736779911064 and parameters: {'alpha': 0.00010063500441425112, 'max_iter': 2521, 'fit_intercept': True}. Best is trial 100 with value: 0.9814736428180454.
Optimisation des hyperparamètres:  88%|████████▊ | 177/200 [7:03:45<1:06:15, 172.85s/itération, Best RMSE=0.981]

[Trial 174] RMSE: 0.9815 | Params: {'alpha': 0.00010063500441425112, 'max_iter': 2521, 'fit_intercept': True}


[I 2025-05-11 15:20:30,717] Trial 176 finished with value: 0.9814741720859635 and parameters: {'alpha': 0.00010268149581305609, 'max_iter': 2462, 'fit_intercept': True}. Best is trial 100 with value: 0.9814736428180454.
Optimisation des hyperparamètres:  89%|████████▉ | 178/200 [7:06:05<59:45, 162.97s/itération, Best RMSE=0.981]  

[Trial 176] RMSE: 0.9815 | Params: {'alpha': 0.00010268149581305609, 'max_iter': 2462, 'fit_intercept': True}


[I 2025-05-11 15:21:09,950] Trial 178 finished with value: 0.9814734234723826 and parameters: {'alpha': 0.00010000087508959247, 'max_iter': 2752, 'fit_intercept': True}. Best is trial 178 with value: 0.9814734234723826.
Optimisation des hyperparamètres:  90%|████████▉ | 179/200 [7:06:44<44:02, 125.85s/itération, Best RMSE=0.981]

[Trial 178] RMSE: 0.9815 | Params: {'alpha': 0.00010000087508959247, 'max_iter': 2752, 'fit_intercept': True}


[I 2025-05-11 15:21:35,348] Trial 179 finished with value: 0.9814742793524477 and parameters: {'alpha': 0.00010412134694590204, 'max_iter': 4490, 'fit_intercept': True}. Best is trial 178 with value: 0.9814734234723826.
Optimisation des hyperparamètres:  90%|█████████ | 180/200 [7:07:09<31:54, 95.71s/itération, Best RMSE=0.981] 

[Trial 179] RMSE: 0.9815 | Params: {'alpha': 0.00010412134694590204, 'max_iter': 4490, 'fit_intercept': True}


[I 2025-05-11 15:25:34,977] Trial 180 finished with value: 0.981473858755791 and parameters: {'alpha': 0.00010103101204370231, 'max_iter': 2447, 'fit_intercept': True}. Best is trial 178 with value: 0.9814734234723826.
Optimisation des hyperparamètres:  90%|█████████ | 181/200 [7:11:09<43:58, 138.89s/itération, Best RMSE=0.981]

[Trial 180] RMSE: 0.9815 | Params: {'alpha': 0.00010103101204370231, 'max_iter': 2447, 'fit_intercept': True}


[I 2025-05-11 15:26:40,907] Trial 181 finished with value: 0.9814755460527115 and parameters: {'alpha': 0.0001085389004853527, 'max_iter': 2365, 'fit_intercept': True}. Best is trial 178 with value: 0.9814734234723826.
Optimisation des hyperparamètres:  91%|█████████ | 182/200 [7:12:15<35:05, 117.00s/itération, Best RMSE=0.981]

[Trial 181] RMSE: 0.9815 | Params: {'alpha': 0.0001085389004853527, 'max_iter': 2365, 'fit_intercept': True}


[I 2025-05-11 15:29:04,132] Trial 182 finished with value: 0.9814745932053194 and parameters: {'alpha': 0.00010362048653499266, 'max_iter': 2337, 'fit_intercept': True}. Best is trial 178 with value: 0.9814734234723826.
Optimisation des hyperparamètres:  92%|█████████▏| 183/200 [7:14:38<35:22, 124.87s/itération, Best RMSE=0.981]

[Trial 182] RMSE: 0.9815 | Params: {'alpha': 0.00010362048653499266, 'max_iter': 2337, 'fit_intercept': True}


[I 2025-05-11 15:29:43,542] Trial 183 finished with value: 0.9814746882287475 and parameters: {'alpha': 0.00010415238150768384, 'max_iter': 2344, 'fit_intercept': True}. Best is trial 178 with value: 0.9814734234723826.
Optimisation des hyperparamètres:  92%|█████████▏| 184/200 [7:15:18<26:27, 99.23s/itération, Best RMSE=0.981] 

[Trial 183] RMSE: 0.9815 | Params: {'alpha': 0.00010415238150768384, 'max_iter': 2344, 'fit_intercept': True}


[I 2025-05-11 15:31:31,516] Trial 184 finished with value: 0.9814743626020438 and parameters: {'alpha': 0.00010248351214484058, 'max_iter': 2336, 'fit_intercept': True}. Best is trial 178 with value: 0.9814734234723826.
Optimisation des hyperparamètres:  92%|█████████▎| 185/200 [7:17:05<25:27, 101.85s/itération, Best RMSE=0.981]

[Trial 184] RMSE: 0.9815 | Params: {'alpha': 0.00010248351214484058, 'max_iter': 2336, 'fit_intercept': True}


[I 2025-05-11 15:33:39,843] Trial 185 finished with value: 0.9814742141917338 and parameters: {'alpha': 0.00010192195616515824, 'max_iter': 2353, 'fit_intercept': True}. Best is trial 178 with value: 0.9814734234723826.
Optimisation des hyperparamètres:  93%|█████████▎| 186/200 [7:19:14<25:37, 109.79s/itération, Best RMSE=0.981]

[Trial 185] RMSE: 0.9815 | Params: {'alpha': 0.00010192195616515824, 'max_iter': 2353, 'fit_intercept': True}


[I 2025-05-11 15:34:20,633] Trial 190 finished with value: inf and parameters: {'alpha': 0.000223935605839666, 'max_iter': 2679, 'fit_intercept': True}. Best is trial 178 with value: 0.9814734234723826.
Optimisation des hyperparamètres:  94%|█████████▎| 187/200 [7:19:55<19:18, 89.10s/itération, Best RMSE=0.981] 

Trial 190 Erreur : 


[I 2025-05-11 15:36:06,569] Trial 191 finished with value: inf and parameters: {'alpha': 0.0002309639676305156, 'max_iter': 2663, 'fit_intercept': True}. Best is trial 178 with value: 0.9814734234723826.
Optimisation des hyperparamètres:  94%|█████████▍| 188/200 [7:21:41<18:49, 94.15s/itération, Best RMSE=0.981]

Trial 191 Erreur : 


[I 2025-05-11 15:37:21,103] Trial 186 finished with value: 0.981474422043554 and parameters: {'alpha': 0.00010289613665879929, 'max_iter': 2349, 'fit_intercept': True}. Best is trial 178 with value: 0.9814734234723826.
Optimisation des hyperparamètres:  94%|█████████▍| 189/200 [7:22:55<16:10, 88.27s/itération, Best RMSE=0.981]

[Trial 186] RMSE: 0.9815 | Params: {'alpha': 0.00010289613665879929, 'max_iter': 2349, 'fit_intercept': True}


[I 2025-05-11 16:42:47,450] Trial 187 finished with value: 0.9814785872672059 and parameters: {'alpha': 0.00012481117684842063, 'max_iter': 4395, 'fit_intercept': True}. Best is trial 178 with value: 0.9814734234723826.
Optimisation des hyperparamètres:  95%|█████████▌| 190/200 [8:28:21<3:26:36, 1239.69s/itération, Best RMSE=0.981]

[Trial 187] RMSE: 0.9815 | Params: {'alpha': 0.00012481117684842063, 'max_iter': 4395, 'fit_intercept': True}


[I 2025-05-11 16:44:26,099] Trial 188 finished with value: 0.9814744963506146 and parameters: {'alpha': 0.00010516377557399565, 'max_iter': 4591, 'fit_intercept': True}. Best is trial 178 with value: 0.9814734234723826.
Optimisation des hyperparamètres:  96%|█████████▌| 191/200 [8:30:00<2:14:36, 897.37s/itération, Best RMSE=0.981] 

[Trial 188] RMSE: 0.9815 | Params: {'alpha': 0.00010516377557399565, 'max_iter': 4591, 'fit_intercept': True}


[I 2025-05-11 16:44:55,174] Trial 189 finished with value: 0.9814738499840278 and parameters: {'alpha': 0.00010195854018027032, 'max_iter': 2640, 'fit_intercept': True}. Best is trial 178 with value: 0.9814734234723826.
Optimisation des hyperparamètres:  96%|█████████▌| 192/200 [8:30:29<1:24:55, 636.89s/itération, Best RMSE=0.981]

[Trial 189] RMSE: 0.9815 | Params: {'alpha': 0.00010195854018027032, 'max_iter': 2640, 'fit_intercept': True}


[I 2025-05-11 16:48:47,030] Trial 193 finished with value: 0.9814799274411784 and parameters: {'alpha': 0.00013124493595232688, 'max_iter': 4506, 'fit_intercept': True}. Best is trial 178 with value: 0.9814734234723826.
Optimisation des hyperparamètres:  96%|█████████▋| 193/200 [8:34:21<1:00:07, 515.38s/itération, Best RMSE=0.981]

[Trial 193] RMSE: 0.9815 | Params: {'alpha': 0.00013124493595232688, 'max_iter': 4506, 'fit_intercept': True}


[I 2025-05-11 16:48:51,001] Trial 192 finished with value: 0.9814736573426383 and parameters: {'alpha': 0.00010112580682751744, 'max_iter': 4564, 'fit_intercept': True}. Best is trial 178 with value: 0.9814734234723826.
Optimisation des hyperparamètres:  97%|█████████▋| 194/200 [8:34:25<36:11, 361.95s/itération, Best RMSE=0.981]  

[Trial 192] RMSE: 0.9815 | Params: {'alpha': 0.00010112580682751744, 'max_iter': 4564, 'fit_intercept': True}


[I 2025-05-11 16:50:12,223] Trial 194 finished with value: 0.9814795324472531 and parameters: {'alpha': 0.00012934202274671002, 'max_iter': 2664, 'fit_intercept': True}. Best is trial 178 with value: 0.9814734234723826.
Optimisation des hyperparamètres:  98%|█████████▊| 195/200 [8:35:46<23:08, 277.73s/itération, Best RMSE=0.981]

[Trial 194] RMSE: 0.9815 | Params: {'alpha': 0.00012934202274671002, 'max_iter': 2664, 'fit_intercept': True}


[I 2025-05-11 16:50:32,232] Trial 195 finished with value: 0.9814801002872175 and parameters: {'alpha': 0.00013207352909171897, 'max_iter': 4736, 'fit_intercept': True}. Best is trial 178 with value: 0.9814734234723826.
Optimisation des hyperparamètres:  98%|█████████▊| 196/200 [8:36:06<13:21, 200.41s/itération, Best RMSE=0.981]

[Trial 195] RMSE: 0.9815 | Params: {'alpha': 0.00013207352909171897, 'max_iter': 4736, 'fit_intercept': True}


[I 2025-05-11 16:51:03,449] Trial 196 finished with value: 0.981479528488619 and parameters: {'alpha': 0.00012932217836274392, 'max_iter': 4679, 'fit_intercept': True}. Best is trial 178 with value: 0.9814734234723826.
Optimisation des hyperparamètres:  98%|█████████▊| 197/200 [8:36:37<07:28, 149.66s/itération, Best RMSE=0.981]

[Trial 196] RMSE: 0.9815 | Params: {'alpha': 0.00012932217836274392, 'max_iter': 4679, 'fit_intercept': True}


[I 2025-05-11 16:51:40,337] Trial 197 finished with value: 0.9814798587857808 and parameters: {'alpha': 0.00013091805480228082, 'max_iter': 4625, 'fit_intercept': True}. Best is trial 178 with value: 0.9814734234723826.
Optimisation des hyperparamètres:  99%|█████████▉| 198/200 [8:37:14<03:51, 115.83s/itération, Best RMSE=0.981]

[Trial 197] RMSE: 0.9815 | Params: {'alpha': 0.00013091805480228082, 'max_iter': 4625, 'fit_intercept': True}


[I 2025-05-11 16:51:42,549] Trial 198 finished with value: 0.9814796329721207 and parameters: {'alpha': 0.00012982822789370599, 'max_iter': 4742, 'fit_intercept': True}. Best is trial 178 with value: 0.9814734234723826.
Optimisation des hyperparamètres: 100%|█████████▉| 199/200 [8:37:17<01:21, 81.74s/itération, Best RMSE=0.981] 

[Trial 198] RMSE: 0.9815 | Params: {'alpha': 0.00012982822789370599, 'max_iter': 4742, 'fit_intercept': True}


[I 2025-05-11 16:52:11,659] Trial 199 finished with value: 0.9814790102400976 and parameters: {'alpha': 0.0001268481505437036, 'max_iter': 4198, 'fit_intercept': True}. Best is trial 178 with value: 0.9814734234723826.
Optimisation des hyperparamètres: 100%|██████████| 200/200 [8:37:46<00:00, 155.33s/itération, Best RMSE=0.981]

[Trial 199] RMSE: 0.9815 | Params: {'alpha': 0.0001268481505437036, 'max_iter': 4198, 'fit_intercept': True}



[I 2025-05-11 16:54:52,505] A new study created in memory with name: no-name-b81701de-f8a6-406a-bec7-2a4f3652e3e8



🔍 Traitement du modèle : ElasticNet


Optimisation des hyperparamètres:   0%|          | 0/200 [00:00<?, ?itération/s][I 2025-05-11 16:58:58,168] Trial 0 finished with value: 7.337009667195793 and parameters: {'alpha': 4.939342432146478, 'l1_ratio': 0.13869010333185974, 'max_iter': 1038, 'fit_intercept': True}. Best is trial 0 with value: 7.337009667195793.
Optimisation des hyperparamètres:   0%|          | 1/200 [04:05<13:34:44, 245.65s/itération, Best RMSE=7.34]

[Trial 0] RMSE: 7.3370 | Params: {'alpha': 4.939342432146478, 'l1_ratio': 0.13869010333185974, 'max_iter': 1038, 'fit_intercept': True}


[I 2025-05-11 16:59:38,706] Trial 2 finished with value: 33.528815246017835 and parameters: {'alpha': 1.600778362847697, 'l1_ratio': 0.30142469727029275, 'max_iter': 4525, 'fit_intercept': False}. Best is trial 0 with value: 7.337009667195793.
Optimisation des hyperparamètres:   1%|          | 2/200 [04:46<6:52:29, 125.00s/itération, Best RMSE=7.34] 

[Trial 2] RMSE: 33.5288 | Params: {'alpha': 1.600778362847697, 'l1_ratio': 0.30142469727029275, 'max_iter': 4525, 'fit_intercept': False}


[I 2025-05-11 16:59:46,799] Trial 6 finished with value: 33.70148108394401 and parameters: {'alpha': 2.2165257022517455, 'l1_ratio': 0.5361895958247156, 'max_iter': 1659, 'fit_intercept': False}. Best is trial 0 with value: 7.337009667195793.
Optimisation des hyperparamètres:   2%|▏         | 3/200 [04:54<3:55:08, 71.62s/itération, Best RMSE=7.34] 

[Trial 6] RMSE: 33.7015 | Params: {'alpha': 2.2165257022517455, 'l1_ratio': 0.5361895958247156, 'max_iter': 1659, 'fit_intercept': False}


[I 2025-05-11 17:00:04,206] Trial 8 finished with value: 33.25951693619793 and parameters: {'alpha': 1.1107210429094803, 'l1_ratio': 0.12972722041452003, 'max_iter': 4953, 'fit_intercept': False}. Best is trial 0 with value: 7.337009667195793.
Optimisation des hyperparamètres:   2%|▏         | 4/200 [05:11<2:44:02, 50.22s/itération, Best RMSE=7.34]

[Trial 8] RMSE: 33.2595 | Params: {'alpha': 1.1107210429094803, 'l1_ratio': 0.12972722041452003, 'max_iter': 4953, 'fit_intercept': False}


[I 2025-05-11 17:00:58,960] Trial 3 finished with value: 1.0300486940617295 and parameters: {'alpha': 0.02905076729603396, 'l1_ratio': 0.4310211373310844, 'max_iter': 4028, 'fit_intercept': True}. Best is trial 3 with value: 1.0300486940617295.
Optimisation des hyperparamètres:   2%|▎         | 5/200 [06:06<2:48:30, 51.85s/itération, Best RMSE=1.03]

[Trial 3] RMSE: 1.0300 | Params: {'alpha': 0.02905076729603396, 'l1_ratio': 0.4310211373310844, 'max_iter': 4028, 'fit_intercept': True}


[I 2025-05-11 17:01:15,702] Trial 9 finished with value: 31.956411137083244 and parameters: {'alpha': 0.25345702222385996, 'l1_ratio': 0.3928925952322897, 'max_iter': 1325, 'fit_intercept': False}. Best is trial 3 with value: 1.0300486940617295.
Optimisation des hyperparamètres:   3%|▎         | 6/200 [06:23<2:09:03, 39.91s/itération, Best RMSE=1.03]

[Trial 9] RMSE: 31.9564 | Params: {'alpha': 0.25345702222385996, 'l1_ratio': 0.3928925952322897, 'max_iter': 1325, 'fit_intercept': False}


[I 2025-05-11 17:01:18,685] Trial 5 finished with value: 0.9859084610506961 and parameters: {'alpha': 0.0030514282882234942, 'l1_ratio': 0.5497003194921292, 'max_iter': 1184, 'fit_intercept': True}. Best is trial 5 with value: 0.9859084610506961.
Optimisation des hyperparamètres:   4%|▎         | 7/200 [06:26<1:29:33, 27.84s/itération, Best RMSE=0.986]

[Trial 5] RMSE: 0.9859 | Params: {'alpha': 0.0030514282882234942, 'l1_ratio': 0.5497003194921292, 'max_iter': 1184, 'fit_intercept': True}


[I 2025-05-11 17:02:44,743] Trial 10 finished with value: 0.9875600593837995 and parameters: {'alpha': 0.0041665180550081405, 'l1_ratio': 0.1527576392211793, 'max_iter': 2326, 'fit_intercept': True}. Best is trial 5 with value: 0.9859084610506961.
Optimisation des hyperparamètres:   4%|▍         | 8/200 [07:52<2:28:23, 46.37s/itération, Best RMSE=0.986]

[Trial 10] RMSE: 0.9876 | Params: {'alpha': 0.0041665180550081405, 'l1_ratio': 0.1527576392211793, 'max_iter': 2326, 'fit_intercept': True}


[I 2025-05-11 17:04:50,475] Trial 11 finished with value: 0.9861370505012078 and parameters: {'alpha': 0.00026634581466284757, 'l1_ratio': 0.1222023940852587, 'max_iter': 3631, 'fit_intercept': True}. Best is trial 5 with value: 0.9859084610506961.
Optimisation des hyperparamètres:   4%|▍         | 9/200 [09:57<3:46:35, 71.18s/itération, Best RMSE=0.986]

[Trial 11] RMSE: 0.9861 | Params: {'alpha': 0.00026634581466284757, 'l1_ratio': 0.1222023940852587, 'max_iter': 3631, 'fit_intercept': True}


[I 2025-05-11 17:05:13,882] Trial 7 finished with value: 3.6927179236739947 and parameters: {'alpha': 0.5726606119371881, 'l1_ratio': 0.2945278805488113, 'max_iter': 2300, 'fit_intercept': True}. Best is trial 5 with value: 0.9859084610506961.
Optimisation des hyperparamètres:   5%|▌         | 10/200 [10:21<2:58:42, 56.43s/itération, Best RMSE=0.986]

[Trial 7] RMSE: 3.6927 | Params: {'alpha': 0.5726606119371881, 'l1_ratio': 0.2945278805488113, 'max_iter': 2300, 'fit_intercept': True}


[I 2025-05-11 17:05:47,011] Trial 14 finished with value: inf and parameters: {'alpha': 4.880071180696536, 'l1_ratio': 0.9821047675754574, 'max_iter': 2573, 'fit_intercept': True}. Best is trial 5 with value: 0.9859084610506961.
Optimisation des hyperparamètres:   6%|▌         | 11/200 [10:54<2:35:17, 49.30s/itération, Best RMSE=0.986]

Trial 14 Erreur : 


[I 2025-05-11 17:07:07,180] Trial 13 finished with value: 1.2908785464079027 and parameters: {'alpha': 0.11459257859025776, 'l1_ratio': 0.570912762879201, 'max_iter': 3449, 'fit_intercept': True}. Best is trial 5 with value: 0.9859084610506961.
Optimisation des hyperparamètres:   6%|▌         | 12/200 [12:14<3:03:54, 58.69s/itération, Best RMSE=0.986]

[Trial 13] RMSE: 1.2909 | Params: {'alpha': 0.11459257859025776, 'l1_ratio': 0.570912762879201, 'max_iter': 3449, 'fit_intercept': True}


[I 2025-05-11 17:09:09,416] Trial 18 finished with value: inf and parameters: {'alpha': 2.1096769817959444, 'l1_ratio': 0.24900908557140145, 'max_iter': 2297, 'fit_intercept': False}. Best is trial 5 with value: 0.9859084610506961.
Optimisation des hyperparamètres:   6%|▋         | 13/200 [14:16<4:02:54, 77.94s/itération, Best RMSE=0.986]

Trial 18 Erreur : 


[I 2025-05-11 17:10:41,660] Trial 4 finished with value: inf and parameters: {'alpha': 0.005849299670584855, 'l1_ratio': 0.3507100301156417, 'max_iter': 1257, 'fit_intercept': False}. Best is trial 5 with value: 0.9859084610506961.
Optimisation des hyperparamètres:   7%|▋         | 14/200 [15:49<4:15:00, 82.26s/itération, Best RMSE=0.986]

Trial 4 Erreur : 


[I 2025-05-11 17:11:59,803] Trial 16 finished with value: 0.9856119314618904 and parameters: {'alpha': 0.0013511561220548845, 'l1_ratio': 0.38950436342981776, 'max_iter': 1745, 'fit_intercept': True}. Best is trial 16 with value: 0.9856119314618904.
Optimisation des hyperparamètres:   8%|▊         | 15/200 [17:07<4:09:48, 81.02s/itération, Best RMSE=0.986]

[Trial 16] RMSE: 0.9856 | Params: {'alpha': 0.0013511561220548845, 'l1_ratio': 0.38950436342981776, 'max_iter': 1745, 'fit_intercept': True}


[I 2025-05-11 17:14:40,908] Trial 19 finished with value: 0.9856023105538 and parameters: {'alpha': 0.0007769444253747254, 'l1_ratio': 0.32344359593662775, 'max_iter': 2994, 'fit_intercept': True}. Best is trial 19 with value: 0.9856023105538.
Optimisation des hyperparamètres:   8%|▊         | 16/200 [19:48<5:22:23, 105.13s/itération, Best RMSE=0.986]

[Trial 19] RMSE: 0.9856 | Params: {'alpha': 0.0007769444253747254, 'l1_ratio': 0.32344359593662775, 'max_iter': 2994, 'fit_intercept': True}


[I 2025-05-11 17:18:07,485] Trial 20 finished with value: inf and parameters: {'alpha': 0.0466628930392303, 'l1_ratio': 0.923969845276576, 'max_iter': 1940, 'fit_intercept': False}. Best is trial 19 with value: 0.9856023105538.
Optimisation des hyperparamètres:   8%|▊         | 17/200 [23:14<6:53:41, 135.64s/itération, Best RMSE=0.986]

Trial 20 Erreur : 


[I 2025-05-11 17:18:18,470] Trial 21 finished with value: 0.9860276712582959 and parameters: {'alpha': 0.0003485525620377778, 'l1_ratio': 0.8598608831007757, 'max_iter': 2913, 'fit_intercept': True}. Best is trial 19 with value: 0.9856023105538.
Optimisation des hyperparamètres:   9%|▉         | 18/200 [23:25<4:57:47, 98.18s/itération, Best RMSE=0.986] 

[Trial 21] RMSE: 0.9860 | Params: {'alpha': 0.0003485525620377778, 'l1_ratio': 0.8598608831007757, 'max_iter': 2913, 'fit_intercept': True}


[I 2025-05-11 17:22:40,867] Trial 12 finished with value: inf and parameters: {'alpha': 0.003506667606049801, 'l1_ratio': 0.5548694607815983, 'max_iter': 3497, 'fit_intercept': False}. Best is trial 19 with value: 0.9856023105538.
Optimisation des hyperparamètres:  10%|▉         | 19/200 [27:48<7:24:57, 147.50s/itération, Best RMSE=0.986]

Trial 12 Erreur : 


[I 2025-05-11 17:25:00,767] Trial 26 finished with value: 0.9861598743347256 and parameters: {'alpha': 0.00027856950108444677, 'l1_ratio': 0.7418828461450849, 'max_iter': 1746, 'fit_intercept': True}. Best is trial 19 with value: 0.9856023105538.
Optimisation des hyperparamètres:  10%|█         | 20/200 [30:08<7:15:39, 145.22s/itération, Best RMSE=0.986]

[Trial 26] RMSE: 0.9862 | Params: {'alpha': 0.00027856950108444677, 'l1_ratio': 0.7418828461450849, 'max_iter': 1746, 'fit_intercept': True}


[I 2025-05-11 17:27:38,513] Trial 1 finished with value: inf and parameters: {'alpha': 0.0003312001807355349, 'l1_ratio': 0.8122615403090716, 'max_iter': 4557, 'fit_intercept': False}. Best is trial 19 with value: 0.9856023105538.
Optimisation des hyperparamètres:  10%|█         | 21/200 [32:46<7:24:28, 148.98s/itération, Best RMSE=0.986]

Trial 1 Erreur : 


[I 2025-05-11 17:30:30,181] Trial 22 finished with value: 0.9814751805998729 and parameters: {'alpha': 0.00010529552118239793, 'l1_ratio': 0.7508855639531777, 'max_iter': 3550, 'fit_intercept': True}. Best is trial 22 with value: 0.9814751805998729.
Optimisation des hyperparamètres:  11%|█         | 22/200 [35:37<7:42:09, 155.79s/itération, Best RMSE=0.981]

[Trial 22] RMSE: 0.9815 | Params: {'alpha': 0.00010529552118239793, 'l1_ratio': 0.7508855639531777, 'max_iter': 3550, 'fit_intercept': True}


[I 2025-05-11 17:30:56,483] Trial 29 finished with value: 0.9854435980568419 and parameters: {'alpha': 0.0007836121490579658, 'l1_ratio': 0.7197555357647301, 'max_iter': 3153, 'fit_intercept': True}. Best is trial 22 with value: 0.9814751805998729.
Optimisation des hyperparamètres:  12%|█▏        | 23/200 [36:03<5:44:56, 116.93s/itération, Best RMSE=0.981]

[Trial 29] RMSE: 0.9854 | Params: {'alpha': 0.0007836121490579658, 'l1_ratio': 0.7197555357647301, 'max_iter': 3153, 'fit_intercept': True}


[I 2025-05-11 17:31:04,551] Trial 28 finished with value: 0.9864028739287717 and parameters: {'alpha': 0.00017414271814952066, 'l1_ratio': 0.7309546723061631, 'max_iter': 3135, 'fit_intercept': True}. Best is trial 22 with value: 0.9814751805998729.
Optimisation des hyperparamètres:  12%|█▏        | 24/200 [36:12<4:07:10, 84.26s/itération, Best RMSE=0.981] 

[Trial 28] RMSE: 0.9864 | Params: {'alpha': 0.00017414271814952066, 'l1_ratio': 0.7309546723061631, 'max_iter': 3135, 'fit_intercept': True}


[I 2025-05-11 17:31:28,299] Trial 23 finished with value: 0.9814856444803584 and parameters: {'alpha': 0.00015423823979589788, 'l1_ratio': 0.7735457291354711, 'max_iter': 3357, 'fit_intercept': True}. Best is trial 22 with value: 0.9814751805998729.
Optimisation des hyperparamètres:  12%|█▎        | 25/200 [36:35<3:12:48, 66.11s/itération, Best RMSE=0.981]

[Trial 23] RMSE: 0.9815 | Params: {'alpha': 0.00015423823979589788, 'l1_ratio': 0.7735457291354711, 'max_iter': 3357, 'fit_intercept': True}


[I 2025-05-11 17:32:09,986] Trial 17 finished with value: inf and parameters: {'alpha': 0.003306797640809797, 'l1_ratio': 0.672016195500473, 'max_iter': 2217, 'fit_intercept': False}. Best is trial 22 with value: 0.9814751805998729.
Optimisation des hyperparamètres:  13%|█▎        | 26/200 [37:17<2:50:27, 58.78s/itération, Best RMSE=0.981]

Trial 17 Erreur : 


[I 2025-05-11 17:33:24,791] Trial 24 finished with value: 0.9814743972967545 and parameters: {'alpha': 0.00010186998760602834, 'l1_ratio': 0.7678941304906084, 'max_iter': 3292, 'fit_intercept': True}. Best is trial 24 with value: 0.9814743972967545.
Optimisation des hyperparamètres:  14%|█▎        | 27/200 [38:32<3:03:20, 63.59s/itération, Best RMSE=0.981]

[Trial 24] RMSE: 0.9815 | Params: {'alpha': 0.00010186998760602834, 'l1_ratio': 0.7678941304906084, 'max_iter': 3292, 'fit_intercept': True}


[I 2025-05-11 17:34:41,406] Trial 25 finished with value: 0.9814841568242836 and parameters: {'alpha': 0.00014743408813164684, 'l1_ratio': 0.7772595451412943, 'max_iter': 3338, 'fit_intercept': True}. Best is trial 24 with value: 0.9814743972967545.
Optimisation des hyperparamètres:  14%|█▍        | 28/200 [39:48<3:13:30, 67.50s/itération, Best RMSE=0.981]

[Trial 25] RMSE: 0.9815 | Params: {'alpha': 0.00014743408813164684, 'l1_ratio': 0.7772595451412943, 'max_iter': 3338, 'fit_intercept': True}


[I 2025-05-11 17:38:44,591] Trial 15 finished with value: inf and parameters: {'alpha': 0.0026435506128732297, 'l1_ratio': 0.8834868142246061, 'max_iter': 2341, 'fit_intercept': False}. Best is trial 24 with value: 0.9814743972967545.
Optimisation des hyperparamètres:  14%|█▍        | 29/200 [43:52<5:42:34, 120.20s/itération, Best RMSE=0.981]

Trial 15 Erreur : 


[I 2025-05-11 17:38:55,603] Trial 27 finished with value: 0.9814755458265615 and parameters: {'alpha': 0.00010639727928606018, 'l1_ratio': 0.7085779639988907, 'max_iter': 2942, 'fit_intercept': True}. Best is trial 24 with value: 0.9814743972967545.
Optimisation des hyperparamètres:  15%|█▌        | 30/200 [44:03<4:07:46, 87.45s/itération, Best RMSE=0.981] 

[Trial 27] RMSE: 0.9815 | Params: {'alpha': 0.00010639727928606018, 'l1_ratio': 0.7085779639988907, 'max_iter': 2942, 'fit_intercept': True}


[I 2025-05-11 17:39:39,975] Trial 32 finished with value: 0.9854786126356152 and parameters: {'alpha': 0.001216361237571659, 'l1_ratio': 0.4855038674899276, 'max_iter': 2920, 'fit_intercept': True}. Best is trial 24 with value: 0.9814743972967545.
Optimisation des hyperparamètres:  16%|█▌        | 31/200 [44:47<3:29:54, 74.52s/itération, Best RMSE=0.981]

[Trial 32] RMSE: 0.9855 | Params: {'alpha': 0.001216361237571659, 'l1_ratio': 0.4855038674899276, 'max_iter': 2920, 'fit_intercept': True}


[I 2025-05-11 17:43:46,971] Trial 35 finished with value: 0.985360160076623 and parameters: {'alpha': 0.0009137609199810894, 'l1_ratio': 0.6989580833295368, 'max_iter': 2966, 'fit_intercept': True}. Best is trial 24 with value: 0.9814743972967545.
Optimisation des hyperparamètres:  16%|█▌        | 32/200 [48:54<5:53:32, 126.26s/itération, Best RMSE=0.981]

[Trial 35] RMSE: 0.9854 | Params: {'alpha': 0.0009137609199810894, 'l1_ratio': 0.6989580833295368, 'max_iter': 2966, 'fit_intercept': True}


[I 2025-05-11 17:45:20,949] Trial 37 finished with value: 0.9853684277622915 and parameters: {'alpha': 0.0009698172637517965, 'l1_ratio': 0.6767426835340232, 'max_iter': 3976, 'fit_intercept': True}. Best is trial 24 with value: 0.9814743972967545.
Optimisation des hyperparamètres:  16%|█▋        | 33/200 [50:28<5:24:28, 116.58s/itération, Best RMSE=0.981]

[Trial 37] RMSE: 0.9854 | Params: {'alpha': 0.0009698172637517965, 'l1_ratio': 0.6767426835340232, 'max_iter': 3976, 'fit_intercept': True}


[I 2025-05-11 17:47:42,184] Trial 30 finished with value: 0.9814777698555346 and parameters: {'alpha': 0.00011736045005461632, 'l1_ratio': 0.7515031308233566, 'max_iter': 2945, 'fit_intercept': True}. Best is trial 24 with value: 0.9814743972967545.
Optimisation des hyperparamètres:  17%|█▋        | 34/200 [52:49<5:42:59, 123.98s/itération, Best RMSE=0.981]

[Trial 30] RMSE: 0.9815 | Params: {'alpha': 0.00011736045005461632, 'l1_ratio': 0.7515031308233566, 'max_iter': 2945, 'fit_intercept': True}


[I 2025-05-11 17:50:19,601] Trial 31 finished with value: 0.9814788303146983 and parameters: {'alpha': 0.00012177347139441196, 'l1_ratio': 0.72191744201518, 'max_iter': 2976, 'fit_intercept': True}. Best is trial 24 with value: 0.9814743972967545.
Optimisation des hyperparamètres:  18%|█▊        | 35/200 [55:27<6:08:31, 134.01s/itération, Best RMSE=0.981]

[Trial 31] RMSE: 0.9815 | Params: {'alpha': 0.00012177347139441196, 'l1_ratio': 0.72191744201518, 'max_iter': 2976, 'fit_intercept': True}


[I 2025-05-11 17:50:40,487] Trial 41 finished with value: inf and parameters: {'alpha': 0.014017916823607745, 'l1_ratio': 0.6329164364317976, 'max_iter': 3836, 'fit_intercept': True}. Best is trial 24 with value: 0.9814743972967545.
Optimisation des hyperparamètres:  18%|█▊        | 36/200 [55:47<4:33:31, 100.07s/itération, Best RMSE=0.981]

Trial 41 Erreur : 


[I 2025-05-11 17:56:15,284] Trial 36 finished with value: 0.9814786462665259 and parameters: {'alpha': 0.00011995550342540028, 'l1_ratio': 0.664971253711971, 'max_iter': 3927, 'fit_intercept': True}. Best is trial 24 with value: 0.9814743972967545.
Optimisation des hyperparamètres:  18%|█▊        | 37/200 [1:01:22<7:43:10, 170.50s/itération, Best RMSE=0.981]

[Trial 36] RMSE: 0.9815 | Params: {'alpha': 0.00011995550342540028, 'l1_ratio': 0.664971253711971, 'max_iter': 3927, 'fit_intercept': True}


[I 2025-05-11 17:57:05,196] Trial 33 finished with value: 0.9814765921710368 and parameters: {'alpha': 0.00011072140786657846, 'l1_ratio': 0.6757774407908042, 'max_iter': 3041, 'fit_intercept': True}. Best is trial 24 with value: 0.9814743972967545.
Optimisation des hyperparamètres:  19%|█▉        | 38/200 [1:02:12<6:02:38, 134.31s/itération, Best RMSE=0.981]

[Trial 33] RMSE: 0.9815 | Params: {'alpha': 0.00011072140786657846, 'l1_ratio': 0.6757774407908042, 'max_iter': 3041, 'fit_intercept': True}


[I 2025-05-11 17:57:31,629] Trial 34 finished with value: 0.9814749505947334 and parameters: {'alpha': 0.00010347133567452743, 'l1_ratio': 0.6969709505916184, 'max_iter': 2939, 'fit_intercept': True}. Best is trial 24 with value: 0.9814743972967545.
Optimisation des hyperparamètres:  20%|█▉        | 39/200 [1:02:39<4:33:33, 101.95s/itération, Best RMSE=0.981]

[Trial 34] RMSE: 0.9815 | Params: {'alpha': 0.00010347133567452743, 'l1_ratio': 0.6969709505916184, 'max_iter': 2939, 'fit_intercept': True}


[I 2025-05-11 17:58:44,152] Trial 38 finished with value: 0.9814812170434839 and parameters: {'alpha': 0.00013450802802408287, 'l1_ratio': 0.8170383830307577, 'max_iter': 3850, 'fit_intercept': True}. Best is trial 24 with value: 0.9814743972967545.
Optimisation des hyperparamètres:  20%|██        | 40/200 [1:03:51<4:08:19, 93.12s/itération, Best RMSE=0.981] 

[Trial 38] RMSE: 0.9815 | Params: {'alpha': 0.00013450802802408287, 'l1_ratio': 0.8170383830307577, 'max_iter': 3850, 'fit_intercept': True}


[I 2025-05-11 18:01:50,856] Trial 39 finished with value: 0.9814745021488771 and parameters: {'alpha': 0.00010071213260348049, 'l1_ratio': 0.6476275712920979, 'max_iter': 3840, 'fit_intercept': True}. Best is trial 24 with value: 0.9814743972967545.
Optimisation des hyperparamètres:  20%|██        | 41/200 [1:06:58<5:21:10, 121.20s/itération, Best RMSE=0.981]

[Trial 39] RMSE: 0.9815 | Params: {'alpha': 0.00010071213260348049, 'l1_ratio': 0.6476275712920979, 'max_iter': 3840, 'fit_intercept': True}


[I 2025-05-11 18:03:31,229] Trial 40 finished with value: 0.9814744292391918 and parameters: {'alpha': 0.00010035541393541077, 'l1_ratio': 0.6457899297001134, 'max_iter': 3967, 'fit_intercept': True}. Best is trial 24 with value: 0.9814743972967545.
Optimisation des hyperparamètres:  21%|██        | 42/200 [1:08:38<5:02:41, 114.95s/itération, Best RMSE=0.981]

[Trial 40] RMSE: 0.9815 | Params: {'alpha': 0.00010035541393541077, 'l1_ratio': 0.6457899297001134, 'max_iter': 3967, 'fit_intercept': True}


[I 2025-05-11 18:03:35,401] Trial 46 finished with value: 0.9858612169983386 and parameters: {'alpha': 0.0004299567755503088, 'l1_ratio': 0.6127983569781527, 'max_iter': 3753, 'fit_intercept': True}. Best is trial 24 with value: 0.9814743972967545.
Optimisation des hyperparamètres:  22%|██▏       | 43/200 [1:08:42<3:33:49, 81.72s/itération, Best RMSE=0.981] 

[Trial 46] RMSE: 0.9859 | Params: {'alpha': 0.0004299567755503088, 'l1_ratio': 0.6127983569781527, 'max_iter': 3753, 'fit_intercept': True}


[I 2025-05-11 18:05:51,269] Trial 42 finished with value: 0.9814751017360706 and parameters: {'alpha': 0.00010328979262601854, 'l1_ratio': 0.635613598962299, 'max_iter': 3935, 'fit_intercept': True}. Best is trial 24 with value: 0.9814743972967545.
Optimisation des hyperparamètres:  22%|██▏       | 44/200 [1:10:58<4:14:42, 97.96s/itération, Best RMSE=0.981]

[Trial 42] RMSE: 0.9815 | Params: {'alpha': 0.00010328979262601854, 'l1_ratio': 0.635613598962299, 'max_iter': 3935, 'fit_intercept': True}


[I 2025-05-11 18:09:31,171] Trial 48 finished with value: 0.9859229675171295 and parameters: {'alpha': 0.00039971681437266305, 'l1_ratio': 0.8403489041873914, 'max_iter': 2690, 'fit_intercept': True}. Best is trial 24 with value: 0.9814743972967545.
Optimisation des hyperparamètres:  22%|██▎       | 45/200 [1:14:38<5:47:34, 134.54s/itération, Best RMSE=0.981]

[Trial 48] RMSE: 0.9859 | Params: {'alpha': 0.00039971681437266305, 'l1_ratio': 0.8403489041873914, 'max_iter': 2690, 'fit_intercept': True}


[I 2025-05-11 18:10:12,353] Trial 43 finished with value: 0.9814756028489727 and parameters: {'alpha': 0.00010530247929888898, 'l1_ratio': 0.6174963576207678, 'max_iter': 3879, 'fit_intercept': True}. Best is trial 24 with value: 0.9814743972967545.
Optimisation des hyperparamètres:  23%|██▎       | 46/200 [1:15:19<4:33:26, 106.53s/itération, Best RMSE=0.981]

[Trial 43] RMSE: 0.9815 | Params: {'alpha': 0.00010530247929888898, 'l1_ratio': 0.6174963576207678, 'max_iter': 3879, 'fit_intercept': True}


[I 2025-05-11 18:10:19,626] Trial 49 finished with value: 0.9857980588605348 and parameters: {'alpha': 0.0004643343920162219, 'l1_ratio': 0.8292127681233098, 'max_iter': 4419, 'fit_intercept': True}. Best is trial 24 with value: 0.9814743972967545.
Optimisation des hyperparamètres:  24%|██▎       | 47/200 [1:15:27<3:15:44, 76.76s/itération, Best RMSE=0.981] 

[Trial 49] RMSE: 0.9858 | Params: {'alpha': 0.0004643343920162219, 'l1_ratio': 0.8292127681233098, 'max_iter': 4419, 'fit_intercept': True}


[I 2025-05-11 18:10:42,325] Trial 44 finished with value: 0.9814783911027152 and parameters: {'alpha': 0.00011819751652891307, 'l1_ratio': 0.6306203482627148, 'max_iter': 3805, 'fit_intercept': True}. Best is trial 24 with value: 0.9814743972967545.
Optimisation des hyperparamètres:  24%|██▍       | 48/200 [1:15:49<2:33:21, 60.54s/itération, Best RMSE=0.981]

[Trial 44] RMSE: 0.9815 | Params: {'alpha': 0.00011819751652891307, 'l1_ratio': 0.6306203482627148, 'max_iter': 3805, 'fit_intercept': True}


[I 2025-05-11 18:10:49,258] Trial 50 finished with value: inf and parameters: {'alpha': 0.0003763809731389234, 'l1_ratio': 0.8334864630084382, 'max_iter': 2660, 'fit_intercept': True}. Best is trial 24 with value: 0.9814743972967545.
Optimisation des hyperparamètres:  24%|██▍       | 49/200 [1:15:56<1:51:52, 44.46s/itération, Best RMSE=0.981]

Trial 50 Erreur : 


[I 2025-05-11 18:11:24,861] Trial 51 finished with value: 0.9858608774748262 and parameters: {'alpha': 0.00043083932311959724, 'l1_ratio': 0.8156259293968197, 'max_iter': 4409, 'fit_intercept': True}. Best is trial 24 with value: 0.9814743972967545.
Optimisation des hyperparamètres:  25%|██▌       | 50/200 [1:16:32<1:44:30, 41.80s/itération, Best RMSE=0.981]

[Trial 51] RMSE: 0.9859 | Params: {'alpha': 0.00043083932311959724, 'l1_ratio': 0.8156259293968197, 'max_iter': 4409, 'fit_intercept': True}


[I 2025-05-11 18:14:26,928] Trial 45 finished with value: 0.9814744484514957 and parameters: {'alpha': 0.00010015957757162148, 'l1_ratio': 0.6257947407590021, 'max_iter': 3881, 'fit_intercept': True}. Best is trial 24 with value: 0.9814743972967545.
Optimisation des hyperparamètres:  26%|██▌       | 51/200 [1:19:34<3:28:18, 83.88s/itération, Best RMSE=0.981]

[Trial 45] RMSE: 0.9815 | Params: {'alpha': 0.00010015957757162148, 'l1_ratio': 0.6257947407590021, 'max_iter': 3881, 'fit_intercept': True}


[I 2025-05-11 18:15:28,612] Trial 52 finished with value: inf and parameters: {'alpha': 0.0003649855559878304, 'l1_ratio': 0.6219381153482981, 'max_iter': 4334, 'fit_intercept': True}. Best is trial 24 with value: 0.9814743972967545.
Optimisation des hyperparamètres:  26%|██▌       | 52/200 [1:20:36<3:10:30, 77.24s/itération, Best RMSE=0.981]

Trial 52 Erreur : 


[I 2025-05-11 18:16:22,608] Trial 47 finished with value: 0.9814738244059107 and parameters: {'alpha': 0.00010004549286453333, 'l1_ratio': 0.8359981854950035, 'max_iter': 4243, 'fit_intercept': True}. Best is trial 47 with value: 0.9814738244059107.
Optimisation des hyperparamètres:  26%|██▋       | 53/200 [1:21:30<2:52:06, 70.25s/itération, Best RMSE=0.981]

[Trial 47] RMSE: 0.9815 | Params: {'alpha': 0.00010004549286453333, 'l1_ratio': 0.8359981854950035, 'max_iter': 4243, 'fit_intercept': True}


[I 2025-05-11 18:16:53,133] Trial 53 finished with value: 0.9858444976722929 and parameters: {'alpha': 0.0004393144947455462, 'l1_ratio': 0.6277611047314205, 'max_iter': 4283, 'fit_intercept': True}. Best is trial 47 with value: 0.9814738244059107.
Optimisation des hyperparamètres:  27%|██▋       | 54/200 [1:22:00<2:21:56, 58.33s/itération, Best RMSE=0.981]

[Trial 53] RMSE: 0.9858 | Params: {'alpha': 0.0004393144947455462, 'l1_ratio': 0.6277611047314205, 'max_iter': 4283, 'fit_intercept': True}


[I 2025-05-11 18:17:17,103] Trial 54 finished with value: 0.9857256720846173 and parameters: {'alpha': 0.0005155591905575797, 'l1_ratio': 0.6075305130362849, 'max_iter': 4269, 'fit_intercept': True}. Best is trial 47 with value: 0.9814738244059107.
Optimisation des hyperparamètres:  28%|██▊       | 55/200 [1:22:24<1:56:03, 48.02s/itération, Best RMSE=0.981]

[Trial 54] RMSE: 0.9857 | Params: {'alpha': 0.0005155591905575797, 'l1_ratio': 0.6075305130362849, 'max_iter': 4269, 'fit_intercept': True}


[I 2025-05-11 18:19:55,996] Trial 55 finished with value: 0.9857599260009346 and parameters: {'alpha': 0.0005081517989589418, 'l1_ratio': 0.4829009535680221, 'max_iter': 4267, 'fit_intercept': True}. Best is trial 47 with value: 0.9814738244059107.
Optimisation des hyperparamètres:  28%|██▊       | 56/200 [1:25:03<3:15:04, 81.28s/itération, Best RMSE=0.981]

[Trial 55] RMSE: 0.9858 | Params: {'alpha': 0.0005081517989589418, 'l1_ratio': 0.4829009535680221, 'max_iter': 4267, 'fit_intercept': True}


[I 2025-05-11 18:24:29,070] Trial 56 finished with value: 0.9857669344427692 and parameters: {'alpha': 0.0004918580962347245, 'l1_ratio': 0.5049485295897678, 'max_iter': 4277, 'fit_intercept': True}. Best is trial 47 with value: 0.9814738244059107.
Optimisation des hyperparamètres:  28%|██▊       | 57/200 [1:29:36<5:30:51, 138.82s/itération, Best RMSE=0.981]

[Trial 56] RMSE: 0.9858 | Params: {'alpha': 0.0004918580962347245, 'l1_ratio': 0.5049485295897678, 'max_iter': 4277, 'fit_intercept': True}


[I 2025-05-11 18:25:12,805] Trial 57 finished with value: 0.9857441301421141 and parameters: {'alpha': 0.00053047030653128, 'l1_ratio': 0.4830394410592995, 'max_iter': 4229, 'fit_intercept': True}. Best is trial 47 with value: 0.9814738244059107.
Optimisation des hyperparamètres:  29%|██▉       | 58/200 [1:30:20<4:21:01, 110.30s/itération, Best RMSE=0.981]

[Trial 57] RMSE: 0.9857 | Params: {'alpha': 0.00053047030653128, 'l1_ratio': 0.4830394410592995, 'max_iter': 4229, 'fit_intercept': True}


[I 2025-05-11 18:25:25,428] Trial 58 finished with value: inf and parameters: {'alpha': 0.0002323108005786335, 'l1_ratio': 0.508789058186819, 'max_iter': 4208, 'fit_intercept': True}. Best is trial 47 with value: 0.9814738244059107.
Optimisation des hyperparamètres:  30%|██▉       | 59/200 [1:30:32<3:10:20, 81.00s/itération, Best RMSE=0.981] 

Trial 58 Erreur : 


: 

: 

## Comparaison des modèles et selection du meilleur modèle

###   Comparaison

In [None]:
def compare_grid_models(models_grids: Dict[str, Tuple], X_train: pd.DataFrame, y_train: pd.Series, cv: int = 3) -> Tuple[pd.DataFrame, Dict[str, Pipeline]]:
    """
    Compare plusieurs modèles de régression à l'aide de GridSearchCV rapide.

    Args
    ----
    - models_grids : dict où chaque clé est le nom du modèle et la valeur est un tuple (modèle sklearn, grille d'hyperparamètres)
    - X_train : données d'entraînement
    - cv : nombre de folds pour la validation croisée

    Returns:
    - results_df: DataFrame des scores RMSE moyens
    - best_models: dictionnaire des pipelines optimaux entraînés
    """
    results = []
    best_models = {}

    for name, (model, grid) in models_grids.items():
        print(f"\n Recherche GridSearch pour {name}...")

        # Prétraitement et pipeline
        preprocessor = get_preprocessor(X_train)
        pipeline = Pipeline([
            ("preprocessor", preprocessor),
            ("model", model)
        ])

        # Adapter la grille avec le préfixe "model__"
        param_grid = {'model__' + k: v for k, v in grid.items()}

        gridsearch = GridSearchCV(
            pipeline ,
            param_grid=param_grid,
            X = X_train,
            y = y_train,
            cv = cv ,
            n_jobs=-1,

        )

        best_pipeline, best_params, best_score, _ = quick_gridsearch(
            pipeline,
            param_grid=param_grid,
            X=X_train,
            y=y_train,
            cv=cv,
            scoring='neg_root_mean_squared_error',
        )

        results.append({
            "Model": name,
            "Best RMSE (CV)": -best_score,
            "Best Params": best_params
        })
        best_models[name] = best_pipeline

    results_df = pd.DataFrame(results).sort_values(by="Best RMSE (CV)")
    return results_df, best_models



results_df, pipelines = compare_grid_models(models_grids, X_train, y_train)
print(results_df)



### Selection automatique du meilleur Modèle

In [None]:
# selection du meilleur modèle

best_model_name = results_df.iloc[0]['Model'] # nom du meilleur modèle

print(f"le meilleur modèle est {best_model_name}")
best_pipeline = best_models[best_model_name] #extraction de la meilleur  pipeline
best_features = models_info[best_model_name]['features'] #extraction des features optimal
best_model = models_info[best_model_name]['model'] #extraction du meilleur modèle
best_params = models_info[best_model_name]['params'] #extraction des meilleurs paramètres

# entrainement du meilleur modèle
final_best_model = best_pipeline.fit(X_train[best_features], y_train)
final_best_features = best_features

# mise à jour de models_info:
for name  in models_info:
  models_info[name]["is_best"] = (name == best_model_name)
for name , pipe in pipelines:
  models_info[name]["pipeline"] = pipe

##  Quelques Analyses graphiques des modèles

In [None]:
def analyze_model_predictions(y_true, y_pred, model_name=""):
    """
    Génère des graphiques d'analyse des performances d'un modèle de régression :
    - Résidus vs Prédictions
    - Histogramme des résidus
    - QQ-plot
    - Vraies vs Prédictions
    - Affichage des métriques

    Args:
    - y_true: valeurs réelles
    - y_pred: prédictions du modèle
    - model_name: nom du modèle (affiché sur les titres)
    """
    residuals = y_true - y_pred
    rmse = mean_squared_error(y_true, y_pred, squared=False)
    mae = mean_absolute_error(y_true, y_pred)
    r2 = r2_score(y_true, y_pred)

    print(f"\n📊 Évaluation du {model_name}")
    print(f"RMSE: {rmse:.4f}")
    print(f"MAE : {mae:.4f}")
    print(f"R²  : {r2:.4f}")

    plt.figure(figsize=(15, 10))

    # Résidus vs Prédictions
    plt.subplot(2, 2, 1)
    sns.scatterplot(x=y_pred, y=residuals)
    plt.axhline(0, color='red', linestyle='--')
    plt.xlabel("Prédictions")
    plt.ylabel("Résidus")
    plt.title("Résidus vs Prédictions")

    # Histogramme des résidus
    plt.subplot(2, 2, 2)
    sns.histplot(residuals, kde=True)
    plt.title("Distribution des résidus")

    # QQ-plot
    plt.subplot(2, 2, 3)
    stats.probplot(residuals, dist="norm", plot=plt)
    plt.title("QQ-Plot des résidus")

    # Vraies vs Prédictions
    plt.subplot(2, 2, 4)
    sns.scatterplot(x=y_true, y=y_pred)
    plt.plot([y_true.min(), y_true.max()], [y_true.min(), y_true.max()], 'r--')
    plt.xlabel("Vraies valeurs")
    plt.ylabel("Prédictions")
    plt.title("Vraies vs Prédictions")

    plt.suptitle(f"Analyse du {model_name}", fontsize=16)
    plt.tight_layout(rect=[0, 0.03, 1, 0.95])
    plt.savefig(f"graphiques d'analyse du modèle {model_name}")
    plt.show()

#-------------------------------------------------------------------------------
def plot_feature_importance(model, feature_names, model_name=""):
    """
    Trace l'importance des features pour un modèle donné.
    """
    if hasattr(model, "coef_"):
        importances = model.coef_
    elif hasattr(model, "feature_importances_"):
        importances = model.feature_importances_
    else:
        print(f"Aucune importance de feature disponible pour {model_name}")
        return

    sorted_idx = np.argsort(importances)[::-1]
    plt.figure(figsize=(10, 6))
    sns.barplot(x=np.abs(importances)[sorted_idx], y=np.array(feature_names)[sorted_idx])
    plt.title(f"Importance des variables - {model_name}")
    plt.xlabel("Importance absolue")
    plt.ylabel("Variables")
    plt.tight_layout()
    plt.savefig(f"Figures/importances_{model_name}.png")
    plt.close()

#----------------------------------------------------------------
def analyze_all_models(models_info , X_test, y_test):
    """
    Applique l'analyse des prédictions à tous les modèles comparés.
    """
    for model_name, info in models_info.items():
        features = info.get("features" , X_test.columns.tolist())
        pipeline = info.get("pipeline")
        x_val = safe_feature_selection(X_test, features) if features else X_test

        y_pred = pipeline.predict(X_test[features])
        analyze_model_predictions(y_test, y_pred, model_name)

#----------------------------------------------------------------------

os.makedirs("Figures",exist_ok=True)

# tracer les graphiques pour les analyses des trois modèles
analyze_all_models(models_info , X_test, y_test)


# graphique de l'importance des features pour le meilleur modèle
plot_feature_importance(final_best_model , final_best_features, model_name=best_model_name)

# 4. Soumission sur Kaggle

In [None]:
# Soumission sur Kaggle

# copier le fichier de test déja merge
data_test = test_data.copy()

# selectionner les features optimaux pour le best_model
data_test_submit = data_test[final_best_features]

# Calcul des prédictions sur le dataset de test final
y_pred_test = final_best_model.predict(data_test_submit)

# Ajout des prédictions au dataframe
data_test['Prediction'] = y_pred_test

submission = data_test[['CodeINSEE', 'Prediction']]

# sauvegarder la liste des features
with open(f"final_best_features_with_{best_model_name}.json" , 'w') as file:
    json.dump(final_best_features , file)

# sauvegarder les prédictions
submission.to_csv('results_test_predicted.csv', index=False)
print(f"\nFichier de soumission 'results_test_predicted.csv' généré avec succès \nForme {submission.shape}  ")

submission.head(5)