In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import GradientBoostingRegressor

# --- 1. Définition de la Grille d'Hyperparamètres à Tester ---
param_grid = {
    'learning_rate': [0.1, 0.05, 0.02], 
    'n_estimators': [200] # Ajout de valeurs pour une recherche plus complète
}

# --- 2. Fenêtres Glissantes Fixes ---
FIXED_WINDOW_TA = 14
FIXED_WINDOW_TP = 90
VAL_SIZE = 365 # Taille du jeu de validation

# --- 3. Fonction d'Optimisation par Région (SANS CV) ---
def tune_gbr_hyperparameters_for_region_no_cv(region, ta_data, tp_data, cf_data, window_ta, window_tp, param_grid):

    # 3.1. Préparation des Features et Target
    
    y_target = cf_data[region].copy()
    
    X_ta = ta_data.rolling(window=window_ta, min_periods=1).mean()
    X_ta.columns = [f'{col}_TA' for col in X_ta.columns] 
    
    X_tp = tp_data.rolling(window=window_tp, min_periods=1).sum()
    X_tp.columns = [f'{col}_TP' for col in X_tp.columns]

    X_rolling = pd.merge(X_ta, X_tp, left_index = True, right_index = True)
    
    X_rolling['cos'] = np.cos(X_rolling.index.dayofyear * 2 * np.pi/365)
    X_rolling['sin'] = np.sin(X_rolling.index.dayofyear * 2 * np.pi/365)

    X_combined = X_rolling.copy()
    X_combined.dropna(inplace=True)
    y_aligned = y_target.loc[X_combined.index]
    
    # 3.2. Séparation Train/Test initiale (pour enlever le jeu de test final)
    X_train_full, _, y_train, _ = train_test_split(X_combined, y_aligned, test_size=VAL_SIZE, shuffle=False)
    
    # 3.3. Séparation INTERNE en Entraînement et Validation (fixed split)
    if len(X_train_full) < 2 * VAL_SIZE + 1: 
        print(f"Skipping {region}: Data size too small for Training + Validation.")
        return None
        
    X_train_final = X_train_full.iloc[:-VAL_SIZE].copy()
    X_val = X_train_full.iloc[-VAL_SIZE:].copy()
    y_train_final = y_train.iloc[:-VAL_SIZE].copy()
    y_val = y_train.iloc[-VAL_SIZE:].copy()
    
    # 3.4. Mise à l'échelle (Scaling)
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train_final)
    X_val_scaled = scaler.transform(X_val)

    # 3.5. Boucle de Recherche (Grid Search Manuel)
    best_r2 = -np.inf
    best_params = {}
    
    for lr in param_grid['learning_rate']:
        for n_est in param_grid['n_estimators']:
            gbr = GradientBoostingRegressor(
                n_estimators=n_est, 
                learning_rate=lr, 
                max_depth=2, 
                random_state=75
            )
            
            # Entraînement et évaluation sur le set de validation
            gbr.fit(X_train_scaled, y_train_final)
            y_pred_val = gbr.predict(X_val_scaled)
            
            r2 = r2_score(y_val, y_pred_val)
            
            if r2 > best_r2:
                best_r2 = r2
                best_params = {'learning_rate': lr, 'n_estimators': n_est}
                
    # 3.6. Retour des Meilleurs Résultats
    return {
        'Best_learning_rate': best_params.get('learning_rate'),
        'Best_n_estimators': best_params.get('n_estimators'),
        'R2_Validation_Max': best_r2
    }

# --- 4. Boucle principale sur chaque colonne FR ---
all_regions_results_hp = {}
# La liste des régions doit provenir de la CIBLE (cf_FR)
regions = ta_FR.columns.tolist() 
cf_data = ta_FR 

print(f"Démarrage de l'optimisation des hyperparamètres GBR pour {len(regions)} région(s) (SANS CV).")

for region in regions:
    print(f"\n>>>> Optimisation du Learning Rate pour la région : {region} <<<<")
    
    results = tune_gbr_hyperparameters_for_region_no_cv(
        region, ta_FR, tp_FR, cf_data, 
        FIXED_WINDOW_TA, FIXED_WINDOW_TP, param_grid
    )

    if results:
        all_regions_results_hp[region] = results
        print(f"Résultat pour {region}:")
        print(f"   Meilleur LR: {results['Best_learning_rate']}")
        print(f"   Meilleur n_estimators: {results['Best_n_estimators']}")
        print(f"   R2 Max (Validation): {results['R2_Validation_Max']:.6f}")

# --- 5. Affichage du tableau récapitulatif ---
print("\n--- Synthèse des Hyperparamètres GBR Optimaux par Région ---")
results_df_hp = pd.DataFrame.from_dict(all_regions_results_hp, orient='index')
results_df_hp.index.name = "Région"
# display(results_df_hp.sort_values(by='R2_Validation_Max', ascending=False))

NameError: name 'ta_FR' is not defined