In [None]:
train_data = lgb.Dataset(X_train, label=y_train, categorical_feature=categorical_features)
val_data_list = []
for i in range(len(periodos_valid)):
    val_dataset = lgb.Dataset(X_val_list[i], label=y_val_list[i], categorical_feature=categorical_features)
    # Inyectar PRODUCT_ID como atributo extra
    val_dataset.PRODUCT_ID = X_val_list[i]['PRODUCT_ID'].values
    val_data_list.append(val_dataset)

def mape_sum_lgb(y_pred, dataset):
    y_true = dataset.get_label()
    product_id = getattr(dataset, 'PRODUCT_ID', None)
    y_pred = np.where(y_pred < 0, 0, y_pred)
    denom = np.sum(np.abs(y_true))
    if denom == 0:
        return 'mape_sum', 0.0, False
    if product_id is not None:
        df_pred = pd.DataFrame({'y_true': y_true, 'y_pred': y_pred, 'PRODUCT_ID': product_id})
        mape = np.sum(np.abs(df_pred.groupby('PRODUCT_ID')['y_true'].sum() - df_pred.groupby('PRODUCT_ID')['y_pred'].sum())) / denom
    else:
        mape = np.sum(np.abs(y_true.sum() - y_pred.sum())) / denom
    mape = np.nan_to_num(mape, nan=0.0)
    return 'mape_sum', mape, False  # False: menor es mejor

def objective(trial):
    params = {
        'objective': 'regression',
        'metric': 'None',  # Solo métrica personalizada
        'boosting_type': 'gbdt',
        'num_leaves': trial.suggest_int('num_leaves', 31, 512),
        'learning_rate': trial.suggest_float('learning_rate', 0.005, 0.2, log=True),
        'feature_fraction': trial.suggest_float('feature_fraction', 0.6, 1.0),
        'bagging_fraction': trial.suggest_float('bagging_fraction', 0.6, 1.0),
        'bagging_freq': trial.suggest_int('bagging_freq', 1, 10),
        'min_data_in_leaf': trial.suggest_int('min_data_in_leaf', 10, 200),
        'max_depth': trial.suggest_int('max_depth', 3, 16),
        'lambda_l1': trial.suggest_float('lambda_l1', 0.0, 5.0),
        'lambda_l2': trial.suggest_float('lambda_l2', 0.0, 5.0),
        'min_gain_to_split': trial.suggest_float('min_gain_to_split', 0.0, 1.0),
        'verbose': -1,
        'feature_pre_filter': False
    }

    model = lgb.train(
        params,
        train_data,
        num_boost_round=2000,
        valid_sets=val_data_list,
        valid_names=[f'validation_{p}' for p in periodos_valid],
        feval=mape_sum_lgb,
        callbacks=[
            lgb.early_stopping(stopping_rounds=50, first_metric_only=True),
            lgb.log_evaluation(period=50)
        ]
    )
    best_score = model.best_score[f'validation_{periodos_valid[0]}']['mape_sum']
    print(f"Trial {trial.number}: mape_sum={best_score:.5f}")
    return best_score

# Guardar resultados en base de datos sqlite
storage_url = "sqlite:///./modelos/optuna.db"
study = optuna.create_study(direction='minimize', study_name="lgbm_regression", storage=storage_url, load_if_exists=True)
study.optimize(objective, n_trials=50, show_progress_bar=True)  # Puedes ajustar n_trials

print("Mejores hiperparámetros encontrados:")
print(study.best_params)

# Entrena el modelo final con los mejores hiperparámetros encontrados
best_params = study.best_params
best_params['objective'] = 'regression'
best_params['metric'] = 'None'

model_reg = lgb.train(
    best_params,
    train_data,
    num_boost_round=50000,
    valid_sets=val_data_list,
    valid_names=[f'validation_{p}' for p in periodos_valid],
    feval=mape_sum_lgb,
    callbacks=[
        lgb.early_stopping(stopping_rounds=500, first_metric_only=True),
        lgb.log_evaluation(period=50)
    ]
)

os.makedirs('./modelos', exist_ok=True)
model_reg.save_model('./modelos/lgbm_model_reg.txt')