# **Implementación del modelo original**

In [1]:
import pandas as pd
import numpy as np
from sklearn.pipeline import Pipeline
from sklearn.svm import SVR
from scipy.stats import jarque_bera
from statsmodels.stats.diagnostic import acorr_ljungbox
from sklearn.model_selection import train_test_split, GridSearchCV, KFold
from sklearn.metrics import r2_score, mean_absolute_percentage_error, mean_squared_error

In [2]:
file_path = r"C:\Users\kamac\OneDrive\Desktop\MachineLearningUN\EDA\ws_modelos.xlsx"
data = pd.read_excel(file_path)

In [3]:
X1 = data.drop(columns=['Fecha', 'VelViento100m_1', 'VelViento100m_2', 'VelViento80m_1', 'VelViento80m_2', 'VelViento60m', 'DirViento80m', 'DirViento60m'])
y1 = data['VelViento100m_1']

X_train, X_test, y_train, y_test = train_test_split(X1, y1, test_size = 0.3, random_state = 11)

## **Bayesian Optimization**

In [4]:
import xgboost as xgb
from sklearn.metrics import mean_squared_error, r2_score
from bayes_opt import BayesianOptimization
import numpy as np


In [5]:
def optimize_xgboost(max_depth, learning_rate, n_estimators, gamma, min_child_weight):
    params = {
        "max_depth": int(max_depth),
        "learning_rate": learning_rate,
        "n_estimators": int(n_estimators),
        "gamma": gamma,
        "min_child_weight": min_child_weight,
        "objective": "reg:squarederror",
        "eval_metric": "rmse",
        "random_state": 42,
    }
    model = xgb.XGBRegressor(**params)
    model.fit(X_train, y_train)
    predictions = model.predict(X_test)
    rmse = np.sqrt(mean_squared_error(y_test, predictions))
    return -rmse  # Maximizar el inverso de RMSE


In [15]:
param_bounds = {
    "max_depth": (3, 20),
    "learning_rate": (0.01, 0.3),
    "n_estimators": (50, 300),
    "gamma": (0, 5),
    "min_child_weight": (1, 10),
}


In [16]:
optimizer = BayesianOptimization(
    f=optimize_xgboost,
    pbounds=param_bounds,
    random_state=42,
    verbose=2
)

optimizer.maximize(init_points=10, n_iter=30)


|   iter    |  target   |   gamma   | learni... | max_depth | min_ch... | n_esti... |
-------------------------------------------------------------------------------------
| [39m1        [39m | [39m-1.461   [39m | [39m1.873    [39m | [39m0.2857   [39m | [39m15.44    [39m | [39m6.388    [39m | [39m89.0     [39m |
| [35m2        [39m | [35m-1.408   [39m | [35m0.78     [39m | [35m0.02684  [39m | [35m17.72    [39m | [35m6.41     [39m | [35m227.0    [39m |
| [39m3        [39m | [39m-1.448   [39m | [39m0.1029   [39m | [39m0.2913   [39m | [39m17.15    [39m | [39m2.911    [39m | [39m95.46    [39m |
| [39m4        [39m | [39m-1.48    [39m | [39m0.917    [39m | [39m0.09823  [39m | [39m11.92    [39m | [39m4.888    [39m | [39m122.8    [39m |
| [39m5        [39m | [39m-1.722   [39m | [39m3.059    [39m | [39m0.05045  [39m | [39m7.966    [39m | [39m4.297    [39m | [39m164.0    [39m |
| [39m6        [39m | [39m-1.564   [39m | [

In [17]:
best_params = optimizer.max['params']
best_model = xgb.XGBRegressor(
    max_depth=int(best_params['max_depth']),
    learning_rate=best_params['learning_rate'],
    n_estimators=int(best_params['n_estimators']),
    gamma=best_params['gamma'],
    min_child_weight=best_params['min_child_weight'],
    objective="reg:squarederror",
    eval_metric="rmse",
    random_state=42,
)

best_model.fit(X_train, y_train)
predictions = best_model.predict(X_test)


In [21]:
rmse = np.sqrt(mean_squared_error(y_test, predictions))
r2 = r2_score(y_test, predictions)
residuals = y_test - predictions
ljung_box_p_value = acorr_ljungbox(residuals, lags=[30], return_df=True)['lb_pvalue'].iloc[0]
jarque_bera_p_value = jarque_bera(residuals)[1]
print(f"RMSE: {rmse}")
print(f"R2 Score: {r2}")
print(f"Ljung-Box p-value: {ljung_box_p_value}")
print(f"Jarque-Bera p-value: {jarque_bera_p_value}")



RMSE: 1.4076855242944042
R2 Score: 0.8960165627720081
Ljung-Box p-value: 0.45843638807833753
Jarque-Bera p-value: 0.0


In [10]:
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from scipy.stats import jarque_bera
from statsmodels.stats.diagnostic import acorr_ljungbox
import numpy as np
from xgboost import XGBRegressor

def evaluate_xgboost_with_gridsearch_r2(X_train, y_train, X_test, y_test, results, resultados_graficos):
    # Definimos el espacio de búsqueda para GridSearchCV
    param_grid_xgb = {
        'n_estimators': [10, 50, 100],
        'max_depth': [5, 10, 15, 20],
        'learning_rate': [0.01, 0.05, 0.1, 0.3]
    }

    # Inicializamos el modelo XGBoost
    modelo = XGBRegressor(objective='reg:squarederror', random_state=42)

    # Implementamos GridSearchCV con 10 pliegues
    grid_xgb = GridSearchCV(
        estimator=modelo,
        param_grid=param_grid_xgb,
        scoring='r2',  # Usamos R² como métrica de evaluación
        cv=10,         # Validación cruzada con 10 pliegues
        verbose=1,
        n_jobs=-1
    )

    # Ajustamos el modelo a los datos de entrenamiento
    grid_xgb.fit(X_train, y_train)

    # Predicciones para entrenamiento y prueba
    y_train_pred = grid_xgb.predict(X_train)
    y_pred_xgb = grid_xgb.predict(X_test)

    # Residuos
    residuals = y_test - y_pred_xgb

    # Métricas de evaluación
    r2 = r2_score(y_test, y_pred_xgb)
    mape = np.mean(np.abs((y_test - y_pred_xgb) / y_test)) * 100  # Mean Absolute Percentage Error
    rmse = np.sqrt(mean_squared_error(y_test, y_pred_xgb))
    mae = mean_absolute_error(y_test, y_pred_xgb)
    mse = mean_squared_error(y_test, y_pred_xgb)

    # Pruebas estadísticas para residuos
    ljung_box_p_value = acorr_ljungbox(residuals, lags=[30], return_df=True)['lb_pvalue'].iloc[0]
    jarque_bera_p_value = jarque_bera(residuals)[1]

    # Guardamos los resultados en las listas proporcionadas
    results.append({
        'Modelo': 'XGBoost Regressor (GridSearch)',
        'MAPE': mape,
        'MAE': mae,
        'MSE': mse,
        'RMSE': rmse,
        'R²': r2,
        'Ljung-Box p-value': ljung_box_p_value,
        'Jarque-Bera p-value': jarque_bera_p_value
    })

    resultados_graficos.append({
        'nombre_modelo': 'XGBoost Regressor (GridSearch)',
        'y_train': y_train,
        'y_train_pred': y_train_pred,
        'y_test': y_test,
        'y_test_pred': y_pred_xgb,
        'residuos': residuals
    })

    print("Mejores hiperparámetros encontrados:", grid_xgb.best_params_)
    print("Mejor puntuación R² en validación cruzada:", grid_xgb.best_score_)


In [11]:
results = []
resultados_graficos = []

evaluate_xgboost_with_gridsearch_r2(X_train, y_train, X_test, y_test, results, resultados_graficos)


Fitting 10 folds for each of 48 candidates, totalling 480 fits


1 fits failed out of a total of 480.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
1 fits failed with the following error:
Traceback (most recent call last):
  File "c:\Users\kamac\miniconda3\envs\data_viz\lib\site-packages\sklearn\model_selection\_validation.py", line 888, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "c:\Users\kamac\miniconda3\envs\data_viz\lib\site-packages\xgboost\core.py", line 726, in inner_f
    return func(**kwargs)
  File "c:\Users\kamac\miniconda3\envs\data_viz\lib\site-packages\xgboost\sklearn.py", line 1081, in fit
    train_dmatrix, evals = _wrap_evaluation_matrices(
  File "c:\Users\kamac\miniconda3\envs\data_viz\lib\site-packages\xgboost\sklearn.py", line 596, in _wrap_evalua

Mejores hiperparámetros encontrados: {'learning_rate': 0.1, 'max_depth': 15, 'n_estimators': 100}
Mejor puntuación R² en validación cruzada: 0.8941666710088407


In [13]:
resultados = pd.DataFrame(results)
resultados

Unnamed: 0,Modelo,MAPE,MAE,MSE,RMSE,R²,Ljung-Box p-value,Jarque-Bera p-value
0,XGBoost Regressor (GridSearch),11.475852,0.972034,2.008381,1.417173,0.89461,0.402368,0.0


## **GWO**

In [24]:
import numpy as np
from xgboost import XGBRegressor
from sklearn.metrics import mean_squared_error, r2_score
import random

# Función objetivo para optimización
def objective_function(params, X_train, y_train, X_test, y_test):
    max_depth, learning_rate, n_estimators = params
    model = XGBRegressor(
        max_depth=int(max_depth),
        learning_rate=learning_rate,
        n_estimators=int(n_estimators),
        objective="reg:squarederror",
        random_state=42
    )
    model.fit(X_train, y_train)
    predictions = model.predict(X_test)
    rmse = np.sqrt(mean_squared_error(y_test, predictions))
    return rmse

# Implementación de GWO
def gwo_xgboost(X_train, y_train, X_test, y_test, n_wolves=5, n_iterations=20):
    # Definir espacio de búsqueda
    bounds = {
        'max_depth': (3, 30),        # Profundidad del árbol
        'learning_rate': (0.01, 0.3), # Tasa de aprendizaje
        'n_estimators': (50, 300)    # Número de árboles
    }

    # Inicialización de lobos
    wolves = [np.array([random.uniform(bounds[k][0], bounds[k][1]) for k in bounds.keys()]) for _ in range(n_wolves)]
    alpha, beta, delta = None, None, None  # Tres mejores lobos
    alpha_score, beta_score, delta_score = float("inf"), float("inf"), float("inf")

    for iteration in range(n_iterations):
        for wolf in wolves:
            score = objective_function(wolf, X_train, y_train, X_test, y_test)

            # Actualizar lobos alfa, beta y delta
            if score < alpha_score:
                delta, beta, alpha = beta, alpha, wolf
                delta_score, beta_score, alpha_score = beta_score, alpha_score, score
            elif score < beta_score:
                delta, beta = beta, wolf
                delta_score, beta_score = beta_score, score
            elif score < delta_score:
                delta = wolf
                delta_score = score

        # Actualizar posiciones de los lobos
        for i, wolf in enumerate(wolves):
            for j in range(len(wolf)):
                A1 = 2 * random.random() - 1
                C1 = 2 * random.random()
                X1 = alpha[j] - A1 * abs(C1 * alpha[j] - wolf[j])

                A2 = 2 * random.random() - 1
                C2 = 2 * random.random()
                X2 = beta[j] - A2 * abs(C2 * beta[j] - wolf[j])

                A3 = 2 * random.random() - 1
                C3 = 2 * random.random()
                X3 = delta[j] - A3 * abs(C3 * delta[j] - wolf[j])

                wolf[j] = (X1 + X2 + X3) / 3  # Actualización promedio

                # Restringir al espacio de búsqueda
                wolf[j] = np.clip(wolf[j], bounds[list(bounds.keys())[j]][0], bounds[list(bounds.keys())[j]][1])

        print(f"Iteración {iteration+1}/{n_iterations}: Mejor RMSE = {alpha_score}")

    # Devuelve los mejores parámetros
    return {
        'max_depth': int(alpha[0]),
        'learning_rate': alpha[1],
        'n_estimators': int(alpha[2]),
        'best_rmse': alpha_score
}

# Llamar a la optimización GWO
best_params_gwo = gwo_xgboost(X_train, y_train, X_test, y_test)
print("Mejores hiperparámetros encontrados con GWO:", best_params_gwo)


Iteración 1/20: Mejor RMSE = 1.4266196351997413
Iteración 2/20: Mejor RMSE = 1.403670040199491
Iteración 3/20: Mejor RMSE = 1.402758042439684
Iteración 4/20: Mejor RMSE = 1.4000905332402367
Iteración 5/20: Mejor RMSE = 1.4000905332402367
Iteración 6/20: Mejor RMSE = 1.4000905332402367
Iteración 7/20: Mejor RMSE = 1.4000905332402367
Iteración 8/20: Mejor RMSE = 1.4000905332402367
Iteración 9/20: Mejor RMSE = 1.4000905332402367
Iteración 10/20: Mejor RMSE = 1.4000905332402367
Iteración 11/20: Mejor RMSE = 1.4000905332402367
Iteración 12/20: Mejor RMSE = 1.4000905332402367
Iteración 13/20: Mejor RMSE = 1.3947734584687395
Iteración 14/20: Mejor RMSE = 1.3947734584687395
Iteración 15/20: Mejor RMSE = 1.3947734584687395
Iteración 16/20: Mejor RMSE = 1.3947734584687395
Iteración 17/20: Mejor RMSE = 1.3947734584687395
Iteración 18/20: Mejor RMSE = 1.3947734584687395
Iteración 19/20: Mejor RMSE = 1.3947734584687395
Iteración 20/20: Mejor RMSE = 1.3947734584687395
Mejores hiperparámetros encontr

In [27]:
import numpy as np
from xgboost import XGBRegressor
from sklearn.metrics import mean_squared_error, r2_score
import random

# Función para evaluar las métricas
def evaluate_woa_metrics(params, X_train, y_train, X_test, y_test):
    max_depth, learning_rate, n_estimators = params
    model = XGBRegressor(
        max_depth=int(max_depth),
        learning_rate=learning_rate,
        n_estimators=int(n_estimators),
        objective="reg:squarederror",
        random_state=42
    )
    model.fit(X_train, y_train)
    y_train_pred = model.predict(X_train)
    y_test_pred = model.predict(X_test)

    # Métricas
    r2_train = r2_score(y_train, y_train_pred)
    r2_test = r2_score(y_test, y_test_pred)
    rmse_train = np.sqrt(mean_squared_error(y_train, y_train_pred))
    rmse_test = np.sqrt(mean_squared_error(y_test, y_test_pred))

    return rmse_test, r2_train, r2_test, rmse_train, rmse_test, model

# Implementación del Whale Optimization Algorithm (WOA)
def woa_xgboost(X_train, y_train, X_test, y_test, n_whales=10, n_iterations=20):
    # Definir espacio de búsqueda
    bounds = {
        'max_depth': (3, 20),         # Profundidad del árbol
        'learning_rate': (0.01, 0.3), # Tasa de aprendizaje
        'n_estimators': (50, 300)     # Número de árboles
    }

    # Inicialización de ballenas (aleatorio dentro del espacio de búsqueda)
    whales = [np.array([random.uniform(bounds[k][0], bounds[k][1]) for k in bounds.keys()]) for _ in range(n_whales)]
    best_whale, best_score = None, float("inf")
    best_r2_train, best_r2_test, best_rmse_train, best_rmse_test, best_model = None, None, None, None, None

    for iteration in range(n_iterations):
        for i, whale in enumerate(whales):
            # Evaluar métricas
            rmse_test, r2_train, r2_test, rmse_train, rmse_test_value, model = evaluate_woa_metrics(whale, X_train, y_train, X_test, y_test)

            # Actualizar la mejor ballena
            if rmse_test_value < best_score:
                best_whale = whale
                best_score = rmse_test_value
                best_r2_train, best_r2_test, best_rmse_train, best_rmse_test, best_model = r2_train, r2_test, rmse_train, rmse_test_value, model

        # Actualizar posiciones de las ballenas (comportamiento de caza)
        for i, whale in enumerate(whales):
            for j in range(len(whale)):
                r = random.random()
                if r < 0.5:
                    # Movimiento en espiral
                    D = abs(best_whale[j] - whale[j])
                    b = 1  # Constante de forma para el espiral
                    l = random.uniform(-1, 1)  # Factor aleatorio
                    whale[j] = D * np.exp(b * l) * np.cos(2 * np.pi * l) + best_whale[j]
                else:
                    # Movimiento lineal hacia la mejor ballena
                    A = 2 * random.random() - 1
                    C = 2 * random.random()
                    D = abs(C * best_whale[j] - whale[j])
                    whale[j] = best_whale[j] - A * D

                # Restringir al espacio de búsqueda
                whale[j] = np.clip(whale[j], bounds[list(bounds.keys())[j]][0], bounds[list(bounds.keys())[j]][1])

        print(f"Iteración {iteration+1}/{n_iterations}: Mejor RMSE Test = {best_score:.4f}")

    # Devuelve los mejores parámetros y métricas
    return {
        'best_params': {
            'max_depth': int(best_whale[0]),
            'learning_rate': best_whale[1],
            'n_estimators': int(best_whale[2])
        },
        'best_r2_train': best_r2_train,
        'best_r2_test': best_r2_test,
        'best_rmse_train': best_rmse_train,
        'best_rmse_test': best_rmse_test,
        'best_model': best_model
    }

# Llamar a la optimización WOA
results_woa = woa_xgboost(X_train, y_train, X_test, y_test)
print("\nResultados finales de WOA-XGBoost:")
print(f"R² Train: {results_woa['best_r2_train']:.4f}")
print(f"R² Test: {results_woa['best_r2_test']:.4f}")
print(f"RMSE Train: {results_woa['best_rmse_train']:.4f}")
print(f"RMSE Test: {results_woa['best_rmse_test']:.4f}")
print(f"Hiperparámetros óptimos: {results_woa['best_params']}")


Iteración 1/20: Mejor RMSE Test = 1.4072
Iteración 2/20: Mejor RMSE Test = 1.4041
Iteración 3/20: Mejor RMSE Test = 1.4041
Iteración 4/20: Mejor RMSE Test = 1.4041
Iteración 5/20: Mejor RMSE Test = 1.4041
Iteración 6/20: Mejor RMSE Test = 1.4041
Iteración 7/20: Mejor RMSE Test = 1.4041
Iteración 8/20: Mejor RMSE Test = 1.4041
Iteración 9/20: Mejor RMSE Test = 1.4041
Iteración 10/20: Mejor RMSE Test = 1.4041
Iteración 11/20: Mejor RMSE Test = 1.4041
Iteración 12/20: Mejor RMSE Test = 1.4041
Iteración 13/20: Mejor RMSE Test = 1.4041
Iteración 14/20: Mejor RMSE Test = 1.4041
Iteración 15/20: Mejor RMSE Test = 1.4041
Iteración 16/20: Mejor RMSE Test = 1.4041
Iteración 17/20: Mejor RMSE Test = 1.4041
Iteración 18/20: Mejor RMSE Test = 1.4041
Iteración 19/20: Mejor RMSE Test = 1.4041
Iteración 20/20: Mejor RMSE Test = 1.4041

Resultados finales de WOA-XGBoost:
R² Train: 0.9999
R² Test: 0.8965
RMSE Train: 0.0407
RMSE Test: 1.4041
Hiperparámetros óptimos: {'max_depth': 3, 'learning_rate': 0.01