In [18]:
# Machine Learning Pipeline Execution
# Este notebook ejecuta secuencialmente todos los pasos del pipeline ML

# Importaciones necesarias
import os
import sys
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Asegurarse de que podemos importar desde la raíz del proyecto
# (ajusta esto si es necesario según la estructura de tu proyecto)
sys.path.append('..')



In [19]:
import os
print(f"Directorio de trabajo actual: {os.getcwd()}")
# Si necesitas cambiar el directorio:
#os.chdir('/ruta/a/tu/proyecto')

Directorio de trabajo actual: c:\Users\natus\Documents\Trabajo\PEDRO_PEREZ\Proyecto_Mercado_de_Valores\SP500_INDEX_Analisis\notebooks


In [20]:
import os
import shutil
import sys

# 1. Detectar la estructura del proyecto de forma dinámica
# Obtener el directorio actual
current_dir = os.getcwd()
print(f"Directorio actual: {current_dir}")

# Determinar la ruta al directorio raíz del proyecto
# Asumiendo que estamos en un notebook dentro del proyecto
if 'SP500_INDEX_Analisis' in current_dir:
    # Buscar el directorio raíz del proyecto (subiendo hasta encontrar SP500_INDEX_Analisis)
    path_parts = current_dir.split('SP500_INDEX_Analisis')
    project_root = os.path.join(path_parts[0], 'SP500_INDEX_Analisis')
else:
    # Si no estamos en la estructura esperada, usamos el directorio actual
    project_root = current_dir

print(f"Raíz del proyecto detectada: {project_root}")

# 2. Definir rutas relativas al project_root
pipelines_dir = os.path.join(project_root, "pipelines")
ml_dir = os.path.join(pipelines_dir, "ml")
config_path = os.path.join(pipelines_dir, "Data Engineering.xlsx")

# Verificar si el archivo de configuración existe
if not os.path.exists(config_path):
    print(f"ADVERTENCIA: El archivo de configuración no se encuentra en: {config_path}")
else:
    print(f"Archivo de configuración encontrado en: {config_path}")

# 3. Asegurar que existan los directorios de datos
data_root = os.path.join(project_root, "Data", "0_raw")
if not os.path.exists(data_root):
    os.makedirs(data_root, exist_ok=True)
    print(f"Creado directorio de datos: {data_root}")

# 4. Configurar path para importaciones
sys.path.append(project_root)
sys.path.append(ml_dir)

# 5. Ejecutar los procesadores de datos
try:
    from step_0_preprocess import run_economic_data_processor, ejecutar_myinvestingreportnormal, run_fred_data_processor, ejecutar_otherdataprocessor
    
    print("\nEjecutando procesadores de datos con rutas relativas:")
    print("-------------------------------------------")
    
    # Ejecutar cada procesador con la ruta correcta del archivo de configuración
    run_economic_data_processor(config_file=config_path, data_root=data_root)
    ejecutar_myinvestingreportnormal(config_file=config_path, data_root=data_root)
    run_fred_data_processor(config_file=config_path, data_root=data_root)
    ejecutar_otherdataprocessor(config_file=config_path, data_root=data_root)
    
except ImportError as e:
    print(f"Error al importar funciones de preprocesamiento: {e}")
    print("\nCreando y ejecutando un script temporal con las rutas relativas...")
    
    # Crear un script temporal con las rutas relativas
    temp_script = os.path.join(current_dir, "temp_preprocess.py")
    script_content = f"""
import os
import sys

# Configurar rutas relativas
project_root = r"{project_root}"
sys.path.append(project_root)
sys.path.append(os.path.join(project_root, "pipelines", "ml"))

from pipelines.ml.step_0_preprocess import run_economic_data_processor, ejecutar_myinvestingreportnormal, run_fred_data_processor, ejecutar_otherdataprocessor

# Ejecutar con las rutas relativas
config_path = os.path.join(project_root, "pipelines", "Data Engineering.xlsx")
data_root = os.path.join(project_root, "Data", "0_raw")

# Verificar que el archivo de configuración existe
if not os.path.exists(config_path):
    print(f"ERROR: No se encuentra el archivo de configuración en: {{config_path}}")
    sys.exit(1)
else:
    print(f"Usando archivo de configuración: {{config_path}}")

# Ejecutar los procesadores
run_economic_data_processor(config_file=config_path, data_root=data_root)
ejecutar_myinvestingreportnormal(config_file=config_path, data_root=data_root)
run_fred_data_processor(config_file=config_path, data_root=data_root)
ejecutar_otherdataprocessor(config_file=config_path, data_root=data_root)
"""
    with open(temp_script, "w") as f:
        f.write(script_content)
    
    # Ejecutar el script temporal
    print(f"Ejecutando script temporal: {temp_script}")
    %run {temp_script}

2025-05-13 11:58:18,457 - INFO - INICIANDO PROCESO: EconomicDataProcessor
2025-05-13 11:58:18,458 - INFO - Archivo de configuración: c:\Users\natus\Documents\Trabajo\PEDRO_PEREZ\Proyecto_Mercado_de_Valores\SP500_INDEX_Analisis\pipelines\Data Engineering.xlsx
2025-05-13 11:58:18,460 - INFO - Directorio raíz de datos: c:\Users\natus\Documents\Trabajo\PEDRO_PEREZ\Proyecto_Mercado_de_Valores\SP500_INDEX_Analisis\Data\0_raw
2025-05-13 11:58:18,461 - INFO - Fecha y hora: 2025-05-13 11:58:18
2025-05-13 11:58:18,462 - INFO - Leyendo archivo de configuración...
2025-05-13 11:58:18,492 - INFO - Se encontraron 21 configuraciones para procesar
2025-05-13 11:58:18,494 - INFO - 
Procesando: US_ISM_Manufacturing (business_confidence)
2025-05-13 11:58:18,494 - INFO - - Archivo: US_ISM_Manufacturing.xlsx
2025-05-13 11:58:18,495 - INFO - - Columna TARGET: ACTUAL
2025-05-13 11:58:18,496 - INFO - - Ruta encontrada: c:\Users\natus\Documents\Trabajo\PEDRO_PEREZ\Proyecto_Mercado_de_Valores\SP500_INDEX_Analis

Directorio actual: c:\Users\natus\Documents\Trabajo\PEDRO_PEREZ\Proyecto_Mercado_de_Valores\SP500_INDEX_Analisis\notebooks
Raíz del proyecto detectada: c:\Users\natus\Documents\Trabajo\PEDRO_PEREZ\Proyecto_Mercado_de_Valores\SP500_INDEX_Analisis
Archivo de configuración encontrado en: c:\Users\natus\Documents\Trabajo\PEDRO_PEREZ\Proyecto_Mercado_de_Valores\SP500_INDEX_Analisis\pipelines\Data Engineering.xlsx

Ejecutando procesadores de datos con rutas relativas:
-------------------------------------------


2025-05-13 11:58:18,640 - INFO - - Periodo: 2014-01-06 a 2025-04-03
2025-05-13 11:58:18,642 - INFO - - Cobertura: 100.00%
2025-05-13 11:58:18,643 - INFO - 
Procesando: US_Philly_Fed_Index (business_confidence)
2025-05-13 11:58:18,644 - INFO - - Archivo: US_Philly_Fed_Index.xlsx
2025-05-13 11:58:18,644 - INFO - - Columna TARGET: ACTUAL
2025-05-13 11:58:18,645 - INFO - - Ruta encontrada: c:\Users\natus\Documents\Trabajo\PEDRO_PEREZ\Proyecto_Mercado_de_Valores\SP500_INDEX_Analisis\Data\0_raw\business_confidence\US_Philly_Fed_Index.xlsx
2025-05-13 11:58:18,672 - INFO - - Filas encontradas: 136
2025-05-13 11:58:18,679 - INFO - Preferencia de dayfirst para c:\Users\natus\Documents\Trabajo\PEDRO_PEREZ\Proyecto_Mercado_de_Valores\SP500_INDEX_Analisis\Data\0_raw\business_confidence\US_Philly_Fed_Index.xlsx: True
2025-05-13 11:58:18,732 - INFO - - Valores no nulos en TARGET: 136
2025-05-13 11:58:18,733 - INFO - - Periodo: 2014-01-16 a 2025-04-17
2025-05-13 11:58:18,734 - INFO - - Cobertura: 100.

In [50]:
pip install openpyxl

Note: you may need to restart the kernel to use updated packages.


In [21]:
# Paso 0: Preprocesamiento inicial
print("Ejecutando paso 0: Preprocesamiento inicial")
%run ../pipelines/ml/step_0_preprocess.py

2025-05-13 11:58:57,063 - INFO - INICIANDO PROCESO: EconomicDataProcessor
2025-05-13 11:58:57,064 - INFO - Archivo de configuración: c:\Users\natus\Documents\Trabajo\PEDRO_PEREZ\Proyecto_Mercado_de_Valores\SP500_INDEX_Analisis\pipelines/Data Engineering.xlsx
2025-05-13 11:58:57,065 - INFO - Directorio raíz de datos: c:\Users\natus\Documents\Trabajo\PEDRO_PEREZ\Proyecto_Mercado_de_Valores\SP500_INDEX_Analisis\data/0_raw
2025-05-13 11:58:57,066 - INFO - Fecha y hora: 2025-05-13 11:58:57
2025-05-13 11:58:57,067 - INFO - Leyendo archivo de configuración...
2025-05-13 11:58:57,095 - INFO - Se encontraron 21 configuraciones para procesar


Ejecutando paso 0: Preprocesamiento inicial


2025-05-13 11:58:57,096 - INFO - 
Procesando: US_ISM_Manufacturing (business_confidence)
2025-05-13 11:58:57,098 - INFO - - Archivo: US_ISM_Manufacturing.xlsx
2025-05-13 11:58:57,098 - INFO - - Columna TARGET: ACTUAL
2025-05-13 11:58:57,099 - INFO - - Ruta encontrada: c:\Users\natus\Documents\Trabajo\PEDRO_PEREZ\Proyecto_Mercado_de_Valores\SP500_INDEX_Analisis\data/0_raw\business_confidence\US_ISM_Manufacturing.xlsx
2025-05-13 11:58:57,115 - INFO - - Filas encontradas: 136
2025-05-13 11:58:57,120 - INFO - Preferencia de dayfirst para c:\Users\natus\Documents\Trabajo\PEDRO_PEREZ\Proyecto_Mercado_de_Valores\SP500_INDEX_Analisis\data/0_raw\business_confidence\US_ISM_Manufacturing.xlsx: True
2025-05-13 11:58:57,179 - INFO - - Valores no nulos en TARGET: 136
2025-05-13 11:58:57,179 - INFO - - Periodo: 2014-01-02 a 2025-04-01
2025-05-13 11:58:57,180 - INFO - - Cobertura: 100.00%
2025-05-13 11:58:57,181 - INFO - 
Procesando: US_ISM_Services (business_confidence)
2025-05-13 11:58:57,182 - INFO

Proceso completado exitosamente


2025-05-13 11:59:00,735 - INFO - Ejemplos de fechas convertidas para Australia_10Y_Bond: [Timestamp('2025-04-30 00:00:00'), Timestamp('2025-04-29 00:00:00'), Timestamp('2025-04-28 00:00:00'), Timestamp('2025-04-24 00:00:00'), Timestamp('2025-04-23 00:00:00')]
2025-05-13 11:59:00,738 - INFO - Formato numérico detectado para Australia_10Y_Bond: americano
2025-05-13 11:59:00,743 - INFO - Para Australia_10Y_Bond (columna Date), la fecha mínima es 2014-01-01 00:00:00 y la fecha máxima es 2025-04-30 00:00:00
2025-05-13 11:59:00,745 - INFO - - Australia_10Y_Bond: 3832 filas procesadas, periodo: 2014-01-01 a 2025-04-30
2025-05-13 11:59:00,746 - INFO - 
Procesando: Italy_10Y_Bond (bond)
2025-05-13 11:59:00,747 - INFO - - Archivo: Italy_10Y_Bond
2025-05-13 11:59:00,748 - INFO - - Columna TARGET: PRICE
2025-05-13 11:59:00,750 - INFO - - Ruta encontrada: c:\Users\natus\Documents\Trabajo\PEDRO_PEREZ\Proyecto_Mercado_de_Valores\SP500_INDEX_Analisis\data/0_raw\bond\Italy_10Y_Bond.csv
2025-05-13 11:59

Proceso completado exitosamente


2025-05-13 11:59:27,669 - INFO - Primeras fechas convertidas: [Timestamp('2014-01-02 00:00:00'), Timestamp('2014-01-03 00:00:00'), Timestamp('2014-01-06 00:00:00'), Timestamp('2014-01-07 00:00:00'), Timestamp('2014-01-08 00:00:00')]
2025-05-13 11:59:27,671 - INFO - - Valores no nulos en TARGET: 2831
2025-05-13 11:59:27,672 - INFO - - Periodo: 2014-01-02 a 2025-04-28
2025-05-13 11:59:27,673 - INFO - - Cobertura: 100.00%
2025-05-13 11:59:27,674 - INFO - 
Procesando: US_2Y_Treasury (bond)
2025-05-13 11:59:27,675 - INFO - - Archivo: US_2Y_Treasury.csv
2025-05-13 11:59:27,676 - INFO - - Columna TARGET: DGS2
2025-05-13 11:59:27,676 - INFO - - Ruta encontrada: c:\Users\natus\Documents\Trabajo\PEDRO_PEREZ\Proyecto_Mercado_de_Valores\SP500_INDEX_Analisis\data/0_raw\bond\US_2Y_Treasury.csv
2025-05-13 11:59:27,678 - INFO - - Filas encontradas: 2953
2025-05-13 11:59:27,680 - INFO - Detección formato: 20/20 registros ISO (ratio 1.00)
2025-05-13 11:59:27,681 - INFO - Formato detectado para c:\Users\

Proceso completado exitosamente


2025-05-13 11:59:31,749 - INFO - Guardando estadísticas de los indicadores...
2025-05-13 11:59:32,363 - INFO - Archivo guardado exitosamente: c:\Users\natus\Documents\Trabajo\PEDRO_PEREZ\Proyecto_Mercado_de_Valores\SP500_INDEX_Analisis\data/0_raw/datos_economicos_other_procesados.xlsx
2025-05-13 11:59:32,363 - INFO - 
2025-05-13 11:59:32,364 - INFO - RESUMEN DE EJECUCIÓN
2025-05-13 11:59:32,366 - INFO - Proceso: OtherDataProcessor
2025-05-13 11:59:32,366 - INFO - Tiempo de ejecución: 0.95 segundos
2025-05-13 11:59:32,367 - INFO - Archivos procesados: 4
2025-05-13 11:59:32,367 - INFO - Archivos con error: 2
2025-05-13 11:59:32,368 - INFO - Archivos procesados correctamente: 2
2025-05-13 11:59:32,369 - INFO - Periodo de datos: 2001-07-31 a 2025-04-30
2025-05-13 11:59:32,370 - INFO - Datos combinados: 8675 filas, 5 columnas
2025-05-13 11:59:32,370 - INFO - Archivo de salida: c:\Users\natus\Documents\Trabajo\PEDRO_PEREZ\Proyecto_Mercado_de_Valores\SP500_INDEX_Analisis\data/0_raw/datos_econ

Proceso completado exitosamente


2025-05-13 11:59:32,717 - INFO - Guardando estadísticas de los indicadores...
2025-05-13 11:59:33,150 - INFO - Archivo guardado exitosamente: c:\Users\natus\Documents\Trabajo\PEDRO_PEREZ\Proyecto_Mercado_de_Valores\SP500_INDEX_Analisis\data/0_raw/datos_economicos_other_procesados.xlsx
2025-05-13 11:59:33,151 - INFO - 
2025-05-13 11:59:33,151 - INFO - RESUMEN DE EJECUCIÓN
2025-05-13 11:59:33,153 - INFO - Proceso: OtherDataProcessor
2025-05-13 11:59:33,153 - INFO - Tiempo de ejecución: 0.77 segundos
2025-05-13 11:59:33,154 - INFO - Archivos procesados: 4
2025-05-13 11:59:33,154 - INFO - Archivos con error: 2
2025-05-13 11:59:33,155 - INFO - Archivos procesados correctamente: 2
2025-05-13 11:59:33,155 - INFO - Periodo de datos: 2001-07-31 a 2025-04-30
2025-05-13 11:59:33,156 - INFO - Datos combinados: 8675 filas, 5 columnas
2025-05-13 11:59:33,156 - INFO - Archivo de salida: c:\Users\natus\Documents\Trabajo\PEDRO_PEREZ\Proyecto_Mercado_de_Valores\SP500_INDEX_Analisis\data/0_raw/datos_econ

Proceso completado exitosamente


2025-05-13 11:59:35,081 - INFO - Archivo Excel cargado exitosamente desde C:\Users\natus\Documents\Trabajo\PEDRO_PEREZ\Proyecto_Mercado_de_Valores\SP500_INDEX_Analisis\Data\1_preprocess\MERGEDEXCELS.xlsx
2025-05-13 11:59:35,081 - INFO - Iniciando la categorización de columnas...
2025-05-13 11:59:35,083 - INFO - Proceso completado exitosamente.


columnas encontradas para Sin categoría:
fecha

columnas encontradas para bond:
PRICE_Australia_10Y_Bond_bond
PRICE_Italy_10Y_Bond_bond
PRICE_Japan_10Y_Bond_bond
PRICE_UK_10Y_Bond_bond
PRICE_Germany_10Y_Bond_bond
PRICE_Canada_10Y_Bond_bond
PRICE_China_10Y_Bond_bond
DGS10_US_10Y_Treasury_bond
DGS2_US_2Y_Treasury_bond
AAA_Corporate_Bond_AAA_Spread_bond
BAA10YM_Corporate_Bond_BBB_Spread_bond
BAMLH0A0HYM2_High_Yield_Bond_Spread_bond

columnas encontradas para commodities:
PRICE_CrudeOil_WTI_commodities
PRICE_Gold_Spot_commodities
PRICE_Silver_Spot_commodities
PRICE_Copper_Futures_commodities
PRICE_Platinum_Spot_commodities

columnas encontradas para exchange_rate:
PRICE_EUR_USD_Spot_exchange_rate
PRICE_GBP_USD_Spot_exchange_rate
PRICE_JPY_USD_Spot_exchange_rate
PRICE_CNY_USD_Spot_exchange_rate
PRICE_AUD_USD_Spot_exchange_rate
PRICE_CAD_USD_Spot_exchange_rate
PRICE_MXN_USD_Spot_exchange_rate
PRICE_EUR_GBP_Cross_exchange_rate

columnas encontradas para index_pricing:
PRICE_S&P500_Index_index

In [25]:
# Paso 1: Fusión de archivos Excel
print("Ejecutando paso 1: Fusión de archivos Excel")
%run ../pipelines/ml/step_1_merge_excels.py

2025-05-13 12:02:57,992 - INFO - === INICIANDO PROCESO DE COMBINACIÓN DE ARCHIVOS EXCEL ===
2025-05-13 12:02:57,997 - INFO - Se encontraron 4 archivos Excel en la carpeta.
2025-05-13 12:02:57,998 - INFO - Cargando archivo: datos_economicos_normales_procesados.xlsx


Ejecutando paso 1: Fusión de archivos Excel


2025-05-13 12:02:58,625 - INFO - Archivo datos_economicos_normales_procesados.xlsx cargado en 0.63s con 4151 filas.
2025-05-13 12:02:58,626 - INFO - Archivo datos_economicos_normales_procesados.xlsx cargado correctamente con 4151 filas
--- Logging error ---
Traceback (most recent call last):
  File "C:\Users\natus\anaconda3\Lib\logging\__init__.py", line 1163, in emit
    stream.write(msg + self.terminator)
  File "C:\Users\natus\anaconda3\Lib\encodings\cp1252.py", line 19, in encode
    return codecs.charmap_encode(input,self.errors,encoding_table)[0]
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
UnicodeEncodeError: 'charmap' codec can't encode character '\u2192' in position 78: character maps to <undefined>
Call stack:
  File "<frozen runpy>", line 198, in _run_module_as_main
  File "<frozen runpy>", line 88, in _run_code
  File "c:\Users\natus\Documents\Trabajo\PEDRO_PEREZ\Proyecto_Mercado_de_Valores\SP500_INDEX_Analisis\venv_MValores\Lib\site-packages\ipykernel

In [7]:
# Paso 2: Generación de categorías
print("Ejecutando paso 2: Generación de categorías")
%run ../pipelines/ml/step_2_generate_categories.py



Ejecutando paso 2: Generación de categorías


2025-05-12 13:40:41,906 - INFO - Archivo cargado correctamente. Dimensiones: (4104, 85)
2025-05-12 13:40:41,909 - INFO - No se encontraron columnas para renombrar.
2025-05-12 13:40:41,956 - INFO - Resultados detallados de categorización guardados en: c:\Users\natus\Documents\Trabajo\PEDRO_PEREZ\Proyecto_Mercado_de_Valores\SP500_INDEX_Analisis\data\1_preprocess\DIAGNOSTICO_CATEGORIAS.xlsx
2025-05-12 13:40:41,958 - INFO - Resumen de categorización:
2025-05-12 13:40:41,958 - INFO -   - Sin categoría: 1 columnas
2025-05-12 13:40:41,959 - INFO -   - bond: 11 columnas
2025-05-12 13:40:41,960 - INFO -   - business_confidence: 7 columnas
2025-05-12 13:40:41,961 - INFO -   - car_registrations: 5 columnas
2025-05-12 13:40:41,962 - INFO -   - comm_loans: 2 columnas
2025-05-12 13:40:41,962 - INFO -   - commodities: 5 columnas
2025-05-12 13:40:41,963 - INFO -   - consumer_confidence: 12 columnas
2025-05-12 13:40:41,964 - INFO -   - economics: 22 columnas
2025-05-12 13:40:41,964 - INFO -   - exchang

In [8]:
# Paso 3: Limpieza de columnas
print("Ejecutando paso 3: Limpieza de columnas")
%run ../pipelines/ml/step_3_clean_columns.py



Ejecutando paso 3: Limpieza de columnas


2025-05-12 13:45:02,391 - INFO - Archivo cargado correctamente. Columnas: 85
2025-05-12 13:45:02,393 - INFO - Se modificaron 5 nombres de columnas.
2025-05-12 13:45:02,394 - INFO - Renombrando: 'DNKSLRTCR03GPSAM_Denmark_Car_Registrations_MoM_car_registrations' -> 'DNKSLRTCR03GPSAM_Denmark_Car_Registrations_MoM'
2025-05-12 13:45:02,394 - INFO - Renombrando: 'USASLRTCR03GPSAM_US_Car_Registrations_MoM_car_registrations' -> 'USASLRTCR03GPSAM_US_Car_Registrations_MoM'
2025-05-12 13:45:02,394 - INFO - Renombrando: 'ZAFSLRTCR03GPSAM_SouthAfrica_Car_Registrations_MoM_car_registrations' -> 'ZAFSLRTCR03GPSAM_SouthAfrica_Car_Registrations_MoM'
2025-05-12 13:45:02,395 - INFO - Renombrando: 'GBRSLRTCR03GPSAM_United_Kingdom_Car_Registrations_MoM_car_registrations' -> 'GBRSLRTCR03GPSAM_United_Kingdom_Car_Registrations_MoM'
2025-05-12 13:45:02,396 - INFO - Renombrando: 'ESPSLRTCR03GPSAM_Spain_Car_Registrations_MoM_car_registrations' -> 'ESPSLRTCR03GPSAM_Spain_Car_Registrations_MoM'
2025-05-12 13:45:07

In [9]:
# Paso 4: Transformación de características
print("Ejecutando paso 4: Transformación de características")
%run ../pipelines/ml/step_4_transform_features.py


Ejecutando paso 4: Transformación de características
✅ Archivo Excel cargado exitosamente.
🧪 Primeras filas del archivo:
                    0                              1   \
0                fecha  PRICE_Australia_10Y_Bond_bond   
1        Sin categoría                           bond   
2  2014-01-01 00:00:00                          4.289   
3  2014-01-02 00:00:00                          4.331   
4  2014-01-03 00:00:00                          4.344   

                          2                          3   \
0  PRICE_Italy_10Y_Bond_bond  PRICE_Japan_10Y_Bond_bond   
1                       bond                       bond   
2                        NaN                        NaN   
3                        NaN                        NaN   
4                        NaN                        NaN   

                       4                            5   \
0  PRICE_UK_10Y_Bond_bond  PRICE_Germany_10Y_Bond_bond   
1                    bond                         bond   
2      

In [None]:

# Paso 5: Eliminación de relaciones
print("Ejecutando paso 5: Eliminación de relaciones")
%run ../pipelines/ml/step_5_remove_relations.py

Ejecutando paso 5: Eliminación de relaciones


2025-05-12 13:47:37,866 - INFO - Iniciando proceso de selección y filtrado de features...
2025-05-12 13:47:46,907 - INFO - Datos cargados: 2932 filas, 474 columnas
2025-05-12 13:47:46,908 - INFO - Target identificado: ICSA_US_Initial_Jobless_Claims_unemployment_rate
2025-05-12 13:47:47,182 - INFO - Features constantes eliminadas: 2
2025-05-12 13:47:47,183 - INFO - Ejemplos: ['id', 'log_diff_AAA_Corporate_Bond_AAA_Spread_bond']...
2025-05-12 13:47:47,186 - INFO - Features numéricas para análisis: 470
2025-05-12 13:47:48,594 - INFO - Features correlacionadas eliminadas: 255
2025-05-12 13:47:48,595 - INFO - Ejemplos: ['EMA_5_PRICE_CrudeOil_WTI_commodities', 'PRICE_UK_10Y_Bond_bond', 'rolling_std_PRICE_Platinum_Spot_commodities', 'MA_20_PRICE_Shanghai_Composite_index_pricing', 'WALCL_Fed_Balance_Sheet_economics']...
2025-05-12 13:48:20,519 - INFO - Eliminando 'PAYEMS_US_Nonfarm_Payrolls_unemployment_rate' por VIF = 388778.60
2025-05-12 13:48:52,478 - INFO - Eliminando 'log_PRICE_JPY_USD_Sp

In [11]:
!pip install catboost
!pip install feature_engine
!pip install pandas_market_calendars



In [12]:
# Paso 6: Selección FPI
print("Ejecutando paso 6: Selección FPI")
%run ../pipelines/ml/step_6_fpi_selection.py



Ejecutando paso 6: Selección FPI


2025-05-12 15:28:36,821 - INFO - Usando el archivo más reciente: C:\Users\natus\Documents\Trabajo\PEDRO_PEREZ\Proyecto_Mercado_de_Valores\SP500_INDEX_Analisis\data\2_processed\ULTIMO_S&P500_final.xlsx
2025-05-12 15:28:36,822 - INFO - La salida se guardará en: C:\Users\natus\Documents\Trabajo\PEDRO_PEREZ\Proyecto_Mercado_de_Valores\SP500_INDEX_Analisis\data\3_trainingdata\ULTIMO_S&P500_final_FPI.xlsx
2025-05-12 15:28:39,461 - INFO - Archivo 'C:\Users\natus\Documents\Trabajo\PEDRO_PEREZ\Proyecto_Mercado_de_Valores\SP500_INDEX_Analisis\data\2_processed\ULTIMO_S&P500_final.xlsx' cargado con forma (2932, 132).
2025-05-12 15:28:39,462 - INFO - Columnas del DataFrame original: ['MoM_PRICE_Australia_10Y_Bond_bond', 'log_diff_PRICE_Australia_10Y_Bond_bond', 'rolling_var_PRICE_Australia_10Y_Bond_bond', '3M_change_PRICE_Australia_10Y_Bond_bond', 'zscore_PRICE_Australia_10Y_Bond_bond', 'MoM_PRICE_Japan_10Y_Bond_bond', 'YoY_PRICE_Japan_10Y_Bond_bond', 'log_diff_PRICE_Japan_10Y_Bond_bond', 'rolling_

In [13]:
!pip install lightgbm
!pip install xgboost




In [1]:
# Paso 7: Entrenamiento de modelos
print("Ejecutando paso 7: Entrenamiento de modelos")
%run ../pipelines/ml/step_7_0_train_models.py

Ejecutando paso 7: Entrenamiento de modelos


  from .autonotebook import tqdm as notebook_tqdm
2025-05-12 17:36:43,695 - INFO - Usando el archivo más reciente: C:\Users\natus\Documents\Trabajo\PEDRO_PEREZ\Proyecto_Mercado_de_Valores\SP500_INDEX_Analisis\data\3_trainingdata\ULTIMO_S&P500_final_FPI.xlsx
2025-05-12 17:36:43,698 - INFO - Usando horizonte de 20 días para forecast_period=1MONTH
2025-05-12 17:36:46,184 - INFO - Datos leídos y ordenados por fecha.
2025-05-12 17:36:46,193 - INFO - Se han imputado los valores NaN e inf (ffill y relleno con 0).
2025-05-12 17:36:46,226 - INFO - Tiempo de carga y preprocesamiento: 2.53s
2025-05-12 17:36:46,226 - INFO - Realizando división temporal por años (Year<=2022 para train+val)
2025-05-12 17:36:46,232 - INFO - División por años: Train=2246 (years <= 2022), Val=20 (last 20 days of 2022), Test=560 (years > 2022)
2025-05-12 17:36:46,233 - INFO - Tiempo para split de datos: 0.01s
2025-05-12 17:36:46,233 - INFO - === Optimizando y entrenando CatBoost... ===
2025-05-12 17:36:46,234 - INFO - [

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002116 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 10385
[LightGBM] [Info] Number of data points in the train set: 356, number of used features: 106
[LightGBM] [Info] Start training from score 1974.842415
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[43]	valid_0's rmse: 134.288	valid_0's l2: 18033.2
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001110 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 23140
[LightGBM] [Info] Number of data points in the train set: 730, number of used features: 107
[LightGBM] [Info] Start training from score 2021.568219
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[118]	valid_0's rmse: 523.008	valid_0's l2: 273537
[LightGBM] [Info

2025-05-12 17:46:26,607 - INFO - [LightGBM] Trial 0: RMSE=671.8982, Time=2.05s
[I 2025-05-12 17:46:26,609] Trial 0 finished with value: 671.8982284880136 and parameters: {'learning_rate': 0.004151639030613823, 'max_depth': 6, 'n_estimators': 597}. Best is trial 0 with value: 671.8982284880136.


Did not meet early stopping. Best iteration is:
[597]	valid_0's rmse: 837.482	valid_0's l2: 701376
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000718 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 10385
[LightGBM] [Info] Number of data points in the train set: 356, number of used features: 106
[LightGBM] [Info] Start training from score 1974.842415
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[125]	valid_0's rmse: 134.29	valid_0's l2: 18033.8
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001403 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 23140
[LightGBM] [Info] Number of data points in the train set: 730, number of used features: 107
[LightGBM] [Info] Start training from score 2021.568219
Training until validation scores don't improve for 50 rounds
Did

2025-05-12 17:46:31,326 - INFO - [LightGBM] Trial 1: RMSE=717.2365, Time=4.72s
[I 2025-05-12 17:46:31,329] Trial 1 finished with value: 717.2365250519654 and parameters: {'learning_rate': 0.0014182357321258846, 'max_depth': 10, 'n_estimators': 675}. Best is trial 0 with value: 671.8982284880136.


Did not meet early stopping. Best iteration is:
[675]	valid_0's rmse: 1074.73	valid_0's l2: 1.15503e+06
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000855 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 10385
[LightGBM] [Info] Number of data points in the train set: 356, number of used features: 106
[LightGBM] [Info] Start training from score 1974.842415
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[6]	valid_0's rmse: 134.256	valid_0's l2: 18024.7
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001675 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 23140
[LightGBM] [Info] Number of data points in the train set: 730, number of used features: 107
[LightGBM] [Info] Start training from score 2021.568219
Training until validation scores don't improve for 50 rounds

2025-05-12 17:46:31,994 - INFO - [LightGBM] Trial 2: RMSE=670.7922, Time=0.66s
[I 2025-05-12 17:46:31,996] Trial 2 finished with value: 670.7922251882405 and parameters: {'learning_rate': 0.03199882313423211, 'max_depth': 5, 'n_estimators': 1277}. Best is trial 2 with value: 670.7922251882405.


Early stopping, best iteration is:
[139]	valid_0's rmse: 825.577	valid_0's l2: 681578
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000845 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 10385
[LightGBM] [Info] Number of data points in the train set: 356, number of used features: 106
[LightGBM] [Info] Start training from score 1974.842415
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[47]	valid_0's rmse: 134.283	valid_0's l2: 18032
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001495 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 23140
[LightGBM] [Info] Number of data points in the train set: 730, number of used features: 107
[LightGBM] [Info] Start training from score 2021.568219
Training until validation scores don't improve for 50 rounds
Early stopping, be

2025-05-12 17:46:36,599 - INFO - [LightGBM] Trial 3: RMSE=665.3867, Time=4.60s
[I 2025-05-12 17:46:36,602] Trial 3 finished with value: 665.3867148485646 and parameters: {'learning_rate': 0.0038451614457144794, 'max_depth': 10, 'n_estimators': 1271}. Best is trial 3 with value: 665.3867148485646.


Early stopping, best iteration is:
[1006]	valid_0's rmse: 817.388	valid_0's l2: 668123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000756 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 10385
[LightGBM] [Info] Number of data points in the train set: 356, number of used features: 106
[LightGBM] [Info] Start training from score 1974.842415
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[20]	valid_0's rmse: 134.288	valid_0's l2: 18033.4
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001426 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 23140
[LightGBM] [Info] Number of data points in the train set: 730, number of used features: 107
[LightGBM] [Info] Start training from score 2021.568219
Training until validation scores don't improve for 50 rounds
Early stopping,

2025-05-12 17:46:40,016 - INFO - [LightGBM] Trial 4: RMSE=664.3749, Time=3.41s
[I 2025-05-12 17:46:40,018] Trial 4 finished with value: 664.3748822976852 and parameters: {'learning_rate': 0.00884102089814788, 'max_depth': 10, 'n_estimators': 907}. Best is trial 4 with value: 664.3748822976852.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000762 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 10385
[LightGBM] [Info] Number of data points in the train set: 356, number of used features: 106
[LightGBM] [Info] Start training from score 1974.842415
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[162]	valid_0's rmse: 134.288	valid_0's l2: 18033.2
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001323 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 23140
[LightGBM] [Info] Number of data points in the train set: 730, number of used features: 107
[LightGBM] [Info] Start training from score 2021.568219
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[1070]	valid_0's rmse: 512.28	valid_0's l2: 262430
[L

2025-05-12 17:46:47,153 - INFO - [LightGBM] Trial 5: RMSE=703.7996, Time=7.13s
[I 2025-05-12 17:46:47,158] Trial 5 finished with value: 703.799593224801 and parameters: {'learning_rate': 0.00109996822432747, 'max_depth': 9, 'n_estimators': 1071}. Best is trial 4 with value: 664.3748822976852.


Did not meet early stopping. Best iteration is:
[1071]	valid_0's rmse: 1007.75	valid_0's l2: 1.01557e+06
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000955 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 10385
[LightGBM] [Info] Number of data points in the train set: 356, number of used features: 106
[LightGBM] [Info] Start training from score 1974.842415
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[21]	valid_0's rmse: 134.27	valid_0's l2: 18028.5
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002206 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 23140
[LightGBM] [Info] Number of data points in the train set: 730, number of used features: 107
[LightGBM] [Info] Start training from score 2021.568219
Training until validation scores don't improve for 50 round

2025-05-12 17:46:48,352 - INFO - [LightGBM] Trial 6: RMSE=679.0459, Time=1.19s
[I 2025-05-12 17:46:48,354] Trial 6 finished with value: 679.0459150230047 and parameters: {'learning_rate': 0.00695405991827337, 'max_depth': 4, 'n_estimators': 1245}. Best is trial 4 with value: 664.3748822976852.


Early stopping, best iteration is:
[521]	valid_0's rmse: 869.657	valid_0's l2: 756303
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000608 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 10385
[LightGBM] [Info] Number of data points in the train set: 356, number of used features: 106
[LightGBM] [Info] Start training from score 1974.842415
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[7]	valid_0's rmse: 134.288	valid_0's l2: 18033.2
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001681 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 23140
[LightGBM] [Info] Number of data points in the train set: 730, number of used features: 107
[LightGBM] [Info] Start training from score 2021.568219
Training until validation scores don't improve for 50 rounds
Early stopping, b

2025-05-12 17:46:49,932 - INFO - [LightGBM] Trial 7: RMSE=663.5366, Time=1.58s
[I 2025-05-12 17:46:49,933] Trial 7 finished with value: 663.5366124734066 and parameters: {'learning_rate': 0.024853316599954007, 'max_depth': 10, 'n_estimators': 512}. Best is trial 7 with value: 663.5366124734066.


Early stopping, best iteration is:
[337]	valid_0's rmse: 812.489	valid_0's l2: 660139
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000798 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 10385
[LightGBM] [Info] Number of data points in the train set: 356, number of used features: 106
[LightGBM] [Info] Start training from score 1974.842415
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[21]	valid_0's rmse: 134.307	valid_0's l2: 18038.2
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001582 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 23140
[LightGBM] [Info] Number of data points in the train set: 730, number of used features: 107
[LightGBM] [Info] Start training from score 2021.568219
Training until validation scores don't improve for 50 rounds
Early stopping, 

2025-05-12 17:46:51,216 - INFO - [LightGBM] Trial 8: RMSE=678.9189, Time=1.28s
[I 2025-05-12 17:46:51,219] Trial 8 finished with value: 678.9189487071183 and parameters: {'learning_rate': 0.004842714682304414, 'max_depth': 4, 'n_estimators': 1571}. Best is trial 7 with value: 663.5366124734066.


Early stopping, best iteration is:
[760]	valid_0's rmse: 869.236	valid_0's l2: 755571
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000870 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 10385
[LightGBM] [Info] Number of data points in the train set: 356, number of used features: 106
[LightGBM] [Info] Start training from score 1974.842415
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[23]	valid_0's rmse: 134.291	valid_0's l2: 18034.1
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001489 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 23140
[LightGBM] [Info] Number of data points in the train set: 730, number of used features: 107
[LightGBM] [Info] Start training from score 2021.568219
Training until validation scores don't improve for 50 rounds
Early stopping, 

2025-05-12 17:46:53,409 - INFO - [LightGBM] Trial 9: RMSE=667.2682, Time=2.19s
[I 2025-05-12 17:46:53,412] Trial 9 finished with value: 667.2681634236606 and parameters: {'learning_rate': 0.007775336162038853, 'max_depth': 6, 'n_estimators': 1857}. Best is trial 7 with value: 663.5366124734066.


Early stopping, best iteration is:
[705]	valid_0's rmse: 813.962	valid_0's l2: 662533
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000866 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 10385
[LightGBM] [Info] Number of data points in the train set: 356, number of used features: 106
[LightGBM] [Info] Start training from score 1974.842415
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[2]	valid_0's rmse: 134.278	valid_0's l2: 18030.6
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001270 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 23140
[LightGBM] [Info] Number of data points in the train set: 730, number of used features: 107
[LightGBM] [Info] Start training from score 2021.568219
Training until validation scores don't improve for 50 rounds
Early stopping, b

2025-05-12 17:46:54,429 - INFO - [LightGBM] Trial 10: RMSE=665.3074, Time=1.02s
[I 2025-05-12 17:46:54,431] Trial 10 finished with value: 665.3074312257845 and parameters: {'learning_rate': 0.04388828848670923, 'max_depth': 8, 'n_estimators': 505}. Best is trial 7 with value: 663.5366124734066.


Early stopping, best iteration is:
[177]	valid_0's rmse: 812.849	valid_0's l2: 660723
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000672 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 10385
[LightGBM] [Info] Number of data points in the train set: 356, number of used features: 106
[LightGBM] [Info] Start training from score 1974.842415
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[10]	valid_0's rmse: 134.285	valid_0's l2: 18032.5
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001835 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 23140
[LightGBM] [Info] Number of data points in the train set: 730, number of used features: 107
[LightGBM] [Info] Start training from score 2021.568219
Training until validation scores don't improve for 50 rounds
Early stopping, 

2025-05-12 17:46:56,209 - INFO - [LightGBM] Trial 11: RMSE=664.7824, Time=1.78s
[I 2025-05-12 17:46:56,211] Trial 11 finished with value: 664.7823997346778 and parameters: {'learning_rate': 0.01893490101317616, 'max_depth': 8, 'n_estimators': 895}. Best is trial 7 with value: 663.5366124734066.


Early stopping, best iteration is:
[390]	valid_0's rmse: 813.67	valid_0's l2: 662060
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000906 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 10385
[LightGBM] [Info] Number of data points in the train set: 356, number of used features: 106
[LightGBM] [Info] Start training from score 1974.842415
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[12]	valid_0's rmse: 134.273	valid_0's l2: 18029.3
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001381 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 23140
[LightGBM] [Info] Number of data points in the train set: 730, number of used features: 107
[LightGBM] [Info] Start training from score 2021.568219
Training until validation scores don't improve for 50 rounds
Early stopping, b

2025-05-12 17:46:58,515 - INFO - [LightGBM] Trial 12: RMSE=664.0714, Time=2.30s
[I 2025-05-12 17:46:58,517] Trial 12 finished with value: 664.0714319455528 and parameters: {'learning_rate': 0.014629696690600248, 'max_depth': 9, 'n_estimators': 873}. Best is trial 7 with value: 663.5366124734066.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000831 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 10385
[LightGBM] [Info] Number of data points in the train set: 356, number of used features: 106
[LightGBM] [Info] Start training from score 1974.842415
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[9]	valid_0's rmse: 134.284	valid_0's l2: 18032.1
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001478 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 23140
[LightGBM] [Info] Number of data points in the train set: 730, number of used features: 107
[LightGBM] [Info] Start training from score 2021.568219
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[75]	valid_0's rmse: 515.758	valid_0's l2: 266006
[LightGBM] [Info] 

2025-05-12 17:47:00,402 - INFO - [LightGBM] Trial 13: RMSE=665.1918, Time=1.88s
[I 2025-05-12 17:47:00,404] Trial 13 finished with value: 665.1917896557277 and parameters: {'learning_rate': 0.02001883980874093, 'max_depth': 8, 'n_estimators': 831}. Best is trial 7 with value: 663.5366124734066.


Early stopping, best iteration is:
[596]	valid_0's rmse: 815.47	valid_0's l2: 664991
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000925 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 10385
[LightGBM] [Info] Number of data points in the train set: 356, number of used features: 106
[LightGBM] [Info] Start training from score 1974.842415
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[11]	valid_0's rmse: 134.29	valid_0's l2: 18033.8
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001546 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 23140
[LightGBM] [Info] Number of data points in the train set: 730, number of used features: 107
[LightGBM] [Info] Start training from score 2021.568219
Training until validation scores don't improve for 50 rounds
Early stopping, be

2025-05-12 17:47:02,634 - INFO - [LightGBM] Trial 14: RMSE=664.1541, Time=2.23s
[I 2025-05-12 17:47:02,636] Trial 14 finished with value: 664.1541215539626 and parameters: {'learning_rate': 0.015987246704124297, 'max_depth': 9, 'n_estimators': 761}. Best is trial 7 with value: 663.5366124734066.


Early stopping, best iteration is:
[513]	valid_0's rmse: 815.027	valid_0's l2: 664269
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000960 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 10385
[LightGBM] [Info] Number of data points in the train set: 356, number of used features: 106
[LightGBM] [Info] Start training from score 1974.842415
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[15]	valid_0's rmse: 134.276	valid_0's l2: 18030.1
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001425 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 23140
[LightGBM] [Info] Number of data points in the train set: 730, number of used features: 107
[LightGBM] [Info] Start training from score 2021.568219
Training until validation scores don't improve for 50 rounds
Early stopping, 

2025-05-12 17:47:04,568 - INFO - [LightGBM] Trial 15: RMSE=664.6807, Time=1.93s
[I 2025-05-12 17:47:04,570] Trial 15 finished with value: 664.6806978897869 and parameters: {'learning_rate': 0.012174745622710292, 'max_depth': 9, 'n_estimators': 1033}. Best is trial 7 with value: 663.5366124734066.


Early stopping, best iteration is:
[315]	valid_0's rmse: 818.004	valid_0's l2: 669131
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000907 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 10385
[LightGBM] [Info] Number of data points in the train set: 356, number of used features: 106
[LightGBM] [Info] Start training from score 1974.842415
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[7]	valid_0's rmse: 134.276	valid_0's l2: 18030
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001518 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 23140
[LightGBM] [Info] Number of data points in the train set: 730, number of used features: 107
[LightGBM] [Info] Start training from score 2021.568219
Training until validation scores don't improve for 50 rounds
Early stopping, bes

2025-05-12 17:47:05,884 - INFO - [LightGBM] Trial 16: RMSE=665.7414, Time=1.31s
[I 2025-05-12 17:47:05,885] Trial 16 finished with value: 665.741440700962 and parameters: {'learning_rate': 0.02656773207373731, 'max_depth': 7, 'n_estimators': 518}. Best is trial 7 with value: 663.5366124734066.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000770 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 10385
[LightGBM] [Info] Number of data points in the train set: 356, number of used features: 106
[LightGBM] [Info] Start training from score 1974.842415
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[2]	valid_0's rmse: 134.283	valid_0's l2: 18031.8
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001511 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 23140
[LightGBM] [Info] Number of data points in the train set: 730, number of used features: 107
[LightGBM] [Info] Start training from score 2021.568219
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[37]	valid_0's rmse: 512.078	valid_0's l2: 262224
[LightGBM] [Info] 

2025-05-12 17:47:07,024 - INFO - [LightGBM] Trial 17: RMSE=664.3638, Time=1.14s
[I 2025-05-12 17:47:07,027] Trial 17 finished with value: 664.3638134678574 and parameters: {'learning_rate': 0.04294151517477312, 'max_depth': 9, 'n_estimators': 722}. Best is trial 7 with value: 663.5366124734066.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001183 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 10385
[LightGBM] [Info] Number of data points in the train set: 356, number of used features: 106
[LightGBM] [Info] Start training from score 1974.842415
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[15]	valid_0's rmse: 134.277	valid_0's l2: 18030.2
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001459 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 23140
[LightGBM] [Info] Number of data points in the train set: 730, number of used features: 107
[LightGBM] [Info] Start training from score 2021.568219
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[123]	valid_0's rmse: 511.12	valid_0's l2: 261244
[LightGBM] [Info]

2025-05-12 17:47:09,086 - INFO - [LightGBM] Trial 18: RMSE=663.8940, Time=2.06s
[I 2025-05-12 17:47:09,089] Trial 18 finished with value: 663.8939641838354 and parameters: {'learning_rate': 0.012142962694428688, 'max_depth': 10, 'n_estimators': 1069}. Best is trial 7 with value: 663.5366124734066.


Early stopping, best iteration is:
[317]	valid_0's rmse: 814.718	valid_0's l2: 663765
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000832 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 10385
[LightGBM] [Info] Number of data points in the train set: 356, number of used features: 106
[LightGBM] [Info] Start training from score 1974.842415
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[77]	valid_0's rmse: 134.287	valid_0's l2: 18032.9
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001524 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 23140
[LightGBM] [Info] Number of data points in the train set: 730, number of used features: 107
[LightGBM] [Info] Start training from score 2021.568219
Training until validation scores don't improve for 50 rounds
Early stopping, 

2025-05-12 17:47:15,986 - INFO - [LightGBM] Trial 19: RMSE=666.0183, Time=6.90s
[I 2025-05-12 17:47:15,988] Trial 19 finished with value: 666.0183420828895 and parameters: {'learning_rate': 0.0023357270108859637, 'max_depth': 10, 'n_estimators': 1534}. Best is trial 7 with value: 663.5366124734066.
2025-05-12 17:47:15,989 - INFO - [LightGBM] Mejor RMSE en CV (Training): 663.5366
2025-05-12 17:47:15,990 - INFO - [LightGBM] Mejores hiperparámetros: {'learning_rate': 0.024853316599954007, 'max_depth': 10, 'n_estimators': 512}
2025-05-12 17:47:15,991 - INFO - [LightGBM] Tiempo de optimización: 51.43s


Did not meet early stopping. Best iteration is:
[1533]	valid_0's rmse: 820.615	valid_0's l2: 673410


2025-05-12 17:47:16,216 - INFO - [LightGBM] Gráfico de evolución de trials guardado: C:\Users\natus\Documents\Trabajo\PEDRO_PEREZ\Proyecto_Mercado_de_Valores\SP500_INDEX_Analisis\data\4_results\charts\lightgbm_trials_evolution.png
2025-05-12 17:47:16,217 - INFO - [LightGBM] Entrenando modelo final con parámetros: {'learning_rate': 0.024853316599954007, 'max_depth': 10, 'n_estimators': 512}


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002462 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 25630
[LightGBM] [Info] Number of data points in the train set: 2246, number of used features: 109
[LightGBM] [Info] Start training from score 2855.650448


2025-05-12 17:47:17,304 - INFO - [LightGBM] Modelo entrenado inicialmente en 1.09s
2025-05-12 17:47:17,413 - INFO - [LightGBM] Modelo refinado localmente en 0.11s
2025-05-12 17:47:17,426 - INFO - [LightGBM] Training predictions generadas en 0.01s (RMSE=1686.5228)
2025-05-12 17:47:17,431 - INFO - [LightGBM] Predicciones en Evaluacion generadas en 0.00s (RMSE=339.3806)
2025-05-12 17:47:17,440 - INFO - [LightGBM] Predicciones en Test generadas en 0.01s (RMSE=918.4834)
2025-05-12 17:47:17,471 - INFO - [LightGBM] Mejor modelo guardado como: C:\Users\natus\Documents\Trabajo\PEDRO_PEREZ\Proyecto_Mercado_de_Valores\SP500_INDEX_Analisis\models\lightgbm_best.pkl
2025-05-12 17:47:17,472 - INFO - [LightGBM] Modelo refinado guardado en 0.03s: C:\Users\natus\Documents\Trabajo\PEDRO_PEREZ\Proyecto_Mercado_de_Valores\SP500_INDEX_Analisis\models\lightgbmtrial7.pkl


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000420 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 5893
[LightGBM] [Info] Number of data points in the train set: 180, number of used features: 103
[LightGBM] [Info] Start training from score 4169.403954


2025-05-12 17:47:17,522 - INFO - [LightGBM] Forecast de 20 días generado en 0.05s
2025-05-12 17:47:17,942 - INFO - [LightGBM] Gráfico completo guardado: C:\Users\natus\Documents\Trabajo\PEDRO_PEREZ\Proyecto_Mercado_de_Valores\SP500_INDEX_Analisis\data\4_results\charts\lightgbm_trial7_full.png
2025-05-12 17:47:17,964 - INFO - [LightGBM] CSV para Trial 7 guardado: C:\Users\natus\Documents\Trabajo\PEDRO_PEREZ\Proyecto_Mercado_de_Valores\SP500_INDEX_Analisis\data\4_results\s&p500_lightgbm_trial7.csv
2025-05-12 17:47:18,549 - INFO - [LightGBM] Gráfico de importancia de features guardado: C:\Users\natus\Documents\Trabajo\PEDRO_PEREZ\Proyecto_Mercado_de_Valores\SP500_INDEX_Analisis\data\4_results\charts\lightgbm_trial7_importance.png
2025-05-12 17:47:18,550 - INFO - [LightGBM] Trial 7 métricas:
2025-05-12 17:47:18,550 - INFO -   - Training: RMSE=1686.5228, MAE=1479.4276, R2=-3.1087
2025-05-12 17:47:18,551 - INFO -   - Validación: RMSE=339.3806, MAE=319.0875, R2=-16.9696
2025-05-12 17:47:18,55

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002488 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 25630
[LightGBM] [Info] Number of data points in the train set: 2246, number of used features: 109
[LightGBM] [Info] Start training from score 2855.650448


2025-05-12 17:47:20,762 - INFO - [LightGBM] Modelo entrenado inicialmente en 2.21s


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000316 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 5893
[LightGBM] [Info] Number of data points in the train set: 180, number of used features: 103
[LightGBM] [Info] Start training from score 4169.403954


2025-05-12 17:47:21,005 - INFO - [LightGBM] Modelo refinado localmente en 0.24s
2025-05-12 17:47:21,031 - INFO - [LightGBM] Training predictions generadas en 0.03s (RMSE=1687.8015)
2025-05-12 17:47:21,038 - INFO - [LightGBM] Predicciones en Evaluacion generadas en 0.01s (RMSE=349.8295)
2025-05-12 17:47:21,049 - INFO - [LightGBM] Predicciones en Test generadas en 0.01s (RMSE=919.8923)
2025-05-12 17:47:21,073 - INFO - [LightGBM] Modelo refinado guardado en 0.02s: C:\Users\natus\Documents\Trabajo\PEDRO_PEREZ\Proyecto_Mercado_de_Valores\SP500_INDEX_Analisis\models\lightgbmtrial18.pkl
2025-05-12 17:47:21,099 - INFO - [LightGBM] Forecast de 20 días generado en 0.02s




2025-05-12 17:47:21,523 - INFO - [LightGBM] Gráfico completo guardado: C:\Users\natus\Documents\Trabajo\PEDRO_PEREZ\Proyecto_Mercado_de_Valores\SP500_INDEX_Analisis\data\4_results\charts\lightgbm_trial18_full.png
2025-05-12 17:47:21,546 - INFO - [LightGBM] CSV para Trial 18 guardado: C:\Users\natus\Documents\Trabajo\PEDRO_PEREZ\Proyecto_Mercado_de_Valores\SP500_INDEX_Analisis\data\4_results\s&p500_lightgbm_trial18.csv
2025-05-12 17:47:22,050 - INFO - [LightGBM] Gráfico de importancia de features guardado: C:\Users\natus\Documents\Trabajo\PEDRO_PEREZ\Proyecto_Mercado_de_Valores\SP500_INDEX_Analisis\data\4_results\charts\lightgbm_trial18_importance.png
2025-05-12 17:47:22,051 - INFO - [LightGBM] Trial 18 métricas:
2025-05-12 17:47:22,052 - INFO -   - Training: RMSE=1687.8015, MAE=1480.4869, R2=-3.1150
2025-05-12 17:47:22,054 - INFO -   - Validación: RMSE=349.8295, MAE=330.7028, R2=-18.0931
2025-05-12 17:47:22,054 - INFO -   - Test: RMSE=919.8923, MAE=769.4360, R2=-0.7531
2025-05-12 17:47

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002727 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 25630
[LightGBM] [Info] Number of data points in the train set: 2246, number of used features: 109
[LightGBM] [Info] Start training from score 2855.650448


2025-05-12 17:47:23,973 - INFO - [LightGBM] Modelo entrenado inicialmente en 1.92s


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000267 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 5893
[LightGBM] [Info] Number of data points in the train set: 180, number of used features: 103
[LightGBM] [Info] Start training from score 4169.403954


2025-05-12 17:47:24,190 - INFO - [LightGBM] Modelo refinado localmente en 0.22s




2025-05-12 17:47:24,214 - INFO - [LightGBM] Training predictions generadas en 0.02s (RMSE=1686.1045)
2025-05-12 17:47:24,223 - INFO - [LightGBM] Predicciones en Evaluacion generadas en 0.01s (RMSE=346.6524)
2025-05-12 17:47:24,240 - INFO - [LightGBM] Predicciones en Test generadas en 0.01s (RMSE=920.5975)
2025-05-12 17:47:24,263 - INFO - [LightGBM] Modelo refinado guardado en 0.02s: C:\Users\natus\Documents\Trabajo\PEDRO_PEREZ\Proyecto_Mercado_de_Valores\SP500_INDEX_Analisis\models\lightgbmtrial12.pkl
2025-05-12 17:47:24,293 - INFO - [LightGBM] Forecast de 20 días generado en 0.03s
2025-05-12 17:47:24,786 - INFO - [LightGBM] Gráfico completo guardado: C:\Users\natus\Documents\Trabajo\PEDRO_PEREZ\Proyecto_Mercado_de_Valores\SP500_INDEX_Analisis\data\4_results\charts\lightgbm_trial12_full.png
2025-05-12 17:47:24,809 - INFO - [LightGBM] CSV para Trial 12 guardado: C:\Users\natus\Documents\Trabajo\PEDRO_PEREZ\Proyecto_Mercado_de_Valores\SP500_INDEX_Analisis\data\4_results\s&p500_lightgbm_t

✅ Entrenamiento completado para 5 algoritmos
✅ Visualizaciones generadas en: C:\Users\natus\Documents\Trabajo\PEDRO_PEREZ\Proyecto_Mercado_de_Valores\SP500_INDEX_Analisis\data\4_results\charts
✅ Modelos guardados en: C:\Users\natus\Documents\Trabajo\PEDRO_PEREZ\Proyecto_Mercado_de_Valores\SP500_INDEX_Analisis\models
✅ Predicciones consolidadas en: C:\Users\natus\Documents\Trabajo\PEDRO_PEREZ\Proyecto_Mercado_de_Valores\SP500_INDEX_Analisis\data\4_results\all_models_predictions.csv
✅ Tiempo total de ejecución: 2177.43s


In [3]:
# Paso 8: Preparación de salida
print("Ejecutando paso 8: Preparación de salida")
%run ../pipelines/ml/step_7_5_ensemble.py


2025-05-13 09:23:24,662 - INFO - Iniciando creación de ensamble greedy...
2025-05-13 09:23:24,672 - INFO - Modelo cargado: catboosttrial10
2025-05-13 09:23:24,677 - INFO - Modelo cargado: catboosttrial11
2025-05-13 09:23:24,681 - INFO - Modelo cargado: catboosttrial13
2025-05-13 09:23:24,684 - INFO - Modelo cargado: catboosttrial16
2025-05-13 09:23:24,688 - INFO - Modelo cargado: catboosttrial4
2025-05-13 09:23:24,694 - INFO - Modelo cargado: catboosttrial5
2025-05-13 09:23:24,697 - INFO - Modelo cargado: catboost_best
2025-05-13 09:23:24,707 - INFO - Modelo cargado: lightgbmtrial12
2025-05-13 09:23:24,718 - INFO - Modelo cargado: lightgbmtrial15
2025-05-13 09:23:24,726 - INFO - Modelo cargado: lightgbmtrial18
2025-05-13 09:23:24,736 - INFO - Modelo cargado: lightgbmtrial19
2025-05-13 09:23:24,742 - INFO - Modelo cargado: lightgbmtrial7
2025-05-13 09:23:24,747 - INFO - Modelo cargado: lightgbm_best
2025-05-13 09:23:24,756 - INFO - Modelo cargado: mlptrial0
2025-05-13 09:23:24,766 - INF

Ejecutando paso 8: Preparación de salida


2025-05-13 09:23:24,865 - INFO - Modelo cargado: xgboosttrial12
2025-05-13 09:23:24,889 - INFO - Modelo cargado: xgboosttrial13
2025-05-13 09:23:24,926 - INFO - Modelo cargado: xgboosttrial15
2025-05-13 09:23:24,965 - INFO - Modelo cargado: xgboosttrial19
2025-05-13 09:23:25,001 - INFO - Modelo cargado: xgboosttrial4
2025-05-13 09:23:25,037 - INFO - Modelo cargado: xgboost_best
2025-05-13 09:23:25,038 - INFO - Se cargaron 32 modelos para el ensamble.
2025-05-13 09:23:25,039 - INFO - Usando datos: ULTIMO_S&P500_final_FPI.xlsx
2025-05-13 09:23:27,219 - INFO - Datos cargados: 2826 filas, 111 columnas
2025-05-13 09:23:27,220 - INFO - Columnas en el DataFrame: ['date', '6M_change_PRICE_Australia_10Y_Bond_bond', 'zscore_PRICE_Australia_10Y_Bond_bond', 'log_diff_PRICE_Italy_10Y_Bond_bond', 'rolling_std_PRICE_Italy_10Y_Bond_bond', '3M_change_PRICE_Italy_10Y_Bond_bond', 'zscore_PRICE_Italy_10Y_Bond_bond', 'MoM_PRICE_Japan_10Y_Bond_bond', 'YoY_PRICE_Japan_10Y_Bond_bond', '3M_change_PRICE_Japan_1

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.006111 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 25653
[LightGBM] [Info] Number of data points in the train set: 2261, number of used features: 109
[LightGBM] [Info] Start training from score 2862.772342


2025-05-13 09:23:59,525 - INFO - Modelo lightgbmtrial12 entrenado en 2.53s - RMSE: 1018.2621
2025-05-13 09:23:59,526 - INFO - Entrenando modelo base lightgbmtrial15 (9/32)


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001624 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 25653
[LightGBM] [Info] Number of data points in the train set: 2261, number of used features: 109
[LightGBM] [Info] Start training from score 2862.772342


2025-05-13 09:24:03,367 - INFO - Modelo lightgbmtrial15 entrenado en 3.84s - RMSE: 1010.1506
2025-05-13 09:24:03,368 - INFO - Entrenando modelo base lightgbmtrial18 (10/32)


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001682 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 25653
[LightGBM] [Info] Number of data points in the train set: 2261, number of used features: 109
[LightGBM] [Info] Start training from score 2862.772342


2025-05-13 09:24:06,313 - INFO - Modelo lightgbmtrial18 entrenado en 2.94s - RMSE: 1019.6921
2025-05-13 09:24:06,313 - INFO - Entrenando modelo base lightgbmtrial19 (11/32)


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001590 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 25653
[LightGBM] [Info] Number of data points in the train set: 2261, number of used features: 109
[LightGBM] [Info] Start training from score 2862.772342


2025-05-13 09:24:09,256 - INFO - Modelo lightgbmtrial19 entrenado en 2.94s - RMSE: 1019.1039
2025-05-13 09:24:09,257 - INFO - Entrenando modelo base lightgbmtrial7 (12/32)


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001771 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 25653
[LightGBM] [Info] Number of data points in the train set: 2261, number of used features: 109
[LightGBM] [Info] Start training from score 2862.772342


2025-05-13 09:24:10,707 - INFO - Modelo lightgbmtrial7 entrenado en 1.45s - RMSE: 1014.8092




2025-05-13 09:24:10,708 - INFO - Entrenando modelo base lightgbm_best (13/32)


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001479 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 25653
[LightGBM] [Info] Number of data points in the train set: 2261, number of used features: 109
[LightGBM] [Info] Start training from score 2862.772342


2025-05-13 09:24:12,146 - INFO - Modelo lightgbm_best entrenado en 1.44s - RMSE: 1014.8092
2025-05-13 09:24:12,147 - INFO - Entrenando modelo base mlptrial0 (14/32)




  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)


ValueError: Input X contains NaN.
MLPRegressor does not accept missing values encoded as NaN natively. For supervised learning, you might want to consider sklearn.ensemble.HistGradientBoostingClassifier and Regressor which accept missing values encoded as NaNs natively. Alternatively, it is possible to preprocess the data, for instance by using an imputer transformer in a pipeline or drop samples with missing values. See https://scikit-learn.org/stable/modules/impute.html You can find a list of all estimators that handle NaN values at the following page: https://scikit-learn.org/stable/modules/impute.html#estimators-that-handle-nan-values

In [None]:
# Paso 9: Preparación de salida
print("Ejecutando paso 9: Preparación de salida")
%run ../pipelines/ml/step_8_prepare_output.py


In [None]:
# Paso 10: Preparación de salida
print("Ejecutando paso 10: Preparación de salida")
%run ../pipelines/ml/step_9_backtest.py

print("¡Pipeline ML completado!")

In [None]:
# Paso 11: Preparación de salida
print("Ejecutando paso 11: Preparación de salida")
%run ../pipelines/ml/step_10_inference.py
