In [1]:
# Machine Learning Pipeline Execution
# Este notebook ejecuta secuencialmente todos los pasos del pipeline ML

# Importaciones necesarias
import os
import sys
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Asegurarse de que podemos importar desde la raíz del proyecto
# (ajusta esto si es necesario según la estructura de tu proyecto)
sys.path.append('..')



In [2]:
import os
print(f"Directorio de trabajo actual: {os.getcwd()}")
# Si necesitas cambiar el directorio:
# os.chdir('/ruta/a/tu/proyecto')

Directorio de trabajo actual: c:\Users\pedro\OneDrive\Documents\ALGO TRADING\SP500_INDEX_Analisis\notebooks


In [3]:
import os
import shutil
import sys

# 1. First, print current working directory to understand where we are
print(f"Current working directory: {os.getcwd()}")

# 2. Find the correct path to the configuration file
# Look for it in several possible locations relative to the notebook
possible_config_paths = [
    "Data Engineering.xlsx",  # Current directory
    "../Data Engineering.xlsx",  # Parent directory
    "../../Data Engineering.xlsx",  # Grandparent directory
]

config_path = None
for path in possible_config_paths:
    if os.path.exists(path):
        config_path = os.path.abspath(path)
        print(f"Found configuration file at: {config_path}")
        break

if not config_path:
    print("Configuration file not found in any of the expected locations")
    # Create a basic template if you don't have one
    config_path = "Data Engineering.xlsx"
    print(f"Will create a template at: {config_path}")

# 3. Determine the project root directory
# Assumes notebook is in a 'notebooks' folder inside the project
if os.path.basename(os.getcwd()) == "notebooks":
    project_root = os.path.abspath("..")
else:
    # Try to find it based on directory structure
    if os.path.exists("../pipelines"):
        project_root = os.path.abspath("..")
    elif os.path.exists("../../pipelines"):
        project_root = os.path.abspath("../..")
    else:
        project_root = os.getcwd()
        
print(f"Using project root: {project_root}")

# 4. Make sure data directories exist
data_root = os.path.join(project_root, "Data", "raw")
if not os.path.exists(data_root):
    os.makedirs(data_root, exist_ok=True)
    print(f"Created data directory: {data_root}")

# 5. Run the script with modified paths
# Adjust system path to import from project root
sys.path.append(project_root)

# Import preprocessing functions directly
try:
    sys.path.append(os.path.join(project_root, "pipelines", "ml"))
    from step_0_preprocess import run_economic_data_processor, ejecutar_myinvestingreportnormal, run_fred_data_processor, ejecutar_otherdataprocessor
    
    # Run the processors with correct paths
    print("\nRunning data processors with correct paths:")
    print("-------------------------------------------")
    run_economic_data_processor(config_file=config_path, data_root=data_root)
    ejecutar_myinvestingreportnormal(config_file=config_path, data_root=data_root)
    run_fred_data_processor(config_file=config_path, data_root=data_root)
    ejecutar_otherdataprocessor(config_file=config_path, data_root=data_root)
    
except ImportError as e:
    print(f"Error importing preprocessing functions: {e}")
    print("\nFalling back to running the script directly with the correct paths...")
    
    # Create a temporary script with corrected paths
    temp_script = "temp_preprocess.py"
    script_content = f"""
import sys
sys.path.append("{project_root}")
from pipelines.ml.step_0_preprocess import run_economic_data_processor, ejecutar_myinvestingreportnormal, run_fred_data_processor, ejecutar_otherdataprocessor

# Run with correct paths
run_economic_data_processor(config_file="{config_path}", data_root="{data_root}")
ejecutar_myinvestingreportnormal(config_file="{config_path}", data_root="{data_root}")
run_fred_data_processor(config_file="{config_path}", data_root="{data_root}")
ejecutar_otherdataprocessor(config_file="{config_path}", data_root="{data_root}")
"""
    with open(temp_script, "w") as f:
        f.write(script_content)
    
    # Run the temporary script
    %run {temp_script}

2025-04-18 17:41:41,083 - INFO - INICIANDO PROCESO: EconomicDataProcessor
2025-04-18 17:41:41,083 - INFO - Archivo de configuración: Data Engineering.xlsx
2025-04-18 17:41:41,083 - INFO - Directorio raíz de datos: c:\Users\pedro\OneDrive\Documents\ALGO TRADING\SP500_INDEX_Analisis\Data\raw
2025-04-18 17:41:41,085 - INFO - Fecha y hora: 2025-04-18 17:41:41
2025-04-18 17:41:41,085 - INFO - Leyendo archivo de configuración...
2025-04-18 17:41:41,086 - ERROR - Error al leer configuración: [Errno 2] No such file or directory: 'Data Engineering.xlsx'
2025-04-18 17:41:41,088 - INFO - INICIANDO PROCESO: MyinvestingreportNormal
2025-04-18 17:41:41,088 - INFO - Archivo de configuración: Data Engineering.xlsx
2025-04-18 17:41:41,088 - INFO - Directorio raíz de datos: c:\Users\pedro\OneDrive\Documents\ALGO TRADING\SP500_INDEX_Analisis\Data\raw
2025-04-18 17:41:41,088 - INFO - Fecha y hora: 2025-04-18 17:41:41
2025-04-18 17:41:41,090 - INFO - Iniciando proceso completo MyinvestingreportNormal...
20

Current working directory: c:\Users\pedro\OneDrive\Documents\ALGO TRADING\SP500_INDEX_Analisis\notebooks
Configuration file not found in any of the expected locations
Will create a template at: Data Engineering.xlsx
Using project root: c:\Users\pedro\OneDrive\Documents\ALGO TRADING\SP500_INDEX_Analisis
Created data directory: c:\Users\pedro\OneDrive\Documents\ALGO TRADING\SP500_INDEX_Analisis\Data\raw

Running data processors with correct paths:
-------------------------------------------


In [2]:
pip install openpyxl

Defaulting to user installation because normal site-packages is not writeable
Collecting openpyxl
  Using cached openpyxl-3.1.5-py2.py3-none-any.whl.metadata (2.5 kB)
Collecting et-xmlfile (from openpyxl)
  Using cached et_xmlfile-2.0.0-py3-none-any.whl.metadata (2.7 kB)
Using cached openpyxl-3.1.5-py2.py3-none-any.whl (250 kB)
Using cached et_xmlfile-2.0.0-py3-none-any.whl (18 kB)
Installing collected packages: et-xmlfile, openpyxl
Successfully installed et-xmlfile-2.0.0 openpyxl-3.1.5
Note: you may need to restart the kernel to use updated packages.


In [6]:
# Paso 0: Preprocesamiento inicial
print("Ejecutando paso 0: Preprocesamiento inicial")
%run ../pipelines/ml/step_0_preprocess.py



2025-04-17 19:45:49,658 [INFO] INICIANDO PROCESO: EconomicDataProcessor
2025-04-17 19:45:49,660 [INFO] Archivo de configuración: c:\Users\pedro\OneDrive\Documents\ALGO TRADING\SP500_INDEX_Analisis\pipelines/Data Engineering.xlsx
2025-04-17 19:45:49,661 [INFO] Directorio raíz de datos: c:\Users\pedro\OneDrive\Documents\ALGO TRADING\SP500_INDEX_Analisis\data/0_raw
2025-04-17 19:45:49,663 [INFO] Fecha y hora: 2025-04-17 19:45:49
2025-04-17 19:45:49,666 [INFO] Leyendo archivo de configuración...


Ejecutando paso 0: Preprocesamiento inicial


2025-04-17 19:45:49,777 [INFO] Se encontraron 21 configuraciones para procesar
2025-04-17 19:45:49,783 [INFO] 
Procesando: US_ISM_Manufacturing (business_confidence)
2025-04-17 19:45:49,784 [INFO] - Archivo: US_ISM_Manufacturing.xlsx
2025-04-17 19:45:49,784 [INFO] - Columna TARGET: ACTUAL
2025-04-17 19:45:49,786 [INFO] - Ruta encontrada: c:\Users\pedro\OneDrive\Documents\ALGO TRADING\SP500_INDEX_Analisis\data/0_raw\business_confidence\US_ISM_Manufacturing.xlsx
2025-04-17 19:45:49,817 [INFO] - Filas encontradas: 138
2025-04-17 19:45:49,835 [INFO] Preferencia de dayfirst para c:\Users\pedro\OneDrive\Documents\ALGO TRADING\SP500_INDEX_Analisis\data/0_raw\business_confidence\US_ISM_Manufacturing.xlsx: True
2025-04-17 19:45:49,915 [INFO] - Valores no nulos en TARGET: 137
2025-04-17 19:45:49,916 [INFO] - Periodo: 2013-11-01 a 2025-03-03
2025-04-17 19:45:49,917 [INFO] - Cobertura: 100.00%
2025-04-17 19:45:49,922 [INFO] 
Procesando: US_ISM_Services (business_confidence)
2025-04-17 19:45:49,922

Proceso completado exitosamente


2025-04-17 19:45:54,143 [INFO] Ejemplos de fechas convertidas para Australia_10Y_Bond: [Timestamp('2025-03-26 00:00:00'), Timestamp('2025-03-25 00:00:00'), Timestamp('2025-03-24 00:00:00'), Timestamp('2025-03-21 00:00:00'), Timestamp('2025-03-20 00:00:00')]
2025-04-17 19:45:54,147 [INFO] Formato numérico detectado para Australia_10Y_Bond: americano
2025-04-17 19:45:54,157 [INFO] Para Australia_10Y_Bond (columna Date), la fecha mínima es 2014-01-01 00:00:00 y la fecha máxima es 2025-03-26 00:00:00
2025-04-17 19:45:54,159 [INFO] - Australia_10Y_Bond: 3810 filas procesadas, periodo: 2014-01-01 a 2025-03-26
2025-04-17 19:45:54,163 [INFO] 
Procesando: Italy_10Y_Bond (bond)
2025-04-17 19:45:54,167 [INFO] - Archivo: Italy_10Y_Bond
2025-04-17 19:45:54,169 [INFO] - Columna TARGET: PRICE
2025-04-17 19:45:54,171 [INFO] - Ruta encontrada: c:\Users\pedro\OneDrive\Documents\ALGO TRADING\SP500_INDEX_Analisis\data/0_raw\bond\Italy_10Y_Bond.csv
2025-04-17 19:45:54,190 [INFO] Columna de fecha detectada:

Proceso completado exitosamente


2025-04-17 19:46:17,928 [INFO] Primeras fechas convertidas: [Timestamp('2014-01-02 00:00:00'), Timestamp('2014-01-03 00:00:00'), Timestamp('2014-01-06 00:00:00'), Timestamp('2014-01-07 00:00:00'), Timestamp('2014-01-08 00:00:00')]
2025-04-17 19:46:17,931 [INFO] - Valores no nulos en TARGET: 2808
2025-04-17 19:46:17,933 [INFO] - Periodo: 2014-01-02 a 2025-03-25
2025-04-17 19:46:17,933 [INFO] - Cobertura: 100.00%
2025-04-17 19:46:17,936 [INFO] 
Procesando: US_2Y_Treasury (bond)
2025-04-17 19:46:17,936 [INFO] - Archivo: US_2Y_Treasury.csv
2025-04-17 19:46:17,936 [INFO] - Columna TARGET: DGS2
2025-04-17 19:46:17,937 [INFO] - Ruta encontrada: c:\Users\pedro\OneDrive\Documents\ALGO TRADING\SP500_INDEX_Analisis\data/0_raw\bond\US_2Y_Treasury.csv
2025-04-17 19:46:17,941 [INFO] - Filas encontradas: 2929
2025-04-17 19:46:17,943 [INFO] Detección formato: 20/20 registros ISO (ratio 1.00)
2025-04-17 19:46:17,944 [INFO] Formato detectado para c:\Users\pedro\OneDrive\Documents\ALGO TRADING\SP500_INDE

Proceso completado exitosamente


2025-04-17 19:46:22,151 [INFO] - Archivo cargado: 1966 filas, 13 columnas
2025-04-17 19:46:22,151 [INFO] - Columnas disponibles: Reported Date, Bullish, Neutral, Bearish, Total, Bullish 8-week Mov Avg, Bull-Bear Spread, Bullish Average, Bullish Average +St. Dev.,  Bullish Average - St. Dev., S&P 500 Weekly High, S&P 500 Weekly Low, S&P 500 Weekly Close
2025-04-17 19:46:22,153 [INFO] - Usando columna 'Reported Date' como fecha
2025-04-17 19:46:22,153 [INFO] - Columnas encontradas: Bearish, Bull-Bear Spread, Bullish
2025-04-17 19:46:22,157 [INFO] - Rango de fechas total: 1987-06-26 a 2025-03-20
2025-04-17 19:46:22,158 [INFO] - ¿Tiene datos desde 2014 o antes?: Sí
2025-04-17 19:46:22,160 [INFO] - Columna AAII_Bearish_AAII_Investor_Sentiment_consumer_confidence: Rango de fechas 1987-07-24 a 2025-03-20
2025-04-17 19:46:22,162 [INFO] - Valores disponibles: 1963/1966 (99.85%)
2025-04-17 19:46:22,163 [INFO] - Columna AAII_Bull-Bear Spread_AAII_Investor_Sentiment_consumer_confidence: Rango de f

Proceso completado exitosamente


2025-04-17 19:46:56,091 [INFO] - Archivo cargado: 1966 filas, 13 columnas
2025-04-17 19:46:56,091 [INFO] - Columnas disponibles: Reported Date, Bullish, Neutral, Bearish, Total, Bullish 8-week Mov Avg, Bull-Bear Spread, Bullish Average, Bullish Average +St. Dev.,  Bullish Average - St. Dev., S&P 500 Weekly High, S&P 500 Weekly Low, S&P 500 Weekly Close
2025-04-17 19:46:56,093 [INFO] - Usando columna 'Reported Date' como fecha
2025-04-17 19:46:56,093 [INFO] - Columnas encontradas: Bearish, Bull-Bear Spread, Bullish
2025-04-17 19:46:56,099 [INFO] - Rango de fechas total: 1987-06-26 a 2025-03-20
2025-04-17 19:46:56,100 [INFO] - ¿Tiene datos desde 2014 o antes?: Sí
2025-04-17 19:46:56,103 [INFO] - Columna AAII_Bearish_AAII_Investor_Sentiment_consumer_confidence: Rango de fechas 1987-07-24 a 2025-03-20
2025-04-17 19:46:56,103 [INFO] - Valores disponibles: 1963/1966 (99.85%)
2025-04-17 19:46:56,108 [INFO] - Columna AAII_Bull-Bear Spread_AAII_Investor_Sentiment_consumer_confidence: Rango de f

Proceso completado exitosamente


In [10]:
# Paso 1: Fusión de archivos Excel
print("Ejecutando paso 1: Fusión de archivos Excel")
%run ../pipelines/ml/step_1_merge_excels.py



Ejecutando paso 1: Fusión de archivos Excel
Se encontraron 4 archivos Excel en la carpeta.
Iniciando proceso de combinación de archivos Excel
Cargando archivo: c:\Users\pedro\OneDrive\Documents\ALGO TRADING\SP500_INDEX_Analisis\data\0_raw\datos_economicos_normales_procesados.xlsx
Archivo datos_economicos_normales_procesados.xlsx cargado correctamente con 4104 filas
Después de filtrar por fecha: 4104 filas
Cargando archivo: c:\Users\pedro\OneDrive\Documents\ALGO TRADING\SP500_INDEX_Analisis\data\0_raw\datos_economicos_other_procesados.xlsx
Archivo datos_economicos_other_procesados.xlsx cargado correctamente con 13794 filas
Después de filtrar por fecha: 4104 filas
Cargando archivo: c:\Users\pedro\OneDrive\Documents\ALGO TRADING\SP500_INDEX_Analisis\data\0_raw\datos_economicos_procesados_cp.xlsx
Archivo datos_economicos_procesados_cp.xlsx cargado correctamente con 4166 filas
Se imputó valor del 2025-03-26 para la fecha faltante 2025-03-27
Después de filtrar por fecha: 4104 filas
Cargando 

In [16]:
# Paso 2: Generación de categorías
print("Ejecutando paso 2: Generación de categorías")
%run ../pipelines/ml/step_2_generate_categories.py



Ejecutando paso 2: Generación de categorías


2025-04-17 20:50:09,452 [INFO] Archivo cargado correctamente. Dimensiones: (4104, 89)
2025-04-17 20:50:09,455 [INFO] No se encontraron columnas para renombrar.
2025-04-17 20:50:09,483 [INFO] Resultados detallados de categorización guardados en: c:\Users\pedro\OneDrive\Documents\ALGO TRADING\SP500_INDEX_Analisis\data\1_preprocess\DIAGNOSTICO_CATEGORIAS.xlsx
2025-04-17 20:50:09,486 [INFO] Resumen de categorización:
2025-04-17 20:50:09,487 [INFO]   - Sin categoría: 1 columnas
2025-04-17 20:50:09,490 [INFO]   - bond: 11 columnas
2025-04-17 20:50:09,490 [INFO]   - business_confidence: 7 columnas
2025-04-17 20:50:09,492 [INFO]   - car_registrations: 5 columnas
2025-04-17 20:50:09,493 [INFO]   - comm_loans: 2 columnas
2025-04-17 20:50:09,495 [INFO]   - commodities: 5 columnas
2025-04-17 20:50:09,495 [INFO]   - consumer_confidence: 15 columnas
2025-04-17 20:50:09,496 [INFO]   - economics: 22 columnas
2025-04-17 20:50:09,498 [INFO]   - exchange_rate: 8 columnas
2025-04-17 20:50:09,498 [INFO]   

In [15]:
# Paso 3: Limpieza de columnas
print("Ejecutando paso 3: Limpieza de columnas")
%run ../pipelines/ml/step_3_clean_columns.py



Ejecutando paso 3: Limpieza de columnas


2025-04-17 20:48:50,307 [INFO] Archivo cargado correctamente. Columnas: 89
2025-04-17 20:48:50,318 [INFO] Se modificaron 5 nombres de columnas.
2025-04-17 20:48:50,320 [INFO] Renombrando: 'DNKSLRTCR03GPSAM_Denmark_Car_Registrations_MoM_car_registrations' -> 'DNKSLRTCR03GPSAM_Denmark_Car_Registrations_MoM'
2025-04-17 20:48:50,322 [INFO] Renombrando: 'USASLRTCR03GPSAM_US_Car_Registrations_MoM_car_registrations' -> 'USASLRTCR03GPSAM_US_Car_Registrations_MoM'
2025-04-17 20:48:50,325 [INFO] Renombrando: 'ZAFSLRTCR03GPSAM_SouthAfrica_Car_Registrations_MoM_car_registrations' -> 'ZAFSLRTCR03GPSAM_SouthAfrica_Car_Registrations_MoM'
2025-04-17 20:48:50,326 [INFO] Renombrando: 'GBRSLRTCR03GPSAM_United_Kingdom_Car_Registrations_MoM_car_registrations' -> 'GBRSLRTCR03GPSAM_United_Kingdom_Car_Registrations_MoM'
2025-04-17 20:48:50,328 [INFO] Renombrando: 'ESPSLRTCR03GPSAM_Spain_Car_Registrations_MoM_car_registrations' -> 'ESPSLRTCR03GPSAM_Spain_Car_Registrations_MoM'
2025-04-17 20:48:56,350 [INFO] Ar

In [18]:
# Paso 4: Transformación de características
print("Ejecutando paso 4: Transformación de características")
%run ../pipelines/ml/step_4_transform_features.py


Ejecutando paso 4: Transformación de características
✅ Archivo Excel cargado exitosamente.
🧪 Primeras filas del archivo:
                    0                              1   \
0                fecha  PRICE_Australia_10Y_Bond_bond   
1        Sin categoría                           bond   
2  2014-01-01 00:00:00                          4.289   
3  2014-01-02 00:00:00                          4.331   
4  2014-01-03 00:00:00                          4.344   

                          2                          3   \
0  PRICE_Italy_10Y_Bond_bond  PRICE_Japan_10Y_Bond_bond   
1                       bond                       bond   
2                        NaN                        NaN   
3                      3.972                        NaN   
4                       3.94                        NaN   

                       4                            5   \
0  PRICE_UK_10Y_Bond_bond  PRICE_Germany_10Y_Bond_bond   
1                    bond                         bond   
2      

In [3]:

# Paso 5: Eliminación de relaciones
print("Ejecutando paso 5: Eliminación de relaciones")
%run ../pipelines/ml/step_5_remove_relations.py



Ejecutando paso 5: Eliminación de relaciones


  ret = umr_sum(arr, axis, dtype, out, keepdims, where=where)
  X -= avg[:, None]
2025-04-17 23:23:51,437 - INFO - Eliminando 'PRICE_US_Nonfarm_Payrolls_unemployment_rate' por VIF = 63283.96
2025-04-17 23:26:42,923 - INFO - Eliminando 'Price_Dollar_Index_DXY_index_pricing' por VIF = 13575.99
2025-04-17 23:29:34,429 - INFO - Eliminando 'log_PRICE_CrudeOil_WTI_commodities' por VIF = 11923.88
2025-04-17 23:32:23,231 - INFO - Eliminando 'Actual_EuroZone_Business_Climate_business_confidence' por VIF = 6807.20
2025-04-17 23:35:09,229 - INFO - Eliminando 'bollinger_lower_PRICE_MXN_USD_Spot_exchange_rate' por VIF = 6753.82
2025-04-17 23:37:56,312 - INFO - Eliminando 'Actual_China_PMI_Manufacturing_economics' por VIF = 5510.99
2025-04-17 23:40:41,540 - INFO - Eliminando 'YTD_PRICE_EUR_USD_Spot_exchange_rate' por VIF = 4224.68
2025-04-17 23:43:24,669 - INFO - Eliminando 'Actual_US_ISM_Manufacturing_business_confidence' por VIF = 3208.39
2025-04-17 23:46:05,835 - INFO - Eliminando 'log_ULTIMO_VIX

In [9]:
!pip install catboost
!pip install feature_engine
!pip install pandas_market_calendars

Defaulting to user installation because normal site-packages is not writeable
Defaulting to user installation because normal site-packages is not writeable
Defaulting to user installation because normal site-packages is not writeable
Collecting pandas_market_calendars
  Downloading pandas_market_calendars-5.0.0-py3-none-any.whl.metadata (9.6 kB)
Collecting exchange-calendars>=3.3 (from pandas_market_calendars)
  Using cached exchange_calendars-4.10-py3-none-any.whl.metadata (37 kB)
Collecting pyluach (from exchange-calendars>=3.3->pandas_market_calendars)
  Using cached pyluach-2.2.0-py3-none-any.whl.metadata (4.3 kB)
Collecting toolz (from exchange-calendars>=3.3->pandas_market_calendars)
  Using cached toolz-1.0.0-py3-none-any.whl.metadata (5.1 kB)
Collecting korean_lunar_calendar (from exchange-calendars>=3.3->pandas_market_calendars)
  Using cached korean_lunar_calendar-0.3.1-py3-none-any.whl.metadata (2.8 kB)
Downloading pandas_market_calendars-5.0.0-py3-none-any.whl (122 kB)
Usin

In [11]:
# Paso 6: Selección FPI
print("Ejecutando paso 6: Selección FPI")
%run ../pipelines/ml/step_6_fpi_selection.py



2025-04-18 10:50:32,212 - INFO - Usando el archivo más reciente: c:\Users\pedro\OneDrive\Documents\ALGO TRADING\SP500_INDEX_Analisis\data\2_processed\ULTIMO_S&P500_final.xlsx
2025-04-18 10:50:32,214 - INFO - La salida se guardará en: c:\Users\pedro\OneDrive\Documents\ALGO TRADING\SP500_INDEX_Analisis\data\3_trainingdata\ULTIMO_S&P500_final_FPI.xlsx


Ejecutando paso 6: Selección FPI


2025-04-18 10:50:35,603 - INFO - Archivo 'c:\Users\pedro\OneDrive\Documents\ALGO TRADING\SP500_INDEX_Analisis\data\2_processed\ULTIMO_S&P500_final.xlsx' cargado con forma (2932, 148).
2025-04-18 10:50:35,605 - INFO - Columnas del DataFrame original: ['date', 'MoM_PRICE_Australia_10Y_Bond_bond', 'log_diff_PRICE_Australia_10Y_Bond_bond', '6M_change_PRICE_Australia_10Y_Bond_bond', 'zscore_PRICE_Australia_10Y_Bond_bond', 'MoM_PRICE_Italy_10Y_Bond_bond', 'log_diff_PRICE_Italy_10Y_Bond_bond', 'rolling_std_PRICE_Italy_10Y_Bond_bond', '3M_change_PRICE_Italy_10Y_Bond_bond', 'zscore_PRICE_Italy_10Y_Bond_bond', 'MoM_PRICE_Japan_10Y_Bond_bond', 'YoY_PRICE_Japan_10Y_Bond_bond', 'log_diff_PRICE_Japan_10Y_Bond_bond', 'rolling_var_PRICE_Japan_10Y_Bond_bond', '3M_change_PRICE_Japan_10Y_Bond_bond', '6M_change_PRICE_Japan_10Y_Bond_bond', 'YTD_PRICE_Japan_10Y_Bond_bond', 'zscore_PRICE_Japan_10Y_Bond_bond', 'MoM_PRICE_UK_10Y_Bond_bond', 'log_diff_PRICE_UK_10Y_Bond_bond', '3M_change_PRICE_UK_10Y_Bond_bond',

In [15]:
!pip install lightgbm
!pip install xgboost


Defaulting to user installation because normal site-packages is not writeable
Defaulting to user installation because normal site-packages is not writeable
Collecting xgboost
  Using cached xgboost-3.0.0-py3-none-win_amd64.whl.metadata (2.1 kB)
Using cached xgboost-3.0.0-py3-none-win_amd64.whl (150.0 MB)
Installing collected packages: xgboost
Successfully installed xgboost-3.0.0


In [17]:
# Paso 7: Entrenamiento de modelos
print("Ejecutando paso 7: Entrenamiento de modelos")
%run ../pipelines/ml/step_7_train_models.py



2025-04-18 11:04:06,355 - INFO - Usando el archivo más reciente: c:\Users\pedro\OneDrive\Documents\ALGO TRADING\SP500_INDEX_Analisis\data\3_trainingdata\ULTIMO_S&P500_final_FPI.xlsx


Ejecutando paso 7: Entrenamiento de modelos


2025-04-18 11:04:08,551 - INFO - Datos leídos y ordenados por fecha.
2025-04-18 11:04:08,559 - INFO - Se han imputado los valores NaN e inf (ffill y relleno con 0).
2025-04-18 11:04:08,606 - INFO - Split realizado: Training=2784, Evaluacion (21 días)=21, Test (21 días)=21
2025-04-18 11:04:08,608 - INFO - === Optimizando y entrenando CatBoost... ===
[I 2025-04-18 11:04:08,609] A new study created in memory with name: no-name-970bc493-f008-45e8-ae8f-de5f95e35f91
[I 2025-04-18 11:04:15,862] Trial 0 finished with value: 954.4416227128535 and parameters: {'learning_rate': 0.04845901204836764, 'depth': 4, 'iterations': 816}. Best is trial 0 with value: 954.4416227128535.
[I 2025-04-18 11:04:30,318] Trial 1 finished with value: 1059.0621635762423 and parameters: {'learning_rate': 0.0015090161154317763, 'depth': 5, 'iterations': 744}. Best is trial 0 with value: 954.4416227128535.
[I 2025-04-18 11:08:16,087] Trial 2 finished with value: 1080.2505387369513 and parameters: {'learning_rate': 0.00

CatBoostError: bad allocation

In [None]:
# Paso 8: Preparación de salida
print("Ejecutando paso 8: Preparación de salida")
%run ../pipelines/ml/step_8_prepare_output.py


In [None]:
# Paso 8: Preparación de salida
print("Ejecutando paso 8: Preparación de salida")
%run ../pipelines/ml/step_9_backtest.py

print("¡Pipeline ML completado!")