In [9]:
"""
Sistema de Arbitraje Estadístico Bayesiano Adaptativo a Múltiples Regímenes

Este programa implementa una estrategia de trading de pares que:
1. Selecciona y valida pares cointegrados del S&P 500
2. Detecta múltiples regímenes de mercado usando HMM
3. Genera señales de trading adaptativas según el régimen
4. Monitorea cambios estructurales en las relaciones de cointegración
5. Gestiona el riesgo mediante un enfoque bayesiano
"""

import os
import sys
import logging
import numpy as np
import pandas as pd
import yfinance as yf
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.api as sm
from statsmodels.tsa.stattools import coint, adfuller, kpss
from statsmodels.tsa.vector_ar.vecm import coint_johansen
from statsmodels.tsa.ar_model import AutoReg
from statsmodels.stats.diagnostic import recursive_olsresiduals
from scipy.stats import norm, uniform
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from hmmlearn import hmm
import warnings
import datetime as dt
import pickle
import requests
from bs4 import BeautifulSoup
import time
import sqlite3
from tqdm import tqdm
import concurrent.futures
from scipy.linalg import block_diag

# Suprimir advertencias
warnings.filterwarnings('ignore')

# Crear directorios para resultados
os.makedirs('./artifacts/results', exist_ok=True)
os.makedirs('./artifacts/results/figures', exist_ok=True)
os.makedirs('./artifacts/results/data', exist_ok=True)

# Configurar logging
logging.basicConfig(
    filename='./artifacts/errors.txt',
    level=logging.ERROR,
    format='[%(asctime)s] %(levelname)s: %(message)s',
    datefmt='%Y-%m-%d %H:%M:%S'
)

class BayesianPairsTrading:
    """
    Implementación de un sistema de trading de pares con enfoque bayesiano
    y adaptación a múltiples regímenes de mercado.
    """

    def __init__(self, start_date='2018-01-01', end_date=None, cache_dir='./artifacts/cache'):
        """
        Inicializa el sistema de trading con los parámetros básicos.
        
        Args:
            start_date (str): Fecha inicial para los datos históricos
            end_date (str): Fecha final para los datos históricos (default: hoy)
            cache_dir (str): Directorio para almacenar el caché de datos
        """
        self.start_date = start_date
        self.end_date = end_date if end_date else dt.datetime.now().strftime('%Y-%m-%d')
        self.cache_dir = cache_dir
        os.makedirs(cache_dir, exist_ok=True)
        
        # Conexión a la base de datos SQLite para caché
        self.conn = sqlite3.connect(f"{cache_dir}/pairs_data.db")
        
        # Parámetros del sistema
        self.window_train = 252  # 1 año de trading para estimación
        self.window_hmm = 504    # 2 años para HMM
        self.min_halflife = 1
        self.max_halflife = 30
        self.n_regimes = 3
        
        # Umbrales por régimen
        self.entry_thresholds = {
            1: 1.25,  # Régimen de baja volatilidad
            2: 1.75,  # Régimen de volatilidad normal
            3: 2.25   # Régimen de alta volatilidad
        }
        
        self.exit_thresholds = {
            1: 0.5,   # Régimen de baja volatilidad
            2: 0.75,  # Régimen de volatilidad normal
            3: 1.0    # Régimen de alta volatilidad
        }
        
        # Scaling de posiciones por régimen
        self.position_scaling = {
            1: 1.0,   # 100% en régimen de baja volatilidad
            2: 0.7,   # 70% en régimen de volatilidad normal
            3: 0.4    # 40% en régimen de alta volatilidad
        }
        
        # Inicializar atributos
        self.sp500_symbols = []
        self.prices_df = None
        self.regime_indicators = None
        self.current_regime = None
        self.cointegrated_pairs = []
        self.active_positions = {}
        self.pair_params = {}
        self.quarantine_pairs = set()
        self.regime_model = None
        
        # Métricas de performance
        self.performance = {
            'daily_returns': [],
            'sharpe_ratio': None,
            'max_drawdown': None,
            'win_rate': None,
            'profit_factor': None,
            'calmar_ratio': None,
            'beta_market': None,
            'regime_performance': {1: {}, 2: {}, 3: {}}
        }
        
        logging.info("Sistema inicializado con fecha de inicio: %s, fecha de fin: %s", 
                    self.start_date, self.end_date)

    def get_sp500_symbols(self):
        """
        Obtiene la lista de símbolos del S&P 500 desde Wikipedia.
        """
        try:
            url = "https://en.wikipedia.org/wiki/List_of_S%26P_500_companies"
            response = requests.get(url)
            soup = BeautifulSoup(response.text, 'html.parser')
            table = soup.find('table', {'class': 'wikitable'})
            
            symbols = []
            for row in table.findAll('tr')[1:]:
                symbol = row.findAll('td')[0].text.strip()
                symbols.append(symbol)
            
            self.sp500_symbols = symbols
            logging.info(f"Obtenidos {len(symbols)} símbolos del S&P 500")
            
            # Guardar lista de símbolos
            pd.DataFrame(symbols, columns=['Symbol']).to_csv('./artifacts/results/data/sp500_symbols.csv', index=False)
            
            return symbols
        
        except Exception as e:
            logging.error(f"Error al obtener símbolos del S&P 500: {str(e)}")
            logging.error(f"Traceback: {sys.exc_info()[2]}")
            return []

    def download_price_data(self, batch_size=50, max_retries=3):
        """
        Descarga datos de precios para todos los símbolos del S&P 500 usando yfinance.
        Implementa un sistema de caché y descarga por lotes para eficiencia.
        
        Args:
            batch_size (int): Tamaño del lote para descargas
            max_retries (int): Número máximo de reintentos
        
        Returns:
            pandas.DataFrame: DataFrame con los precios de cierre ajustados
        """
        try:
            # Verificar si ya existe en caché
            cache_file = f"{self.cache_dir}/prices_{self.start_date}_{self.end_date}.pkl"
            
            if os.path.exists(cache_file):
                self.prices_df = pd.read_pickle(cache_file)
                logging.info(f"Datos cargados desde caché: {cache_file}")
                return self.prices_df
            
            if not self.sp500_symbols:
                self.get_sp500_symbols()
            
            all_data = []
            
            # Procesar por lotes para evitar límites de API
            for i in range(0, len(self.sp500_symbols), batch_size):
                batch = self.sp500_symbols[i:i+batch_size]
                
                for attempt in range(max_retries):
                    try:
                        # auto_adjust=True por defecto
                        batch_data = yf.download(batch, start=self.start_date, end=self.end_date, progress=False)
                        
                        if 'Close' in batch_data.columns:
                            # Si solo hay un símbolo, ajustar estructura
                            if isinstance(batch_data.columns, pd.Index) and not isinstance(batch_data.columns, pd.MultiIndex):
                                df = pd.DataFrame({batch[0]: batch_data['Close']})
                            else:
                                df = batch_data['Close']
                            
                            all_data.append(df)
                            break
                    except Exception as e:
                        logging.warning(f"Intento {attempt+1}/{max_retries} fallido para el lote {i//batch_size+1}: {str(e)}")
                        time.sleep(2 ** attempt)  # backoff exponencial
                
                time.sleep(1)  # Pausa para no sobrecargar la API
            
            if all_data:
                # Combinar todos los lotes
                combined_data = pd.concat(all_data, axis=1)
                combined_data = combined_data.loc[~combined_data.index.duplicated(keep='first')]
                
                # Filtrar columnas con demasiados valores faltantes (>30%)
                threshold = len(combined_data) * 0.7
                self.prices_df = combined_data.dropna(axis=1, thresh=threshold)
                
                # Interpolación lineal para huecos pequeños (<3 días)
                self.prices_df = self.prices_df.interpolate(method='linear', limit=3)
                
                # Guardar en caché
                self.prices_df.to_pickle(cache_file)
                
                # Guardar también como CSV
                self.prices_df.to_csv('./artifacts/results/data/price_data.csv')
                
                logging.info(f"Descargados y procesados datos para {self.prices_df.shape[1]} símbolos")
                return self.prices_df
            
            else:
                logging.error("No se pudieron obtener datos de precios")
                return pd.DataFrame()
                
        except Exception as e:
            logging.error(f"Error al descargar datos de precios: {str(e)}")
            logging.error(f"Traceback: {sys.exc_info()[2]}")
            return pd.DataFrame()

    def download_regime_indicators(self):
        """
        Descarga indicadores para la detección de regímenes:
        1. VIX (volatilidad)
        2. Term spread (10Y-2Y Treasury)
        3. Credit spread (BBB-Treasury)
        4. Indicador compuesto de liquidez
        
        Returns:
            pandas.DataFrame: DataFrame con los indicadores
        """
        try:
            # Verificar caché
            cache_file = f"{self.cache_dir}/regime_indicators_{self.start_date}_{self.end_date}.pkl"
            
            if os.path.exists(cache_file):
                self.regime_indicators = pd.read_pickle(cache_file)
                return self.regime_indicators
            
            # Descargar VIX
            vix = yf.download('^VIX', start=self.start_date, end=self.end_date, progress=False)['Close']
            vix = np.log(vix)  # Transformación logarítmica
            
            # Descargar tasas del Tesoro para term spread
            treasury_10y = yf.download('^TNX', start=self.start_date, end=self.end_date, progress=False)['Close'] / 100
            treasury_2y = yf.download('^TWO', start=self.start_date, end=self.end_date, progress=False)['Close'] / 100
            
            # Calcular term spread
            term_spread = pd.DataFrame({'10Y': treasury_10y, '2Y': treasury_2y})
            term_spread = term_spread.interpolate(method='linear', limit=5)
            term_spread['spread'] = term_spread['10Y'] - term_spread['2Y']
            
            # Usar un proxy para credit spread (podríamos usar ETFs como LQD-IEF)
            investment_grade = yf.download('LQD', start=self.start_date, end=self.end_date, progress=False)['Close']
            treasury_etf = yf.download('IEF', start=self.start_date, end=self.end_date, progress=False)['Close']
            
            # Normalizar y calcular credit spread proxy
            investment_grade_norm = investment_grade / investment_grade.iloc[0]
            treasury_etf_norm = treasury_etf / treasury_etf.iloc[0]
            credit_spread_proxy = -(investment_grade_norm - treasury_etf_norm)  # Invertir para que valores más altos indiquen mayor spread
            
            # Si ya tenemos datos de precio, usarlos para el indicador de liquidez
            if self.prices_df is None:
                self.download_price_data()
            
            # Indicador compuesto de liquidez (volatilidad/volumen para SPY)
            spy_data = yf.download('SPY', start=self.start_date, end=self.end_date, progress=False)
            spy_data['volatility'] = spy_data['Close'].pct_change().rolling(21).std() * np.sqrt(252)
            spy_data['volume_ma'] = spy_data['Volume'].rolling(21).mean()
            spy_data['volume_rel'] = spy_data['Volume'] / spy_data['volume_ma']
            spy_data['liquidity'] = spy_data['volatility'] / spy_data['volume_rel']
            
            # Normalizar cada indicador
            vix_norm = (vix - vix.mean()) / vix.std()
            term_spread_norm = (term_spread['spread'] - term_spread['spread'].mean()) / term_spread['spread'].std()
            credit_spread_norm = (credit_spread_proxy - credit_spread_proxy.mean()) / credit_spread_proxy.std()
            liquidity_norm = (spy_data['liquidity'] - spy_data['liquidity'].mean()) / spy_data['liquidity'].std()
            
            # Combinar todos los indicadores
            indicators = pd.DataFrame({
                'vix': vix_norm,
                'term_spread': term_spread_norm,
                'credit_spread': credit_spread_norm,
                'liquidity': liquidity_norm
            })
            
            indicators = indicators.dropna()
            
            # Guardar en caché
            self.regime_indicators = indicators
            indicators.to_pickle(cache_file)
            indicators.to_csv('./artifacts/results/data/regime_indicators.csv')
            
            return indicators
            
        except Exception as e:
            logging.error(f"Error al descargar indicadores de régimen: {str(e)}")
            logging.error(f"Traceback: {sys.exc_info()[2]}")
            return pd.DataFrame()

    def detect_regimes(self):
        """
        Implementa un modelo HMM para detectar regímenes de mercado 
        basado en los indicadores descargados.
        
        Returns:
            pandas.Series: Serie temporal con el régimen identificado para cada fecha
        """
        try:
            if self.regime_indicators is None:
                self.download_regime_indicators()
            
            if len(self.regime_indicators) < self.window_hmm:
                logging.warning(f"Datos insuficientes para HMM. Se requieren {self.window_hmm} días, disponibles: {len(self.regime_indicators)}")
                return pd.Series(index=self.regime_indicators.index, data=2)  # Default a régimen normal
            
            # Preparar datos para HMM
            X = self.regime_indicators.dropna().values
            scaler = StandardScaler()
            X_scaled = scaler.fit_transform(X)
            
            # Inicializar modelo HMM con K-means
            kmeans = KMeans(n_clusters=self.n_regimes, random_state=42)
            kmeans.fit(X_scaled)
            
            # Ordenar clusters por volatilidad (VIX)
            cluster_centers = scaler.inverse_transform(kmeans.cluster_centers_)
            volatility_ranking = np.argsort(cluster_centers[:, 0])  # VIX es la primera columna
            
            # El modelo HMM con inicialización apropiada
            model = hmm.GaussianHMM(n_components=self.n_regimes, covariance_type="full", 
                                    n_iter=100, random_state=42)
            
            # Estimar modelo
            model.fit(X_scaled)
            self.regime_model = model
            
            # Decodificar estados
            hidden_states = model.predict(X_scaled)
            
            # Mapear estados a regímenes (1=bajo, 2=medio, 3=alto) basado en volatilidad
            state_volatility = np.zeros(self.n_regimes)
            for i in range(self.n_regimes):
                state_volatility[i] = np.mean(X_scaled[hidden_states == i, 0])
            
            regime_map = np.argsort(state_volatility) + 1
            
            decoded_regimes = pd.Series(index=self.regime_indicators.index, 
                                        data=[regime_map[s] for s in hidden_states])
            
            # Guardar resultados
            decoded_regimes.to_csv('./artifacts/results/data/regime_detection.csv')
            
            # Visualizar regímenes
            plt.figure(figsize=(12, 8))
            
            # Gráfico de regímenes
            ax1 = plt.subplot(211)
            ax1.plot(self.prices_df.index, self.prices_df['SPY'] if 'SPY' in self.prices_df.columns else self.prices_df.iloc[:, 0], 'k', alpha=0.7)
            ax1.set_title('Detección de Regímenes de Mercado')
            ax1.set_ylabel('Precio de Mercado')
            
            # Colorear fondo según régimen
            regime_colors = {1: 'green', 2: 'blue', 3: 'red'}
            for regime in range(1, self.n_regimes + 1):
                mask = decoded_regimes == regime
                ax1.fill_between(decoded_regimes.index, 0, 1, where=mask, 
                                 transform=ax1.get_xaxis_transform(), 
                                 color=regime_colors[regime], alpha=0.3, 
                                 label=f'Régimen {regime}')
            
            ax1.legend()
            
            # Gráfico de indicadores
            ax2 = plt.subplot(212, sharex=ax1)
            ax2.plot(self.regime_indicators.index, self.regime_indicators['vix'], 'r-', label='VIX (norm)')
            ax2.plot(self.regime_indicators.index, self.regime_indicators['credit_spread'], 'b-', label='Credit Spread (norm)')
            ax2.set_xlabel('Fecha')
            ax2.set_ylabel('Valor normalizado')
            ax2.legend()
            
            plt.tight_layout()
            plt.savefig('./artifacts/results/figures/market_regimes.png')
            plt.close()
            
            # Actualizar régimen actual
            self.current_regime = decoded_regimes.iloc[-1]
            logging.info(f"Régimen actual detectado: {self.current_regime}")
            
            return decoded_regimes
            
        except Exception as e:
            logging.error(f"Error en la detección de regímenes: {str(e)}")
            logging.error(f"Traceback: {sys.exc_info()[2]}")
            # Fallback a régimen normal
            return pd.Series(index=self.regime_indicators.index if self.regime_indicators is not None else [], data=2)

    def find_cointegrated_pairs(self, max_pairs=50, p_value_threshold=0.01):
        """
        Identifica pares de acciones cointegrados mediante el test de Johansen.
        
        Args:
            max_pairs (int): Número máximo de pares a seleccionar
            p_value_threshold (float): Umbral de p-valor para el test de Johansen
        
        Returns:
            list: Lista de tuplas (stock1, stock2) con pares cointegrados
        """
        try:
            if self.prices_df is None:
                self.download_price_data()
            
            if len(self.prices_df) < self.window_train:
                logging.warning(f"Datos insuficientes para cointegración. Se requieren {self.window_train} días")
                return []
            
            # Usamos los últimos window_train días para análisis
            recent_prices = self.prices_df.iloc[-self.window_train:]
            
            # Filtrar columnas con NaN
            recent_prices = recent_prices.dropna(axis=1)
            
            symbols = recent_prices.columns.tolist()
            n = len(symbols)
            
            pairs_results = []
            
            logging.info(f"Analizando cointegración para {n} símbolos...")
            
            # Procesar en paralelo para eficiencia
            def process_pair(i, j):
                stock1, stock2 = symbols[i], symbols[j]
                
                # Extraer series de tiempo
                stock1_prices = recent_prices[stock1].values
                stock2_prices = recent_prices[stock2].values
                
                if np.any(np.isnan(stock1_prices)) or np.any(np.isnan(stock2_prices)):
                    return None
                
                # Test de Johansen
                try:
                    result = coint_johansen(np.column_stack((stock1_prices, stock2_prices)), det_order=0, k_ar_diff=1)
                    trace_stat = result.lr1[0]
                    critical_value = result.cvt[0, 0]  # Valor crítico al 90%
                    
                    # Test de cointegración de Engle-Granger como verificación adicional
                    _, p_value, _ = coint(stock1_prices, stock2_prices)
                    
                    # Estimar half-life si parece cointegrado
                    if trace_stat > critical_value and p_value < p_value_threshold:
                        # Regresión para estimar parámetros de cointegración
                        model = sm.OLS(stock1_prices, sm.add_constant(stock2_prices)).fit()
                        beta = model.params[1]
                        alpha = model.params[0]
                        
                        # Calcular residuos
                        spread = stock1_prices - beta * stock2_prices - alpha
                        
                        # Estimar modelo AR(1) en los residuos
                        model_ar = AutoReg(spread, lags=1).fit()
                        
                        # Calcular half-life
                        phi = model_ar.params[1]
                        half_life = -np.log(2) / np.log(abs(phi)) if abs(phi) < 1 else np.inf
                        
                        # Verificar KPSS (estacionariedad) en los residuos
                        kpss_stat, kpss_p_value, _, _ = kpss(spread)
                        
                        # Solo considerar si el half-life está en el rango deseado
                        # y los residuos son estacionarios (KPSS no rechaza H0)
                        if (self.min_halflife <= half_life <= self.max_halflife and 
                            kpss_p_value > 0.05):
                            
                            # Calcular intervalo de credibilidad para beta (simulación simplificada)
                            mean_beta = beta
                            std_beta = model.bse[1]
                            ci_lower = mean_beta - 1.96 * std_beta
                            ci_upper = mean_beta + 1.96 * std_beta
                            
                            # Verificar que 0 no está en el intervalo de credibilidad
                            if ci_lower * ci_upper > 0:  # Mismo signo = 0 no incluido
                                return {
                                    'stock1': stock1,
                                    'stock2': stock2,
                                    'trace_stat': trace_stat,
                                    'critical_value': critical_value,
                                    'p_value': p_value,
                                    'beta': beta,
                                    'alpha': alpha,
                                    'half_life': half_life,
                                    'ci_lower': ci_lower,
                                    'ci_upper': ci_upper,
                                    'kpss_p_value': kpss_p_value
                                }
                except Exception as e:
                    logging.debug(f"Error procesando par {stock1}-{stock2}: {str(e)}")
                    return None
                
                return None
            
            # Ejecutar análisis de pares en paralelo
            pairs = []
            with concurrent.futures.ThreadPoolExecutor(max_workers=8) as executor:
                futures = []
                for i in range(n):
                    for j in range(i+1, n):
                        futures.append(executor.submit(process_pair, i, j))
                
                for future in tqdm(concurrent.futures.as_completed(futures), total=len(futures), desc="Analizando pares"):
                    result = future.result()
                    if result is not None:
                        pairs.append(result)
            
            # Ordenar por p-valor y seleccionar los mejores pares
            pairs.sort(key=lambda x: x['p_value'])
            selected_pairs = pairs[:max_pairs]
            
            # Guardar resultados
            pd.DataFrame(selected_pairs).to_csv('./artifacts/results/data/cointegrated_pairs.csv', index=False)
            
            # Actualizar lista de pares
            self.cointegrated_pairs = [(p['stock1'], p['stock2']) for p in selected_pairs]
            
            # Guardar parámetros de pares
            for p in selected_pairs:
                key = (p['stock1'], p['stock2'])
                self.pair_params[key] = {
                    'beta': p['beta'],
                    'alpha': p['alpha'],
                    'half_life': p['half_life'],
                    'ci_lower': p['ci_lower'],
                    'ci_upper': p['ci_upper']
                }
            
            logging.info(f"Identificados {len(selected_pairs)} pares cointegrados")
            
            # Visualizar algunos pares
            self.visualize_pairs(selected_pairs[:5])
            
            return self.cointegrated_pairs
            
        except Exception as e:
            logging.error(f"Error al encontrar pares cointegrados: {str(e)}")
            logging.error(f"Traceback: {sys.exc_info()[2]}")
            return []

    def visualize_pairs(self, pairs_to_plot):
        """
        Visualiza los pares cointegrados seleccionados.
        
        Args:
            pairs_to_plot (list): Lista de diccionarios con información de pares
        """
        try:
            if not pairs_to_plot:
                return
                
            recent_prices = self.prices_df.iloc[-self.window_train:]
            
            for i, pair in enumerate(pairs_to_plot):
                stock1, stock2 = pair['stock1'], pair['stock2']
                beta, alpha = pair['beta'], pair['alpha']
                half_life = pair['half_life']
                
                # Normalizar precios
                s1 = recent_prices[stock1] / recent_prices[stock1].iloc[0]
                s2 = recent_prices[stock2] / recent_prices[stock2].iloc[0]
                
                # Calcular spread
                spread = recent_prices[stock1] - beta * recent_prices[stock2] - alpha
                
                # Estadísticas del spread
                spread_mean = spread.mean()
                spread_std = spread.std()
                z_score = (spread - spread_mean) / spread_std
                
                fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 10))
                
                # Gráfico de precios normalizados
                ax1.plot(recent_prices.index, s1, label=stock1)
                ax1.plot(recent_prices.index, s2, label=stock2)
                ax1.set_title(f'Par Cointegrado: {stock1} - {stock2}')
                ax1.set_ylabel('Precio Normalizado')
                ax1.legend()
                
                # Gráfico de spread
                ax2.plot(recent_prices.index, z_score)
                ax2.axhline(y=0, color='r', linestyle='-')
                ax2.axhline(y=1.0, color='g', linestyle='--')
                ax2.axhline(y=-1.0, color='g', linestyle='--')
                ax2.axhline(y=2.0, color='y', linestyle='--')
                ax2.axhline(y=-2.0, color='y', linestyle='--')
                ax2.set_title(f'Z-Score del Spread (Half-Life: {half_life:.2f} días)')
                ax2.set_ylabel('Z-Score')
                
                plt.tight_layout()
                plt.savefig(f'./artifacts/results/figures/pair_{stock1}_{stock2}.png')
                plt.close()
                
        except Exception as e:
            logging.error(f"Error al visualizar pares: {str(e)}")
            logging.error(f"Traceback: {sys.exc_info()[2]}")

    def calculate_pair_zscore(self, stock1, stock2, lookback=None):
        """
        Calcula el z-score actual para un par de acciones.
        
        Args:
            stock1 (str): Primer símbolo del par
            stock2 (str): Segundo símbolo del par
            lookback (int): Periodo para calcular estadísticas, default es half-life*4
            
        Returns:
            float: Z-score actual del par
        """
        try:
            if (stock1, stock2) not in self.pair_params:
                return None
            
            params = self.pair_params[(stock1, stock2)]
            beta, alpha = params['beta'], params['alpha']
            
            if lookback is None:
                # Por defecto, usar 4 veces el half-life
                lookback = int(min(params['half_life'] * 4, 100))
            
            # Obtener datos recientes
            recent_data = self.prices_df.iloc[-lookback:]
            
            # Verificar que ambos stocks existen en los datos
            if stock1 not in recent_data.columns or stock2 not in recent_data.columns:
                logging.warning(f"Faltan datos para el par {stock1}-{stock2}")
                return None
            
            # Calcular spread
            spread = recent_data[stock1] - beta * recent_data[stock2] - alpha
            
            # Calcular estadísticas del spread
            spread_mean = spread.mean()
            spread_std = spread.std()
            
            if spread_std == 0:
                logging.warning(f"Desviación estándar cero para el par {stock1}-{stock2}")
                return None
            
            # Calcular z-score actual
            current_spread = spread.iloc[-1]
            z_score = (current_spread - spread_mean) / spread_std
            
            return z_score
            
        except Exception as e:
            logging.error(f"Error al calcular z-score para {stock1}-{stock2}: {str(e)}")
            logging.error(f"Traceback: {sys.exc_info()[2]}")
            return None

    def detect_structural_change(self, stock1, stock2, cusum_threshold=0.5, test_window=30):
        """
        Detecta cambios estructurales en la relación de cointegración
        utilizando CUSUM recursivo y pruebas secuenciales.
        
        Args:
            stock1 (str): Primer símbolo del par
            stock2 (str): Segundo símbolo del par
            cusum_threshold (float): Umbral para detección CUSUM
            test_window (int): Ventana para pruebas secuenciales
            
        Returns:
            bool: True si se detecta cambio estructural, False en caso contrario
        """
        try:
            if (stock1, stock2) not in self.pair_params:
                return False
                
            params = self.pair_params[(stock1, stock2)]
            beta, alpha = params['beta'], params['alpha']
            
            # Obtener datos recientes 
            recent_data = self.prices_df.iloc[-test_window:]
            
            if stock1 not in recent_data.columns or stock2 not in recent_data.columns:
                return False
                
            # Calcular spread
            spread = recent_data[stock1] - beta * recent_data[stock2] - alpha
            
            # CUSUM recursivo
            cusum = np.zeros(len(spread))
            spread_std = spread.std()
            
            for i in range(1, len(spread)):
                cusum[i] = max(0, cusum[i-1] + (abs(spread.iloc[i]) - 0.5 * spread_std))
            
            # Normalizar CUSUM
            if cusum.max() > 0:
                cusum = cusum / cusum.max()
                
            # Prueba recursiva de OLS para cambio estructural
            X = sm.add_constant(recent_data[stock2])
            y = recent_data[stock1]
            
            try:
                rec_resids = recursive_olsresiduals(y.values, X.values, 5)
                cusum_test = rec_resids[0][-1]  # Último valor del test CUSUM
                
                # Criterio de detección: CUSUM alto o test formal significativo
                critical_value = cusum_threshold
                
                if cusum[-1] > critical_value or abs(cusum_test) > 1.0:
                    logging.info(f"Cambio estructural detectado en par {stock1}-{stock2}: CUSUM={cusum[-1]:.2f}, Test={cusum_test:.2f}")
                    return True
            
            except Exception as e:
                logging.debug(f"Error en prueba recursiva para {stock1}-{stock2}: {str(e)}")
            
            return False
            
        except Exception as e:
            logging.error(f"Error al detectar cambio estructural para {stock1}-{stock2}: {str(e)}")
            logging.error(f"Traceback: {sys.exc_info()[2]}")
            return False

    def generate_trading_signals(self, regimes):
        """
        Genera señales de trading basadas en los z-scores y el régimen actual.
        
        Args:
            regimes (pandas.Series): Serie con los regímenes identificados
            
        Returns:
            pandas.DataFrame: DataFrame con señales de trading
        """
        try:
            if not self.cointegrated_pairs:
                logging.warning("No hay pares cointegrados para generar señales")
                return pd.DataFrame()
            
            signals = []
            current_date = self.prices_df.index[-1]
            
            # Obtener régimen actual
            current_regime = regimes.iloc[-1] if not regimes.empty else 2  # Default a régimen normal
            
            # Umbrales según régimen
            entry_threshold = self.entry_thresholds[current_regime]
            exit_threshold = self.exit_thresholds[current_regime]
            
            for stock1, stock2 in self.cointegrated_pairs:
                # Verificar si el par está en cuarentena
                if (stock1, stock2) in self.quarantine_pairs:
                    continue
                
                # Calcular z-score actual
                z_score = self.calculate_pair_zscore(stock1, stock2)
                
                if z_score is None:
                    continue
                
                # Detectar cambio estructural
                if self.detect_structural_change(stock1, stock2):
                    logging.info(f"Par {stock1}-{stock2} puesto en cuarentena por cambio estructural")
                    self.quarantine_pairs.add((stock1, stock2))
                    
                    # Si hay posición abierta, cerrarla
                    if (stock1, stock2) in self.active_positions:
                        signals.append({
                            'date': current_date,
                            'pair': f"{stock1}-{stock2}",
                            'stock1': stock1,
                            'stock2': stock2,
                            'z_score': z_score,
                            'signal': 'close',
                            'reason': 'structural_change',
                            'regime': current_regime
                        })
                        
                    continue
                
                # Verificar si ya hay posición activa para este par
                if (stock1, stock2) in self.active_positions:
                    position = self.active_positions[(stock1, stock2)]
                    days_in_position = (current_date - position['entry_date']).days
                    position_type = position['type']
                    
                    # Verificar condiciones de salida
                    exit_signal = False
                    exit_reason = None
                    
                    # 1. Reversión a la media
                    if (position_type == 'long' and z_score > -exit_threshold) or \
                       (position_type == 'short' and z_score < exit_threshold):
                        exit_signal = True
                        exit_reason = 'mean_reversion'
                    
                    # 2. Tiempo máximo en posición (2x half-life)
                    half_life = self.pair_params[(stock1, stock2)]['half_life']
                    max_days = min(int(2 * half_life), 30)
                    
                    if days_in_position > max_days:
                        exit_signal = True
                        exit_reason = 'time_limit'
                    
                    # 3. Stop-loss (2σ del par)
                    if position_type == 'long' and z_score < position['entry_zscore'] - 2.0:
                        exit_signal = True
                        exit_reason = 'stop_loss'
                    elif position_type == 'short' and z_score > position['entry_zscore'] + 2.0:
                        exit_signal = True
                        exit_reason = 'stop_loss'
                    
                    if exit_signal:
                        signals.append({
                            'date': current_date,
                            'pair': f"{stock1}-{stock2}",
                            'stock1': stock1,
                            'stock2': stock2,
                            'z_score': z_score,
                            'signal': 'close',
                            'reason': exit_reason,
                            'regime': current_regime
                        })
                    
                else:
                    # Generar señales de entrada según los umbrales del régimen
                    if z_score < -entry_threshold:
                        signals.append({
                            'date': current_date,
                            'pair': f"{stock1}-{stock2}",
                            'stock1': stock1,
                            'stock2': stock2,
                            'z_score': z_score,
                            'signal': 'open',
                            'position': 'long',  # Long stock1, short stock2
                            'regime': current_regime
                        })
                    
                    elif z_score > entry_threshold:
                        signals.append({
                            'date': current_date,
                            'pair': f"{stock1}-{stock2}",
                            'stock1': stock1,
                            'stock2': stock2,
                            'z_score': z_score,
                            'signal': 'open',
                            'position': 'short',  # Short stock1, long stock2
                            'regime': current_regime
                        })
            
            # Convertir a DataFrame
            signals_df = pd.DataFrame(signals)
            
            if not signals_df.empty:
                # Guardar señales generadas
                signals_df.to_csv('./artifacts/results/data/trading_signals.csv', index=False)
                logging.info(f"Generadas {len(signals_df)} señales de trading")
                
                # Visualizar algunas señales
                self.visualize_signals(signals_df)
            
            return signals_df
            
        except Exception as e:
            logging.error(f"Error al generar señales de trading: {str(e)}")
            logging.error(f"Traceback: {sys.exc_info()[2]}")
            return pd.DataFrame()
            
    def visualize_signals(self, signals_df):
        """
        Visualiza las señales de trading generadas.
        
        Args:
            signals_df (pandas.DataFrame): DataFrame con señales de trading
        """
        try:
            if signals_df.empty:
                return
                
            # Seleccionar hasta 5 señales para visualizar
            unique_pairs = signals_df['pair'].unique()[:5]
            
            for pair in unique_pairs:
                pair_signals = signals_df[signals_df['pair'] == pair]
                
                if pair_signals.empty:
                    continue
                
                # Obtener componentes del par
                stock1, stock2 = pair.split('-')
                
                # Obtener parámetros del par
                if (stock1, stock2) not in self.pair_params:
                    continue
                    
                params = self.pair_params[(stock1, stock2)]
                beta, alpha = params['beta'], params['alpha']
                
                # Obtener datos recientes
                recent_data = self.prices_df[-126:]  # Últimos ~6 meses
                
                if stock1 not in recent_data.columns or stock2 not in recent_data.columns:
                    continue
                
                # Calcular spread y z-score
                spread = recent_data[stock1] - beta * recent_data[stock2] - alpha
                
                # Calcular estadísticas móviles para z-score
                spread_mean = spread.rolling(window=21).mean()
                spread_std = spread.rolling(window=21).std()
                z_score = (spread - spread_mean) / spread_std
                
                # Crear gráfico
                fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 10))
                
                # Gráfico de precios
                ax1.plot(recent_data.index, recent_data[stock1] / recent_data[stock1].iloc[0], label=stock1)
                ax1.plot(recent_data.index, recent_data[stock2] / recent_data[stock2].iloc[0], label=stock2)
                ax1.set_title(f'Señales de Trading para el Par {pair}')
                ax1.set_ylabel('Precio Normalizado')
                ax1.legend()
                
                # Gráfico de z-score con señales
                ax2.plot(recent_data.index, z_score)
                ax2.axhline(y=0, color='k', linestyle='-', alpha=0.3)
                
                # Añadir líneas de umbral según régimen
                for regime in range(1, 4):
                    entry = self.entry_thresholds[regime]
                    exit = self.exit_thresholds[regime]
                    
                    color = {1: 'green', 2: 'blue', 3: 'red'}[regime]
                    alpha = 0.3 if regime != self.current_regime else 0.8
                    
                    ax2.axhline(y=entry, color=color, linestyle='--', alpha=alpha, 
                               label=f'Entrada Régimen {regime}' if regime == 1 else None)
                    ax2.axhline(y=-entry, color=color, linestyle='--', alpha=alpha)
                    ax2.axhline(y=exit, color=color, linestyle=':', alpha=alpha, 
                               label=f'Salida Régimen {regime}' if regime == 1 else None)
                    ax2.axhline(y=-exit, color=color, linestyle=':', alpha=alpha)
                
                # Marcar señales en el gráfico
                for _, signal in pair_signals.iterrows():
                    signal_date = signal['date']
                    
                    if signal_date not in recent_data.index:
                        continue
                        
                    if signal['signal'] == 'open':
                        marker = '^' if signal['position'] == 'long' else 'v'
                        color = 'g' if signal['position'] == 'long' else 'r'
                        ax2.plot(signal_date, signal['z_score'], marker=marker, 
                                color=color, markersize=10)
                    else:  # close
                        ax2.plot(signal_date, signal['z_score'], marker='x', 
                                color='k', markersize=10)
                
                ax2.set_title(f'Z-Score con Señales de Trading (HL: {params["half_life"]:.2f} días)')
                ax2.set_ylabel('Z-Score')
                ax2.legend()
                
                plt.tight_layout()
                plt.savefig(f'./artifacts/results/figures/signals_{stock1}_{stock2}.png')
                plt.close()
                
        except Exception as e:
            logging.error(f"Error al visualizar señales: {str(e)}")
            logging.error(f"Traceback: {sys.exc_info()[2]}")
    
    def optimize_portfolio(self, signals_df, current_regime):
        """
        Optimiza la asignación de capital a los diferentes pares.
        
        Args:
            signals_df (pandas.DataFrame): DataFrame con señales de trading
            current_regime (int): Régimen actual del mercado
            
        Returns:
            dict: Diccionario con asignaciones óptimas de capital
        """
        try:
            if signals_df.empty:
                return {}
                
            # Filtrar solo señales de apertura
            open_signals = signals_df[signals_df['signal'] == 'open']
            
            if open_signals.empty:
                return {}
                
            # Número de pares activos
            n_pairs = len(open_signals)
            
            # Base allocation: 1/sqrt(N)
            base_allocation = 1 / np.sqrt(n_pairs)
            
            # Ajustes por calidad de pares y régimen
            allocations = {}
            
            for _, signal in open_signals.iterrows():
                stock1, stock2 = signal['stock1'], signal['stock2']
                pair_key = (stock1, stock2)
                
                if pair_key not in self.pair_params:
                    continue
                
                # Ajuste por calidad (inverso del ancho del IC)
                ic_width = self.pair_params[pair_key]['ci_upper'] - self.pair_params[pair_key]['ci_lower']
                quality_factor = 1.0
                
                if ic_width > 0:
                    quality_factor = min(1.5, 1.0 / ic_width)  # Cap en 1.5x
                
                # Ajuste por régimen
                regime_factor = self.position_scaling[current_regime]
                
                # Asignación final
                allocation = base_allocation * quality_factor * regime_factor
                
                # Aplicar límites
                allocation = min(0.05, allocation)  # Máximo 5% por par
                
                allocations[pair_key] = allocation
            
            # Normalizar para que sumen a un máximo según el régimen
            max_portfolio_exposure = self.position_scaling[current_regime]
            total_allocation = sum(allocations.values())
            
            if total_allocation > max_portfolio_exposure:
                scale_factor = max_portfolio_exposure / total_allocation
                allocations = {k: v * scale_factor for k, v in allocations.items()}
            
            # Guardar asignaciones
            allocation_df = pd.DataFrame([
                {'pair': f"{k[0]}-{k[1]}", 'allocation': v}
                for k, v in allocations.items()
            ])
            
            if not allocation_df.empty:
                allocation_df.to_csv('./artifacts/results/data/portfolio_allocation.csv', index=False)
            
            return allocations
            
        except Exception as e:
            logging.error(f"Error en optimización de cartera: {str(e)}")
            logging.error(f"Traceback: {sys.exc_info()[2]}")
            return {}

    def run_backtest(self, start_date=None, end_date=None, initial_capital=1000000):
        """
        Ejecuta un backtest de la estrategia para el período especificado.
        
        Args:
            start_date (str): Fecha inicial para el backtest
            end_date (str): Fecha final para el backtest
            initial_capital (float): Capital inicial
            
        Returns:
            pandas.DataFrame: DataFrame con resultados diarios del backtest
        """
        try:
            # Configurar fechas del backtest
            if start_date is None:
                start_date = self.start_date
            
            if end_date is None:
                end_date = self.end_date
            
            logging.info(f"Iniciando backtest desde {start_date} hasta {end_date}")
            
            # Asegurarse de que tenemos los datos
            if self.prices_df is None:
                self.download_price_data()
            
            # Filtrar el período de backtest
            backtest_data = self.prices_df.loc[start_date:end_date]
            
            if len(backtest_data) < self.window_train:
                logging.error(f"Datos insuficientes para backtest. Se requieren al menos {self.window_train} días")
                return pd.DataFrame()
            
            # Inicializar variables de backtest
            portfolio_value = [initial_capital]
            cash = initial_capital
            current_positions = {}
            daily_returns = []
            daily_benchmark = []
            daily_exposures = []
            trade_history = []
            regime_history = []
            
            # Obtener SPY como benchmark
            spy_returns = None
            if 'SPY' in self.prices_df.columns:
                spy_prices = self.prices_df['SPY'].loc[backtest_data.index]
                spy_returns = spy_prices.pct_change().fillna(0)
            
            # Recalibrar cada semana (5 días de trading)
            recalibration_frequency = 5
            
            for i, current_date in enumerate(tqdm(backtest_data.index[self.window_train:], desc="Backtest")):
                day_index = i + self.window_train
                
                # Datos hasta la fecha actual (para evitar look-ahead bias)
                data_until_today = self.prices_df.iloc[:day_index]
                
                # Detectar régimen actual
                if i % recalibration_frequency == 0:
                    # Solo recalibrar periódicamente (semanal)
                    self.regime_indicators = self.download_regime_indicators()
                    regimes = self.detect_regimes()
                    current_regime = regimes.iloc[-1] if not regimes.empty else 2
                
                regime_history.append(current_regime)
                
                # Ejecutar lógica de trading solo en fechas de recalibración
                if i % recalibration_frequency == 0:
                    # Identificar pares cointegrados
                    self.cointegrated_pairs = self.find_cointegrated_pairs()
                    
                    # Generar señales
                    signals = self.generate_trading_signals(regimes)
                    
                    if not signals.empty:
                        # Optimizar portfolio
                        allocations = self.optimize_portfolio(signals, current_regime)
                        
                        # Procesar señales
                        for _, signal in signals.iterrows():
                            pair_key = (signal['stock1'], signal['stock2'])
                            
                            # Señales de cierre
                            if signal['signal'] == 'close' and pair_key in current_positions:
                                position = current_positions[pair_key]
                                position_type = position['type']
                                entry_date = position['entry_date']
                                entry_prices = position['entry_prices']
                                position_size = position['size']
                                
                                # Calcular retorno
                                exit_prices = {
                                    signal['stock1']: data_until_today[signal['stock1']].iloc[-1],
                                    signal['stock2']: data_until_today[signal['stock2']].iloc[-1]
                                }
                                
                                if position_type == 'long':
                                    # Long stock1, short stock2
                                    stock1_return = (exit_prices[signal['stock1']] / entry_prices[signal['stock1']]) - 1
                                    stock2_return = (entry_prices[signal['stock2']] / exit_prices[signal['stock2']]) - 1
                                else:
                                    # Short stock1, long stock2
                                    stock1_return = (entry_prices[signal['stock1']] / exit_prices[signal['stock1']]) - 1
                                    stock2_return = (exit_prices[signal['stock2']] / entry_prices[signal['stock2']]) - 1
                                
                                # Rendimiento neto (simplificado, sin costes de transacción)
                                net_return = (stock1_return + stock2_return) / 2
                                pnl = position_size * net_return
                                
                                # Actualizar cash
                                cash += position_size + pnl
                                
                                # Registrar operación
                                trade_history.append({
                                    'entry_date': entry_date,
                                    'exit_date': current_date,
                                    'pair': f"{signal['stock1']}-{signal['stock2']}",
                                    'position_type': position_type,
                                    'entry_z_score': position['z_score'],
                                    'exit_z_score': signal['z_score'],
                                    'exit_reason': signal.get('reason', 'unknown'),
                                    'regime': signal['regime'],
                                    'position_size': position_size,
                                    'pnl': pnl,
                                    'return_pct': net_return * 100
                                })
                                
                                # Eliminar posición
                                del current_positions[pair_key]
                            
                            # Señales de apertura
                            elif signal['signal'] == 'open' and pair_key not in current_positions:
                                # Solo abrir si tenemos capital disponible
                                if cash > 0 and pair_key in allocations:
                                    allocation = allocations[pair_key]
                                    position_size = cash * allocation
                                    
                                    # Registrar posición
                                    current_positions[pair_key] = {
                                        'type': signal['position'],
                                        'entry_date': current_date,
                                        'entry_prices': {
                                            signal['stock1']: data_until_today[signal['stock1']].iloc[-1],
                                            signal['stock2']: data_until_today[signal['stock2']].iloc[-1]
                                        },
                                        'z_score': signal['z_score'],
                                        'size': position_size
                                    }
                                    
                                    # Actualizar cash
                                    cash -= position_size
                
                # Calcular valor del portfolio para el día actual
                current_value = cash
                current_exposure = 0
                
                # Valorar posiciones abiertas
                for pair_key, position in list(current_positions.items()):
                    stock1, stock2 = pair_key
                    
                    # Verificar si tenemos precios para ambos stocks
                    if stock1 not in data_until_today.columns or stock2 not in data_until_today.columns:
                        continue
                    
                    current_prices = {
                        stock1: data_until_today[stock1].iloc[-1],
                        stock2: data_until_today[stock2].iloc[-1]
                    }
                    
                    entry_prices = position['entry_prices']
                    position_type = position['type']
                    position_size = position['size']
                    
                    # Calcular valor actual
                    if position_type == 'long':
                        # Long stock1, short stock2
                        stock1_return = (current_prices[stock1] / entry_prices[stock1]) - 1
                        stock2_return = (entry_prices[stock2] / current_prices[stock2]) - 1
                    else:
                        # Short stock1, long stock2
                        stock1_return = (entry_prices[stock1] / current_prices[stock1]) - 1
                        stock2_return = (current_prices[stock2] / entry_prices[stock2]) - 1
                    
                    net_return = (stock1_return + stock2_return) / 2
                    position_value = position_size * (1 + net_return)
                    
                    current_value += position_value
                    current_exposure += position_size
                
                # Registrar valor y exposición
                portfolio_value.append(current_value)
                
                # Calcular retorno diario
                if len(portfolio_value) > 1:
                    daily_return = (portfolio_value[-1] / portfolio_value[-2]) - 1
                    daily_returns.append(daily_return)
                    
                    # Registrar benchmark si está disponible
                    if spy_returns is not None and current_date in spy_returns.index:
                        daily_benchmark.append(spy_returns.loc[current_date])
                    else:
                        daily_benchmark.append(0)
                else:
                    daily_returns.append(0)
                    daily_benchmark.append(0)
                
                # Registrar exposición relativa
                daily_exposures.append(current_exposure / current_value)
            
            # Crear DataFrame con resultados
            results = pd.DataFrame({
                'portfolio_value': portfolio_value[1:],  # Excluir valor inicial
                'daily_return': daily_returns,
                'benchmark_return': daily_benchmark,
                'exposure': daily_exposures,
                'regime': regime_history
            }, index=backtest_data.index[self.window_train:])
            
            # Calcular métricas de performance
            self.calculate_performance_metrics(results)
            
            # Guardar resultados de operaciones
            if trade_history:
                trades_df = pd.DataFrame(trade_history)
                trades_df.to_csv('./artifacts/results/data/trade_history.csv', index=False)
            
            # Guardar resultados diarios
            results.to_csv('./artifacts/results/data/backtest_results.csv')
            
            # Visualizar resultados
            self.visualize_backtest_results(results)
            
            return results
            
        except Exception as e:
            logging.error(f"Error en backtest: {str(e)}")
            logging.error(f"Traceback: {sys.exc_info()[2]}")
            return pd.DataFrame()

    def calculate_performance_metrics(self, results):
        """
        Calcula métricas de rendimiento a partir de los resultados del backtest.
        
        Args:
            results (pandas.DataFrame): DataFrame con resultados diarios del backtest
        """
        try:
            if results.empty:
                logging.warning("No hay resultados para calcular métricas")
                return
            
            # Métricas generales
            daily_returns = results['daily_return']
            
            # Retorno total y anualizado
            total_return = (results['portfolio_value'].iloc[-1] / results['portfolio_value'].iloc[0]) - 1
            trading_days = len(results)
            annual_return = (1 + total_return) ** (252 / trading_days) - 1
            
            # Volatilidad
            volatility_daily = daily_returns.std()
            volatility_annual = volatility_daily * np.sqrt(252)
            
            # Sharpe Ratio (asumiendo tasa libre de riesgo = 0)
            sharpe_ratio = annual_return / volatility_annual if volatility_annual > 0 else 0
            
            # Drawdowns
            cumulative_returns = (1 + daily_returns).cumprod()
            running_max = cumulative_returns.cummax()
            drawdowns = (cumulative_returns / running_max) - 1
            max_drawdown = drawdowns.min()
            
            # Calmar Ratio
            calmar_ratio = annual_return / abs(max_drawdown) if max_drawdown < 0 else 0
            
            # Win rate (de las operaciones completadas)
            trades_file = './artifacts/results/data/trade_history.csv'
            if os.path.exists(trades_file):
                trades = pd.read_csv(trades_file)
                if not trades.empty:
                    win_rate = (trades['pnl'] > 0).mean()
                    profit_factor = abs(trades[trades['pnl'] > 0]['pnl'].sum() / trades[trades['pnl'] < 0]['pnl'].sum()) if trades[trades['pnl'] < 0]['pnl'].sum() != 0 else float('inf')
                else:
                    win_rate = 0
                    profit_factor = 0
            else:
                win_rate = 0
                profit_factor = 0
            
            # Beta al mercado
            if 'benchmark_return' in results.columns:
                # Calcular beta usando regresión simple
                benchmark_returns = results['benchmark_return']
                
                if benchmark_returns.std() > 0:
                    model = sm.OLS(daily_returns, sm.add_constant(benchmark_returns)).fit()
                    beta = model.params[1]
                else:
                    beta = 0
            else:
                beta = 0
            
            # Métricas por régimen
            regime_metrics = {}
            for regime in range(1, 4):
                regime_data = results[results['regime'] == regime]
                
                if not regime_data.empty:
                    regime_returns = regime_data['daily_return']
                    regime_annual_return = (1 + regime_returns.mean()) ** 252 - 1
                    regime_volatility = regime_returns.std() * np.sqrt(252)
                    regime_sharpe = regime_annual_return / regime_volatility if regime_volatility > 0 else 0
                    
                    regime_metrics[regime] = {
                        'return': regime_annual_return,
                        'volatility': regime_volatility,
                        'sharpe': regime_sharpe,
                        'days': len(regime_data),
                        'pct_time': len(regime_data) / len(results) if len(results) > 0 else 0
                    }
            
            # Guardar métricas
            metrics = {
                'total_return': total_return,
                'annual_return': annual_return,
                'volatility': volatility_annual,
                'sharpe_ratio': sharpe_ratio,
                'max_drawdown': max_drawdown,
                'calmar_ratio': calmar_ratio,
                'win_rate': win_rate,
                'profit_factor': profit_factor,
                'beta': beta,
                'regime_metrics': regime_metrics
            }
            
            # Guardar como CSV
            pd.DataFrame([metrics]).to_csv('./artifacts/results/data/performance_metrics.csv', index=False)
            
            # Actualizar performance guardada
            self.performance = {
                'daily_returns': daily_returns.tolist(),
                'sharpe_ratio': sharpe_ratio,
                'max_drawdown': max_drawdown,
                'win_rate': win_rate,
                'profit_factor': profit_factor,
                'calmar_ratio': calmar_ratio,
                'beta_market': beta,
                'regime_performance': regime_metrics
            }
            
            # Imprimir resumen
            logging.info(f"Retorno total: {total_return:.2%}")
            logging.info(f"Retorno anualizado: {annual_return:.2%}")
            logging.info(f"Volatilidad anualizada: {volatility_annual:.2%}")
            logging.info(f"Sharpe Ratio: {sharpe_ratio:.2f}")
            logging.info(f"Max Drawdown: {max_drawdown:.2%}")
            logging.info(f"Win Rate: {win_rate:.2%}")
            
        except Exception as e:
            logging.error(f"Error al calcular métricas de rendimiento: {str(e)}")
            logging.error(f"Traceback: {sys.exc_info()[2]}")

In [None]:
def main():
    """
    Función principal que ejecuta el sistema completo.
    """
    try:
        # Crear instancia del sistema
        system = BayesianPairsTrading(
            start_date='2018-01-01',  # Ajustar según necesidad
            end_date=None  # Hasta hoy
        )
        
        # Ejecutar estrategia
        success = system.run_backtest()
        
        if success:
            print("Estrategia ejecutada con éxito. Resultados guardados en './artifacts/results/'")
        else:
            print("Error al ejecutar estrategia. Consultar './artifacts/errors.txt'")
            
    except Exception as e:
        logging.error(f"Error en función principal: {str(e)}")
        logging.error(f"Traceback: {sys.exc_info()[2]}")
        print(f"Error: {str(e)}")

# Para ejecutar directamente en un notebook

if __name__ == "__main__":
    main()

Backtest:   0%|                                        | 0/1579 [00:00<?, ?it/s]
Analizando pares:   0%|                              | 0/118828 [00:00<?, ?it/s][A
Analizando pares:   0%|                    | 1/118828 [00:00<4:35:27,  7.19it/s][A
Analizando pares:   0%|                    | 79/118828 [00:00<05:41, 347.54it/s][A
Analizando pares:   0%|                   | 115/118828 [00:00<10:55, 180.99it/s][A
Analizando pares:   0%|                   | 140/118828 [00:00<13:36, 145.38it/s][A
Analizando pares:   0%|                   | 159/118828 [00:01<14:49, 133.35it/s][A
Analizando pares:   0%|                   | 175/118828 [00:01<15:50, 124.78it/s][A
Analizando pares:   0%|                   | 189/118828 [00:01<17:33, 112.61it/s][A
Analizando pares:   0%|                   | 201/118828 [00:01<18:14, 108.38it/s][A
Analizando pares:   0%|                   | 213/118828 [00:01<18:42, 105.68it/s][A
Analizando pares:   0%|                    | 224/118828 [00:01<20:27, 96.64it/s

In [2]:
import pandas as pd
import yfinance as yf
import time
from tqdm import tqdm

# Diccionario de índices con URL, columna de ticker y sufijo, tabla correspondiente
index_urls = {
    "S&P 500": {
        "url": "https://en.wikipedia.org/wiki/List_of_S%26P_500_companies",
        "symbol_col": "Symbol",
        "suffix": "",
        "table_index": 0
    },
    "NASDAQ 100": {
        "url": "https://en.wikipedia.org/wiki/NASDAQ-100",
        "symbol_col": "Ticker",
        "suffix": "",
        "table_index": 3
    },
    "Euro Stoxx 50": {
        "url": "https://en.wikipedia.org/wiki/EURO_STOXX_50",
        "symbol_col": "Ticker",
        "suffix": "",
        "table_index": 1
    },
    "DAX": {
        "url": "https://en.wikipedia.org/wiki/DAX",
        "symbol_col": "Ticker symbol",
        "suffix": ".DE",
        "table_index": 3
    },
    "CAC 40": {
        "url": "https://en.wikipedia.org/wiki/CAC_40",
        "symbol_col": "Ticker",
        "suffix": ".PA",
        "table_index": 1
    },
    "FTSE 100": {
        "url": "https://en.wikipedia.org/wiki/FTSE_100_Index",
        "symbol_col": "EPIC",
        "suffix": ".L",
        "table_index": 3
    }
}

# 1. Obtener y limpiar todos los tickers de los índices
all_candidates = []

for name, info in index_urls.items():
    try:
        df = pd.read_html(info["url"])[info["table_index"]]
        symbols = df[info["symbol_col"]].astype(str)
        cleaned = symbols.str.replace(r"\.", "-", regex=True) + info["suffix"]
        all_candidates.extend(cleaned.tolist())
        print(f"✅ {name}: {len(cleaned)} tickers cargados.")
    except Exception as e:
        print(f"❌ Error cargando {name}: {e}")

# 2. Limpiar duplicados
unique_candidates = list(set(all_candidates))[:3000]

# 3. Validar en yfinance
valid_tickers = []
sector_map = {}
industry_map = {}

print(f"\n🔎 Validando hasta 2000 tickers de un total de {len(unique_candidates)}...")

for ticker in tqdm(unique_candidates):
    try:
        stock = yf.Ticker(ticker)
        info = stock.info
        sector = info.get("sector", None)
        industry = info.get("industry", None)
        if sector:
            valid_tickers.append(ticker)
            sector_map[ticker] = sector
            industry_map[ticker] = industry
        if len(valid_tickers) >= 2000:
            break
    except Exception:
        continue
    time.sleep(0.2)

# 4. Guardar resultados
df = pd.DataFrame({
    "Ticker": valid_tickers,
    "Sector": [sector_map[t] for t in valid_tickers],
    "Industry": [industry_map[t] for t in valid_tickers]
})
df.to_csv("tickers_globales_yf.csv", index=False)

print(f"\n✅ ¡Listo! Se guardaron {len(valid_tickers)} tickers en 'tickers_globales_yf.csv'.")


✅ S&P 500: 503 tickers cargados.
❌ Error cargando NASDAQ 100: 'Ticker'
❌ Error cargando Euro Stoxx 50: 'Ticker'
❌ Error cargando DAX: 'Ticker symbol'
❌ Error cargando CAC 40: 'Ticker'
❌ Error cargando FTSE 100: 'EPIC'

🔎 Validando hasta 2000 tickers de un total de 503...


  1%|▌                                          | 6/503 [00:03<04:24,  1.88it/s]


KeyboardInterrupt: 