In [None]:
# Asegurar importaciones necesarias
import os
import logging
import warnings
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import yfinance as yf
from datetime import datetime, timedelta
from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.stattools import coint as statsmodels_coint
from statsmodels.tsa.vector_ar.vecm import coint_johansen
from hmmlearn import hmm
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import TimeSeriesSplit
import time
from tqdm import tqdm
import traceback
from scipy.stats import linregress
import json
# Configuraciones para visualización
plt.style.use('seaborn-v0_8-darkgrid')

sns.set(style="darkgrid")
warnings.filterwarnings("ignore")   

# Configurar logging más detallado
logging.basicConfig(
    filename='./artifacts/logs/strategy.log',
    level=logging.INFO,
    format='[%(asctime)s] %(levelname)s: %(message)s',
    datefmt='%Y-%m-%d %H:%M:%S'
)

# Crear directorios para logs y resultados si no existen
os.makedirs('./artifacts/logs', exist_ok=True)
os.makedirs('./artifacts/results', exist_ok=True)
os.makedirs('./artifacts/results/figures', exist_ok=True)
os.makedirs('./artifacts/results/data', exist_ok=True)


# Función para obtener tickers del S&P 500
def get_sp500_tickers():
    """Obtiene los tickers del S&P 500 y sus sectores GICS"""
    try:
        sp500_table = pd.read_html('https://en.wikipedia.org/wiki/List_of_S%26P_500_companies')[0]
        tickers = sp500_table['Symbol'].str.replace('.', '-').tolist()
        sector_map = {k.replace('.', '-'): v for k, v in 
                     sp500_table.set_index('Symbol')['GICS Sector'].to_dict().items()}
        subsector_map = {k.replace('.', '-'): v for k, v in 
                        sp500_table.set_index('Symbol')['GICS Sub-Industry'].to_dict().items()}
        return tickers, sector_map, subsector_map
    except Exception as e:
        logging.error(f"Error obteniendo tickers del S&P 500: {str(e)}")
        return [], {}, {}

# Función para descargar datos históricos
def get_historical_data(tickers, start_date, end_date, sleep_time=0.1):
    """Descarga datos históricos para los tickers especificados"""
    data = {}
    failed_tickers = []
    
    for ticker in tqdm(tickers, desc="Descargando datos"):
        try:
            ticker_data = yf.download(ticker, start=start_date, end=end_date, progress=False)
            if len(ticker_data) > 60:  # Asegurar datos suficientes
                data[ticker] = ticker_data
            else:
                failed_tickers.append(ticker)
            time.sleep(sleep_time)  # Evitar sobrecargar la API
        except Exception as e:
            logging.error(f"Error descargando datos para {ticker}: {str(e)}")
            failed_tickers.append(ticker)
    
    print(f"Datos obtenidos para {len(data)} tickers. Fallaron {len(failed_tickers)} tickers.")
    return data

# Preprocesamiento de datos
def preprocess_data(data):
    """Preprocesa los datos históricos para la estrategia"""
    tickers = list(data.keys())
    
    # Obtener fechas comunes
    all_dates = set()
    for ticker_data in data.values():
        all_dates.update(ticker_data.index)
    all_dates = sorted(list(all_dates))
    
    # Crear DataFrames alineados
    prices = pd.DataFrame(index=all_dates, columns=tickers)
    volumes = pd.DataFrame(index=all_dates, columns=tickers)
    
    # Usar método más seguro para llenar los DataFrames
    for ticker in tickers:
        try:
            # Obtener datos para el ticker actual
            ticker_data = data[ticker]
            
            # Verificar si existen las columnas necesarias
            if 'Close' in ticker_data.columns and 'Volume' in ticker_data.columns:
                # Crear Series temporales con el índice completo
                close_series = pd.Series(index=all_dates, dtype='float64')
                volume_series = pd.Series(index=all_dates, dtype='float64')
                
                # Llenar solo en los índices donde tenemos datos
                for date in ticker_data.index:
                    if date in all_dates:  # Verificación adicional de seguridad
                        close_series[date] = ticker_data.loc[date, 'Close']
                        volume_series[date] = ticker_data.loc[date, 'Volume']
                
                # Asignar las series a los DataFrames
                prices[ticker] = close_series
                volumes[ticker] = volume_series
        except Exception as e:
            # Ignorar tickers con errores y continuar con los demás
            print(f"Error procesando ticker {ticker}: {e}")
            continue
    
    # Calcular métricas derivadas (asegurando que no haya errores)
    returns = prices.pct_change().replace([np.inf, -np.inf], np.nan)
    realized_vol = returns.rolling(window=22).std() * np.sqrt(252)
    
    # Manejar caso donde volumes tiene todos NaN para algunos tickers
    relative_volume = pd.DataFrame(index=volumes.index, columns=volumes.columns)
    for col in volumes.columns:
        if not volumes[col].isna().all():  # Solo procesar columnas con datos
            rolling_mean = volumes[col].rolling(window=20).mean()
            relative_volume[col] = volumes[col] / rolling_mean
    
    # Manejar outliers en retornos de manera segura
    mad = lambda x: np.abs(x - x.median()).median() if not x.isna().all() else np.nan
    returns_mad = returns.apply(mad)
    outlier_threshold = 3.5
    
    returns_cleaned = returns.copy()
    for col in returns.columns:
        if pd.isna(returns_mad[col]):
            continue
            
        threshold = outlier_threshold * returns_mad[col]
        outliers = np.abs(returns[col]) > threshold
        
        # Solo modificar valores donde hay outliers
        if outliers.any():
            for idx in returns.index[outliers]:
                # Verificar cada valor antes de modificarlo
                if not pd.isna(returns.loc[idx, col]):
                    sign = 1 if returns.loc[idx, col] > 0 else -1
                    returns_cleaned.loc[idx, col] = sign * threshold
    
    # Imputar datos faltantes (gaps < 3 días)
    prices_filled = prices.interpolate(method='linear', limit=3, axis=0)
    
    # Calcular ADV en dólares (de forma segura)
    adv = pd.DataFrame(index=volumes.index, columns=volumes.columns)
    for col in volumes.columns:
        if not volumes[col].isna().all() and not prices[col].isna().all():
            adv[col] = (volumes[col] * prices[col]).rolling(window=20).mean()
    
    # Recalcular retornos con precios imputados
    returns_filled = prices_filled.pct_change().replace([np.inf, -np.inf], np.nan)
    
    processed_data = {
        'prices': prices_filled,
        'returns': returns_filled,
        'volumes': volumes,
        'realized_vol': realized_vol,
        'relative_volume': relative_volume,
        'adv': adv
    }
    
    return processed_data
    
# Sistema de Detección de Regímenes
class RegimeDetector:
    """Implementa el sistema de detección de regímenes usando HMM con correcciones críticas"""
    
    def __init__(self, min_train_samples=1260, persistence_days=3, prob_threshold=0.85):
        # Cambio a min_train_samples en lugar de min_train_years
        self.min_train_samples = min_train_samples  # Aprox. 5 años de datos diarios
        self.persistence_days = persistence_days
        self.prob_threshold = prob_threshold
        self.hmm_models = {
            'hmm1': None,  # 2 estados, vol + correlaciones
            'hmm2': None,  # 3 estados, retornos + vol + volumen
            'hmm3': None   # 2 estados, dispersión + breadth
        }
        self.model_weights = {'hmm1': 1/3, 'hmm2': 1/3, 'hmm3': 1/3}
        self.last_calibration = None
        self.regime_history = None
        self.current_regime = None
        self.sector_map = {}
        self.is_model_valid = {'hmm1': False, 'hmm2': False, 'hmm3': False}
    
    def create_features(self, data, market_index='^GSPC', lookback=None):
        """Crea features para los modelos HMM"""
        try:
            # Usar lookback adaptativo si se proporciona
            if lookback is None:
                lookback = 252  # Por defecto
            
            prices = data['prices'].iloc[-lookback:] if len(data['prices']) > lookback else data['prices']
            returns = data['returns'].iloc[-lookback:] if len(data['returns']) > lookback else data['returns']
            realized_vol = data['realized_vol'].iloc[-lookback:] if len(data['realized_vol']) > lookback else data['realized_vol']
            relative_volume = data['relative_volume'].iloc[-lookback:] if len(data['relative_volume']) > lookback else data['relative_volume']
            
            # Verificación de datos suficientes
            if len(prices) < 60:
                logging.warning(f"Datos insuficientes para crear features: {len(prices)} < 60")
                return {'hmm1': pd.DataFrame(), 'hmm2': pd.DataFrame(), 'hmm3': pd.DataFrame()}
            
            # Obtener datos de mercado si no están en los datos
            if market_index not in prices.columns:
                try:
                    market_data = yf.download(market_index, 
                                          start=prices.index[0].strftime('%Y-%m-%d'),
                                          end=prices.index[-1].strftime('%Y-%m-%d'),
                                          progress=False)
                    
                    # Verificar que se obtuvieron datos
                    if len(market_data) < 30:
                        logging.warning(f"Datos insuficientes para {market_index}: {len(market_data)} < 30")
                        # Usar promedio como proxy
                        market_returns = returns.mean(axis=1)
                        market_vol = market_returns.rolling(window=min(22, len(market_returns))).std() * np.sqrt(252)
                    else:
                        market_returns = market_data['Close'].pct_change().dropna()
                        market_vol = market_returns.rolling(window=min(22, len(market_returns))).std() * np.sqrt(252)
                except Exception as e:
                    logging.error(f"Error obteniendo datos de mercado: {str(e)}")
                    logging.info("Usando datos promedio como proxy para el mercado")
                    # Usar promedio como proxy
                    market_returns = returns.mean(axis=1)
                    market_vol = market_returns.rolling(window=min(22, len(market_returns))).std() * np.sqrt(252)
            else:
                market_returns = returns[market_index]
                market_vol = realized_vol[market_index]
            
            # Features para HMM-1: volatilidad y correlaciones sectoriales
            vix_proxy = market_vol.dropna()
            
            # Calcular correlaciones entre sectores con manejo adaptativo
            sector_returns = {}
            for sector in set(self.sector_map.values()):
                sector_tickers = [t for t in returns.columns if self.sector_map.get(t) == sector]
                if sector_tickers:
                    # Usar solo columnas existentes
                    valid_tickers = [t for t in sector_tickers if t in returns.columns]
                    if valid_tickers:
                        sector_returns[sector] = returns[valid_tickers].mean(axis=1)
            
            sector_corr = pd.DataFrame(index=prices.index)
            if len(sector_returns) > 1:
                sectors = list(sector_returns.keys())
                for i in range(len(sectors)):
                    for j in range(i+1, len(sectors)):
                        s1, s2 = sectors[i], sectors[j]
                        # Verificar que tenemos datos para ambos sectores
                        if s1 in sector_returns and s2 in sector_returns:
                            pair_df = pd.DataFrame({s1: sector_returns[s1], s2: sector_returns[s2]})
                            # Eliminar filas con NaN para evitar problemas en correlación
                            pair_df = pair_df.dropna()
                            
                            # Usar ventana adaptativa
                            roll_win = min(22, len(pair_df) // 2) if len(pair_df) > 10 else 5
                            if len(pair_df) >= roll_win and roll_win > 1:
                                rolling_corr = pair_df.rolling(window=roll_win).corr().iloc[1::2][s1]
                                # Asegurarse de que rolling_corr no esté vacío
                                if not rolling_corr.empty:
                                    sector_corr[f'{s1}_{s2}'] = np.nan
                                    # Comprobar índices coincidentes antes de asignar
                                    valid_indices = rolling_corr.index.intersection(sector_corr.index)
                                    if len(valid_indices) > 0:
                                        for idx in valid_indices:
                                            if not pd.isna(rolling_corr.loc[idx]):
                                                sector_corr.loc[idx, f'{s1}_{s2}'] = rolling_corr.loc[idx]
                
                # Solo calcular mean si hay columnas
                if len(sector_corr.columns) > 0:
                    mean_sector_corr = sector_corr.mean(axis=1)
                else:
                    logging.info("No hay suficientes correlaciones sectoriales, usando valor por defecto")
                    mean_sector_corr = pd.Series(index=prices.index, data=0.5)
            else:
                logging.info("Menos de 2 sectores disponibles, usando valor por defecto para correlación")
                mean_sector_corr = pd.Series(index=prices.index, data=0.5)
            
            # Features para HMM-2: retornos, volatilidad y volumen relativo
            market_rets = market_returns.dropna()
            avg_rel_vol = relative_volume.mean(axis=1)
            
            # Features para HMM-3: dispersión sectorial y breadth de mercado
            if len(sector_returns) > 1:
                # Convertir a DataFrame y asegurar que no hay NaNs
                sector_df = pd.DataFrame(sector_returns).dropna(how='all')
                if not sector_df.empty and sector_df.shape[1] > 1:  # Necesitamos al menos 2 columnas para std
                    sector_dispersion = sector_df.std(axis=1)
                else:
                    logging.info("Datos insuficientes para cálculo de dispersión sectorial")
                    sector_dispersion = pd.Series(index=prices.index, data=0.01)
            else:
                logging.info("Menos de 2 sectores disponibles, usando valor por defecto para dispersión")
                sector_dispersion = pd.Series(index=prices.index, data=0.01)
            
            # Calcular market breadth adaptativo (% de acciones sobre su MA)
            ma_window = min(50, len(prices) // 3) if len(prices) > 60 else 20
            above_ma = pd.DataFrame(index=prices.index, columns=prices.columns)
            ma = prices.rolling(window=ma_window).mean()
            for col in prices.columns:
                above_ma[col] = prices[col] > ma[col]
            
            market_breadth = above_ma.mean(axis=1)
            
            # Alinear y eliminar NaNs con cuidado
            features_hmm1 = pd.DataFrame()
            if not vix_proxy.empty and not mean_sector_corr.empty:
                # Encontrar índices comunes
                common_indices = vix_proxy.index.intersection(mean_sector_corr.index)
                if len(common_indices) > 0:
                    features_hmm1 = pd.DataFrame({
                        'volatility': vix_proxy.loc[common_indices].values,
                        'sector_correlation': mean_sector_corr.loc[common_indices].values
                    }, index=common_indices).dropna()
                    logging.info(f"Features HMM1 creadas: {len(features_hmm1)} observaciones")
                else:
                    logging.warning("No hay índices comunes para features HMM1")
            else:
                logging.warning("Datos insuficientes para features HMM1")
            
            features_hmm2 = pd.DataFrame()
            # Verificar que tenemos todos los datos necesarios
            if not market_rets.empty and not market_vol.empty and not avg_rel_vol.empty:
                # Alinear índices
                common_indices = market_rets.index.intersection(market_vol.index).intersection(avg_rel_vol.index)
                if len(common_indices) > 0:
                    features_hmm2 = pd.DataFrame({
                        'returns': market_rets.loc[common_indices].values,
                        'volatility': market_vol.loc[common_indices].values,
                        'relative_volume': avg_rel_vol.loc[common_indices].values
                    }, index=common_indices).dropna()
                    logging.info(f"Features HMM2 creadas: {len(features_hmm2)} observaciones")
                else:
                    logging.warning("No hay índices comunes para features HMM2")
            else:
                logging.warning("Datos insuficientes para features HMM2")
            
            features_hmm3 = pd.DataFrame()
            if not sector_dispersion.empty and not market_breadth.empty:
                common_indices = sector_dispersion.index.intersection(market_breadth.index)
                if len(common_indices) > 0:
                    features_hmm3 = pd.DataFrame({
                        'sector_dispersion': sector_dispersion.loc[common_indices].values,
                        'market_breadth': market_breadth.loc[common_indices].values
                    }, index=common_indices).dropna()
                    logging.info(f"Features HMM3 creadas: {len(features_hmm3)} observaciones")
                else:
                    logging.warning("No hay índices comunes para features HMM3")
            else:
                logging.warning("Datos insuficientes para features HMM3")
            
            result = {
                'hmm1': features_hmm1,
                'hmm2': features_hmm2,
                'hmm3': features_hmm3
            }
            
            # Verificar si tenemos suficientes datos para al menos un modelo
            has_sufficient_data = any(not df.empty and len(df) >= 30 for df in result.values())
            if not has_sufficient_data:
                logging.warning("No hay suficientes datos para ningún modelo HMM")
            
            return result
        except Exception as e:
            logging.error(f"Error en create_features: {str(e)}")
            logging.error(f"Stack trace: {traceback.format_exc()}")
            return {'hmm1': pd.DataFrame(), 'hmm2': pd.DataFrame(), 'hmm3': pd.DataFrame(), 'error': str(e)}
    
    def train_models(self, features, end_date=None):
        """Entrena los modelos HMM con las features proporcionadas"""
        try:
            # Reset estado de modelos
            self.is_model_valid = {'hmm1': False, 'hmm2': False, 'hmm3': False}
            
            self.hmm_models = {
                'hmm1': hmm.GaussianHMM(n_components=2, covariance_type="full", 
                                      n_iter=1000, random_state=42),
                'hmm2': hmm.GaussianHMM(n_components=3, covariance_type="full", 
                                      n_iter=1000, random_state=42),
                'hmm3': hmm.GaussianHMM(n_components=2, covariance_type="full", 
                                      n_iter=1000, random_state=42)
            }
            
            # Limitar datos a la fecha de fin si se proporciona
            if end_date:
                for key in features:
                    if isinstance(features[key], pd.DataFrame) and not features[key].empty:
                        features[key] = features[key][features[key].index <= end_date]
            
            # Entrenar modelos
            for key, model in self.hmm_models.items():
                if key in features and isinstance(features[key], pd.DataFrame) and not features[key].empty and features[key].shape[0] > 30:
                    logging.info(f"Entrenando modelo {key} con {features[key].shape[0]} observaciones")
                    
                    # Asegurar que los datos están como array 2D
                    X = features[key].values
                    # StandardScaler espera datos 2D
                    scaler = StandardScaler()
                    X_scaled = scaler.fit_transform(X)
                    
                    try:
                        self.hmm_models[key].fit(X_scaled)
                        self.is_model_valid[key] = True
                        logging.info(f"Modelo {key} entrenado exitosamente")
                    except Exception as e:
                        logging.error(f"Error entrenando modelo {key}: {str(e)}")
                        self.hmm_models[key] = None
                        self.is_model_valid[key] = False
                else:
                    logging.warning(f"Datos insuficientes para entrenar modelo {key}")
                    self.hmm_models[key] = None
                    self.is_model_valid[key] = False
            
            self.last_calibration = datetime.now()
            
            # Verificar si al menos un modelo se entrenó correctamente
            if not any(self.is_model_valid.values()):
                logging.error("Ningún modelo pudo ser entrenado correctamente")
            else:
                valid_models = [k for k, v in self.is_model_valid.items() if v]
                logging.info(f"Modelos válidos después del entrenamiento: {valid_models}")
                
        except Exception as e:
            logging.error(f"Error general en train_models: {str(e)}")
            logging.error(f"Stack trace: {traceback.format_exc()}")
    
    def predict_regimes(self, features, apply_persistence=True):
        """Predice regímenes con los modelos HMM"""
        try:
            predictions = {}
            probs_log = {}
            
            # Predecir con cada modelo
            for key, model in self.hmm_models.items():
                if (self.is_model_valid[key] and model is not None and 
                    key in features and isinstance(features[key], pd.DataFrame) and 
                    not features[key].empty and features[key].shape[0] > 1):
                    
                    # Asegurar que los datos están como array 2D
                    X = features[key].values
                    # StandardScaler espera datos 2D
                    scaler = StandardScaler()
                    X_scaled = scaler.fit_transform(X)
                    
                    try:
                        hidden_states = model.predict(X_scaled)
                        probs = model.predict_proba(X_scaled)
                        
                        pred_df = pd.DataFrame(index=features[key].index)
                        pred_df['state'] = hidden_states
                        
                        for i in range(model.n_components):
                            pred_df[f'prob_state_{i}'] = probs[:, i]
                        
                        predictions[key] = pred_df
                        
                        # Guardar probabilidades para logs
                        if len(probs) > 0:
                            last_probs = {f'state_{i}': probs[-1, i] for i in range(model.n_components)}
                            probs_log[key] = last_probs
                            
                        logging.info(f"Predicción exitosa con modelo {key}")
                    except Exception as e:
                        logging.error(f"Error prediciendo con modelo {key}: {str(e)}")
                        logging.error(f"Stack trace: {traceback.format_exc()}")
            
            if not predictions:
                logging.warning("No se pudieron hacer predicciones con ningún modelo")
                # Registrar información sobre los modelos y features
                for key in self.hmm_models:
                    logging.info(f"Modelo {key} válido: {self.is_model_valid[key]}")
                    if key in features:
                        if isinstance(features[key], pd.DataFrame):
                            logging.info(f"Features {key} disponibles: {not features[key].empty}")
                        else:
                            logging.info(f"Features {key} no es DataFrame: {type(features[key])}")
                
                return pd.Series(2, index=[datetime.now()], name='regime')  # Régimen neutral por defecto
            
            # Alinear índices
            all_indices = set()
            for pred_df in predictions.values():
                all_indices.update(pred_df.index)
            all_indices = sorted(list(all_indices))
            
            for key in predictions:
                predictions[key] = predictions[key].reindex(all_indices)
            
            # Combinar predicciones
            combined_df = pd.DataFrame(index=all_indices)
            
            if 'hmm1' in predictions:
                combined_df['hmm1_regime'] = predictions['hmm1']['state']
            if 'hmm2' in predictions:
                combined_df['hmm2_regime'] = predictions['hmm2']['state']
            if 'hmm3' in predictions:
                combined_df['hmm3_regime'] = predictions['hmm3']['state']
            
            # Mapear a régimen final (3 regímenes)
            # Definir una función más segura que devuelva valores escalares
            def map_regime(row):
                try:
                    hmm1 = row.get('hmm1_regime', 0)
                    if pd.isna(hmm1):
                        hmm1 = 0
                    elif isinstance(hmm1, (list, np.ndarray)):
                        hmm1 = hmm1[0] if len(hmm1) > 0 else 0
                    hmm1 = int(hmm1)
                    
                    hmm2 = row.get('hmm2_regime', 1)
                    if pd.isna(hmm2):
                        hmm2 = 1
                    elif isinstance(hmm2, (list, np.ndarray)):
                        hmm2 = hmm2[0] if len(hmm2) > 0 else 1
                    hmm2 = int(hmm2)
                    
                    hmm3 = row.get('hmm3_regime', 0)
                    if pd.isna(hmm3):
                        hmm3 = 0
                    elif isinstance(hmm3, (list, np.ndarray)):
                        hmm3 = hmm3[0] if len(hmm3) > 0 else 0
                    hmm3 = int(hmm3)
                    
                    # Régimen 3 (Crisis): Alta volatilidad
                    if hmm1 == 1 and hmm2 == 2:
                        return 3
                    # Régimen 1 (Favorable): Baja volatilidad y baja dispersión
                    elif hmm1 == 0 and hmm3 == 1:
                        return 1
                    # Régimen 2 (Transición): Otros casos
                    else:
                        return 2
                except Exception as e:
                    logging.error(f"Error en map_regime: {str(e)}")
                    return 2  # Régimen neutral por defecto en caso de error
            
            # Aplicar la función de mapeo fila por fila para evitar problemas
            final_regimes = []
            for idx in combined_df.index:
                row_dict = {}
                for col in combined_df.columns:
                    row_dict[col] = combined_df.loc[idx, col]
                final_regimes.append(map_regime(row_dict))
            
            combined_df['final_regime'] = final_regimes
            
            # Aplicar filtro de persistencia
            if apply_persistence and len(combined_df) > self.persistence_days:
                filtered_regimes = combined_df['final_regime'].copy()
                
                for i in range(self.persistence_days, len(filtered_regimes)):
                    window = combined_df['final_regime'].iloc[i-self.persistence_days:i]
                    counts = window.value_counts()
                    if len(counts) > 0:  # Asegurar que hay valores
                        most_common = counts.idxmax()
                        if (window == most_common).mean() >= self.prob_threshold:
                            filtered_regimes.iloc[i] = most_common
                        else:
                            filtered_regimes.iloc[i] = filtered_regimes.iloc[i-1]
                
                combined_df['filtered_regime'] = filtered_regimes
                regime_series = combined_df['filtered_regime']
            else:
                regime_series = combined_df['final_regime']
            
            # Retrasar 2 días para evitar look-ahead bias
            if len(regime_series) > 2:
                regime_series = regime_series.shift(2).fillna(method='bfill')
            
            self.regime_history = regime_series
            if not regime_series.empty:
                self.current_regime = int(regime_series.iloc[-1])
                # Log detallado del régimen actual y sus causas
                logging.info(f"Régimen actual: {self.current_regime}")
                logging.info(f"Probabilidades de los modelos: {probs_log}")
                
                # Explicar factores que llevaron a este régimen
                if self.current_regime == 1:
                    logging.info("Régimen 1 (Favorable): Baja volatilidad, alta predictibilidad")
                elif self.current_regime == 2:
                    logging.info("Régimen 2 (Transición): Volatilidad moderada, señales mixtas")
                else:
                    logging.info("Régimen 3 (Crisis): Alta volatilidad, baja predictibilidad")
            else:
                self.current_regime = 2  # Régimen neutral por defecto
                logging.warning("No se pudo determinar régimen, usando valor por defecto (2)")
            
            return regime_series
        except Exception as e:
            logging.error(f"Error general en predict_regimes: {str(e)}")
            logging.error(f"Stack trace: {traceback.format_exc()}")
            return pd.Series(2, index=[datetime.now()], name='regime')  # Régimen neutral por defecto
    
    def fit_predict(self, data, sector_map, current_date=None, force_calibration=False):
        """Entrena y predice el régimen actual"""
        try:
            self.sector_map = sector_map
            
            if current_date is None:
                current_date = data['prices'].index[-1]
            
            # Verificar datos suficientes
            if len(data['prices']) < self.min_train_samples:
                logging.warning(f"Datos insuficientes: {len(data['prices'])} < {self.min_train_samples}")
                return 2  # Régimen neutral por defecto con datos insuficientes
            
            # Crear features con ventanas adaptativas
            lookback = min(252 * 5, len(data['prices']))
            features = self.create_features(data, lookback=lookback)
            
            # Verificar si hay features para entrenar
            if not features:
                logging.warning("No hay features para entrenar.")
                return 2  # Régimen neutral por defecto
                
            # Corregir la verificación de features vacías
            has_valid_features = False
            for key, value in features.items():
                if key != 'error' and isinstance(value, pd.DataFrame) and not value.empty:
                    has_valid_features = True
                    break
                    
            if not has_valid_features:
                logging.warning("No hay suficientes datos para crear features.")
                return 2  # Régimen neutral por defecto
            
            # Entrenar modelos si es necesario
            need_training = (force_calibration or 
                             all(model is None for model in self.hmm_models.values()) or 
                             not any(self.is_model_valid.values()) or
                             (self.last_calibration and 
                              (current_date.date() - self.last_calibration.date()).days > 7))
            
            if need_training:
                logging.info("Iniciando entrenamiento de modelos HMM")
                self.train_models(features)
            
            # Predecir regímenes
            regimes = self.predict_regimes(features)
            
            # Devolver régimen actual
            if not regimes.empty:
                current_regime = regimes.iloc[-1]
                if pd.isna(current_regime):  # Si es NaN, usar valor neutral
                    logging.warning("Régimen actual es NaN, usando valor neutral (2)")
                    current_regime = 2
                
                logging.info(f"Régimen detectado: {int(current_regime)}")
                return int(current_regime)
            else:
                logging.warning("No se pudo determinar régimen, usando valor neutral (2)")
                return 2  # Régimen neutral por defecto
        except Exception as e:
            logging.error(f"Error en fit_predict: {str(e)}")
            logging.error(f"Stack trace: {traceback.format_exc()}")
            return 2  # Régimen neutral por defecto en caso de error
            
# Componente de Selección de Pares
class PairSelector:
    """Sistema mejorado de identificación y selección de pares para arbitraje"""
    
    def __init__(self, min_liquidity=10e6, 
                 max_pairs_by_regime={1: 25, 2: 20, 3: 15},
                 recalibration_days=5,
                 similarity_threshold=0.7):
        self.min_liquidity = min_liquidity
        self.max_pairs_by_regime = max_pairs_by_regime
        self.recalibration_days = recalibration_days
        self.similarity_threshold = similarity_threshold
        self.candidate_pairs = None
        self.selected_pairs = None
        self.last_calibration_date = None
        self.coint_history = {}  # Historial de cointegración para seguimiento
        self.market_volatility = None  # Para tracking de volatilidad del mercado
    
    def prefilter_by_liquidity(self, data, min_adv=None):
        """Filtro de tickers por liquidez mínima con verificación robusta"""
        if min_adv is None:
            min_adv = self.min_liquidity
        
        try:    
            # Verificar que tenemos datos adecuados
            if 'adv' not in data or data['adv'].empty:
                logging.warning("Datos ADV no disponibles para filtro de liquidez")
                # Alternativa usando volumen y precio si están disponibles
                if 'volumes' in data and 'prices' in data and not data['volumes'].empty and not data['prices'].empty:
                    logging.info("Usando volumen y precio como proxy para ADV")
                    # Calcular ADV como volumen * precio
                    vol = data['volumes'].iloc[-20:].mean()
                    price = data['prices'].iloc[-1]
                    adv = vol * price
                else:
                    logging.warning("No se pueden calcular métricas de liquidez, retornando todos los tickers")
                    return list(data['prices'].columns)
            else:
                # Usar ADV promedio de las últimas 20 sesiones si hay suficientes datos
                window = min(20, max(5, len(data['adv'])))
                adv = data['adv'].iloc[-window:].mean()
            
            # Aplicar filtro con verificación
            liquid_tickers = [ticker for ticker in adv.index if adv.get(ticker, 0) > min_adv]
            
            logging.info(f"Filtro de liquidez: {len(liquid_tickers)}/{len(adv)} tickers pasan el umbral de ${min_adv/1e6:.1f}M")
            
            return liquid_tickers
        except Exception as e:
            logging.error(f"Error en filtro de liquidez: {str(e)}")
            logging.error(traceback.format_exc())
            # Retornar una lista segura en caso de error
            return list(data['prices'].columns if 'prices' in data and not data['prices'].empty else [])
    
    def filter_by_events(self, tickers, data, days_ahead=7):
        """Filtro de tickers con eventos próximos (mejorado para evitar aleatorios)"""
        # En implementación real, usaríamos datos de calendarios de eventos
        try:
            # En lugar de valores aleatorios, usar criterios basados en datos
            # Por ejemplo, filtrar tickers con volatilidad anormalmente alta
            if 'realized_vol' in data and not data['realized_vol'].empty:
                vol = data['realized_vol'].iloc[-5:].mean()  # Últimos 5 días
                vol_percentile = vol.rank(pct=True)  # Percentil de volatilidad
                
                # Filtrar tickers en el 10% superior de volatilidad como proxy de eventos
                high_vol_tickers = [t for t in vol_percentile.index if t in tickers and vol_percentile.get(t, 0) > 0.9]
                
                logging.info(f"Filtro de eventos: {len(high_vol_tickers)} tickers filtrados por volatilidad alta")
                
                return [t for t in tickers if t not in high_vol_tickers]
            else:
                logging.warning("Datos de volatilidad no disponibles para filtro de eventos")
                return tickers
        except Exception as e:
            logging.error(f"Error en filtro de eventos: {str(e)}")
            logging.error(traceback.format_exc())
            return tickers  # Retornar la lista original en caso de error
    
    def calculate_similarity_score(self, data, tickers, lookback=252):
        """Calcula scores de similitud entre tickers usando métricas reales en lugar de aleatorios"""
        try:
            # Adaptar lookback a los datos disponibles
            lookback = min(lookback, len(data['prices']) - 1)
            if lookback < 60:
                logging.warning(f"Datos insuficientes para similitud: {lookback} < 60 días")
                lookback = max(lookback, 20)  # Mínimo 20 días para análisis
            
            prices = data['prices'].iloc[-lookback:][tickers]
            returns = data['returns'].iloc[-lookback:][tickers]
            vols = data['realized_vol'].iloc[-lookback:][tickers]
            
            similarity = pd.DataFrame(0, index=tickers, columns=tickers)
            
            # Betas a mercado (en lugar de correlación entre pares)
            market_returns = returns.mean(axis=1)
            betas = {}
            
            for ticker in tickers:
                X = market_returns.values.reshape(-1, 1)
                y = returns[ticker].values
                mask = ~np.isnan(X.flatten()) & ~np.isnan(y)
                if mask.sum() > 30:
                    X, y = X[mask], y[mask]
                    beta = np.cov(y, X.flatten())[0, 1] / np.var(X.flatten())
                    betas[ticker] = beta
                else:
                    betas[ticker] = 1.0
            
            # Proxy de capitalización: precio * volumen
            mcap_proxy = pd.DataFrame(index=tickers)
            
            if 'volumes' in data and not data['volumes'].empty:
                vol_avg = data['volumes'].iloc[-20:][tickers].mean()
                price_last = prices.iloc[-1]
                mcap_proxy = price_last * vol_avg
                # Normalizar
                if not mcap_proxy.empty and mcap_proxy.max() > mcap_proxy.min():
                    mcap_proxy = (mcap_proxy - mcap_proxy.min()) / (mcap_proxy.max() - mcap_proxy.min())
                else:
                    mcap_proxy = pd.Series(0.5, index=tickers)
            else:
                # Capitalización (proxy: precio)
                mcap = prices.iloc[-1]
                if mcap.max() > mcap.min():
                    mcap_proxy = (mcap - mcap.min()) / (mcap.max() - mcap.min())
                else:
                    mcap_proxy = pd.Series(0.5, index=tickers)
            
            # Volatilidad realizada
            vol_window = min(60, lookback//2)
            vol = vols.iloc[-vol_window:].mean()
            if vol.max() > vol.min():
                vol_norm = (vol - vol.min()) / (vol.max() - vol.min())
            else:
                vol_norm = pd.Series(0.5, index=tickers)
            
            # Ratios financieros reales en lugar de aleatorios (usar datos disponibles)
            # En este caso, usamos el ratio precio/volumen como proxy de valoración
            valuation_proxy = pd.Series(index=tickers)
            try:
                if 'volumes' in data and not data['volumes'].empty:
                    price_avg = prices.iloc[-20:].mean()
                    vol_avg = data['volumes'].iloc[-20:][tickers].mean()
                    vol_avg = vol_avg.replace(0, np.nan)  # Evitar división por cero
                    
                    # Price/Volume ratio como proxy
                    pv_ratio = price_avg / vol_avg
                    pv_ratio = pv_ratio.fillna(pv_ratio.median())  # Manejar NaN
                    
                    # Normalizar
                    if pv_ratio.max() > pv_ratio.min():
                        valuation_proxy = (pv_ratio - pv_ratio.min()) / (pv_ratio.max() - pv_ratio.min())
                    else:
                        valuation_proxy = pd.Series(0.5, index=tickers)
                else:
                    valuation_proxy = pd.Series(0.5, index=tickers)
            except Exception as e:
                logging.error(f"Error calculando proxies de valoración: {str(e)}")
                valuation_proxy = pd.Series(0.5, index=tickers)
            
            # Registrar volatilidad de mercado para posible recalibración
            self.market_volatility = vols.mean().mean()
            
            # Calcular similitud
            for i, ticker1 in enumerate(tickers):
                for j, ticker2 in enumerate(tickers):
                    if i < j:
                        # Beta similarity (30%)
                        beta_sim = 1 - min(abs(betas.get(ticker1, 1) - betas.get(ticker2, 1)) / 2, 1)
                        
                        # Market cap similarity (25%)
                        mcap_sim = 1 - abs(mcap_proxy.get(ticker1, 0.5) - mcap_proxy.get(ticker2, 0.5))
                        
                        # Volatility similarity (25%)
                        vol_sim = 1 - abs(vol_norm.get(ticker1, 0.5) - vol_norm.get(ticker2, 0.5))
                        
                        # Valuation similarity (20%)
                        val_sim = 1 - abs(valuation_proxy.get(ticker1, 0.5) - valuation_proxy.get(ticker2, 0.5))
                        
                        # Weighted score
                        sim_score = 0.3*beta_sim + 0.25*mcap_sim + 0.25*vol_sim + 0.2*val_sim
                        similarity.loc[ticker1, ticker2] = sim_score
                        similarity.loc[ticker2, ticker1] = sim_score
            
            return similarity
        except Exception as e:
            logging.error(f"Error en calculate_similarity_score: {str(e)}")
            logging.error(traceback.format_exc())
            # Retornar matriz de similitud vacía en caso de error
            return pd.DataFrame(0.5, index=tickers, columns=tickers)
    
    def generate_candidate_pairs(self, data, sector_map, subsector_map, similarity_threshold=None):
        """Genera pares candidatos basados en sector/subsector y similitud con verificaciones robustas"""
        try:
            if similarity_threshold is None:
                similarity_threshold = self.similarity_threshold
                
            # Filtrar por liquidez y eventos
            liquid_tickers = self.prefilter_by_liquidity(data)
            filtered_tickers = self.filter_by_events(liquid_tickers, data)
            
            # Verificar tickers suficientes
            if len(filtered_tickers) < 2:
                logging.warning(f"Tickers insuficientes después de filtro: {len(filtered_tickers)}")
                return []
            
            # Calcular scores de similitud
            similarity = self.calculate_similarity_score(data, filtered_tickers)
            
            # Generar pares candidatos
            candidate_pairs = []
            
            # Agrupar por sector y subsector
            sector_groups = {}
            subsector_groups = {}
            
            for ticker in filtered_tickers:
                sector = sector_map.get(ticker)
                subsector = subsector_map.get(ticker)
                
                if sector:
                    if sector not in sector_groups:
                        sector_groups[sector] = []
                    sector_groups[sector].append(ticker)
                
                if subsector:
                    if subsector not in subsector_groups:
                        subsector_groups[subsector] = []
                    subsector_groups[subsector].append(ticker)
            
            # Buscar pares en mismo subsector
            subsector_pairs = []
            for subsector, tickers in subsector_groups.items():
                if len(tickers) < 2:
                    continue
                    
                for i, ticker1 in enumerate(tickers):
                    for ticker2 in tickers[i+1:]:
                        if ticker1 in similarity.index and ticker2 in similarity.columns:
                            if similarity.loc[ticker1, ticker2] >= similarity_threshold:
                                subsector_pairs.append((ticker1, ticker2))
            
            candidate_pairs.extend(subsector_pairs)
            logging.info(f"Pares candidatos del mismo subsector: {len(subsector_pairs)}")
            
            # Buscar en mismo sector si necesitamos más pares
            if len(candidate_pairs) < 50:
                sector_pairs = []
                for sector, tickers in sector_groups.items():
                    if len(tickers) < 2:
                        continue
                        
                    for i, ticker1 in enumerate(tickers):
                        for ticker2 in tickers[i+1:]:
                            if (ticker1, ticker2) not in candidate_pairs and (ticker2, ticker1) not in candidate_pairs:
                                if ticker1 in similarity.index and ticker2 in similarity.columns:
                                    if similarity.loc[ticker1, ticker2] >= similarity_threshold * 0.9:
                                        sector_pairs.append((ticker1, ticker2))
                
                candidate_pairs.extend(sector_pairs)
                logging.info(f"Pares candidatos adicionales del mismo sector: {len(sector_pairs)}")
            
            self.candidate_pairs = candidate_pairs
            logging.info(f"Total de pares candidatos: {len(candidate_pairs)}")
            
            # Actualizar fecha de calibración
            self.last_calibration_date = data['prices'].index[-1] if not data['prices'].empty else datetime.now()
            
            return candidate_pairs
        except Exception as e:
            logging.error(f"Error en generate_candidate_pairs: {str(e)}")
            logging.error(traceback.format_exc())
            self.candidate_pairs = []
            return []
    
    def test_cointegration(self, data, pair, window, significance=0.05):
        """Prueba de cointegración de Johansen para un par con manejo robusto de errores"""
        ticker1, ticker2 = pair
        
        try:
            # Importar la función coint correctamente
            from statsmodels.tsa.stattools import coint as statsmodels_coint
            
            # Verificar datos suficientes
            if window > len(data['prices']):
                logging.warning(f"Ventana solicitada {window} mayor que datos disponibles {len(data['prices'])}")
                window = min(window, len(data['prices']))
            
            prices = data['prices'].iloc[-window:][list(pair)]
            
            # Verificar datos suficientes y sin valores faltantes
            if len(prices) < 60 or prices[ticker1].isna().any() or prices[ticker2].isna().any():
                logging.info(f"Datos insuficientes para prueba de cointegración de {ticker1}-{ticker2}: {len(prices)} puntos")
                return {'coint': False, 'pvalue': 1.0, 'half_life': np.inf, 'hedge_ratio': 1.0}
            
            # Test de Johansen
            result = coint_johansen(prices, det_order=0, k_ar_diff=1)
            trace_stat = result.lr1[0]  # Estadístico de traza para r=0
            crit_value = result.cvt[0, 1]  # Valor crítico al 5%
            
            is_cointegrated = trace_stat > crit_value
            
            # También probar con test Engle-Granger para validación
            eg_result = statsmodels_coint(prices[ticker1], prices[ticker2], maxlag=10, autolag='AIC')
            eg_coint = eg_result[1] < significance
            
            # Considerar cointegrado solo si ambos tests lo confirman
            is_cointegrated = is_cointegrated and eg_coint
            
            if is_cointegrated:
                # Vector de cointegración normalizado
                coef = result.evec[:, 0]
                hedge_ratio = -coef[1] / coef[0]
                
                # Verificar hedge ratio razonable
                if abs(hedge_ratio) > 10:
                    logging.warning(f"Hedge ratio extremo: {hedge_ratio} para {ticker1}-{ticker2}")
                    hedge_ratio = np.sign(hedge_ratio) * min(abs(hedge_ratio), 10)
                
                # Calcular spread
                spread = prices[ticker1] + hedge_ratio * prices[ticker2]
                
                # Estimar half-life
                lagged_spread = spread.shift(1).dropna()
                delta_spread = spread.diff().dropna()
                
                if len(lagged_spread) > 30:
                    # Regresión para modelo AR(1)
                    X = lagged_spread.values.reshape(-1, 1)
                    y = delta_spread.values
                    X = np.hstack([np.ones_like(X), X])
                    
                    try:
                        beta = np.linalg.lstsq(X, y, rcond=None)[0]
                        
                        # Verificar coeficiente válido para half-life
                        if beta[1] < 0:
                            half_life = -np.log(2) / beta[1]
                            
                            # Limitar rango de half-life razonable
                            if half_life <= 0 or half_life > 252:
                                logging.info(f"Half-life fuera de rango: {half_life}")
                                is_cointegrated = False
                                half_life = np.inf
                        else:
                            logging.info(f"Coeficiente no negativo: {beta[1]}")
                            is_cointegrated = False
                            half_life = np.inf
                    except Exception as e:
                        logging.error(f"Error calculando half-life: {str(e)}")
                        half_life = np.inf
                        is_cointegrated = False
                else:
                    half_life = np.inf
                    is_cointegrated = False
            else:
                hedge_ratio = 1.0
                half_life = np.inf
            
            # P-valor aproximado
            p_value = 0.01 if trace_stat > result.cvt[0, 0] else 0.05 if trace_stat > result.cvt[0, 1] else 0.1 if trace_stat > result.cvt[0, 2] else 1.0
            
            # Guardar en historial para seguimiento
            pair_id = f"{ticker1}_{ticker2}"
            if pair_id not in self.coint_history:
                self.coint_history[pair_id] = []
            
            self.coint_history[pair_id].append({
                'date': data['prices'].index[-1],
                'coint': is_cointegrated,
                'pvalue': p_value,
                'half_life': half_life,
                'hedge_ratio': hedge_ratio
            })
            
            # Mantener solo los últimos 10 tests para cada par
            if len(self.coint_history[pair_id]) > 10:
                self.coint_history[pair_id] = self.coint_history[pair_id][-10:]
            
            return {
                'coint': is_cointegrated,
                'pvalue': p_value,
                'half_life': half_life,
                'hedge_ratio': hedge_ratio
            }
        
        except Exception as e:
            logging.error(f"Error en test de cointegración para {pair}: {str(e)}")
            logging.error(traceback.format_exc())
            return {'coint': False, 'pvalue': 1.0, 'half_life': np.inf, 'hedge_ratio': 1.0}
    
    def test_structural_stability(self, data, pair, hedge_ratio, lookback=252):
        """Evalúa la estabilidad estructural de la relación con diagnóstico mejorado"""
        ticker1, ticker2 = pair
        
        try:
            # Verificar datos suficientes
            lookback = min(lookback, len(data['prices']))
            prices = data['prices'].iloc[-lookback:][list(pair)]
            
            if len(prices) < 60 or prices[ticker1].isna().any() or prices[ticker2].isna().any():
                logging.info(f"Datos insuficientes para estabilidad de {ticker1}-{ticker2}: {len(prices)} puntos")
                return 0.5  # Estabilidad media por defecto
            
            # Calcular spread histórico
            spread = prices[ticker1] + hedge_ratio * prices[ticker2]
            
            # Evaluar estabilidad con ventanas móviles adaptativas
            stability_scores = []
            
            # Adaptar número de sub-periodos según datos disponibles
            n_periods = 4 if len(spread) >= 240 else 3 if len(spread) >= 120 else 2
            subperiod_length = len(spread) // n_periods
            
            if subperiod_length < 30:
                logging.warning(f"Subperiodos muy cortos: {subperiod_length} < 30 días")
                n_periods = max(1, len(spread) // 30)
                subperiod_length = len(spread) // max(n_periods, 1)
            
            logging.info(f"Evaluando estabilidad en {n_periods} subperiodos de {subperiod_length} días")
            
            for i in range(n_periods):
                start_idx = i * subperiod_length
                end_idx = (i+1) * subperiod_length if i < n_periods-1 else len(spread)
                subspread = spread.iloc[start_idx:end_idx]
                    
                if len(subspread) < 30:
                    logging.info(f"Subperiodo {i+1} insuficiente: {len(subspread)} < 30")
                    continue
                    
                # Test ADF con manejo de errores
                try:
                    adf_result = adfuller(subspread, maxlag=min(10, len(subspread)//5), autolag='AIC')
                    is_stationary = adf_result[1] < 0.05
                    stability_scores.append(1 if is_stationary else 0)
                    logging.info(f"Subperiodo {i+1}: ADF p-valor={adf_result[1]:.4f}, estacionario={is_stationary}")
                except Exception as e:
                    logging.error(f"Error en ADF para subperiodo {i+1}: {str(e)}")
                    stability_scores.append(0)
            
            # Verificar consistencia de estacionariedad
            stationarity_consistency = sum(stability_scores) / len(stability_scores) if stability_scores else 0
            logging.info(f"Consistencia de estacionariedad: {stationarity_consistency:.2f}")
            
            # Calcular volatilidad del spread en cada sub-periodo
            subperiod_vols = []
            
            for i in range(n_periods):
                start_idx = i * subperiod_length
                end_idx = (i+1) * subperiod_length if i < n_periods-1 else len(spread)
                subspread = spread.iloc[start_idx:end_idx]
                    
                if len(subspread) < 10:
                    continue
                    
                subperiod_vols.append(subspread.std())
            
            # Consistencia de volatilidad
            vol_consistency = 1.0
            if len(subperiod_vols) > 1:
                vol_ratio = max(subperiod_vols) / min(subperiod_vols)
                vol_consistency = 1.0 / min(vol_ratio, 5.0)
                logging.info(f"Consistencia de volatilidad: {vol_consistency:.2f} (ratio={vol_ratio:.2f})")
            
            # Puntuación final (0-10)
            stability_score = (0.7 * stationarity_consistency + 0.3 * vol_consistency) * 10
            
            return stability_score
        except Exception as e:
            logging.error(f"Error en test_structural_stability para {pair}: {str(e)}")
            logging.error(traceback.format_exc())
            return 5.0  # Valor medio en caso de error
    
    def check_recalibration_needed(self, current_date, regime, prev_regime, market_conditions=None):
        """Determina si se requiere recalibración basado en múltiples factores"""
        try:
            # Verificar si es la primera calibración
            if self.last_calibration_date is None:
                logging.info("Recalibración necesaria: primera calibración")
                return True
                
            # 1. Calibración por tiempo transcurrido
            days_since_calib = (current_date - self.last_calibration_date).days
            if days_since_calib >= self.recalibration_days:
                logging.info(f"Recalibración por tiempo: {days_since_calib} días desde última calibración")
                return True
                
            # 2. Calibración por cambio de régimen
            if prev_regime != regime:
                logging.info(f"Recalibración por cambio de régimen: {prev_regime} → {regime}")
                return True
                
            # 3. Calibración por aumento de volatilidad
            if market_conditions and 'volatility_increase' in market_conditions:
                vol_increase = market_conditions['volatility_increase']
                if vol_increase > 0.3:  # 30% de aumento en volatilidad
                    logging.info(f"Recalibración por aumento de volatilidad: +{vol_increase*100:.1f}%")
                    return True
                    
            # 4. Calibración por día de la semana (menos prioritario)
            is_monday = current_date.weekday() == 0
            if is_monday and days_since_calib >= 3:  # Solo recalibrar lunes si han pasado al menos 3 días
                logging.info("Recalibración programada (lunes)")
                return True
                
            logging.info("No se requiere recalibración")
            return False
        except Exception as e:
            logging.error(f"Error en check_recalibration_needed: {str(e)}")
            return True  # En caso de error, mejor recalibrar
    
    def select_pairs(self, data, regime, candidate_pairs=None, market_conditions=None):
        """Selecciona los mejores pares para operar según el régimen actual con filtros mejorados"""
        try:
            if candidate_pairs is None:
                candidate_pairs = self.candidate_pairs
                
            if not candidate_pairs:
                logging.warning("No hay pares candidatos para selección")
                return []
            
            # Verificar pares ya seleccionados para evaluar su continua validez
            existing_pair_ids = set()
            if self.selected_pairs:
                existing_pair_ids = {f"{p['ticker1']}_{p['ticker2']}" for p in self.selected_pairs}
            
            # Parámetros según régimen
            if regime == 1:  # Baja volatilidad / Alta predictibilidad
                window = 252
                half_life_range = (10, 25)
                max_pvalue = 0.01
                max_pairs = self.max_pairs_by_regime.get(1, 25)
                min_stability = 6.0
            elif regime == 2:  # Transición / Volatilidad moderada
                window = 180
                half_life_range = (7, 20)
                max_pvalue = 0.03
                max_pairs = self.max_pairs_by_regime.get(2, 20)
                min_stability = 5.5
            else:  # Crisis / Alta volatilidad
                window = 126
                half_life_range = (5, 15)
                max_pvalue = 0.05
                max_pairs = self.max_pairs_by_regime.get(3, 15)
                min_stability = 5.0
            
            # Ajustar ventana a datos disponibles
            window = min(window, len(data['prices']))
            if window < 60:
                logging.warning(f"Ventana ajustada a {window} días por datos insuficientes")
            
            # Evaluar pares candidatos
            pair_results = []
            
            for pair in candidate_pairs:
                ticker1, ticker2 = pair
                pair_id = f"{ticker1}_{ticker2}"
                
                # Test de cointegración
                coint_result = self.test_cointegration(data, pair, window, max_pvalue)
                
                if not coint_result['coint']:
                    # Verificar si era un par seleccionado anteriormente
                    if pair_id in existing_pair_ids:
                        logging.info(f"Par {pair_id} perdió cointegración")
                    continue
                    
                if not half_life_range[0] <= coint_result['half_life'] <= half_life_range[1]:
                    logging.info(f"Par {pair_id} con half-life fuera de rango: {coint_result['half_life']:.2f}")
                    continue
                    
                # Evaluar estabilidad estructural
                stability = self.test_structural_stability(data, pair, coint_result['hedge_ratio'])
                
                if stability < min_stability:
                    logging.info(f"Par {pair_id} con estabilidad insuficiente: {stability:.2f} < {min_stability}")
                    continue
                
                # Calcular liquidez combinada
                ticker1_adv = data['adv'].iloc[-20:][ticker1].mean() if ticker1 in data['adv'].columns else 0
                ticker2_adv = data['adv'].iloc[-20:][ticker2].mean() if ticker2 in data['adv'].columns else 0
                combined_liquidity = min(ticker1_adv, ticker2_adv)
                
                # Rendimiento histórico (si está disponible en el historial)
                historical_performance = 1.0
                if pair_id in self.coint_history and len(self.coint_history[pair_id]) > 1:
                    # Calcular variabilidad del hedge ratio como indicador de estabilidad
                    hedge_ratios = [item['hedge_ratio'] for item in self.coint_history[pair_id] 
                                   if not np.isinf(item['hedge_ratio'])]
                    if hedge_ratios:
                        hr_std = np.std(hedge_ratios)
                        hr_mean = np.mean(hedge_ratios)
                        hr_cv = hr_std / abs(hr_mean) if abs(hr_mean) > 0 else np.inf
                        
                        # Mayor estabilidad = mejor rendimiento histórico
                        if hr_cv < 0.1:
                            historical_performance = 1.5
                        elif hr_cv < 0.2:
                            historical_performance = 1.2
                        elif hr_cv > 0.5:
                            historical_performance = 0.7
                
                # Score compuesto según régimen
                if regime == 1:
                    composite_score = (0.35 * (1/coint_result['pvalue']) + 
                                     0.35 * stability/10 + 
                                     0.2 * (combined_liquidity/1e7) + 
                                     0.1 * historical_performance)
                elif regime == 2:
                    composite_score = (0.4 * (1/coint_result['pvalue']) + 
                                     0.3 * stability/10 + 
                                     0.2 * (combined_liquidity/1e7) + 
                                     0.1 * historical_performance)
                else:
                    composite_score = (0.4 * (1/coint_result['pvalue']) + 
                                     0.3 * stability/10 + 
                                     0.2 * (combined_liquidity/1e7) + 
                                     0.1 * historical_performance)
                
                # Guardar resultados
                pair_results.append({
                    'ticker1': ticker1,
                    'ticker2': ticker2,
                    'hedge_ratio': coint_result['hedge_ratio'],
                    'half_life': coint_result['half_life'],
                    'pvalue': coint_result['pvalue'],
                    'stability': stability,
                    'liquidity': combined_liquidity,
                    'composite_score': composite_score
                })
            
            # Ordenar por score compuesto
            sorted_pairs = sorted(pair_results, key=lambda x: x['composite_score'], reverse=True)
            
            # Seleccionar mejores pares
            selected_pairs = sorted_pairs[:max_pairs]
            
            # Log de resultados
            logging.info(f"Pares seleccionados: {len(selected_pairs)}/{len(pair_results)} candidatos evaluados")
            
            self.selected_pairs = selected_pairs
            return selected_pairs
        except Exception as e:
            logging.error(f"Error en select_pairs: {str(e)}")
            logging.error(traceback.format_exc())
            # Mantener pares anteriores en caso de error
            return self.selected_pairs if self.selected_pairs else []

# Modelo Predictivo de Convergencia
class ConvergencePredictor:
    """Modelo predictivo mejorado para la probabilidad de convergencia"""
    
    def __init__(self, learning_rate=0.01, max_depth=4, n_estimators=200):
        self.models = {
            1: GradientBoostingClassifier(learning_rate=learning_rate, max_depth=max_depth, 
                                          n_estimators=n_estimators, subsample=0.8,
                                          random_state=42),
            2: GradientBoostingClassifier(learning_rate=learning_rate, max_depth=max_depth, 
                                          n_estimators=n_estimators, subsample=0.8,
                                          random_state=42),
            3: GradientBoostingClassifier(learning_rate=learning_rate, max_depth=max_depth, 
                                          n_estimators=n_estimators, subsample=0.8,
                                          random_state=42)
        }
        self.scalers = {1: StandardScaler(), 2: StandardScaler(), 3: StandardScaler()}
        self.is_trained = {1: False, 2: False, 3: False}
        self.feature_importance = {1: None, 2: None, 3: None}  # Para almacenar importancia de features
        self.last_training_date = None
        self.best_validation_scores = {1: 0, 2: 0, 3: 0}  # Para monitorear calidad del modelo
    
    def create_features(self, data, pair, hedge_ratio, regime, lookback=None):
        """Crea features para el modelo de predicción con ventanas adaptativas"""
        ticker1, ticker2 = pair
        
        try:
            # Verificar que los tickers existen en los datos
            if ticker1 not in data['prices'].columns or ticker2 not in data['prices'].columns:
                logging.error(f"Tickers {ticker1} o {ticker2} no encontrados en los datos")
                return None
            
            # Definir lookback adaptativo basado en datos disponibles
            if lookback is None:
                min_required = 60  # Mínimo requerido para cálculos
                default_lookback = 252  # Valor por defecto
                lookback = min(default_lookback, max(min_required, len(data['prices'])))
            
            # Obtener precios de manera segura
            prices = data['prices'].iloc[-lookback:].copy()
            prices = prices[[ticker1, ticker2]]  # Seleccionar solo las columnas necesarias
            
            # Verificar datos suficientes
            if len(prices) < min_required or prices[ticker1].isna().all() or prices[ticker2].isna().all():
                logging.warning(f"Datos insuficientes para crear features: {len(prices)} puntos")
                return None
            
            # Imputar valores faltantes si hay algunos (máx 20% de la serie)
            missing_pct1 = prices[ticker1].isna().mean()
            missing_pct2 = prices[ticker2].isna().mean()
            
            if missing_pct1 > 0 and missing_pct1 < 0.2:
                prices[ticker1] = prices[ticker1].interpolate(method='time').ffill().bfill()
                logging.info(f"Imputados {missing_pct1*100:.1f}% valores faltantes en {ticker1}")
                
            if missing_pct2 > 0 and missing_pct2 < 0.2:
                prices[ticker2] = prices[ticker2].interpolate(method='time').ffill().bfill()
                logging.info(f"Imputados {missing_pct2*100:.1f}% valores faltantes en {ticker2}")
            
            # Calcular spread y z-score
            spread = prices[ticker1] + hedge_ratio * prices[ticker2]
            
            # Ventanas adaptativas para media y desviación
            ma_window = min(60, max(20, len(spread) // 4))
            spread_mean = spread.rolling(window=ma_window).mean()
            spread_std = spread.rolling(window=ma_window).std()
            
            # Evitar división por cero con verificación robusta
            valid_idx = (spread_std > 0) & (~pd.isna(spread_mean)) & (~pd.isna(spread_std))
            z_score = pd.Series(index=spread.index, data=np.nan)
            z_score[valid_idx] = (spread[valid_idx] - spread_mean[valid_idx]) / spread_std[valid_idx]
            
            # Crear DataFrame de features con manejo de NaN
            features = pd.DataFrame(index=z_score.index)
            
            # Z-score y cambios con períodos adaptativos
            features['z_score'] = z_score
            
            # Usar ventanas adaptativas para diferencias
            lag3 = min(3, max(1, len(z_score) // 84))
            lag5 = min(5, max(2, len(z_score) // 50))
            lag10 = min(10, max(3, len(z_score) // 25))
            
            features['z_score_change_short'] = z_score - z_score.shift(lag3)
            features['z_score_change_medium'] = z_score - z_score.shift(lag5)
            features['z_score_change_long'] = z_score - z_score.shift(lag10)
            
            # Velocidad de cambio (momentum)
            features['z_momentum'] = features['z_score_change_short'] / lag3
            
            # Volatilidad relativa con manejo seguro de división por cero
            spread_vol = pd.Series(index=spread_mean.index, data=np.nan)
            nonzero_idx = (spread_mean.abs() > 1e-10) & (~pd.isna(spread_mean))
            if nonzero_idx.any():
                spread_vol[nonzero_idx] = spread_std[nonzero_idx] / spread_mean.abs()[nonzero_idx]
            
            # Ventana adaptativa para promedio de volatilidad
            vol_window = min(60, max(20, len(spread) // 4))
            spread_vol_avg = spread_vol.rolling(window=vol_window).mean()
            
            # Evitar otra división por cero
            valid_vol_idx = (spread_vol_avg > 0) & (~pd.isna(spread_vol)) & (~pd.isna(spread_vol_avg))
            rel_vol = pd.Series(index=spread_vol.index, data=np.nan)
            rel_vol[valid_vol_idx] = spread_vol[valid_vol_idx] / spread_vol_avg[valid_vol_idx]
            
            features['rel_vol'] = rel_vol
            
            # Ratio de volumen anormal con verificación de datos disponibles
            valid_vol_features = True
            
            if 'relative_volume' in data and not data['relative_volume'].empty:
                rel_vol_data = data['relative_volume'].iloc[-lookback:]
                
                vol1 = pd.Series(1, index=prices.index)  # Valor por defecto
                vol2 = pd.Series(1, index=prices.index)  # Valor por defecto
                
                if ticker1 in rel_vol_data.columns:
                    common_idx1 = prices.index.intersection(rel_vol_data.index)
                    vol1.loc[common_idx1] = rel_vol_data.loc[common_idx1, ticker1]
                else:
                    valid_vol_features = False
                    
                if ticker2 in rel_vol_data.columns:
                    common_idx2 = prices.index.intersection(rel_vol_data.index)
                    vol2.loc[common_idx2] = rel_vol_data.loc[common_idx2, ticker2]
                else:
                    valid_vol_features = False
                
                if valid_vol_features:
                    features['abnormal_volume'] = (vol1 + vol2) / 2
                    
                    # Agregar features adicionales de volumen
                    features['vol_imbalance'] = vol1 / vol2  # Desequilibrio de volumen entre pares
                    features['vol_trend'] = vol1.rolling(window=5).mean() / vol1.rolling(window=20).mean()
            
            # Variables dummy de régimen
            features['regime_1'] = 1 if regime == 1 else 0
            features['regime_2'] = 1 if regime == 2 else 0
            features['regime_3'] = 1 if regime == 3 else 0
            
            # Métricas de tendencia de mercado si están disponibles
            if 'returns' in data and not data['returns'].empty:
                mkt_returns = data['returns'].mean(axis=1).iloc[-lookback:]
                if not mkt_returns.empty:
                    mkt_trend = mkt_returns.rolling(window=20).mean()
                    features['mkt_trend'] = 0
                    common_idx = features.index.intersection(mkt_trend.index)
                    if len(common_idx) > 0:
                        features.loc[common_idx, 'mkt_trend'] = mkt_trend.loc[common_idx]
            
            # Características de autocorrelación del spread
            if len(spread) > ma_window + 10:
                # Autocorrelación del spread
                spread_cleaned = spread.dropna()
                if len(spread_cleaned) > ma_window:
                    try:
                        lag1_autocorr = pd.Series(
                            [spread_cleaned.iloc[i-ma_window:i].autocorr(lag=1) 
                             for i in range(ma_window, len(spread_cleaned))],
                            index=spread_cleaned.index[ma_window:]
                        )
                        features['autocorrelation'] = np.nan
                        common_idx = features.index.intersection(lag1_autocorr.index)
                        if len(common_idx) > 0:
                            features.loc[common_idx, 'autocorrelation'] = lag1_autocorr.loc[common_idx]
                    except Exception as e:
                        logging.warning(f"Error calculando autocorrelación: {str(e)}")
            
            # Indicador de extremos (0-1)
            features['extreme_indicator'] = features['z_score'].abs().apply(
                lambda x: 1 if x > 2.5 else 0.75 if x > 2.0 else 0.5 if x > 1.5 else 0.25 if x > 1.0 else 0
            )
            
            # Eliminar NaNs
            features = features.dropna()
            
            # Verificar que tenemos suficientes datos
            if len(features) < 10:
                logging.warning(f"Features insuficientes después de eliminar NaN: {len(features)} < 10")
                return None
                
            return features
        except Exception as e:
            logging.error(f"Error en create_features para {pair}: {str(e)}")
            logging.error(f"Detalle: {traceback.format_exc()}")
            return None
    
    def create_target(self, data, pair, hedge_ratio, forward_period=None, threshold=0.5):
        """Crea variable objetivo para entrenamiento con período futuro adaptativo"""
        try:
            ticker1, ticker2 = pair
            
            # Verificar que los tickers existen en los datos
            if ticker1 not in data['prices'].columns or ticker2 not in data['prices'].columns:
                logging.error(f"Tickers {ticker1} o {ticker2} no encontrados en create_target")
                return pd.Series(dtype='int')
            
            # Adaptar forward_period según datos disponibles
            if forward_period is None:
                # Adaptar basado en half-life típico de pares cointegrados (5-25 días)
                # Usar un período más corto si tenemos pocos datos
                if len(data['prices']) < 126:
                    forward_period = min(5, max(2, len(data['prices']) // 20))
                elif len(data['prices']) < 252:
                    forward_period = min(10, max(3, len(data['prices']) // 20))
                else:
                    forward_period = 10
                
                logging.info(f"Forward period adaptativo: {forward_period} días")
            
            # Obtener precios de manera segura usando solo las columnas necesarias
            prices = data['prices'][[ticker1, ticker2]].copy()
            
            # Imputar valores faltantes si hay algunos (máx 20% de la serie)
            missing_pct1 = prices[ticker1].isna().mean()
            missing_pct2 = prices[ticker2].isna().mean()
            
            if missing_pct1 > 0 and missing_pct1 < 0.2:
                prices[ticker1] = prices[ticker1].interpolate(method='time').ffill().bfill()
                
            if missing_pct2 > 0 and missing_pct2 < 0.2:
                prices[ticker2] = prices[ticker2].interpolate(method='time').ffill().bfill()
            
            # Verificar datos suficientes después de imputación
            if len(prices) < 30 or prices[ticker1].isna().any() or prices[ticker2].isna().any():
                logging.warning(f"Datos insuficientes para crear target: {len(prices)} puntos")
                return pd.Series(dtype='int')
            
            # Calcular spread y z-score
            spread = prices[ticker1] + hedge_ratio * prices[ticker2]
            
            # Ventanas adaptativas para media y desviación
            ma_window = min(60, max(20, len(spread) // 4))
            spread_mean = spread.rolling(window=ma_window).mean()
            spread_std = spread.rolling(window=ma_window).std()
            
            # Manejar división por cero
            valid_idx = (spread_std > 0) & (~pd.isna(spread_mean)) & (~pd.isna(spread_std))
            z_score = pd.Series(index=spread.index, data=np.nan)
            z_score[valid_idx] = (spread[valid_idx] - spread_mean[valid_idx]) / spread_std[valid_idx]
            
            # Crear variable objetivo
            target = pd.Series(index=z_score.index, data=0)
            
            # Determinar convergencia futura
            logging.info(f"Calculando target con forward_period={forward_period}, threshold={threshold}")
            
            for i in range(len(z_score) - forward_period):
                current_z = z_score.iloc[i]
                
                if pd.isna(current_z):
                    continue
                    
                # Solo considerar desviaciones significativas
                if abs(current_z) > 1.0:
                    future_idx_end = min(i + forward_period + 1, len(z_score))
                    future_z = z_score.iloc[i+1:future_idx_end]
                        
                    if future_z.empty or future_z.isna().all():
                        continue
                        
                    future_z_abs = future_z.abs()
                    future_z_filtered = future_z_abs[~future_z_abs.isna()]
                    
                    if not future_z_filtered.empty:
                        min_distance = future_z_filtered.min()
                        
                        # Verificar convergencia con umbral relativo
                        if min_distance < threshold * abs(current_z):
                            target.iloc[i] = 1
                            
                            # Registrar si la convergencia es rápida (útil para aprendizaje)
                            min_idx = future_z_abs.idxmin()
                            if min_idx is not None:
                                days_to_converge = future_z.index.get_loc(min_idx) + 1
                                if days_to_converge <= forward_period // 3:
                                    # Convergencia muy rápida podría recibir más peso (2) en entrenamiento
                                    target.iloc[i] = 2
            
            # Convertir a categorías 0-1 si hay valores 2
            if (target == 2).any():
                target = (target > 0).astype(int)
                
            return target
        except Exception as e:
            logging.error(f"Error en create_target para {pair}: {str(e)}")
            logging.error(f"Detalle: {traceback.format_exc()}")
            return pd.Series(dtype='int')
        
    def train(self, data, selected_pairs, regime, max_history=None):
        """Entrena el modelo para un régimen específico con validación mejorada"""
        try:
            if not selected_pairs:
                logging.warning(f"No hay pares seleccionados para el régimen {regime}.")
                return False
                
            # Adaptar max_history a datos disponibles
            if max_history is None:
                max_history = min(1260, len(data['prices']))
            else:
                max_history = min(max_history, len(data['prices']))
                
            prices = data['prices'].iloc[-max_history:]
            
            all_features = []
            all_targets = []
            pair_identifiers = []  # Para tracking de resultados por par
            
            for pair_info in selected_pairs:
                ticker1 = pair_info['ticker1']
                ticker2 = pair_info['ticker2']
                hedge_ratio = pair_info['hedge_ratio']
                half_life = pair_info.get('half_life', 15)  # Valor por defecto si no está
                
                # Crear features con parámetros adaptativos
                pair = (ticker1, ticker2)
                features = self.create_features(data, pair, hedge_ratio, regime)
                
                if features is None or len(features) < 30:
                    logging.info(f"Features insuficientes para {ticker1}-{ticker2}: {0 if features is None else len(features)}")
                    continue
                    
                # Añadir half-life como feature
                features['half_life'] = half_life
                
                # Crear target con forward_period adaptativo basado en half-life
                forward_period = min(int(half_life * 1.5), 15)
                forward_period = max(5, forward_period)  # Mínimo 5 días
                
                target = self.create_target(data, pair, hedge_ratio, forward_period=forward_period)
                
                # Alinear features y target
                common_index = features.index.intersection(target.index)
                if len(common_index) < 30:
                    logging.info(f"Datos alineados insuficientes para {ticker1}-{ticker2}: {len(common_index)} < 30")
                    continue
                    
                features = features.loc[common_index]
                target = target.loc[common_index]
                
                all_features.append(features)
                all_targets.append(target)
                pair_identifiers.extend([(ticker1, ticker2)] * len(features))
            
            if not all_features:
                logging.warning(f"No se pudieron crear features para ningún par en régimen {regime}.")
                return False
                
            # Concatenar datos
            X = pd.concat(all_features)
            y = pd.concat(all_targets)
            
            # Verificar que tenemos suficientes muestras y clases
            if len(X) < 50:
                logging.warning(f"Datos insuficientes para entrenar modelo de régimen {regime}: {len(X)} muestras")
                return False
            
            # Verificar que hay suficientes muestras de cada clase
            class_counts = y.value_counts()
            if len(class_counts) < 2:
                logging.warning(f"Solo hay una clase en los datos de entrenamiento para régimen {regime}: {class_counts}")
                
                # Generar datos sintéticos balanceados si solo hay una clase
                if 0 in class_counts and class_counts[0] > 0:
                    # Si solo hay class 0, añadir algunas class 1 sintéticas
                    synthetic_size = min(int(class_counts[0] * 0.2), 20)
                    synthetic_indices = np.random.choice(X.index, size=synthetic_size, replace=False)
                    synthetic_X = X.loc[synthetic_indices].copy()
                    # Modificar features para que parezcan class 1 (convergentes)
                    synthetic_X['z_score'] = synthetic_X['z_score'] * 1.5  # Aumentar magnitud
                    synthetic_y = pd.Series(1, index=synthetic_X.index)
                    
                    X = pd.concat([X, synthetic_X])
                    y = pd.concat([y, synthetic_y])
                elif 1 in class_counts and class_counts[1] > 0:
                    # Si solo hay class 1, añadir algunas class 0 sintéticas
                    synthetic_size = min(int(class_counts[1] * 0.2), 20)
                    synthetic_indices = np.random.choice(X.index, size=synthetic_size, replace=False)
                    synthetic_X = X.loc[synthetic_indices].copy()
                    # Modificar features para que parezcan class 0 (no convergentes)
                    synthetic_X['z_score'] = synthetic_X['z_score'] * 0.5  # Reducir magnitud
                    synthetic_y = pd.Series(0, index=synthetic_X.index)
                    
                    X = pd.concat([X, synthetic_X])
                    y = pd.concat([y, synthetic_y])
                else:
                    # Si no hay datos en ninguna clase, no se puede entrenar
                    logging.error(f"No hay datos válidos para entrenar régimen {regime}")
                    return False
            
            # Re-verificar después de sintéticos
            class_counts = y.value_counts()
            if len(class_counts) < 2:
                logging.error(f"No se pudo crear un conjunto balanceado para régimen {regime}")
                return False
                
            # Asegurar balanceo mínimo para evitar sesgos
            min_class = class_counts.min()
            maj_class = class_counts.idxmax()
            min_class_indices = y[y != maj_class].index
            
            # Sobremuestrear la clase minoritaria si es necesario
            if len(min_class_indices) > 0 and min_class < 15 and class_counts[maj_class] > 30:
                oversample_size = min(30, class_counts[maj_class])
                oversample_indices = np.random.choice(min_class_indices, 
                                                   size=oversample_size, 
                                                   replace=True)
                X_oversample = X.loc[oversample_indices]
                y_oversample = y.loc[oversample_indices]
                
                X = pd.concat([X, X_oversample])
                y = pd.concat([y, y_oversample])
                
                logging.info(f"Clase minoritaria sobremuestreada: {min_class} -> {min_class + len(oversample_indices)}")
            
            # Validación cruzada temporal con manejo de fechas
            tscv = TimeSeriesSplit(n_splits=3)  # Validación temporal con 3 splits
            best_score = 0
            best_model = None
            best_scaler = None
            
            # Convertir índice a números para split temporal
            X_reset = X.reset_index(drop=True)
            y_reset = y.reset_index(drop=True)
            
            for train_index, test_index in tscv.split(X_reset):
                X_train, X_test = X_reset.iloc[train_index], X_reset.iloc[test_index]
                y_train, y_test = y_reset.iloc[train_index], y_reset.iloc[test_index]
                
                # Verificar que ambos conjuntos tienen suficientes muestras
                if len(X_train) < 30 or len(X_test) < 15:
                    logging.info(f"Split con datos insuficientes: train={len(X_train)}, test={len(X_test)}")
                    continue
                
                # Verificar que ambos conjuntos tienen ambas clases
                if len(pd.Series(y_train).unique()) < 2 or len(pd.Series(y_test).unique()) < 2:
                    logging.info("Split sin ambas clases en train o test")
                    continue
                
                # Escalar features
                scaler = StandardScaler()
                X_train_scaled = scaler.fit_transform(X_train)
                X_test_scaled = scaler.transform(X_test)
                
                # Ajustar complejidad del modelo según tamaño de datos
                if len(X_train) < 100:
                    model = GradientBoostingClassifier(
                        learning_rate=0.05, 
                        max_depth=3, 
                        n_estimators=50,
                        subsample=0.8,
                        random_state=42
                    )
                elif len(X_train) < 200:
                    model = GradientBoostingClassifier(
                        learning_rate=0.03, 
                        max_depth=3, 
                        n_estimators=100,
                        subsample=0.8,
                        random_state=42
                    )
                else:
                    model = self.models[regime]
                
                # Balancear pesos si hay desbalance significativo
                class_weight = None
                class_counts_train = pd.Series(y_train).value_counts()
                if len(class_counts_train) >= 2:
                    count_0 = class_counts_train.get(0, 0)
                    count_1 = class_counts_train.get(1, 0)
                    if count_0 > 0 and count_1 > 0:
                        ratio = count_0 / count_1 if count_0 > count_1 else count_1 / count_0
                        if ratio > 3:  # Desbalanceo significativo
                            class_weight = 'balanced'
                
                try:
                    # Entrenar modelo con balanceo si es necesario
                    if class_weight:
                        model.set_params(class_weight=class_weight)
                        
                    model.fit(X_train_scaled, y_train)
                    
                    # Evaluar con métrica relevante (AUC mejor que accuracy para desbalanceados)
                    try:
                        from sklearn.metrics import roc_auc_score
                        y_pred_proba = model.predict_proba(X_test_scaled)[:, 1]
                        score = roc_auc_score(y_test, y_pred_proba)
                    except:
                        # Si AUC falla, usar accuracy
                        score = model.score(X_test_scaled, y_test)
                    
                    logging.info(f"CV split score: {score:.4f}")
                    
                    if score > best_score:
                        best_score = score
                        best_model = model
                        best_scaler = scaler
                except Exception as e:
                    logging.error(f"Error entrenando en split CV para régimen {regime}: {str(e)}")
                    continue
            
            # Si encontramos un buen modelo en CV, usarlo
            if best_score > 0.6:  # Umbral de calidad mínima
                self.models[regime] = best_model
                self.scalers[regime] = best_scaler
                self.is_trained[regime] = True
                self.best_validation_scores[regime] = best_score
                
                # Guardar importancia de features
                if hasattr(best_model, 'feature_importances_'):
                    self.feature_importance[regime] = pd.Series(
                        best_model.feature_importances_,
                        index=X.columns
                    ).sort_values(ascending=False)
                    
                    logging.info(f"Top features para régimen {regime}: {self.feature_importance[regime].head(5).to_dict()}")
                
                logging.info(f"Modelo para régimen {regime} entrenado con score: {best_score:.4f}")
                self.last_training_date = datetime.now()
                return True
            else:
                # Modelo final con todos los datos si no obtuvimos buen modelo en CV
                try:
                    scaler = StandardScaler()
                    X_scaled = scaler.fit_transform(X)
                    
                    # Usar modelo más simple para datos difíciles
                    model = GradientBoostingClassifier(
                        learning_rate=0.05, 
                        max_depth=3, 
                        n_estimators=100,
                        subsample=0.8,
                        class_weight='balanced',
                        random_state=42
                    )
                    
                    model.fit(X_scaled, y)
                    self.models[regime] = model
                    self.scalers[regime] = scaler
                    self.is_trained[regime] = True
                    
                    # Calidad estimada mediante validación
                    preds = model.predict(X_scaled)
                    accuracy = (preds == y).mean()
                    self.best_validation_scores[regime] = accuracy
                    
                    logging.info(f"Modelo alternativo para régimen {regime} con accuracy: {accuracy:.4f}")
                    
                    # Guardar importancia de features
                    if hasattr(model, 'feature_importances_'):
                        self.feature_importance[regime] = pd.Series(
                            model.feature_importances_,
                            index=X.columns
                        ).sort_values(ascending=False)
                    
                    self.last_training_date = datetime.now()
                    return True
                except Exception as e:
                    logging.error(f"Error entrenando modelo final para régimen {regime}: {str(e)}")
                    logging.error(traceback.format_exc())
                    return False
        except Exception as e:
            logging.error(f"Error general en train para régimen {regime}: {str(e)}")
            logging.error(traceback.format_exc())
            return False
    
    def predict_convergence(self, data, pair_info, regime):
        """Predice probabilidad de convergencia para un par con manejo robusto de errores"""
        if not self.is_trained[regime]:
            logging.info(f"Modelo para régimen {regime} no entrenado, usando probabilidad neutral")
            return 0.5  # Valor neutro si no está entrenado
            
        try:
            ticker1 = pair_info['ticker1']
            ticker2 = pair_info['ticker2']
            hedge_ratio = pair_info['hedge_ratio']
            half_life = pair_info.get('half_life', 15)  # Valor por defecto si no está
            
            # Crear features con parámetros adaptados a disponibilidad de datos
            pair = (ticker1, ticker2)
            features = self.create_features(data, pair, hedge_ratio, regime, lookback=60)
            
            if features is None or len(features) < 5:
                logging.info(f"Features insuficientes para predicción de {ticker1}-{ticker2}")
                return 0.5
                
            # Añadir half-life
            features['half_life'] = half_life
            
            # Verificar columnas consistentes con entrenamiento
            if hasattr(self.models[regime], 'feature_names_in_'):
                model_features = set(self.models[regime].feature_names_in_)
                current_features = set(features.columns)
                
                # Si faltan features en los datos actuales
                missing_features = model_features - current_features
                if missing_features:
                    logging.warning(f"Faltan features: {missing_features}")
                    # Añadir las que faltan con valores neutrales
                    for feat in missing_features:
                        features[feat] = 0
                        
                # Seleccionar solo las features que el modelo conoce
                features = features[list(model_features)]
            
            # Usar solo la última fila
            latest_features = features.iloc[-1:].copy()
            
            # Escalar features
            try:
                latest_features_scaled = self.scalers[regime].transform(latest_features)
            except Exception as e:
                logging.error(f"Error al escalar features para {ticker1}-{ticker2}: {str(e)}")
                return 0.5
            
            # Predecir probabilidad
            try:
                probability = self.models[regime].predict_proba(latest_features_scaled)[0, 1]
                logging.info(f"Predicción para {ticker1}-{ticker2}: {probability:.4f}")
                return probability
            except Exception as e:
                logging.error(f"Error en predict_proba para {ticker1}-{ticker2}: {str(e)}")
                return 0.5  # En caso de error, devolver valor neutro
        except Exception as e:
            logging.error(f"Error en predict_convergence para {ticker1}_{ticker2}: {str(e)}")
            logging.error(traceback.format_exc())
            return 0.5

class SignalGenerator:
    """Genera señales de trading basadas en z-scores y regímenes con mayor robustez"""
    
    def __init__(self):
        # Umbrales de entrada por régimen (ajustados para mayor prudencia)
        self.entry_thresholds = {
            1: {'long': -2.0, 'short': 2.0},     # Régimen favorable
            2: {'long': -2.2, 'short': 2.2},     # Régimen de transición
            3: {'long': -2.5, 'short': 2.5}      # Régimen de crisis
        }
        
        # Umbrales de salida por régimen
        self.exit_thresholds = {
            1: {'long': -0.5, 'short': 0.5},
            2: {'long': -0.7, 'short': 0.7},
            3: {'long': -1.0, 'short': 1.0}
        }
        
        # Bandas de no-transacción (ajustables por régimen)
        self.no_trade_base_width = {
            1: 0.2,  # Menor en regímenes estables
            2: 0.3,
            3: 0.4   # Mayor en regímenes volátiles
        }
        
        # Seguimiento de señales históricas para análisis
        self.signal_history = {}
        
        # Retrasos mínimos entre operaciones para cada par
        self.min_bars_between_trades = 3
    
    def calculate_z_score(self, data, pair_info, lookback=None, method='ewm'):
        """Calcula z-score actual para un par con ventanas adaptativas"""
        ticker1 = pair_info['ticker1']
        ticker2 = pair_info['ticker2']
        hedge_ratio = pair_info['hedge_ratio']
        
        try:
            # Verificación de tickers
            if ticker1 not in data['prices'].columns or ticker2 not in data['prices'].columns:
                logging.warning(f"Tickers {ticker1} o {ticker2} no encontrados para cálculo de z-score")
                return None
            
            # Determinar lookback adaptativo
            if lookback is None:
                # Usar una ventana adaptativa basada en half-life si está disponible
                if 'half_life' in pair_info and pair_info['half_life'] not in (None, np.inf):
                    half_life = pair_info['half_life']
                    lookback = min(int(half_life * 5), 60)  # 5x half-life o máximo 60 días
                else:
                    lookback = min(60, len(data['prices']) // 4)  # 25% de datos disponibles o máximo 60 días
                lookback = max(lookback, 20)  # Mínimo 20 días para estadística decente
            
            # Verificar datos suficientes
            if len(data['prices']) < lookback:
                lookback = max(20, len(data['prices']) - 1)  # Ajustar dinámicamente
                
            # Obtener precios
            prices = data['prices'].iloc[-lookback:][[ticker1, ticker2]]
            
            # Verificar datos válidos
            if len(prices) < lookback/2 or prices[ticker1].isna().any() or prices[ticker2].isna().any():
                logging.warning(f"Datos insuficientes o inválidos para z-score de {ticker1}-{ticker2}")
                return None
            
            # Calcular spread
            spread = prices[ticker1] + hedge_ratio * prices[ticker2]
            
            # Calcular media y desviación con método seleccionado
            if method == 'ewm':
                # Media y desviación móvil exponencial
                # Usar half-life si está disponible, o ajustar según lookback
                half_life = pair_info.get('half_life', lookback/3)
                # Limitar half-life a valores razonables
                half_life = min(max(5, half_life), lookback/2)
                
                spread_mean = spread.ewm(halflife=half_life).mean().iloc[-1]
                spread_std = spread.ewm(halflife=half_life/3).std().iloc[-1]  # Más sensible a volatilidad reciente
            else:
                # Media y desviación móvil simple
                window = min(lookback, max(20, len(spread) // 3))
                spread_mean = spread.rolling(window=window).mean().iloc[-1]
                spread_std = spread.rolling(window=window).std().iloc[-1]
            
            # Verificar valores válidos
            if pd.isna(spread_mean) or pd.isna(spread_std) or spread_std <= 0:
                logging.warning(f"Estadísticas inválidas para z-score de {ticker1}-{ticker2}")
                return None
            
            # Z-score
            z_score = (spread.iloc[-1] - spread_mean) / spread_std
            
            # Verificar valor razonable
            if abs(z_score) > 5:
                logging.warning(f"Z-score extremo: {z_score:.2f} para {ticker1}-{ticker2}")
            
            logging.info(f"Z-score calculado para {ticker1}-{ticker2}: {z_score:.2f}")
            return z_score
        
        except Exception as e:
            logging.error(f"Error calculando z-score para {ticker1}-{ticker2}: {str(e)}")
            logging.error(traceback.format_exc())
            return None
    
    def adjust_no_trade_band(self, pair_info, regime, vol_increase=0.0, cost_bps=1.0):
        """Ajusta banda de no-transacción según volatilidad, costos y régimen"""
        try:
            # Base ajustada por régimen
            base_width = self.no_trade_base_width.get(regime, 0.3)
            
            # Ajuste por volatilidad (incrementar si volatilidad sube)
            vol_adj = base_width + 0.1 * (vol_increase / 25.0)
            
            # Ajuste por costos de transacción
            # Escalar proporcionalmente al spread de entrada/salida
            entry_exit_gap = self.entry_thresholds[regime]['short'] - self.exit_thresholds[regime]['short']
            cost_factor = (cost_bps / 10) * entry_exit_gap * 0.1  # 1bp = 0.01% costo
            cost_adj = vol_adj + cost_factor
            
            # Ajuste por half-life si está disponible
            if 'half_life' in pair_info and pair_info['half_life'] not in (None, np.inf):
                half_life = pair_info['half_life']
                # Pares con half-life más corto (más rápido) necesitan bandas más estrechas
                if half_life < 10:
                    half_life_adj = cost_adj * 0.9
                elif half_life > 20:
                    half_life_adj = cost_adj * 1.1
                else:
                    half_life_adj = cost_adj
                    
                return half_life_adj
            else:
                return cost_adj
                
        except Exception as e:
            logging.error(f"Error en adjust_no_trade_band: {str(e)}")
            # Valor por defecto seguro
            return 0.3
    
    def check_recent_signals(self, pair_id, current_date):
        """Verifica si ha pasado suficiente tiempo desde la última señal"""
        try:
            if pair_id not in self.signal_history:
                return True  # No hay historial previo
                
            last_signal_date = self.signal_history[pair_id]['date']
            min_bars = self.min_bars_between_trades
            
            # Calcular días de trading (aproximado)
            trading_days = 0
            date_diff = (current_date - last_signal_date).days
            
            if date_diff < 1:
                return False  # Misma fecha, demasiado pronto
            elif date_diff < min_bars:
                # Contar días de trading (aproximado, ignora fines de semana)
                if last_signal_date.weekday() < 5 and current_date.weekday() < 5:
                    trading_days = date_diff
                else:
                    # Ajustar por posibles fines de semana
                    trading_days = max(1, date_diff - 2)
                    
                return trading_days >= min_bars
            else:
                return True  # Ha pasado suficiente tiempo
            
        except Exception as e:
            logging.error(f"Error en check_recent_signals: {str(e)}")
            return True  # En caso de duda, permitir la señal
    
    def generate_signal(self, data, pair_info, regime, current_position=0, conv_probability=0.5, 
                        current_date=None, market_conditions=None):
        """Genera señal de trading para un par con mayor robustez y factores adicionales"""
        try:
            # Importar linregress para el análisis de tendencia
            from scipy.stats import linregress
            
            ticker1 = pair_info['ticker1']
            ticker2 = pair_info['ticker2']
            hedge_ratio = pair_info['hedge_ratio']
            pair_id = f"{ticker1}_{ticker2}"
            
            # Usar fecha actual del conjunto de datos
            if current_date is None and not data['prices'].empty:
                current_date = data['prices'].index[-1]
            elif current_date is None:
                current_date = datetime.now()
            
            # Verificar tiempo desde última señal
            if not self.check_recent_signals(pair_id, current_date):
                logging.info(f"Tiempo insuficiente desde última señal para {pair_id}")
                return {'signal': 0, 'z_score': None, 'strength': 0}
            
            # Calcular z-score usando método adecuado
            z_score = self.calculate_z_score(data, pair_info)
            
            if z_score is None:
                return {'signal': 0, 'z_score': None, 'strength': 0}
            
            # Ajustar umbrales basados en probabilidad de convergencia
            # Más agresivo si probabilidad es alta, más conservador si es baja
            prob_adj = (conv_probability - 0.5) * 0.5
            
            # Obtener umbrales base para el régimen
            entry_long_base = self.entry_thresholds[regime]['long']
            entry_short_base = self.entry_thresholds[regime]['short']
            exit_long_base = self.exit_thresholds[regime]['long']
            exit_short_base = self.exit_thresholds[regime]['short']
            
            # Aplicar ajuste por probabilidad
            entry_long = entry_long_base + prob_adj
            entry_short = entry_short_base - prob_adj
            exit_long = exit_long_base - prob_adj
            exit_short = exit_short_base + prob_adj
            
            # Ajustar en mercados turbulentos si tenemos información
            if market_conditions and 'high_volatility' in market_conditions and market_conditions['high_volatility']:
                # Más conservador en entrada, más rápido en salida
                entry_long *= 1.1  # Requiere mayor desviación para entrar largo (-2.0 -> -2.2)
                entry_short *= 1.1  # Requiere mayor desviación para entrar corto (2.0 -> 2.2)
                exit_long *= 0.8   # Salir antes en posiciones largas (-0.5 -> -0.4)
                exit_short *= 0.8  # Salir antes en posiciones cortas (0.5 -> 0.4)
                
                logging.info(f"Umbrales ajustados por alta volatilidad para {pair_id}")
            
            # Ajustar bandas de no-transacción
            vol_increase = market_conditions.get('volatility_increase', 0.0) if market_conditions else 0.0
            cost_bps = market_conditions.get('cost_bps', 1.0) if market_conditions else 1.0
            no_trade_band = self.adjust_no_trade_band(pair_info, regime, vol_increase, cost_bps)
            exit_no_trade_band = no_trade_band * 1.2
            
            # Determinar señal
            signal = 0
            strength = 0
            reason = "Sin señal"
            
            if current_position == 0:  # Sin posición
                if z_score < entry_long:
                    signal = 1  # Comprar
                    strength = min(1.0, (entry_long - z_score) / abs(entry_long * 0.5))
                    reason = "Entrada larga"
                elif z_score > entry_short:
                    signal = -1  # Vender
                    strength = min(1.0, (z_score - entry_short) / abs(entry_short * 0.5))
                    reason = "Entrada corta"
            elif current_position == 1:  # Posición larga
                if z_score > exit_long:
                    signal = -1  # Cerrar
                    strength = min(1.0, (z_score - exit_long) / abs(exit_long * 0.5))
                    reason = "Cierre de larga"
            elif current_position == -1:  # Posición corta
                if z_score < exit_short:
                    signal = 1  # Cerrar
                    strength = min(1.0, (exit_short - z_score) / abs(exit_short * 0.5))
                    reason = "Cierre de corta"
            
            # Aplicar bandas de no-transacción para evitar ruido
            if abs(signal) > 0 and abs(z_score) < no_trade_band and current_position == 0:
                signal = 0
                strength = 0
                reason = f"Dentro de banda de no-transacción ({no_trade_band:.2f})"
            elif abs(signal) > 0 and abs(z_score) < exit_no_trade_band and current_position != 0:
                signal = 0
                strength = 0
                reason = f"Dentro de banda de no-transacción para salida ({exit_no_trade_band:.2f})"
            
            # Verificar si la señal es contraria a la tendencia reciente
            if signal != 0:
                try:
                    # Calcular tendencia de z-score en últimos N períodos
                    lookback = min(10, len(data['prices']) // 10)
                    prices_subset = data['prices'].iloc[-lookback:][[ticker1, ticker2]]
                    spread_subset = prices_subset[ticker1] + hedge_ratio * prices_subset[ticker2]
                    
                    if len(spread_subset) >= 5:
                        # Tendencia: positiva (1) o negativa (-1)
                        # Usar simple regresión lineal de z-score
                        x = np.arange(len(spread_subset))
                        y = spread_subset.values
                        slope, _, _, _, _ = linregress(x, y)
                        trend = 1 if slope > 0 else -1 if slope < 0 else 0
                        
                        # Si la señal es contraria a tendencia fuerte, aumentar fuerza
                        # Esto porque una reversión contra tendencia puede ser significativa
                        if signal * trend < 0 and abs(slope) > 0.02:  # Tendencia contraria significativa
                            strength = min(1.0, strength * 1.2)
                            reason += " - Contra tendencia (señal más fuerte)"
                        elif signal * trend > 0 and abs(slope) > 0.02:  # A favor de tendencia
                            # Puede ser continuación o sobreextensión - usar fuerza normal
                            reason += " - Con tendencia"
                except Exception as e:
                    logging.warning(f"No se pudo analizar tendencia: {str(e)}")
            
            # Registrar en historial si hay señal
            if signal != 0:
                self.signal_history[pair_id] = {
                    'date': current_date,
                    'z_score': z_score,
                    'signal': signal,
                    'strength': strength
                }
                
                logging.info(f"Señal generada para {pair_id}: {signal} (fuerza={strength:.2f}, z-score={z_score:.2f}, razón={reason})")
            
            return {
                'signal': signal,
                'z_score': z_score,
                'strength': strength,
                'reason': reason,
                'thresholds': {
                    'entry_long': entry_long,
                    'entry_short': entry_short,
                    'exit_long': exit_long,
                    'exit_short': exit_short,
                    'no_trade_band': no_trade_band
                }
            }
        except Exception as e:
            logging.error(f"Error en generate_signal para {pair_info.get('ticker1', '')}-{pair_info.get('ticker2', '')}: {str(e)}")
            logging.error(traceback.format_exc())
            return {'signal': 0, 'z_score': None, 'strength': 0}
        
# Gestión de Posiciones y Riesgo
class PositionManager:
    """Gestiona posiciones y riesgo para la estrategia con parámetros realistas"""
    
    def __init__(self):
        # Límites de posición por régimen (reducidos para mayor prudencia)
        self.position_limits = {
            1: 0.025,  # 2.5% máximo por par en Régimen 1 (favorable)
            2: 0.02,   # 2% en Régimen 2 (transición)
            3: 0.015   # 1.5% en Régimen 3 (crisis)
        }
        
        # Factores ATR para stop-loss (aumentados para mayor margen)
        self.atr_factors = {
            1: 3.5,  # Régimen favorable - stops más amplios
            2: 3.0,  # Régimen de transición
            3: 2.5   # Régimen de crisis - stops más ajustados
        }
        
        # Límites de concentración por sector (reducidos)
        self.sector_limits = {
            1: 0.18,  # 18% por sector en Régimen 1
            2: 0.15,  # 15% en Régimen 2
            3: 0.12   # 12% en Régimen 3
        }
        
        # Volatilidad objetivo por régimen (reducida para mejor control de riesgo)
        self.vol_targets = {
            1: 0.08,  # 8% anualizada en régimen favorable
            2: 0.07,  # 7% anualizada en régimen de transición
            3: 0.05   # 5% anualizada en régimen de crisis
        }
        
        # Circuit breakers reajustados
        self.circuit_breakers = {
            'level1': {'reduction': 0.30, 'vix_percentile': 75, 'vix_increase': 0.08},
            'level2': {'reduction': 0.55, 'vix_percentile': 85, 'vix_increase': 0.15, 'correlation': 0.65},
            'level3': {'reduction': 0.80, 'vix_percentile': 90, 'vix_increase': 0.20, 'correlation': 0.75}
        }
        
        # Parámetros de costos de transacción
        self.transaction_costs = {
            'commission_pct': 0.0005,  # 0.05% comisión por operación (5bps)
            'slippage_factor': 0.0003, # 0.03% slippage base (3bps)
            'market_impact_factor': 0.1 # Factor de impacto de mercado (% de ADV)
        }
        
        # Parámetros de liquidez
        self.liquidity_params = {
            'min_adv_usd': 5000000,   # $5M ADV mínimo
            'max_position_pct_adv': 0.03,  # Máximo 3% del ADV diario
            'illiquid_slippage_multiplier': 2.0  # Doble slippage para baja liquidez
        }
        
        self.current_positions = {}
        self.position_history = []
        self.last_risk_check = None
    
    def calculate_transaction_costs(self, pair_info, position_size, data):
        """Calcula costos de transacción realistas incluyendo comisiones, slippage e impacto de mercado"""
        try:
            ticker1 = pair_info['ticker1']
            ticker2 = pair_info['ticker2']
            
            # 1. Comisiones fijas
            commission_cost = abs(position_size) * self.transaction_costs['commission_pct']
            
            # 2. Slippage basado en liquidez
            base_slippage = self.transaction_costs['slippage_factor']
            
            # Obtener liquidez reciente
            ticker1_adv = data['adv'].iloc[-5:][ticker1].mean() if ticker1 in data['adv'].columns else 0
            ticker2_adv = data['adv'].iloc[-5:][ticker2].mean() if ticker2 in data['adv'].columns else 0
            
            # Si datos no disponibles, estimar con volumen * precio
            if ticker1_adv == 0 and 'volumes' in data and 'prices' in data:
                if ticker1 in data['volumes'].columns and ticker1 in data['prices'].columns:
                    vol = data['volumes'].iloc[-5:][ticker1].mean()
                    price = data['prices'].iloc[-1][ticker1]
                    ticker1_adv = vol * price
            
            if ticker2_adv == 0 and 'volumes' in data and 'prices' in data:
                if ticker2 in data['volumes'].columns and ticker2 in data['prices'].columns:
                    vol = data['volumes'].iloc[-5:][ticker2].mean()
                    price = data['prices'].iloc[-1][ticker2]
                    ticker2_adv = vol * price
            
            # Determinar el activo menos líquido
            min_adv = min(ticker1_adv, ticker2_adv)
            min_adv = max(min_adv, 1000000)  # Asegurar valor mínimo para evitar división por cero
            
            # Calcular slippage ajustado por liquidez
            if min_adv < self.liquidity_params['min_adv_usd']:
                slippage_multiplier = self.liquidity_params['illiquid_slippage_multiplier']
                logging.info(f"Aumentando slippage para {ticker1}-{ticker2} por baja liquidez: ${min_adv/1e6:.1f}M < ${self.liquidity_params['min_adv_usd']/1e6:.1f}M")
            else:
                slippage_multiplier = 1.0
                
            adjusted_slippage = base_slippage * slippage_multiplier
            
            # 3. Impacto de mercado (aumenta con el tamaño relativo a ADV)
            # Estimar valor nominal de la posición
            notional_value = abs(position_size) * 1e6  # Asumiendo $1M como capital base para simplificar
            
            # Calcular % de ADV
            pct_of_adv = notional_value / min_adv if min_adv > 0 else 0.05
            
            # Ajustar por tamaño (modelo simplificado: impacto ~ raíz cuadrada del tamaño relativo)
            market_impact = self.transaction_costs['market_impact_factor'] * np.sqrt(pct_of_adv)
            
            # Total de costos como % del valor de la posición
            total_cost_pct = commission_cost + adjusted_slippage + market_impact
            
            logging.info(f"Costos para {ticker1}-{ticker2}: comisión={commission_cost*100:.3f}%, slippage={adjusted_slippage*100:.3f}%, impacto={market_impact*100:.3f}%, total={total_cost_pct*100:.3f}%")
            
            return {
                'total_pct': total_cost_pct,
                'commission': commission_cost,
                'slippage': adjusted_slippage,
                'market_impact': market_impact,
                'min_adv': min_adv,
                'pct_of_adv': pct_of_adv
            }
            
        except Exception as e:
            logging.error(f"Error en calculate_transaction_costs: {str(e)}")
            logging.error(traceback.format_exc())
            # Retornar valor por defecto en caso de error
            return {
                'total_pct': 0.001,  # 0.1% por defecto
                'commission': 0.0005,
                'slippage': 0.0005,
                'market_impact': 0,
                'min_adv': 0,
                'pct_of_adv': 0
            }
    
    def calculate_position_size(self, pair_info, signal_strength, regime, 
                               conv_probability, current_volatility, data):
        """Calcula tamaño óptimo de posición considerando todos los factores relevantes"""
        try:
            ticker1 = pair_info['ticker1']
            ticker2 = pair_info['ticker2']
            
            # Verificar volatilidad válida
            if current_volatility <= 0:
                logging.warning(f"Volatilidad inválida para {ticker1}-{ticker2}")
                return 0
                
            # 1. Volatilidad inversa normalizada con límites realistas
            vol_sizing = min(0.2 / current_volatility, 3.0) / 3.0
            
            # 2. Ajuste por convicción (probabilidad de convergencia)
            # Más convicción = posición más grande
            conv_multiplier = 0.7 + 0.6 * conv_probability
            
            # 3. Liquidez y capacidad
            liquidity = self.check_liquidity_constraint(pair_info, data)
            liquidity_factor = min(1.0, liquidity / 50e6)
            
            # 4. Señal y fuerza de la señal
            signal_factor = signal_strength
            
            # Calcular tamaño base
            base_size = vol_sizing * conv_multiplier * liquidity_factor * signal_factor
            
            # Aplicar límite por régimen
            max_size = self.position_limits[regime]
            position_size = min(base_size * max_size, max_size)
            
            # Verificar costos de transacción
            costs = self.calculate_transaction_costs(pair_info, position_size, data)
            
            # Si los costos son muy altos relativos al retorno esperado, reducir posición
            expected_return = 0.01  # Retorno esperado por default (1%)
            
            # Estimar retorno esperado basado en z-score y half-life
            if 'z_score' in pair_info and 'half_life' in pair_info:
                z_score = pair_info.get('z_score', 2.0)
                half_life = pair_info.get('half_life', 15)
                
                # Modelo simple: mayor z-score y menor half-life = mayor retorno esperado
                expected_return = min(abs(z_score) * 0.01, 0.03) * (10 / max(half_life, 5))
            
            # Ajustar por probabilidad de convergencia
            expected_return *= conv_probability
            
            # Si los costos son más del 25% del retorno esperado, reducir proporcionalmente
            if costs['total_pct'] > 0.25 * expected_return:
                cost_adjustment = (expected_return / (4 * costs['total_pct']))
                position_size *= cost_adjustment
                logging.info(f"Posición reducida por costos altos: {cost_adjustment:.2f} x tamaño")
            
            # Verificar restricción de ADV
            if 'min_adv' in costs and costs['min_adv'] > 0:
                max_by_adv = self.liquidity_params['max_position_pct_adv'] * costs['min_adv'] / 1e6
                if position_size > max_by_adv:
                    position_size = max_by_adv
                    logging.info(f"Posición limitada por ADV a {position_size:.4f}")
            
            logging.info(f"Tamaño calculado para {ticker1}-{ticker2}: {position_size:.4f}")
            return position_size
            
        except Exception as e:
            logging.error(f"Error en calculate_position_size: {str(e)}")
            logging.error(traceback.format_exc())
            return 0
    
    def check_liquidity_constraint(self, pair_info, data):
        """Verifica restricciones de liquidez y capacidad para un par"""
        try:
            ticker1 = pair_info['ticker1']
            ticker2 = pair_info['ticker2']
            
            # Obtener ADV en dólares
            ticker1_adv = data['adv'].iloc[-5:][ticker1].mean() if ticker1 in data['adv'].columns else 0
            ticker2_adv = data['adv'].iloc[-5:][ticker2].mean() if ticker2 in data['adv'].columns else 0
            
            # Verificar datos
            if ticker1_adv == 0 or ticker2_adv == 0:
                # Intentar calcular con volumen * precio
                if 'volumes' in data and 'prices' in data:
                    if ticker1_adv == 0 and ticker1 in data['volumes'].columns and ticker1 in data['prices'].columns:
                        vol = data['volumes'].iloc[-5:][ticker1].mean()
                        price = data['prices'].iloc[-1][ticker1]
                        ticker1_adv = vol * price
                    
                    if ticker2_adv == 0 and ticker2 in data['volumes'].columns and ticker2 in data['prices'].columns:
                        vol = data['volumes'].iloc[-5:][ticker2].mean()
                        price = data['prices'].iloc[-1][ticker2]
                        ticker2_adv = vol * price
            
            # Si aún no hay datos, usar valor por defecto bajo
            if ticker1_adv == 0:
                ticker1_adv = self.liquidity_params['min_adv_usd'] / 2
            if ticker2_adv == 0:
                ticker2_adv = self.liquidity_params['min_adv_usd'] / 2
            
            # Retornar el mínimo (el activo menos líquido determina la capacidad)
            return min(ticker1_adv, ticker2_adv)
            
        except Exception as e:
            logging.error(f"Error en check_liquidity_constraint: {str(e)}")
            return self.liquidity_params['min_adv_usd'] / 2  # Valor por defecto conservador
    
    def calculate_stop_loss(self, data, pair_info, regime, entry_price, position_type):
        """Calcula nivel de stop-loss con ATR adaptativo"""
        try:
            ticker1 = pair_info['ticker1']
            ticker2 = pair_info['ticker2']
            hedge_ratio = pair_info['hedge_ratio']
            
            # Verificar datos
            if ticker1 not in data['prices'].columns or ticker2 not in data['prices'].columns:
                logging.warning(f"Tickers {ticker1} o {ticker2} no disponibles para stop-loss")
                # Usar valor por defecto
                return entry_price * (0.95 if position_type == 1 else 1.05)
            
            # Calcular spread con ventana adaptativa
            lookback = min(60, len(data['prices']) - 1)
            lookback = max(lookback, 14)  # Mínimo 14 días para ATR decente
            
            prices = data['prices'].iloc[-lookback:][[ticker1, ticker2]]
            
            # Verificar datos suficientes
            if len(prices) < 10 or prices[ticker1].isna().any() or prices[ticker2].isna().any():
                logging.warning(f"Datos insuficientes para ATR de {ticker1}-{ticker2}")
                # Usar valor por defecto basado en régimen
                default_stop_pct = 0.05 if regime == 1 else 0.04 if regime == 2 else 0.03
                return entry_price * (1 - default_stop_pct) if position_type == 1 else entry_price * (1 + default_stop_pct)
            
            # Calcular spread
            spread = prices[ticker1] + hedge_ratio * prices[ticker2]
            
            # Calcular ATR del spread
            spread_high = spread.rolling(window=2).max()
            spread_low = spread.rolling(window=2).min()
            tr = spread_high - spread_low
            
            # Usar ventana adaptativa para ATR
            atr_window = min(14, len(tr) // 3)
            atr = tr.rolling(window=atr_window).mean().iloc[-1]
            
            # Verificar valor válido
            if pd.isna(atr) or atr == 0:
                atr = spread.std()
                
            # Si aún es inválido, usar % del precio
            if pd.isna(atr) or atr == 0:
                atr = abs(entry_price) * 0.02  # 2% por defecto
                
            # Aplicar factor según régimen
            atr_factor = self.atr_factors[regime]
            stop_distance = atr * atr_factor
            
            # Calcular stop-loss
            if position_type == 1:  # Long
                stop_loss = entry_price - stop_distance
            else:  # Short
                stop_loss = entry_price + stop_distance
                
            # Verificar que el stop no está demasiado cerca
            min_distance_pct = 0.01  # Mínimo 1% de distancia
            min_distance = abs(entry_price) * min_distance_pct
            
            if abs(stop_loss - entry_price) < min_distance:
                # Ajustar stop para tener distancia mínima
                stop_loss = entry_price - min_distance if position_type == 1 else entry_price + min_distance
                logging.info(f"Stop-loss ajustado a distancia mínima para {ticker1}-{ticker2}")
            
            logging.info(f"Stop-loss para {ticker1}-{ticker2}: {stop_loss:.4f}, distancia: {abs(stop_loss-entry_price)/abs(entry_price)*100:.2f}%")
            return stop_loss
            
        except Exception as e:
            logging.error(f"Error en calculate_stop_loss: {str(e)}")
            logging.error(traceback.format_exc())
            # Valor por defecto en caso de error (5% de distancia)
            return entry_price * 0.95 if position_type == 1 else entry_price * 1.05
    
    def calculate_time_stop(self, pair_info):
        """Calcula stop temporal basado en half-life con límites razonables"""
        try:
            # Si no hay half-life disponible, usar valor por defecto
            if 'half_life' not in pair_info or pair_info['half_life'] in (None, np.inf, np.nan):
                return 15  # Default: 15 días
            
            half_life = pair_info['half_life']
            
            # Verificar valor razonable
            if half_life <= 0:
                logging.warning(f"Half-life inválido: {half_life}, usando valor por defecto")
                return 15
            
            # Tiempo objetivo: 2-3x half-life con límites razonables
            time_stop = int(2.5 * half_life)
            
            # Limitar entre 5 y 30 días
            time_stop = max(5, min(time_stop, 30))
            
            return time_stop
            
        except Exception as e:
            logging.error(f"Error en calculate_time_stop: {str(e)}")
            return 15  # Valor por defecto
    
    def check_circuit_breakers(self, data, sector_map, current_date=None):
        """Comprueba activación de circuit breakers con verificaciones mejoradas"""
        try:
            # Fecha actual
            if current_date is None:
                current_date = data['prices'].index[-1] if not data['prices'].empty else datetime.now()
            
            # Verificar si ya hicimos check hoy
            if self.last_risk_check and self.last_risk_check.date() == current_date.date():
                logging.info("Circuit breakers ya verificados hoy")
                return 1.0  # No re-verificar en la misma fecha
            
            # Crear proxy de VIX con volatilidad realizada
            vix_proxy = None
            
            # Intentar con datos de VIX si están disponibles
            if '^VIX' in data['prices'].columns:
                vix_data = data['prices']['^VIX']
                if not vix_data.empty:
                    vix_proxy = vix_data
                    logging.info("Usando datos de VIX directos")
            
            # Si no hay VIX, usar volatilidad realizada
            if vix_proxy is None or vix_proxy.empty:
                if 'returns' in data and not data['returns'].empty:
                    returns = data['returns'].mean(axis=1)
                    # Ventana adaptativa
                    vol_window = min(22, len(returns) // 5)
                    vix_proxy = returns.rolling(window=vol_window).std() * np.sqrt(252) * 100
                    logging.info(f"Usando proxy de VIX con volatilidad a {vol_window} días")
                else:
                    logging.warning("Sin datos para cálculo de VIX")
                    return 1.0  # Sin datos suficientes
            
            # Verificar datos suficientes
            if len(vix_proxy) < 60:
                logging.warning(f"Datos insuficientes para VIX: {len(vix_proxy)} < 60")
                # Si hay muy pocos datos, ser conservador
                if len(vix_proxy) < 10:
                    return 0.8  # Reducción preventiva de 20%
            
            # Percentiles históricos con ventana adaptativa
            lookback = min(252 * 2, len(vix_proxy))
            vix_history = vix_proxy.iloc[-lookback:]
            
            vix_75 = vix_history.quantile(0.75)
            vix_85 = vix_history.quantile(0.85)
            vix_90 = vix_history.quantile(0.90)
            
            current_vix = vix_proxy.iloc[-1]
            vix_1d_change = 0
            
            if len(vix_proxy) > 1:
                vix_1d_change = current_vix / vix_proxy.iloc[-2] - 1
            
            # Correlación promedio para medir estrés sistémico
            avg_correlation = None
            
            if 'returns' in data and not data['returns'].empty:
                returns_window = min(20, len(data['returns']) // 5)
                returns = data['returns'].iloc[-returns_window:]
                
                if len(returns) > 5 and returns.shape[1] > 5:
                    # Calcular matriz de correlación
                    corr_matrix = returns.corr()
                    
                    # Usar solo el triángulo superior sin la diagonal
                    mask = np.triu(np.ones(corr_matrix.shape), k=1).astype(bool)
                    avg_correlation = corr_matrix.values[mask].mean()
                    
                    logging.info(f"Correlación media actual: {avg_correlation:.3f}")
            
            # Verificar niveles de circuit breaker
            message = ""
            
            # Nivel 3 (el más severo)
            if (current_vix > vix_90 and 
                avg_correlation is not None and avg_correlation > self.circuit_breakers['level3']['correlation']):
                circuit_level = 1.0 - self.circuit_breakers['level3']['reduction']
                message = f"CIRCUIT BREAKER NIVEL 3: VIX {current_vix:.1f} > {vix_90:.1f} (p90) y correlación {avg_correlation:.3f} > {self.circuit_breakers['level3']['correlation']:.3f}"
            
            # Nivel 2
            elif (current_vix > vix_85 or 
                 (avg_correlation is not None and avg_correlation > self.circuit_breakers['level2']['correlation']) or
                 (vix_1d_change > self.circuit_breakers['level2']['vix_increase'])):
                circuit_level = 1.0 - self.circuit_breakers['level2']['reduction']
                message = f"CIRCUIT BREAKER NIVEL 2: VIX {current_vix:.1f} > {vix_85:.1f} (p85)"
                if avg_correlation is not None and avg_correlation > self.circuit_breakers['level2']['correlation']:
                    message += f" o correlación {avg_correlation:.3f} > {self.circuit_breakers['level2']['correlation']:.3f}"
                if vix_1d_change > self.circuit_breakers['level2']['vix_increase']:
                    message += f" o aumento de VIX {vix_1d_change*100:.1f}% > {self.circuit_breakers['level2']['vix_increase']*100:.1f}%"
            
            # Nivel 1
            elif (current_vix > vix_75 and 
                  vix_1d_change > self.circuit_breakers['level1']['vix_increase']):
                circuit_level = 1.0 - self.circuit_breakers['level1']['reduction']
                message = f"CIRCUIT BREAKER NIVEL 1: VIX {current_vix:.1f} > {vix_75:.1f} (p75) y aumento {vix_1d_change*100:.1f}% > {self.circuit_breakers['level1']['vix_increase']*100:.1f}%"
            
            else:
                circuit_level = 1.0
                message = "Sin circuit breakers activos"
            
            logging.info(message)
            
            # Registrar último check
            self.last_risk_check = current_date
            
            return circuit_level
            
        except Exception as e:
            logging.error(f"Error en check_circuit_breakers: {str(e)}")
            logging.error(traceback.format_exc())
            return 0.7  # Valor conservador en caso de error (30% reducción)
    
    def check_sector_concentration(self, new_positions, sector_map, regime):
        """Verifica límites de concentración por sector con mejor manejo de nulos"""
        try:
            # Si no hay posiciones, no hay nada que verificar
            if not new_positions:
                return {}
                
            # Calcular exposición por sector
            sector_exposure = {}
            sector_positions = {}  # Para tracking detallado
            
            for pair_id, position in new_positions.items():
                ticker1, ticker2 = pair_id.split('_')
                sector1 = sector_map.get(ticker1)
                sector2 = sector_map.get(ticker2)
                
                if not sector1:
                    logging.warning(f"Sector desconocido para {ticker1}, usando 'Unknown'")
                    sector1 = 'Unknown'
                    
                if not sector2:
                    logging.warning(f"Sector desconocido para {ticker2}, usando 'Unknown'")
                    sector2 = 'Unknown'
                
                size = abs(position['size'])
                
                # Asignar 50% de exposición a cada sector
                for sector in (sector1, sector2):
                    if sector not in sector_exposure:
                        sector_exposure[sector] = 0
                        sector_positions[sector] = []
                        
                    sector_exposure[sector] += size / 2
                    sector_positions[sector].append({
                        'pair_id': pair_id,
                        'exposure': size / 2
                    })
            
            # Verificar límites
            sector_limit = self.sector_limits[regime]
            adjustment_needed = False
            
            # Identificar sectores que exceden límites
            exceeding_sectors = []
            for sector, exposure in sector_exposure.items():
                if exposure > sector_limit:
                    adjustment_needed = True
                    exceeding_sectors.append({
                        'sector': sector,
                        'exposure': exposure,
                        'excess': exposure - sector_limit
                    })
                    logging.warning(f"Sector {sector} excede límite: {exposure:.3f} > {sector_limit:.3f}")
            
            if not adjustment_needed:
                logging.info("Concentración sectorial dentro de límites")
                return new_positions
            
            # Ordenar por exceso para ajustar primero los más excedidos
            exceeding_sectors.sort(key=lambda x: x['excess'], reverse=True)
            
            # Calcular factores de escala por sector
            sector_scales = {}
            for sector, exposure in sector_exposure.items():
                if exposure > sector_limit:
                    sector_scales[sector] = sector_limit / exposure
                else:
                    sector_scales[sector] = 1.0
            
            # Ajustar posiciones (versión mejorada)
            adjusted_positions = {}
            
            for pair_id, position in new_positions.items():
                ticker1, ticker2 = pair_id.split('_')
                sector1 = sector_map.get(ticker1, 'Unknown')
                sector2 = sector_map.get(ticker2, 'Unknown')
                
                # Determinar factor de escala basado en el sector más restrictivo
                scale1 = sector_scales.get(sector1, 1.0)
                scale2 = sector_scales.get(sector2, 1.0)
                scale = min(scale1, scale2)
                
                # Aplicar escala solo a posiciones en sectores excedidos
                if scale < 1.0:
                    adjusted_position = position.copy()
                    adjusted_position['size'] *= scale
                    adjusted_position['original_size'] = position['size']  # Guardar original para referencia
                    adjusted_position['scale_factor'] = scale
                    adjusted_position['scale_reason'] = f"Límite sectorial: {sector1 if scale1 < scale2 else sector2}"
                    adjusted_positions[pair_id] = adjusted_position
                    
                    logging.info(f"Posición {pair_id} ajustada por concentración sectorial: {position['size']:.4f} -> {adjusted_position['size']:.4f}")
                else:
                    # Sin ajuste necesario
                    adjusted_positions[pair_id] = position
            
            return adjusted_positions
            
        except Exception as e:
            logging.error(f"Error en check_sector_concentration: {str(e)}")
            logging.error(traceback.format_exc())
            # En caso de error, mejor mantener posiciones originales
            return new_positions
    
    def optimize_portfolio(self, data, pairs_with_signals, regime, sector_map, current_date=None):
        """Optimiza el portfolio basado en señales y restricciones con mejor control de riesgo"""
        try:
            # Fecha actual
            if current_date is None:
                current_date = data['prices'].index[-1] if not data['prices'].empty else datetime.now()
                
            # Verificar datos suficientes
            if data['prices'].empty:
                logging.error("Datos insuficientes para optimización")
                return {}
                
            logging.info(f"Optimizando portfolio para régimen {regime} con {len(pairs_with_signals)} pares")
            
            new_positions = {}
            
            # Calcular volatilidad de cada par
            pair_volatilities = {}
            market_conditions = {}
            
            # Obtener volatilidad de mercado para comparación
            if 'realized_vol' in data and not data['realized_vol'].empty:
                # Usar índice de mercado si está disponible
                if '^GSPC' in data['realized_vol'].columns:
                    mkt_vol = data['realized_vol']['^GSPC'].iloc[-20:].mean()
                # O promedio de volatilidades
                else:
                    mkt_vol = data['realized_vol'].iloc[-20:].mean().mean()
                    
                # Comparar con volatilidad histórica para detectar cambios
                if len(data['realized_vol']) > 60:
                    prev_mkt_vol = data['realized_vol'].iloc[-60:-20].mean().mean()
                    vol_increase = mkt_vol / prev_mkt_vol - 1 if prev_mkt_vol > 0 else 0
                    market_conditions['volatility_increase'] = vol_increase
                    
                    if vol_increase > 0.25:  # 25% aumento
                        market_conditions['high_volatility'] = True
                        logging.warning(f"Alta volatilidad detectada: +{vol_increase*100:.1f}%")
                    else:
                        market_conditions['high_volatility'] = False
            
            # Calcular volatilidad de pares con ventana adaptativa
            for pair_info in pairs_with_signals:
                ticker1 = pair_info['ticker1']
                ticker2 = pair_info['ticker2']
                hedge_ratio = pair_info['hedge_ratio']
                
                lookback = min(60, len(data['prices']) - 1)
                prices = data['prices'].iloc[-lookback:][[ticker1, ticker2]]
                
                if len(prices) < 20 or prices[ticker1].isna().any() or prices[ticker2].isna().any():
                    logging.warning(f"Datos insuficientes para volatilidad de {ticker1}-{ticker2}")
                    # Usar valor por defecto alto (conservador)
                    pair_volatilities[f"{ticker1}_{ticker2}"] = 0.03  # 3% diaria = ~48% anualizada
                    continue
                    
                spread = prices[ticker1] + hedge_ratio * prices[ticker2]
                
                # Usar retorno logarítmico para mejor precisión
                log_returns = np.log(spread / spread.shift(1)).dropna()
                
                if len(log_returns) < 5:
                    volatility = 0.03  # Valor por defecto
                else:
                    # Anualizar volatilidad (√252 días)
                    volatility = log_returns.std() * np.sqrt(252)
                    
                    # Verificar valor razonable
                    if volatility < 0.05:  # Menos de 5% anual es sospechosamente bajo
                        volatility = 0.05
                    elif volatility > 0.80:  # Más de 80% anual es extremadamente alto
                        volatility = 0.80
                
                pair_id = f"{ticker1}_{ticker2}"
                pair_volatilities[pair_id] = volatility
            
            # Ordenar pares por fuerza de señal * probabilidad * (1/volatilidad)
            sorted_pairs = sorted(
                pairs_with_signals, 
                key=lambda x: abs(x['signal']['signal']) * x['signal']['strength'] * x['conv_probability'] / (pair_volatilities.get(f"{x['ticker1']}_{x['ticker2']}", 0.3) + 0.05),
                reverse=True
            )
            
            # Asignar posiciones
            total_risk = 0
            
            for pair_info in sorted_pairs:
                ticker1 = pair_info['ticker1']
                ticker2 = pair_info['ticker2']
                hedge_ratio = pair_info['hedge_ratio']
                signal = pair_info['signal']['signal']
                strength = pair_info['signal']['strength']
                z_score = pair_info['signal']['z_score']
                conv_probability = pair_info['conv_probability']
                
                pair_id = f"{ticker1}_{ticker2}"
                
                if signal == 0 or pair_id not in pair_volatilities:
                    logging.info(f"Sin señal para {pair_id} o volatilidad no disponible")
                    continue
                
                # Calcular tamaño con todos los factores
                volatility = pair_volatilities[pair_id]
                position_size = self.calculate_position_size(
                    pair_info, strength, regime, conv_probability, volatility, data
                )
                
                # Si el tamaño es demasiado pequeño, ignorar
                min_size = 0.003  # 0.3% mínimo para evitar posiciones insignificantes
                if position_size < min_size:
                    logging.info(f"Posición demasiado pequeña para {pair_id}: {position_size:.4f} < {min_size:.4f}")
                    continue
                
                # Ajustar signo
                signed_position = position_size * np.sign(signal)
                
                # Simular precio de entrada
                entry_price = 1.0  # Normalizado
                
                # Calcular stops
                stop_loss = self.calculate_stop_loss(
                    data, pair_info, regime, entry_price, np.sign(signal)
                )
                
                time_stop = self.calculate_time_stop(pair_info)
                
                # Guardar posición
                new_positions[pair_id] = {
                    'ticker1': ticker1,
                    'ticker2': ticker2,
                    'hedge_ratio': hedge_ratio,
                    'size': signed_position,
                    'signal': signal,
                    'strength': strength,
                    'z_score': z_score,
                    'entry_price': entry_price,
                    'stop_loss': stop_loss,
                    'time_stop': time_stop,
                    'entry_date': current_date,
                    'days_held': 0,
                    'half_life': pair_info.get('half_life', 15),
                    'volatility': volatility,
                    'costs': self.calculate_transaction_costs(pair_info, position_size, data)
                }
                
                # Acumular riesgo (volatilidad * tamaño)
                total_risk += abs(signed_position) * volatility
            
            # Si no hay posiciones, retornar diccionario vacío
            if not new_positions:
                logging.warning("No se generaron posiciones nuevas")
                return {}
            
            # Verificar concentración sectorial
            adjusted_positions = self.check_sector_concentration(new_positions, sector_map, regime)
            
            # Recalcular riesgo total después de ajuste sectorial
            if adjusted_positions:
                total_risk = sum(abs(pos['size']) * pair_volatilities.get(pair_id, 0.3) 
                                for pair_id, pos in adjusted_positions.items())
            
            # Escalar a volatilidad objetivo
            target_vol = self.vol_targets[regime]
            
            if total_risk > 0 and total_risk != target_vol:
                vol_scale = target_vol / total_risk
                
                # Limitar factor de escala para evitar cambios extremos
                vol_scale = max(0.5, min(vol_scale, 2.0))
                
                logging.info(f"Escalando portfolio a volatilidad objetivo {target_vol:.1%}: factor {vol_scale:.2f}")
                
                for pair_id in adjusted_positions:
                    adjusted_positions[pair_id]['size'] *= vol_scale
                    adjusted_positions[pair_id]['vol_scale_factor'] = vol_scale
            
            # Aplicar circuit breakers
            circuit_breaker_scale = self.check_circuit_breakers(data, sector_map, current_date)
            
            if circuit_breaker_scale < 1.0:
                logging.warning(f"Circuit breaker activado: {circuit_breaker_scale:.2f}")
                
                for pair_id in adjusted_positions:
                    adjusted_positions[pair_id]['size'] *= circuit_breaker_scale
                    adjusted_positions[pair_id]['circuit_breaker_factor'] = circuit_breaker_scale
            
            # Registrar estado final
            self.current_positions = adjusted_positions
            
            # Guardar snapshot para historia
            position_snapshot = {
                'date': current_date,
                'regime': regime,
                'positions': {k: v.copy() for k, v in adjusted_positions.items()},
                'total_risk': total_risk,
                'target_vol': target_vol,
                'circuit_breaker': circuit_breaker_scale,
                'market_conditions': market_conditions
            }
            self.position_history.append(position_snapshot)
            
            total_positions = len(adjusted_positions)
            total_exposure = sum(abs(pos['size']) for pos in adjusted_positions.values())
            
            logging.info(f"Portfolio optimizado: {total_positions} posiciones, exposición total {total_exposure:.2f}")
            
            return adjusted_positions
            
        except Exception as e:
            logging.error(f"Error en optimize_portfolio: {str(e)}")
            logging.error(traceback.format_exc())
            # En caso de error grave, mejor mantener posiciones anteriores
            return self.current_positions

# Estrategia completa
class StatArbStrategy:
    """Implementación completa y mejorada de la estrategia de arbitraje estadístico"""
    
    def __init__(self, config=None):
        # Configuración por defecto
        self.default_config = {
            'min_data_years': 5,
            'max_pairs_per_regime': {1: 25, 2: 20, 3: 15},
            'min_liquidity': 10e6,
            'recalibration_days': 5,
            'transaction_costs': True,
            'include_market_impact': True,
            'circuit_breakers_enabled': True
        }
        
        # Usar config proporcionada o valores por defecto
        self.config = config if config is not None else self.default_config
        
        # Inicializar componentes con parámetros mejorados
        self.regime_detector = RegimeDetector(
            min_train_samples=min(252 * self.config.get('min_data_years', 5), 1260)
        )
        
        self.pair_selector = PairSelector(
            min_liquidity=self.config.get('min_liquidity', 10e6),
            max_pairs_by_regime=self.config.get('max_pairs_per_regime', {1: 25, 2: 20, 3: 15}),
            recalibration_days=self.config.get('recalibration_days', 5)
        )
        
        self.convergence_predictor = ConvergencePredictor()
        self.signal_generator = SignalGenerator()
        self.position_manager = PositionManager()
        
        # Estado de la estrategia
        self.current_regime = None
        self.previous_regime = None
        self.selected_pairs = []
        self.current_positions = {}
        self.positions_history = []
        self.regime_history = []
        self.recalibration_history = []
        
        # Resultados de backtesting
        self.equity_curve = None
        self.trades_log = None
        
        # Métricas de rendimiento
        self.metrics = {
            'overall': {
                'sharpe_ratio': None,
                'sortino_ratio': None,
                'max_drawdown': None,
                'annual_return': None,
                'volatility': None,
                'win_rate': None,
                'avg_trade_duration': None,
                'avg_profit_per_trade': None,
                'profit_factor': None
            },
            'by_regime': {
                1: {}, 2: {}, 3: {}
            },
            'by_sector': {},
            'by_period': {}
        }
        
        # Parámetros de costos
        self.trading_costs = {
            'commission': 0.0005,  # 5 bps (0.05%)
            'slippage': 0.0003,    # 3 bps (0.03%)
            'market_impact': 0.0002  # 2 bps (0.02%) base
        }
        
        # Historial de métricas para monitoreo
        self.metrics_history = []
        
        # Registro de inicialización
        logging.info("Estrategia de arbitraje estadístico inicializada con configuración:")
        for k, v in self.config.items():
            logging.info(f"  {k}: {v}")
    
    def initialize(self, data, sector_map, subsector_map, calibration_date=None):
        """Inicializa la estrategia con datos históricos y verificaciones de datos"""
        try:
            # Verificar datos suficientes
            if 'prices' not in data or data['prices'].empty:
                raise ValueError("Datos de precios insuficientes o no proporcionados")
                
            # Verificar fecha de calibración válida
            if calibration_date is None:
                # Usar último día disponible en datos
                calibration_date = data['prices'].index[-1]
                logging.info(f"Usando fecha de calibración por defecto: {calibration_date}")
                
            # Asegurar que fecha no es futura
            current_date_time = datetime.now()
            if calibration_date.date() > current_date_time.date():
                logging.warning(f"Fecha futura detectada: {calibration_date}, ajustando a {current_date_time}")
                calibration_date = current_date_time
                
            logging.info(f"Inicializando estrategia en fecha: {calibration_date}")
            
            # Verificar datos suficientes
            min_days = 252 * self.config.get('min_data_years', 5)
            if len(data['prices']) < min_days:
                logging.warning(f"Datos históricos insuficientes: {len(data['prices'])} < {min_days}")
                
            # Filtrar datos hasta fecha de calibración
            filtered_data = self._filter_data_until_date(data, calibration_date)
            
            # Detectar régimen inicial
            logging.info("Detectando régimen inicial...")
            self.current_regime = self.regime_detector.fit_predict(filtered_data, sector_map, calibration_date)
            self.previous_regime = self.current_regime
            logging.info(f"Régimen inicial: {self.current_regime}")
            
            # Registrar en historial
            self.regime_history.append({
                'date': calibration_date,
                'regime': self.current_regime
            })
            
            # Generar pares candidatos
            logging.info("Generando pares candidatos...")
            candidate_pairs = self.pair_selector.generate_candidate_pairs(
                filtered_data, sector_map, subsector_map)
            logging.info(f"Pares candidatos generados: {len(candidate_pairs)}")
            
            # Seleccionar pares óptimos
            logging.info(f"Seleccionando pares óptimos para régimen {self.current_regime}...")
            self.selected_pairs = self.pair_selector.select_pairs(
                filtered_data, self.current_regime, candidate_pairs)
            logging.info(f"Pares seleccionados: {len(self.selected_pairs)}")
            
            # Entrenar modelos predictivos
            logging.info("Entrenando modelos predictivos...")
            training_success = self.convergence_predictor.train(
                filtered_data, self.selected_pairs, self.current_regime)
            logging.info(f"Entrenamiento completado, éxito: {training_success}")
            
            # Registro de inicialización
            self.recalibration_history.append({
                'date': calibration_date,
                'regime': self.current_regime,
                'pairs_count': len(self.selected_pairs),
                'training_success': training_success,
                'type': 'initialization'
            })
            
            logging.info("Estrategia inicializada exitosamente")
            return True
            
        except Exception as e:
            logging.error(f"Error en inicialización: {str(e)}")
            logging.error(traceback.format_exc())
            return False
    
    def _filter_data_until_date(self, data, end_date):
        """Filtra los datos hasta una fecha específica para evitar look-ahead bias"""
        filtered_data = {}
        
        for key, df in data.items():
            if isinstance(df, pd.DataFrame) or isinstance(df, pd.Series):
                # Verificar si el índice es de tipo datetime
                if isinstance(df.index, pd.DatetimeIndex):
                    filtered_data[key] = df[df.index <= end_date].copy()
                else:
                    # Si no es índice datetime, copiar igual
                    filtered_data[key] = df.copy()
            else:
                # Si no es DataFrame ni Series, copiar igual
                filtered_data[key] = df
        
        return filtered_data
    
    def update(self, data, sector_map, subsector_map, current_date=None):
        """Actualiza la estrategia con nuevos datos sin look-ahead bias"""
        try:
            # Verificar fecha actual
            if current_date is None:
                # Usar último día disponible en datos
                if 'prices' in data and not data['prices'].empty:
                    current_date = data['prices'].index[-1]
                else:
                    current_date = datetime.now()
            
            # Verificar datos hasta fecha actual
            filtered_data = self._filter_data_until_date(data, current_date)
            
            # Verificar si tenemos estado inicial
            if self.current_regime is None:
                logging.warning("Estrategia no inicializada, ejecutando inicialización")
                self.initialize(filtered_data, sector_map, subsector_map, current_date)
            
            logging.info(f"Actualizando estrategia para fecha: {current_date}")
            
            # Guardar régimen anterior
            self.previous_regime = self.current_regime
            
            # Actualizar régimen
            self.current_regime = self.regime_detector.fit_predict(
                filtered_data, sector_map, current_date)
            
            # Registrar historia
            self.regime_history.append({
                'date': current_date,
                'regime': self.current_regime
            })
            
            # Verificar recalibración
            market_conditions = self._analyze_market_conditions(filtered_data)
            needs_recalibration = self.pair_selector.check_recalibration_needed(
                current_date, 
                self.current_regime, 
                self.previous_regime, 
                market_conditions
            )
            
            # Recalibrar si es necesario
            if needs_recalibration:
                logging.info(f"Recalibración necesaria en fecha {current_date}")
                
                candidate_pairs = self.pair_selector.generate_candidate_pairs(
                    filtered_data, sector_map, subsector_map)
                
                self.selected_pairs = self.pair_selector.select_pairs(
                    filtered_data, self.current_regime, candidate_pairs, market_conditions)
                
                training_success = self.convergence_predictor.train(
                    filtered_data, self.selected_pairs, self.current_regime)
                
                # Registrar recalibración
                self.recalibration_history.append({
                    'date': current_date,
                    'regime': self.current_regime,
                    'pairs_count': len(self.selected_pairs),
                    'training_success': training_success,
                    'type': 'regular',
                    'market_conditions': market_conditions
                })
                
                logging.info(f"Recalibración completada: {len(self.selected_pairs)} pares seleccionados")
            
            # Generar señales para los pares seleccionados
            pairs_with_signals = []
            
            for pair_info in self.selected_pairs:
                ticker1 = pair_info['ticker1']
                ticker2 = pair_info['ticker2']
                pair_id = f"{ticker1}_{ticker2}"
                
                # Determinar posición actual
                current_position = 0
                if pair_id in self.current_positions:
                    current_position = np.sign(self.current_positions[pair_id]['size'])
                
                # Predecir convergencia
                conv_probability = self.convergence_predictor.predict_convergence(
                    filtered_data, pair_info, self.current_regime)
                
                # Generar señal
                signal = self.signal_generator.generate_signal(
                    filtered_data, pair_info, self.current_regime, 
                    current_position, conv_probability, current_date, market_conditions)
                
                if signal['z_score'] is not None:
                    # Añadir información a pair_info para uso posterior
                    pair_info_with_signal = pair_info.copy()
                    pair_info_with_signal['signal'] = signal
                    pair_info_with_signal['conv_probability'] = conv_probability
                    pairs_with_signals.append(pair_info_with_signal)
            
            logging.info(f"Señales generadas para {len(pairs_with_signals)} pares")
            
            # Optimizar portfolio
            updated_positions = self.position_manager.optimize_portfolio(
                filtered_data, pairs_with_signals, self.current_regime, sector_map, current_date)
            
            # Actualizar posiciones
            self.current_positions = updated_positions
            
            # Resumir cambios
            changed_positions = self._summarize_position_changes(current_date)
            
            # Registrar estado
            position_snapshot = {
                'date': current_date,
                'regime': self.current_regime,
                'positions': {k: v.copy() for k, v in updated_positions.items()},
                'market_conditions': market_conditions,
                'changes': changed_positions
            }
            self.positions_history.append(position_snapshot)
            
            return updated_positions
            
        except Exception as e:
            logging.error(f"Error en update: {str(e)}")
            logging.error(traceback.format_exc())
            # En caso de error, mantener posiciones actuales
            return self.current_positions
    
    def _analyze_market_conditions(self, data):
        """Analiza condiciones actuales de mercado para toma de decisiones"""
        conditions = {}
        
        try:
            # Volatilidad de mercado
            if 'realized_vol' in data and not data['realized_vol'].empty:
                # Usar índice si disponible
                if '^GSPC' in data['realized_vol'].columns:
                    current_vol = data['realized_vol']['^GSPC'].iloc[-5:].mean()
                    
                    # Comparar con histórico
                    if len(data['realized_vol']) > 60:
                        hist_vol = data['realized_vol']['^GSPC'].iloc[-60:-5].mean()
                        vol_change = current_vol / hist_vol - 1 if hist_vol > 0 else 0
                        conditions['volatility_increase'] = vol_change
                        conditions['high_volatility'] = vol_change > 0.2
                        
                # Usar promedio de volatilidades
                else:
                    current_vol = data['realized_vol'].iloc[-5:].mean().mean()
                    
                    # Comparar con histórico
                    if len(data['realized_vol']) > 60:
                        hist_vol = data['realized_vol'].iloc[-60:-5].mean().mean()
                        vol_change = current_vol / hist_vol - 1 if hist_vol > 0 else 0
                        conditions['volatility_increase'] = vol_change
                        conditions['high_volatility'] = vol_change > 0.2
            
            # Correlación entre activos
            if 'returns' in data and not data['returns'].empty:
                # Usar últimos 20 días o menos si hay menos datos
                window = min(20, len(data['returns']))
                recent_returns = data['returns'].iloc[-window:]
                
                if recent_returns.shape[1] > 5:  # Al menos 5 activos
                    corr_matrix = recent_returns.corr()
                    # Usar triángulo superior sin diagonal
                    mask = np.triu(np.ones(corr_matrix.shape), k=1).astype(bool)
                    avg_corr = corr_matrix.values[mask].mean()
                    
                    conditions['avg_correlation'] = avg_corr
                    conditions['high_correlation'] = avg_corr > 0.6
            
            # Tendencia de mercado
            if 'returns' in data and not data['returns'].empty:
                if '^GSPC' in data['returns'].columns:
                    mkt_returns = data['returns']['^GSPC']
                    # Si no hay SPX, usar promedio
                else:
                    mkt_returns = data['returns'].mean(axis=1)
                
                # Calcular tendencia reciente (20 días)
                window = min(20, len(mkt_returns))
                recent_returns = mkt_returns.iloc[-window:]
                cumulative_return = (1 + recent_returns).prod() - 1
                
                conditions['market_trend'] = cumulative_return
                conditions['bull_market'] = cumulative_return > 0.03  # +3% en 20 días
                conditions['bear_market'] = cumulative_return < -0.03  # -3% en 20 días
            
            # Condiciones de liquidez
            if 'relative_volume' in data and not data['relative_volume'].empty:
                avg_rel_vol = data['relative_volume'].iloc[-5:].mean().mean()
                conditions['relative_volume'] = avg_rel_vol
                conditions['high_volume'] = avg_rel_vol > 1.2
                conditions['low_volume'] = avg_rel_vol < 0.8
            
            # Costos de transacción basados en condiciones
            base_cost_bps = 5  # 5 bps base
            
            # Aumentar costos en alta volatilidad o correlación
            if conditions.get('high_volatility', False):
                cost_bps = base_cost_bps * 1.3  # +30%
            elif conditions.get('high_correlation', False):
                cost_bps = base_cost_bps * 1.2  # +20%
            else:
                cost_bps = base_cost_bps
                
            conditions['cost_bps'] = cost_bps
            
            return conditions
            
        except Exception as e:
            logging.error(f"Error en _analyze_market_conditions: {str(e)}")
            return {'error': str(e)}
    
    def _summarize_position_changes(self, current_date):
        """Resume cambios en posiciones para tracking y análisis"""
        changes = {
            'opened': [],
            'closed': [],
            'modified': [],
            'unchanged': []
        }
        
        # Última posición histórica (anterior a la actual)
        previous_positions = {}
        if len(self.positions_history) > 1:
            previous_snapshot = self.positions_history[-2]
            previous_positions = previous_snapshot['positions']
        
        # Nuevas posiciones
        for pair_id, position in self.current_positions.items():
            if pair_id not in previous_positions:
                changes['opened'].append({
                    'pair_id': pair_id,
                    'ticker1': position['ticker1'],
                    'ticker2': position['ticker2'],
                    'size': position['size'],
                    'signal': position['signal'],
                    'date': current_date
                })
            else:
                # Verificar si cambió el tamaño
                prev_size = previous_positions[pair_id]['size']
                curr_size = position['size']
                
                if abs(prev_size - curr_size) > 0.001:  # Cambio significativo
                    changes['modified'].append({
                        'pair_id': pair_id,
                        'ticker1': position['ticker1'],
                        'ticker2': position['ticker2'],
                        'prev_size': prev_size,
                        'new_size': curr_size,
                        'change_pct': (curr_size - prev_size) / prev_size if prev_size != 0 else float('inf'),
                        'date': current_date
                    })
                else:
                    changes['unchanged'].append(pair_id)
        
        # Posiciones cerradas
        for pair_id, position in previous_positions.items():
            if pair_id not in self.current_positions:
                changes['closed'].append({
                    'pair_id': pair_id,
                    'ticker1': position['ticker1'],
                    'ticker2': position['ticker2'],
                    'prev_size': position['size'],
                    'entry_date': position['entry_date'],
                    'days_held': (current_date - position['entry_date']).days,
                    'date': current_date
                })
        
        # Resumen
        summary = {
            'total_positions': len(self.current_positions),
            'new_positions': len(changes['opened']),
            'closed_positions': len(changes['closed']),
            'modified_positions': len(changes['modified']),
            'unchanged_positions': len(changes['unchanged']),
            'details': changes
        }
        
        return summary
    
    def backtest(self, data, sector_map, subsector_map, start_date=None, end_date=None):
        """Realiza backtest de la estrategia con separación estricta de datos"""
        try:
            logging.info("Iniciando backtest de la estrategia...")
            
            if 'prices' not in data or data['prices'].empty:
                raise ValueError("Datos de precios insuficientes o no proporcionados")
            
            prices = data['prices']
            
            # Definir fechas por defecto si no se proporcionan
            if start_date is None:
                # Usar suficientes datos para entrenamiento inicial
                min_train_days = 252 * self.config.get('min_data_years', 5)
                if len(prices) > min_train_days + 20:  # Al menos 20 días para test
                    start_date = prices.index[min_train_days]
                else:
                    # O dividir datos disponibles
                    start_date = prices.index[len(prices) // 2]
                
                logging.info(f"Usando fecha de inicio por defecto: {start_date}")
            elif isinstance(start_date, str):
                # Convertir string a timestamp si es necesario
                start_date = pd.Timestamp(start_date)
                logging.info(f"Fecha de inicio convertida a Timestamp: {start_date}")
            
            if end_date is None:
                # Usar último día disponible
                end_date = prices.index[-1]
                logging.info(f"Usando fecha de fin por defecto: {end_date}")
            elif isinstance(end_date, str):
                # Convertir string a timestamp si es necesario
                end_date = pd.Timestamp(end_date)
                logging.info(f"Fecha de fin convertida a Timestamp: {end_date}")
                
            # Verificar que las fechas son válidas
            if start_date > end_date:
                raise ValueError(f"Fecha de inicio {start_date} posterior a fecha de fin {end_date}")
                
            # Verificar que no son fechas futuras
            current_date = datetime.now().date()
            if end_date.date() > current_date:
                logging.warning(f"Fecha de fin {end_date} es posterior a fecha actual {current_date}")
                # Encontrar el último índice válido que no sea futuro
                valid_indices = prices.index[prices.index.date <= current_date]
                if len(valid_indices) > 0:
                    end_date = valid_indices[-1]
                    logging.info(f"Ajustando fecha de fin a: {end_date}")
                else:
                    raise ValueError(f"No hay fechas válidas antes de la fecha actual")
            
            # Filtrar fechas para backtesting
            backtest_mask = (prices.index >= start_date) & (prices.index <= end_date)
            backtest_dates = prices.index[backtest_mask]
            
            if len(backtest_dates) < 5:
                raise ValueError(f"Período de backtest demasiado corto: {len(backtest_dates)} días")
                
            logging.info(f"Período de backtest: {start_date} a {end_date} ({len(backtest_dates)} días)")
            
            # Inicializar equity curve
            equity_curve = pd.DataFrame(index=backtest_dates, columns=[
                'equity', 'returns', 'costs', 'net_returns', 'drawdown', 'regime'
            ])
            equity_curve['equity'] = 1.0
            equity_curve['costs'] = 0.0
            
            # Inicializar con datos hasta start_date para entrenamiento inicial
            training_data = self._filter_data_until_date(data, start_date)
            
            # Reiniciar estrategia
            self._reset_for_backtest()
            
            # Inicializar con datos de entrenamiento
            self.initialize(training_data, sector_map, subsector_map, start_date)
            
            # Simular trading
            logging.info(f"Ejecutando backtesting día a día...")
            
            previous_positions = {}
            trades_log = []
            
            # Iterar por cada fecha
            for i, current_date in enumerate(tqdm(backtest_dates)):
                # Datos hasta fecha actual (sin look-ahead bias)
                current_data = self._filter_data_until_date(data, current_date)
                
                # Actualizar estrategia
                current_positions = self.update(current_data, sector_map, subsector_map, current_date)
                
                # Registrar régimen
                equity_curve.loc[current_date, 'regime'] = self.current_regime
                
                # Calcular P&L diario
                daily_pnl = 0
                daily_costs = 0
                
                # P&L de posiciones cerradas
                closed_positions = set(previous_positions.keys()) - set(current_positions.keys())
                
                for pair_id in closed_positions:
                    old_position = previous_positions[pair_id]
                    ticker1 = old_position['ticker1']
                    ticker2 = old_position['ticker2']
                    hedge_ratio = old_position['hedge_ratio']
                    
                    # Calcular retorno del spread
                    if ticker1 in prices.columns and ticker2 in prices.columns:
                        try:
                            # Calcular spread en fechas de entrada y salida
                            entry_date = old_position['entry_date']
                            
                            if entry_date in prices.index and ticker1 in prices.loc[entry_date] and ticker2 in prices.loc[entry_date]:
                                old_spread = prices.loc[entry_date, ticker1] + hedge_ratio * prices.loc[entry_date, ticker2]
                                new_spread = prices.loc[current_date, ticker1] + hedge_ratio * prices.loc[current_date, ticker2]
                                
                                # Retorno según dirección
                                if old_position['signal'] > 0:  # Long
                                    trade_return = (new_spread - old_spread) / abs(old_spread)
                                else:  # Short
                                    trade_return = (old_spread - new_spread) / abs(old_spread)
                                
                                # Posición y P&L bruto
                                position_pnl = old_position['size'] * trade_return
                                
                                # Calcular costos si están habilitados
                                trade_costs = 0
                                if self.config.get('transaction_costs', True):
                                    # Costos de entrada (ya incurridos)
                                    entry_costs = old_position.get('costs', {}).get('total_pct', 0.001)
                                    
                                    # Costos de salida (comisión + slippage)
                                    exit_costs = self.trading_costs['commission'] + self.trading_costs['slippage']
                                    
                                    # Si tenemos información de liquidez para impacto de mercado
                                    if self.config.get('include_market_impact', True) and 'min_adv' in old_position.get('costs', {}):
                                        position_value = abs(old_position['size']) * 1e6  # Asumiendo $1M base
                                        pct_of_adv = position_value / old_position['costs']['min_adv']
                                        market_impact = self.trading_costs['market_impact'] * np.sqrt(pct_of_adv)
                                        exit_costs += market_impact
                                    
                                    # Total de costos de salida como % de la posición
                                    trade_costs = abs(old_position['size']) * exit_costs
                                
                                # P&L neto
                                net_pnl = position_pnl - trade_costs
                                daily_pnl += position_pnl
                                daily_costs += trade_costs
                                
                                # Registrar trade
                                trades_log.append({
                                    'pair_id': pair_id,
                                    'ticker1': ticker1,
                                    'ticker2': ticker2,
                                    'entry_date': old_position['entry_date'],
                                    'exit_date': current_date,
                                    'days_held': (current_date - old_position['entry_date']).days,
                                    'entry_signal': old_position['signal'],
                                    'entry_z_score': old_position['z_score'],
                                    'position_size': old_position['size'],
                                    'gross_pnl': position_pnl,
                                    'costs': trade_costs,
                                    'net_pnl': net_pnl,
                                    'return': trade_return,
                                    'regime': old_position.get('regime', self.current_regime)
                                })
                            else:
                                logging.warning(f"No se encontraron datos para {ticker1}-{ticker2} en fecha de entrada {entry_date}")
                        except Exception as e:
                            logging.error(f"Error calculando P&L para {pair_id}: {str(e)}")
                
                # P&L de posiciones actualizadas
                common_positions = set(previous_positions.keys()) & set(current_positions.keys())
                
                for pair_id in common_positions:
                    old_position = previous_positions[pair_id]
                    new_position = current_positions[pair_id]
                    
                    # Si el tamaño cambió, calcular P&L para la parte cerrada
                    if abs(old_position['size'] - new_position['size']) > 0.001:
                        ticker1 = old_position['ticker1']
                        ticker2 = old_position['ticker2']
                        hedge_ratio = old_position['hedge_ratio']
                        
                        try:
                            # Tamaño ajustado
                            size_diff = old_position['size'] - new_position['size']
                            
                            # Calcular retorno
                            entry_date = old_position['entry_date']
                            
                            if entry_date in prices.index and ticker1 in prices.loc[entry_date] and ticker2 in prices.loc[entry_date]:
                                old_spread = prices.loc[entry_date, ticker1] + hedge_ratio * prices.loc[entry_date, ticker2]
                                new_spread = prices.loc[current_date, ticker1] + hedge_ratio * prices.loc[current_date, ticker2]
                                
                                # Retorno según dirección
                                if old_position['signal'] > 0:  # Long
                                    trade_return = (new_spread - old_spread) / abs(old_spread)
                                else:  # Short
                                    trade_return = (old_spread - new_spread) / abs(old_spread)
                                
                                # P&L bruto
                                position_pnl = size_diff * trade_return
                                
                                # Costos de ajuste de posición
                                trade_costs = 0
                                if self.config.get('transaction_costs', True):
                                    # Costos solo para la parte ajustada
                                    costs_pct = self.trading_costs['commission'] + self.trading_costs['slippage']
                                    
                                    # Impacto de mercado si aplica
                                    if self.config.get('include_market_impact', True) and 'costs' in new_position:
                                        costs_pct += new_position['costs'].get('market_impact', 0)
                                        
                                    trade_costs = abs(size_diff) * costs_pct
                                
                                # P&L neto
                                net_pnl = position_pnl - trade_costs
                                daily_pnl += position_pnl
                                daily_costs += trade_costs
                                
                                # Registrar trade parcial
                                trades_log.append({
                                    'pair_id': pair_id,
                                    'ticker1': ticker1,
                                    'ticker2': ticker2,
                                    'entry_date': old_position['entry_date'],
                                    'exit_date': current_date,
                                    'days_held': (current_date - old_position['entry_date']).days,
                                    'entry_signal': old_position['signal'],
                                    'entry_z_score': old_position['z_score'],
                                    'position_size': size_diff,
                                    'gross_pnl': position_pnl,
                                    'costs': trade_costs,
                                    'net_pnl': net_pnl,
                                    'return': trade_return,
                                    'regime': old_position.get('regime', self.current_regime),
                                    'partial': True
                                })
                            else:
                                logging.warning(f"No se encontraron datos para {ticker1}-{ticker2} en fecha de entrada {entry_date}")
                        except Exception as e:
                            logging.error(f"Error calculando P&L para ajuste de {pair_id}: {str(e)}")
                
                # Añadir costos de nuevas posiciones
                new_positions = set(current_positions.keys()) - set(previous_positions.keys())
                for pair_id in new_positions:
                    if self.config.get('transaction_costs', True):
                        position = current_positions[pair_id]
                        # Aplicar costos de entrada
                        entry_costs = position.get('costs', {}).get('total_pct', 0.001)
                        trade_costs = abs(position['size']) * entry_costs
                        daily_costs += trade_costs
                
                # Actualizar equity curve
                if i > 0:
                    equity_curve.loc[current_date, 'returns'] = daily_pnl
                    equity_curve.loc[current_date, 'costs'] = daily_costs
                    equity_curve.loc[current_date, 'net_returns'] = daily_pnl - daily_costs
                    
                    # Equity acumulada con costos
                    previous_equity = equity_curve.iloc[i-1]['equity']
                    equity_curve.loc[current_date, 'equity'] = previous_equity * (1 + daily_pnl - daily_costs)
                else:
                    equity_curve.loc[current_date, 'returns'] = 0
                    equity_curve.loc[current_date, 'costs'] = 0
                    equity_curve.loc[current_date, 'net_returns'] = 0
                
                # Actualizar posiciones anteriores
                previous_positions = {k: v.copy() for k, v in current_positions.items()}
            
            # Calcular drawdown
            equity = equity_curve['equity']
            high_water_mark = equity.cummax()
            drawdown = 1 - equity / high_water_mark
            equity_curve['drawdown'] = drawdown
            
            # Calcular métricas de rendimiento
            self.trades_log = pd.DataFrame(trades_log)
            self.calculate_performance_metrics(equity_curve, self.trades_log)
            
            # Guardar equity curve
            self.equity_curve = equity_curve
            
            logging.info(f"Backtest completado. Retorno final: {(equity.iloc[-1]/equity.iloc[0]-1)*100:.2f}%")
            
            return equity_curve
            
        except Exception as e:
            logging.error(f"Error en backtest: {str(e)}")
            logging.error(traceback.format_exc())
            return None
    
    def _reset_for_backtest(self):
        """Reinicia el estado de la estrategia para backtest"""
        # Reiniciar componentes
        self.regime_detector = RegimeDetector(
            min_train_samples=min(252 * self.config.get('min_data_years', 5), 1260)
        )
        
        self.pair_selector = PairSelector(
            min_liquidity=self.config.get('min_liquidity', 10e6),
            max_pairs_by_regime=self.config.get('max_pairs_per_regime', {1: 25, 2: 20, 3: 15}),
            recalibration_days=self.config.get('recalibration_days', 5)
        )
        
        self.convergence_predictor = ConvergencePredictor()
        self.signal_generator = SignalGenerator()
        self.position_manager = PositionManager()
        
        # Reiniciar estado
        self.current_regime = None
        self.previous_regime = None
        self.selected_pairs = []
        self.current_positions = {}
        self.positions_history = []
        self.regime_history = []
        self.recalibration_history = []
        self.metrics_history = []
        
        logging.info("Estado de la estrategia reiniciado para backtest")
    
    def calculate_performance_metrics(self, equity_curve, trades_log):
        """Calcula métricas de rendimiento completas incluyendo análisis por régimen"""
        try:
            logging.info("Calculando métricas de rendimiento...")
            
            # Retornos diarios (netos después de costos)
            daily_returns = equity_curve['net_returns'].dropna()
            
            if len(daily_returns) == 0:
                logging.warning("No hay retornos para calcular métricas")
                return self.metrics
            
            # Métricas generales
            
            # Sharpe Ratio (anualizado)
            sharpe = np.sqrt(252) * daily_returns.mean() / daily_returns.std() if daily_returns.std() > 0 else 0
            
            # Sortino Ratio (anualizado)
            negative_returns = daily_returns[daily_returns < 0]
            sortino = np.sqrt(252) * daily_returns.mean() / negative_returns.std() if len(negative_returns) > 0 and negative_returns.std() > 0 else 0
            
            # Maximum Drawdown
            max_drawdown = equity_curve['drawdown'].max()
            
            # Annualized Return
            days = (equity_curve.index[-1] - equity_curve.index[0]).days
            annual_return = (equity_curve['equity'].iloc[-1] / equity_curve['equity'].iloc[0]) ** (365 / max(days, 1)) - 1
            
            # Annualized Volatility
            annual_vol = daily_returns.std() * np.sqrt(252)
            
            # Trades específicos
            if len(trades_log) > 0:
                win_rate = (trades_log['net_pnl'] > 0).mean()
                avg_trade_duration = trades_log['days_held'].mean()
                avg_profit_per_trade = trades_log['net_pnl'].mean()
                
                # Profit factor (gross profits / gross losses)
                gross_profits = trades_log[trades_log['gross_pnl'] > 0]['gross_pnl'].sum()
                gross_losses = abs(trades_log[trades_log['gross_pnl'] < 0]['gross_pnl'].sum())
                profit_factor = gross_profits / gross_losses if gross_losses != 0 else float('inf')
                
                # Impacto de costos
                total_gross_pnl = trades_log['gross_pnl'].sum()
                total_costs = trades_log['costs'].sum()
                cost_impact = total_costs / total_gross_pnl if total_gross_pnl != 0 else float('inf')
            else:
                win_rate = 0
                avg_trade_duration = 0
                avg_profit_per_trade = 0
                profit_factor = 0
                cost_impact = 0
            
            # Guardar métricas generales
            self.metrics['overall'] = {
                'sharpe_ratio': sharpe,
                'sortino_ratio': sortino,
                'max_drawdown': max_drawdown,
                'annual_return': annual_return,
                'volatility': annual_vol,
                'win_rate': win_rate,
                'avg_trade_duration': avg_trade_duration,
                'avg_profit_per_trade': avg_profit_per_trade,
                'profit_factor': profit_factor,
                'cost_impact': cost_impact
            }
            
            # Análisis por régimen
            if 'regime' in equity_curve.columns:
                for regime in [1, 2, 3]:
                    regime_mask = equity_curve['regime'] == regime
                    regime_days = regime_mask.sum()
                    
                    # Si hay suficientes días en este régimen
                    if regime_days > 0:
                        regime_returns = equity_curve.loc[regime_mask, 'net_returns']
                        regime_equity = equity_curve.loc[regime_mask, 'equity']
                        
                        # Calcular métricas si hay datos suficientes
                        if len(regime_returns) > 1:
                            regime_sharpe = np.sqrt(252) * regime_returns.mean() / regime_returns.std() if regime_returns.std() > 0 else 0
                            
                            # Rendimiento en período
                            if len(regime_equity) > 1:
                                regime_return = regime_equity.iloc[-1] / regime_equity.iloc[0] - 1
                            else:
                                regime_return = 0
                            
                            # Drawdown en régimen
                            regime_drawdown = equity_curve.loc[regime_mask, 'drawdown'].max()
                            
                            # Volatilidad anualizada
                            regime_vol = regime_returns.std() * np.sqrt(252) if len(regime_returns) > 5 else 0
                            
                            # Guardar
                            self.metrics['by_regime'][regime] = {
                                'days': regime_days,
                                'pct_time': regime_days / len(equity_curve),
                                'return': regime_return,
                                'sharpe': regime_sharpe,
                                'volatility': regime_vol,
                                'max_drawdown': regime_drawdown
                            }
                    
                    # Análisis de trades por régimen
                    if len(trades_log) > 0 and 'regime' in trades_log.columns:
                        regime_trades = trades_log[trades_log['regime'] == regime]
                        
                        if len(regime_trades) > 0:
                            regime_win_rate = (regime_trades['net_pnl'] > 0).mean()
                            regime_avg_profit = regime_trades['net_pnl'].mean()
                            
                            # Añadir a métricas
                            if regime in self.metrics['by_regime']:
                                self.metrics['by_regime'][regime].update({
                                    'trade_count': len(regime_trades),
                                    'win_rate': regime_win_rate,
                                    'avg_profit': regime_avg_profit
                                })
            
            # Análisis sectorial si hay información
            if len(trades_log) > 0 and 'ticker1' in trades_log.columns:
                # Obtener sectores de los tickers (requiere sector_map)
                if hasattr(self, 'sector_map'):
                    trades_log['sector1'] = trades_log['ticker1'].map(self.sector_map)
                    trades_log['sector2'] = trades_log['ticker2'].map(self.sector_map)
                    
                    # Agrupar por sector y calcular métricas
                    for sector in set(trades_log['sector1'].tolist() + trades_log['sector2'].tolist()):
                        if pd.isna(sector):
                            continue
                            
                        # Filtrar trades de este sector (en cualquier lado del par)
                        sector_trades = trades_log[(trades_log['sector1'] == sector) | (trades_log['sector2'] == sector)]
                        
                        if len(sector_trades) > 0:
                            sector_win_rate = (sector_trades['net_pnl'] > 0).mean()
                            sector_avg_profit = sector_trades['net_pnl'].mean()
                            sector_profit_factor = sector_trades[sector_trades['gross_pnl'] > 0]['gross_pnl'].sum() / abs(sector_trades[sector_trades['gross_pnl'] < 0]['gross_pnl'].sum()) if abs(sector_trades[sector_trades['gross_pnl'] < 0]['gross_pnl'].sum()) > 0 else float('inf')
                            
                            self.metrics['by_sector'][sector] = {
                                'trade_count': len(sector_trades),
                                'win_rate': sector_win_rate,
                                'avg_profit': sector_avg_profit,
                                'profit_factor': sector_profit_factor
                            }
            
            # Análisis por período (trimestral)
            if len(equity_curve) > 20:
                # Añadir información de trimestre
                equity_curve['quarter'] = equity_curve.index.to_period('Q')
                
                # Agrupar por trimestre
                for quarter in equity_curve['quarter'].unique():
                    quarter_mask = equity_curve['quarter'] == quarter
                    quarter_returns = equity_curve.loc[quarter_mask, 'net_returns']
                    quarter_equity = equity_curve.loc[quarter_mask, 'equity']
                    
                    if len(quarter_returns) > 5:
                        quarter_return = quarter_equity.iloc[-1] / quarter_equity.iloc[0] - 1
                        quarter_sharpe = np.sqrt(252) * quarter_returns.mean() / quarter_returns.std() if quarter_returns.std() > 0 else 0
                        
                        self.metrics['by_period'][str(quarter)] = {
                            'days': len(quarter_returns),
                            'return': quarter_return,
                            'sharpe': quarter_sharpe
                        }
            
            # Guardar en historial
            snapshot = {
                'date': datetime.now(),
                'metrics': self.metrics.copy()
            }
            self.metrics_history.append(snapshot)
            
            logging.info("Métricas calculadas correctamente")
            
            return self.metrics
            
        except Exception as e:
            logging.error(f"Error calculando métricas: {str(e)}")
            logging.error(traceback.format_exc())
            return self.metrics
    
    def walk_forward_test(self, data, sector_map, subsector_map, 
                         training_years=5, validation_months=6, test_months=6, 
                         num_windows=4, strict_separation=True):
        """Realiza validación walk-forward con separación estricta de datos para evitar look-ahead bias"""
        try:
            prices = data['prices']
            dates = prices.index
            
            # Convertir a días
            training_days = training_years * 252
            validation_days = validation_months * 21
            test_days = test_months * 21
            
            if len(dates) < training_days + validation_days + test_days:
                logging.error("Datos insuficientes para walk-forward testing")
                return None
            
            # Inicializar resultados
            results = {
                'windows': [],
                'equity_curves': [],
                'metrics': [],
                'trades': []
            }
            
            # Verificar número de ventanas
            max_windows = (len(dates) - training_days) // test_days
            num_windows = min(num_windows, max_windows)
            
            logging.info(f"Realizando walk-forward test con {num_windows} ventanas")
            
            # Definir ventanas
            for window_idx in range(num_windows):
                # Definir índices
                train_start_idx = window_idx * test_days
                train_end_idx = train_start_idx + training_days
                validation_end_idx = train_end_idx + validation_days
                test_end_idx = validation_end_idx + test_days
                
                # Ajustar si excede límites
                test_end_idx = min(test_end_idx, len(dates) - 1)
                
                # Definir fechas - asegurar que sean objetos Timestamp
                train_start = dates[train_start_idx]
                train_end = dates[train_end_idx]
                validation_end = dates[validation_end_idx]
                test_end = dates[test_end_idx]
                
                # Asegurar que son objetos Timestamp
                if isinstance(train_start, str):
                    train_start = pd.Timestamp(train_start)
                if isinstance(train_end, str):
                    train_end = pd.Timestamp(train_end)
                if isinstance(validation_end, str):
                    validation_end = pd.Timestamp(validation_end)
                if isinstance(test_end, str):
                    test_end = pd.Timestamp(test_end)
                
                logging.info(f"\nWalk-forward ventana {window_idx+1}:")
                logging.info(f"Entrenamiento: {train_start} a {train_end}")
                logging.info(f"Validación: {train_end} a {validation_end}")
                logging.info(f"Prueba: {validation_end} a {test_end}")
                
                # Nueva instancia de estrategia para cada ventana (reset completo)
                strategy = StatArbStrategy(config=self.config)
                
                # Preparar datos de entrenamiento (hasta train_end)
                train_data = self._filter_data_until_date(data, train_end)
                
                # Inicializar con datos de entrenamiento
                strategy.initialize(train_data, sector_map, subsector_map, train_end)
                
                # Optimizar hiperparámetros en validación si se requiere
                if validation_days > 0:
                    # Filtrar datos hasta validation_end
                    validation_data = self._filter_data_until_date(data, validation_end)
                    
                    # Ejecutar en período de validación
                    validation_dates = [d for d in dates if train_end < d <= validation_end]
                    
                    for val_date in tqdm(validation_dates, desc="Validación"):
                        # Datos hasta esta fecha de validación
                        val_date_data = self._filter_data_until_date(validation_data, val_date)
                        strategy.update(val_date_data, sector_map, subsector_map, val_date)
                    
                    logging.info("Validación completada")
                
                # Ejecutar backtest en período de prueba
                test_data = self._filter_data_until_date(data, test_end)
                
                # Si strict_separation=True, reiniciar estado después de validación
                if strict_separation and validation_days > 0:
                    logging.info("Reiniciando modelos post-validación para separación estricta")
                    # Mantener configuración pero reiniciar estado
                    strategy._reset_for_backtest()
                    # Reentrenar con datos hasta validation_end
                    strategy.initialize(validation_data, sector_map, subsector_map, validation_end)
                
                # Ejecutar backtest solo en período de prueba
                test_dates = [d for d in dates if validation_end < d <= test_end]
                
                # Inicializar equity curve para este test
                test_equity = pd.DataFrame(index=test_dates, columns=['equity', 'returns', 'costs', 'net_returns', 'drawdown', 'regime'])
                test_equity['equity'] = 1.0
                test_trades = []
                
                previous_positions = {}
                
                # Simular día a día
                for i, test_date in enumerate(tqdm(test_dates, desc="Test")):
                    # Datos hasta esta fecha
                    test_date_data = self._filter_data_until_date(test_data, test_date)
                    
                    # Actualizar estrategia
                    current_positions = strategy.update(test_date_data, sector_map, subsector_map, test_date)
                    
                    # Registrar régimen
                    test_equity.loc[test_date, 'regime'] = strategy.current_regime
                    
                    # Calcular P&L (lógica igual que en backtest)
                    daily_pnl, daily_costs, new_trades = self._calculate_daily_pnl(
                        previous_positions, current_positions, prices, test_date
                    )
                    
                    # Añadir datos de ventana a trades
                    for trade in new_trades:
                        trade['window'] = window_idx + 1
                        test_trades.append(trade)
                    
                    # Actualizar equity curve
                    if i > 0:
                        test_equity.loc[test_date, 'returns'] = daily_pnl
                        test_equity.loc[test_date, 'costs'] = daily_costs
                        test_equity.loc[test_date, 'net_returns'] = daily_pnl - daily_costs
                        
                        previous_equity = test_equity.iloc[i-1]['equity']
                        test_equity.loc[test_date, 'equity'] = previous_equity * (1 + daily_pnl - daily_costs)
                    else:
                        test_equity.loc[test_date, 'returns'] = 0
                        test_equity.loc[test_date, 'costs'] = 0
                        test_equity.loc[test_date, 'net_returns'] = 0
                    
                    # Actualizar posiciones anteriores
                    previous_positions = {k: v.copy() for k, v in current_positions.items()}
                
                # Calcular drawdown
                equity = test_equity['equity']
                drawdown = 1 - equity / equity.cummax()
                test_equity['drawdown'] = drawdown
                
                # Calcular métricas
                test_trades_df = pd.DataFrame(test_trades) if test_trades else pd.DataFrame()
                window_metrics = self._calculate_window_metrics(test_equity, test_trades_df, window_idx + 1)
                
                # Guardar resultados
                results['windows'].append({
                    'window': window_idx + 1,
                    'train_start': train_start,
                    'train_end': train_end,
                    'validation_end': validation_end,
                    'test_end': test_end
                })
                results['equity_curves'].append(test_equity)
                results['metrics'].append(window_metrics)
                results['trades'].append(test_trades_df)
                
                logging.info(f"Ventana {window_idx+1} completada. Retorno: {(test_equity['equity'].iloc[-1] - 1) * 100:.2f}%")
            
            # Calcular métricas agregadas
            if results['metrics']:
                avg_metrics = {}
                for key in ['sharpe_ratio', 'sortino_ratio', 'max_drawdown', 'annual_return', 'win_rate']:
                    values = [m.get(key, 0) for m in results['metrics']]
                    avg_metrics[key] = np.mean(values)
                    avg_metrics[f'{key}_std'] = np.std(values)
                
                results['avg_metrics'] = avg_metrics
                
                logging.info("Resultados agregados de walk-forward test:")
                logging.info(f"Retorno anual promedio: {avg_metrics['annual_return']*100:.2f}% ± {avg_metrics['annual_return_std']*100:.2f}%")
                logging.info(f"Sharpe promedio: {avg_metrics['sharpe_ratio']:.2f} ± {avg_metrics['sharpe_ratio_std']:.2f}")
            
            return results
            
        except Exception as e:
            logging.error(f"Error en walk_forward_test: {str(e)}")
            logging.error(traceback.format_exc())
            return None
    
    def _calculate_daily_pnl(self, previous_positions, current_positions, prices, current_date):
        """Calcula P&L diario y costos para backtesting"""
        daily_pnl = 0
        daily_costs = 0
        new_trades = []
        
        # P&L de posiciones cerradas
        closed_positions = set(previous_positions.keys()) - set(current_positions.keys())
        
        for pair_id in closed_positions:
            old_position = previous_positions[pair_id]
            ticker1 = old_position['ticker1']
            ticker2 = old_position['ticker2']
            hedge_ratio = old_position['hedge_ratio']
            
            # Calcular retorno del spread
            if ticker1 in prices.columns and ticker2 in prices.columns:
                try:
                    # Calcular spread en fechas de entrada y salida
                    entry_date = old_position['entry_date']
                    
                    if entry_date in prices.index and ticker1 in prices.loc[entry_date] and ticker2 in prices.loc[entry_date]:
                        old_spread = prices.loc[entry_date, ticker1] + hedge_ratio * prices.loc[entry_date, ticker2]
                        new_spread = prices.loc[current_date, ticker1] + hedge_ratio * prices.loc[current_date, ticker2]
                        
                        # Retorno según dirección
                        if old_position['signal'] > 0:  # Long
                            trade_return = (new_spread - old_spread) / abs(old_spread)
                        else:  # Short
                            trade_return = (old_spread - new_spread) / abs(old_spread)
                        
                        # Posición y P&L bruto
                        position_pnl = old_position['size'] * trade_return
                        
                        # Calcular costos si están habilitados
                        trade_costs = 0
                        if self.config.get('transaction_costs', True):
                            # Costos de entrada (ya incurridos)
                            entry_costs = old_position.get('costs', {}).get('total_pct', 0.001)
                            
                            # Costos de salida (comisión + slippage)
                            exit_costs = self.trading_costs['commission'] + self.trading_costs['slippage']
                            
                            # Si tenemos información de liquidez para impacto de mercado
                            if self.config.get('include_market_impact', True) and 'min_adv' in old_position.get('costs', {}):
                                position_value = abs(old_position['size']) * 1e6  # Asumiendo $1M base
                                pct_of_adv = position_value / old_position['costs']['min_adv']
                                market_impact = self.trading_costs['market_impact'] * np.sqrt(pct_of_adv)
                                exit_costs += market_impact
                            
                            # Total de costos de salida como % de la posición
                            trade_costs = abs(old_position['size']) * exit_costs
                        
                        # P&L neto
                        net_pnl = position_pnl - trade_costs
                        daily_pnl += position_pnl
                        daily_costs += trade_costs
                        
                        # Registrar trade
                        new_trades.append({
                            'pair_id': pair_id,
                            'ticker1': ticker1,
                            'ticker2': ticker2,
                            'entry_date': old_position['entry_date'],
                            'exit_date': current_date,
                            'days_held': (current_date - old_position['entry_date']).days,
                            'entry_signal': old_position['signal'],
                            'entry_z_score': old_position['z_score'],
                            'position_size': old_position['size'],
                            'gross_pnl': position_pnl,
                            'costs': trade_costs,
                            'net_pnl': net_pnl,
                            'return': trade_return,
                            'regime': old_position.get('regime', self.current_regime)
                        })
                except Exception as e:
                    logging.error(f"Error calculando P&L para {pair_id}: {str(e)}")
        
        # P&L de posiciones actualizadas
        common_positions = set(previous_positions.keys()) & set(current_positions.keys())
        
        for pair_id in common_positions:
            old_position = previous_positions[pair_id]
            new_position = current_positions[pair_id]
            
            # Si el tamaño cambió, calcular P&L para la parte cerrada
            if abs(old_position['size'] - new_position['size']) > 0.001:
                ticker1 = old_position['ticker1']
                ticker2 = old_position['ticker2']
                hedge_ratio = old_position['hedge_ratio']
                
                try:
                    # Tamaño ajustado
                    size_diff = old_position['size'] - new_position['size']
                    
                    # Calcular retorno
                    entry_date = old_position['entry_date']
                    
                    if entry_date in prices.index and ticker1 in prices.loc[entry_date] and ticker2 in prices.loc[entry_date]:
                        old_spread = prices.loc[entry_date, ticker1] + hedge_ratio * prices.loc[entry_date, ticker2]
                        new_spread = prices.loc[current_date, ticker1] + hedge_ratio * prices.loc[current_date, ticker2]
                        
                        # Retorno según dirección
                        if old_position['signal'] > 0:  # Long
                            trade_return = (new_spread - old_spread) / abs(old_spread)
                        else:  # Short
                            trade_return = (old_spread - new_spread) / abs(old_spread)
                        
                        # P&L bruto
                        position_pnl = size_diff * trade_return
                        
                        # Costos de ajuste de posición
                        trade_costs = 0
                        if self.config.get('transaction_costs', True):
                            # Costos solo para la parte ajustada
                            costs_pct = self.trading_costs['commission'] + self.trading_costs['slippage']
                            
                            # Impacto de mercado si aplica
                            if self.config.get('include_market_impact', True) and 'costs' in new_position:
                                costs_pct += new_position['costs'].get('market_impact', 0)
                                
                            trade_costs = abs(size_diff) * costs_pct
                        
                        # P&L neto
                        net_pnl = position_pnl - trade_costs
                        daily_pnl += position_pnl
                        daily_costs += trade_costs
                        
                        # Registrar trade parcial
                        new_trades.append({
                            'pair_id': pair_id,
                            'ticker1': ticker1,
                            'ticker2': ticker2,
                            'entry_date': old_position['entry_date'],
                            'exit_date': current_date,
                            'days_held': (current_date - old_position['entry_date']).days,
                            'entry_signal': old_position['signal'],
                            'entry_z_score': old_position['z_score'],
                            'position_size': size_diff,
                            'gross_pnl': position_pnl,
                            'costs': trade_costs,
                            'net_pnl': net_pnl,
                            'return': trade_return,
                            'regime': old_position.get('regime', self.current_regime),
                            'partial': True
                        })
                except Exception as e:
                    logging.error(f"Error calculando P&L para ajuste de {pair_id}: {str(e)}")
        
        # Añadir costos de nuevas posiciones
        new_positions = set(current_positions.keys()) - set(previous_positions.keys())
        for pair_id in new_positions:
            if self.config.get('transaction_costs', True):
                position = current_positions[pair_id]
                # Aplicar costos de entrada
                entry_costs = position.get('costs', {}).get('total_pct', 0.001)
                trade_costs = abs(position['size']) * entry_costs
                daily_costs += trade_costs
        
        return daily_pnl, daily_costs, new_trades
    
    def _calculate_window_metrics(self, equity_curve, trades, window_index):
        """Calcula métricas para una ventana de walk-forward"""
        metrics = {}
        
        try:
            # Retornos diarios (netos después de costos)
            daily_returns = equity_curve['net_returns'].dropna()
            
            if len(daily_returns) > 0:
                # Sharpe Ratio (anualizado)
                sharpe = np.sqrt(252) * daily_returns.mean() / daily_returns.std() if daily_returns.std() > 0 else 0
                
                # Sortino Ratio (anualizado)
                negative_returns = daily_returns[daily_returns < 0]
                sortino = np.sqrt(252) * daily_returns.mean() / negative_returns.std() if len(negative_returns) > 0 and negative_returns.std() > 0 else 0
                
                # Maximum Drawdown
                max_drawdown = equity_curve['drawdown'].max()
                
                # Annualized Return
                days = (equity_curve.index[-1] - equity_curve.index[0]).days
                annual_return = (equity_curve['equity'].iloc[-1] / equity_curve['equity'].iloc[0]) ** (365 / max(days, 1)) - 1
                
                # Annualized Volatility
                annual_vol = daily_returns.std() * np.sqrt(252)
                
                metrics.update({
                    'window': window_index,
                    'sharpe_ratio': sharpe,
                    'sortino_ratio': sortino,
                    'max_drawdown': max_drawdown,
                    'annual_return': annual_return,
                    'volatility': annual_vol,
                    'total_return': equity_curve['equity'].iloc[-1] / equity_curve['equity'].iloc[0] - 1
                })
            
            # Métricas de trades
            if len(trades) > 0:
                win_rate = (trades['net_pnl'] > 0).mean()
                avg_profit = trades['net_pnl'].mean()
                total_trades = len(trades)
                
                metrics.update({
                    'win_rate': win_rate,
                    'avg_profit': avg_profit,
                    'total_trades': total_trades
                })
                
                # Análisis por régimen
                if 'regime' in trades.columns:
                    regimes_data = {}
                    for regime in trades['regime'].unique():
                        regime_trades = trades[trades['regime'] == regime]
                        if len(regime_trades) > 0:
                            regimes_data[int(regime)] = {
                                'count': len(regime_trades),
                                'win_rate': (regime_trades['net_pnl'] > 0).mean(),
                                'avg_profit': regime_trades['net_pnl'].mean()
                            }
                    
                    metrics['regimes'] = regimes_data
        except Exception as e:
            logging.error(f"Error calculando métricas para ventana {window_index}: {str(e)}")
        
        return metrics
    
    def plot_equity_curve(self, filename='equity_curve.png', include_regimes=True):
        """Genera gráfico mejorado de curva de equity con análisis de regímenes"""
        if self.equity_curve is None:
            logging.warning("No hay curva de equity disponible.")
            return
        
        try:
            plt.figure(figsize=(14, 10))
            
            # Panel 1: Equity con retornos acumulados
            ax1 = plt.subplot(3, 1, 1)
            
            # Equity bruta vs neta
            if 'costs' in self.equity_curve.columns:
                # Calcular equity bruta (sin costos)
                gross_equity = self.equity_curve['equity'].copy()
                
                for i in range(1, len(self.equity_curve)):
                    gross_equity.iloc[i] = gross_equity.iloc[i-1] * (1 + self.equity_curve['returns'].iloc[i])
                
                ax1.plot(self.equity_curve.index, gross_equity, 
                        alpha=0.5, linestyle='--', color='skyblue', 
                        label='Equity Bruta (sin costos)')
                
            # Equity neta
            ax1.plot(self.equity_curve.index, self.equity_curve['equity'], 
                   color='blue', linewidth=1.5, label='Equity Neta')
            
            # Marcar cambios de régimen si está disponible
            if include_regimes and 'regime' in self.equity_curve.columns:
                regime_changes = self.equity_curve['regime'].diff().fillna(0) != 0
                change_dates = self.equity_curve.index[regime_changes]
                change_regimes = self.equity_curve.loc[regime_changes, 'regime']
                
                for date, regime in zip(change_dates, change_regimes):
                    if regime == 1:
                        color = 'green'
                        label = 'Régimen 1'
                    elif regime == 2:
                        color = 'orange'
                        label = 'Régimen 2'
                    else:
                        color = 'red'
                        label = 'Régimen 3'
                    
                    # Evitar etiquetas duplicadas
                    if date == change_dates[0]:
                        ax1.axvline(x=date, color=color, linestyle='-', alpha=0.3, label=label)
                    else:
                        ax1.axvline(x=date, color=color, linestyle='-', alpha=0.3)
            
            # Añadir benchmark si está disponible
            if hasattr(self, 'benchmark_equity'):
                ax1.plot(self.equity_curve.index, self.benchmark_equity, 
                       color='gray', linestyle='-.', label='Benchmark')
            
            # Configurar
            ax1.set_title('Curva de Equity', fontsize=12, fontweight='bold')
            ax1.set_ylabel('Equity', fontsize=10)
            ax1.legend(loc='upper left')
            ax1.grid(True, alpha=0.3)
            ax1.set_axisbelow(True)
            
            # Panel 2: Drawdown
            ax2 = plt.subplot(3, 1, 2, sharex=ax1)
            ax2.fill_between(self.equity_curve.index, self.equity_curve['drawdown'], 
                            alpha=0.3, color='red')
            ax2.plot(self.equity_curve.index, self.equity_curve['drawdown'], 
                    color='red', label='Drawdown')
            
            # Líneas de referencia
            ax2.axhline(y=0.05, color='orange', linestyle='--', alpha=0.5, label='5%')
            ax2.axhline(y=0.1, color='red', linestyle='--', alpha=0.5, label='10%')
            
            ax2.set_title('Drawdown', fontsize=12, fontweight='bold')
            ax2.set_ylabel('Drawdown', fontsize=10)
            ax2.set_ylim(0, min(1.0, self.equity_curve['drawdown'].max() * 1.5))
            ax2.legend(loc='upper right')
            ax2.grid(True, alpha=0.3)
            ax2.set_axisbelow(True)
            
            # Panel 3: Retornos diarios
            ax3 = plt.subplot(3, 1, 3, sharex=ax1)
            
            # Retornos brutos vs costos
            if 'returns' in self.equity_curve.columns and 'costs' in self.equity_curve.columns:
                ax3.bar(self.equity_curve.index, self.equity_curve['returns'], 
                      width=1.0, color='green', alpha=0.5, label='Retornos Brutos')
                ax3.bar(self.equity_curve.index, -self.equity_curve['costs'], 
                      width=1.0, color='red', alpha=0.5, label='Costos')
                ax3.bar(self.equity_curve.index, self.equity_curve['net_returns'], 
                      width=0.5, color='blue', alpha=0.7, label='Retornos Netos')
            else:
                # Solo retornos netos
                positive_returns = self.equity_curve['net_returns'] > 0
                negative_returns = self.equity_curve['net_returns'] < 0
                
                ax3.bar(self.equity_curve.index[positive_returns], 
                      self.equity_curve.loc[positive_returns, 'net_returns'], 
                      width=1.0, color='green', alpha=0.7, label='Retornos Positivos')
                ax3.bar(self.equity_curve.index[negative_returns], 
                      self.equity_curve.loc[negative_returns, 'net_returns'], 
                      width=1.0, color='red', alpha=0.7, label='Retornos Negativos')
            
            ax3.set_title('Retornos Diarios', fontsize=12, fontweight='bold')
            ax3.set_ylabel('Retorno', fontsize=10)
            ax3.legend(loc='upper right')
            ax3.grid(True, alpha=0.3)
            ax3.set_axisbelow(True)
            
            # Ajustar diseño
            plt.tight_layout()
            
            # Guardar figura
            plt.savefig(f'./artifacts/results/figures/{filename}', dpi=300, bbox_inches='tight')
            plt.close()
            
            logging.info(f"Gráfico de equity guardado como {filename}")
            
            # Generar gráfico adicional de regímenes si están disponibles
            if include_regimes and 'regime' in self.equity_curve.columns:
                self.plot_regime_distribution(f"regimes_{filename}")
            
            return True
            
        except Exception as e:
            logging.error(f"Error generando gráfico: {str(e)}")
            logging.error(traceback.format_exc())
            return False
    
    def plot_regime_distribution(self, filename='regime_distribution.png'):
        """Grafica distribución y rendimiento por regímenes"""
        try:
            if 'regime' not in self.equity_curve.columns:
                logging.warning("No hay información de regímenes disponible.")
                return False
            
            plt.figure(figsize=(14, 8))
            
            # Panel 1: Distribución de regímenes
            ax1 = plt.subplot(1, 2, 1)
            regime_counts = self.equity_curve['regime'].value_counts()
            
            # Colores por régimen
            colors = ['green', 'orange', 'red']
            labels = [f'Régimen {i} ({n} días)' for i, n in regime_counts.items()]
            
            ax1.pie(regime_counts, labels=labels, autopct='%1.1f%%', 
                  startangle=90, shadow=False, colors=colors)
            ax1.set_title('Distribución de Regímenes', fontsize=12, fontweight='bold')
            
            # Panel 2: Rendimiento por régimen
            ax2 = plt.subplot(1, 2, 2)
            
            # Calcular retorno por régimen
            regime_returns = {}
            
            for regime in sorted(self.equity_curve['regime'].unique()):
                regime_mask = self.equity_curve['regime'] == regime
                regime_curve = self.equity_curve.loc[regime_mask]
                
                if len(regime_curve) > 0:
                    # Calcular equity normalizada para este régimen
                    normalized_equity = regime_curve['equity'] / regime_curve['equity'].iloc[0]
                    
                    # Usar días relativos como eje x para comparar regímenes
                    days = np.arange(len(normalized_equity))
                    
                    # Color según régimen
                    color = colors[int(regime)-1] if int(regime) <= len(colors) else 'gray'
                    
                    # Calcular retorno
                    total_return = normalized_equity.iloc[-1] - 1
                    regime_returns[regime] = total_return
                    
                    ax2.plot(days, normalized_equity, 
                           color=color, 
                           label=f'Régimen {int(regime)}: {total_return*100:.1f}%')
            
            # Configuración
            ax2.set_title('Rendimiento por Régimen (Normalizado)', fontsize=12, fontweight='bold')
            ax2.set_xlabel('Días Transcurridos', fontsize=10)
            ax2.set_ylabel('Equity Normalizada', fontsize=10)
            ax2.legend(loc='upper left')
            ax2.grid(True, alpha=0.3)
            ax2.set_axisbelow(True)
            
            # Añadir texto con métricas detalladas
            metrics_text = "Métricas por Régimen:\n"
            
            for regime in sorted(regime_returns.keys()):
                regime_mask = self.equity_curve['regime'] == regime
                regime_days = regime_mask.sum()
                
                if regime_days > 0:
                    regime_return = regime_returns[regime]
                    annualized = ((1 + regime_return) ** (252 / regime_days) - 1) if regime_days > 0 else 0
                    
                    # Volatilidad
                    if 'net_returns' in self.equity_curve.columns:
                        regime_vol = self.equity_curve.loc[regime_mask, 'net_returns'].std() * np.sqrt(252)
                        
                        # Sharpe simple
                        sharpe = annualized / regime_vol if regime_vol > 0 else 0
                        
                        metrics_text += f"Régimen {int(regime)}: {regime_days} días, {regime_return*100:.1f}% ({annualized*100:.1f}% anual), Sharpe: {sharpe:.2f}\n"
                    else:
                        metrics_text += f"Régimen {int(regime)}: {regime_days} días, {regime_return*100:.1f}% ({annualized*100:.1f}% anual)\n"
            
            # Añadir texto al gráfico
            plt.figtext(0.5, 0.01, metrics_text, ha='center', fontsize=10, 
                      bbox=dict(boxstyle='round', facecolor='white', alpha=0.8))
            
            # Ajustar diseño
            plt.tight_layout(rect=[0, 0.05, 1, 0.95])
            
            # Guardar figura
            plt.savefig(f'./artifacts/results/figures/{filename}', dpi=300, bbox_inches='tight')
            plt.close()
            
            logging.info(f"Gráfico de regímenes guardado como {filename}")
            return True
            
        except Exception as e:
            logging.error(f"Error generando gráfico de regímenes: {str(e)}")
            logging.error(traceback.format_exc())
            return False
    
    def save_performance_summary(self, filename='performance_summary.json'):
        """Guarda métricas en JSON con formato detallado"""
        try:
            # Convertir cualquier numpy float a Python float para serialización
            def convert_np_floats(obj):
                if isinstance(obj, np.float32) or isinstance(obj, np.float64):
                    return float(obj)
                elif isinstance(obj, dict):
                    return {k: convert_np_floats(v) for k, v in obj.items()}
                elif isinstance(obj, list):
                    return [convert_np_floats(item) for item in obj]
                else:
                    return obj
            
            # Convertir métricas
            metrics_json = convert_np_floats(self.metrics)
            
            # Añadir metadatos
            summary = {
                'metrics': metrics_json,
                'metadata': {
                    'generated_at': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
                    'strategy_config': self.config,
                    'data_period': {
                        'start': self.equity_curve.index[0].strftime('%Y-%m-%d') if self.equity_curve is not None else None,
                        'end': self.equity_curve.index[-1].strftime('%Y-%m-%d') if self.equity_curve is not None else None,
                        'days': len(self.equity_curve) if self.equity_curve is not None else 0
                    }
                }
            }
            
            # Guardar como JSON
            with open(f'./artifacts/results/data/{filename}', 'w') as f:
                json.dump(summary, f, indent=2)
            
            logging.info(f"Resumen de rendimiento guardado como {filename}")
            return True
            
        except Exception as e:
            logging.error(f"Error guardando resumen: {str(e)}")
            logging.error(traceback.format_exc())
            return False
    
    def save_trade_log(self, filename='trade_log.csv'):
        """Guarda registro de operaciones en CSV con información detallada"""
        try:
            if not hasattr(self, 'trades_log') or self.trades_log is None or len(self.trades_log) == 0:
                logging.warning("No hay registro de operaciones disponible.")
                return False
            
            # Guardar CSV
            self.trades_log.to_csv(f'./artifacts/results/data/{filename}', index=False)
            
            # Generar análisis adicional de trades
            self.analyze_trades(f"trade_analysis_{filename.replace('.csv', '.json')}")
            
            logging.info(f"Registro de operaciones guardado como {filename}")
            return True
            
        except Exception as e:
            logging.error(f"Error guardando registro de operaciones: {str(e)}")
            return False
    
    def analyze_trades(self, filename='trade_analysis.json'):
        """Analiza operaciones para identificar patrones y factores de éxito"""
        try:
            if not hasattr(self, 'trades_log') or self.trades_log is None or len(self.trades_log) == 0:
                logging.warning("No hay registro de operaciones para análisis.")
                return False
            
            trades = self.trades_log
            
            # Análisis básico
            analysis = {
                'total_trades': len(trades),
                'winning_trades': int((trades['net_pnl'] > 0).sum()),
                'losing_trades': int((trades['net_pnl'] <= 0).sum()),
                'win_rate': float((trades['net_pnl'] > 0).mean()),
                'profit_factor': float(trades[trades['gross_pnl'] > 0]['gross_pnl'].sum() / 
                                    abs(trades[trades['gross_pnl'] < 0]['gross_pnl'].sum())) 
                                    if abs(trades[trades['gross_pnl'] < 0]['gross_pnl'].sum()) > 0 else float('inf'),
                'avg_profit': float(trades['net_pnl'].mean()),
                'avg_win': float(trades[trades['net_pnl'] > 0]['net_pnl'].mean()) if (trades['net_pnl'] > 0).any() else 0,
                'avg_loss': float(trades[trades['net_pnl'] <= 0]['net_pnl'].mean()) if (trades['net_pnl'] <= 0).any() else 0,
                'max_win': float(trades['net_pnl'].max()),
                'max_loss': float(trades['net_pnl'].min()),
                'avg_holding_period': float(trades['days_held'].mean()),
                'total_costs': float(trades['costs'].sum()),
                'cost_impact': float(trades['costs'].sum() / trades['gross_pnl'].sum()) 
                             if trades['gross_pnl'].sum() != 0 else 0
            }
            
            # Análisis por régimen
            if 'regime' in trades.columns:
                regime_analysis = {}
                for regime in trades['regime'].unique():
                    regime_trades = trades[trades['regime'] == regime]
                    regime_analysis[int(regime)] = {
                        'count': len(regime_trades),
                        'pct_total': float(len(regime_trades) / len(trades)),
                        'win_rate': float((regime_trades['net_pnl'] > 0).mean()),
                        'avg_profit': float(regime_trades['net_pnl'].mean()),
                        'avg_holding_period': float(regime_trades['days_held'].mean())
                    }
                analysis['by_regime'] = regime_analysis
            
            # Análisis por tamaño de posición
            trades['position_size_group'] = pd.qcut(abs(trades['position_size']), 
                                                 4, labels=['Very Small', 'Small', 'Medium', 'Large'])
            size_analysis = {}
            for size_group in trades['position_size_group'].unique():
                group_trades = trades[trades['position_size_group'] == size_group]
                size_analysis[str(size_group)] = {
                    'count': len(group_trades),
                    'win_rate': float((group_trades['net_pnl'] > 0).mean()),
                    'avg_profit': float(group_trades['net_pnl'].mean()),
                    'cost_impact': float(group_trades['costs'].sum() / group_trades['gross_pnl'].abs().sum())
                                  if group_trades['gross_pnl'].abs().sum() > 0 else 0
                }
            analysis['by_position_size'] = size_analysis
            
            # Análisis por duración
            trades['duration_group'] = pd.qcut(trades['days_held'], 
                                            3, labels=['Short', 'Medium', 'Long'])
            duration_analysis = {}
            for duration_group in trades['duration_group'].unique():
                group_trades = trades[trades['duration_group'] == duration_group]
                duration_analysis[str(duration_group)] = {
                    'count': len(group_trades),
                    'win_rate': float((group_trades['net_pnl'] > 0).mean()),
                    'avg_profit': float(group_trades['net_pnl'].mean()),
                    'avg_days': float(group_trades['days_held'].mean())
                }
            analysis['by_duration'] = duration_analysis
            
            # Guardar análisis
            with open(f'./artifacts/results/data/{filename}', 'w') as f:
                json.dump(analysis, f, indent=2)
            
            logging.info(f"Análisis de operaciones guardado como {filename}")
            return True
            
        except Exception as e:
            logging.error(f"Error en análisis de operaciones: {str(e)}")
            logging.error(traceback.format_exc())
            return False

# Función principal
def main():
    """Función principal mejorada para ejecutar la estrategia de arbitraje estadístico"""
    print("Iniciando implementación de estrategia de arbitraje estadístico...")
    
    try:

        
        # Configuración de la estrategia
        config = {
            'min_data_years': 5,
            'max_pairs_per_regime': {1: 25, 2: 20, 3: 15},
            'min_liquidity': 10e6,
            'recalibration_days': 5,
            'transaction_costs': True,
            'include_market_impact': True,
            'circuit_breakers_enabled': True
        }
        
        # Período para datos - usar ayer como fecha final para evitar datos futuros
        end_date = (datetime.now() - timedelta(days=1)).strftime('%Y-%m-%d')
        start_date = (datetime.now() - timedelta(days=252*12)).strftime('%Y-%m-%d')  # Reducido a 6 años para demo
        
        print(f"Obteniendo tickers del S&P 500 para período {start_date} - {end_date}")
        tickers, sector_map, subsector_map = get_sp500_tickers()
        
        # Para pruebas, usar subconjunto de tickers
        #tickers = tickers[:100]  # Reducido a 50 para más rápida ejecución
        
        # Descargar datos (sin fechas futuras)
        print(f"Descargando datos históricos...")
        data_dict = get_historical_data(tickers, start_date, end_date)
        
        # Verificar datos suficientes
        if not data_dict or len(data_dict) < 10:
            raise ValueError(f"Datos insuficientes: solo {len(data_dict)} tickers disponibles")
            
        # Preprocesar datos
        print("Procesando datos...")
        processed_data = preprocess_data(data_dict)
        
        # Crear estrategia con configuración optimizada
        strategy = StatArbStrategy(config=config)
        
        # Inicializar
        print("Inicializando estrategia...")
        strategy.initialize(processed_data, sector_map, subsector_map)
        
        # Ejecutar backtest
        print("Ejecutando backtest...")
        # Usar 2 años para testing para demostración
        # Convertir a objeto Timestamp para evitar error de comparación
        backtest_start = pd.Timestamp((datetime.now() - timedelta(days=252*2)).strftime('%Y-%m-%d'))
        equity_curve = strategy.backtest(processed_data, sector_map, subsector_map, backtest_start)
        
        if equity_curve is None:
            print("Advertencia: El backtest no generó resultados, verifique los logs para más detalles")
            # Seguir con otras funcionalidades en lugar de terminar con error
        else:
            # Ejecutar walk-forward test
            print("Ejecutando validación walk-forward...")
            wf_results = strategy.walk_forward_test(
                processed_data, sector_map, subsector_map,
                training_years=3, validation_months=3, test_months=3,  # Parámetros reducidos
                num_windows=2, strict_separation=True  # Reducido a 2 ventanas
            )
            
            # Generar visualizaciones
            print("Generando visualizaciones...")
            strategy.plot_equity_curve('equity_curve_total.png', include_regimes=True)
            
            # Visualizar pares actuales
            if strategy.current_positions:
                print("Graficando z-scores de pares seleccionados...")
                # Crear función de visualización de pares
                for i, pair_id in enumerate(list(strategy.current_positions.keys())[:3]):  # Solo primeros 3
                    plot_pair_zscore(strategy, pair_id, processed_data, 
                                   f"pair_{pair_id.replace('_', '-')}.png")
            
            # Guardar métricas y resultados
            print("Guardando métricas de rendimiento...")
            strategy.save_performance_summary('performance_summary.json')
            strategy.save_trade_log('trade_log.csv')
            
            # Guardar resultados de walk-forward
            if wf_results:
                print("Guardando resultados de walk-forward...")
                save_walkforward_results(wf_results, 'walkforward_results.json')
                
                # Crear visualización de walk-forward
                plot_walkforward_results(wf_results, 'walkforward_comparison.png')
            
            # Mostrar resumen de performance
            print_performance_summary(strategy.metrics)
        
        print("Implementación completada con éxito.")
        return strategy
        
    except Exception as e:
        logging.error(f"Error en ejecución principal: {str(e)}")
        logging.error(traceback.format_exc())
        print(f"Error: {str(e)}")
        return None

# Funciones auxiliares para análisis y visualización
def plot_pair_zscore(strategy, pair_id, data, filename=None):
    """Visualiza z-score histórico de un par específico"""
    # Asegurar importaciones necesarias
    import matplotlib.pyplot as plt
    import numpy as np
    import pandas as pd
    
    if pair_id not in strategy.current_positions:
        logging.warning(f"Par {pair_id} no encontrado en posiciones actuales")
        return False
        
    try:
        position = strategy.current_positions[pair_id]
        ticker1 = position['ticker1']
        ticker2 = position['ticker2']
        hedge_ratio = position['hedge_ratio']
        
        # Calcular spread y z-score con ventana adaptativa
        lookback = min(252, len(data['prices']))
        prices = data['prices'].iloc[-lookback:][[ticker1, ticker2]]
        
        if len(prices) < 30 or prices[ticker1].isna().any() or prices[ticker2].isna().any():
            logging.warning(f"Datos insuficientes para visualizar {pair_id}")
            return False
            
        spread = prices[ticker1] + hedge_ratio * prices[ticker2]
        
        # Calcular media y desviación móvil exponencial
        half_life = position.get('half_life', lookback/4)
        half_life = min(max(5, half_life), lookback/2)
        
        spread_mean = spread.ewm(halflife=half_life).mean()
        spread_std = spread.ewm(halflife=half_life/3).std()
        
        z_score = (spread - spread_mean) / spread_std
        
        # Crear gráfico avanzado
        plt.figure(figsize=(14, 10))
        
        # Panel 1: Spread con bandas
        ax1 = plt.subplot(2, 1, 1)
        ax1.plot(spread.index, spread, label='Spread', color='blue')
        ax1.plot(spread_mean.index, spread_mean, label='Media Móvil', 
               linestyle='--', color='red')
        
        # Bandas de volatilidad
        ax1.fill_between(
            spread.index, 
            spread_mean + 1*spread_std, 
            spread_mean - 1*spread_std, 
            alpha=0.2, color='gray', label='±1σ'
        )
        ax1.fill_between(
            spread.index, 
            spread_mean + 2*spread_std, 
            spread_mean - 2*spread_std, 
            alpha=0.1, color='red', label='±2σ'
        )
        
        # Añadir información del par
        ax1.set_title(f'Spread: {ticker1} - {ticker2}  (HR: {hedge_ratio:.4f}, Half-life: {position.get("half_life", "N/A")} días)', 
                    fontsize=12, fontweight='bold')
        ax1.set_ylabel('Spread', fontsize=10)
        ax1.legend(loc='upper left')
        ax1.grid(True, alpha=0.3)
        
        # Panel 2: Z-Score
        ax2 = plt.subplot(2, 1, 2, sharex=ax1)
        ax2.plot(z_score.index, z_score, label='Z-Score', color='blue')
        
        # Líneas de referencia para los umbrales de trading
        ax2.axhline(y=0, color='black', linestyle='-', alpha=0.3)
        ax2.axhline(y=1, color='orange', linestyle='--', alpha=0.5, label='±1')
        ax2.axhline(y=-1, color='orange', linestyle='--', alpha=0.5)
        ax2.axhline(y=2, color='red', linestyle='--', alpha=0.5, label='±2')
        ax2.axhline(y=-2, color='red', linestyle='--', alpha=0.5)
        
        # Añadir posición actual
        current_z = z_score.iloc[-1]
        current_color = 'green' if current_z < -1 else 'red' if current_z > 1 else 'gray'
        ax2.plot(z_score.index[-1], current_z, 'o', color=current_color, 
               markersize=8, label=f'Actual: {current_z:.2f}')
        
        # Añadir señal actual
        signal_label = "Compra" if position['signal'] > 0 else "Venta" if position['signal'] < 0 else "Neutral"
        ax2.text(0.02, 0.95, f"Señal: {signal_label} (Fuerza: {position['strength']:.2f})", 
               transform=ax2.transAxes, fontsize=10, verticalalignment='top',
               bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.3))
        
        ax2.set_title('Z-Score del Spread', fontsize=12, fontweight='bold')
        ax2.set_ylabel('Z-Score', fontsize=10)
        ax2.set_ylim(-4, 4)
        ax2.legend(loc='upper right')
        ax2.grid(True, alpha=0.3)
        
        plt.tight_layout()
        
        # Guardar figura
        if filename:
            plt.savefig(f'./artifacts/results/figures/{filename}', dpi=300, bbox_inches='tight')
        else:
            plt.savefig(f'./artifacts/results/figures/pair_{ticker1}_{ticker2}_zscore.png', 
                       dpi=300, bbox_inches='tight')
            
        plt.close()
        return True
        
    except Exception as e:
        logging.error(f"Error visualizando par {pair_id}: {str(e)}")
        logging.error(traceback.format_exc())
        return False

def save_walkforward_results(wf_results, filename):
    """Guarda resultados de walk-forward test en formato JSON"""
    try:
        # Preparar datos para serialización (convertir DataFrames)
        serializable_results = {
            'windows': wf_results['windows'],
            'metrics': wf_results['metrics'],
            'avg_metrics': wf_results.get('avg_metrics', {})
        }
        
        # Convertir valores numpy a Python nativos
        def convert_np_values(obj):
            if isinstance(obj, np.float32) or isinstance(obj, np.float64):
                return float(obj)
            elif isinstance(obj, np.int32) or isinstance(obj, np.int64):
                return int(obj)
            elif isinstance(obj, dict):
                return {k: convert_np_values(v) for k, v in obj.items()}
            elif isinstance(obj, list):
                return [convert_np_values(item) for item in obj]
            else:
                return obj
                
        serializable_results = convert_np_values(serializable_results)
        
        # Guardar como JSON
        with open(f'./artifacts/results/data/{filename}', 'w') as f:
            json.dump(serializable_results, f, indent=2)
            
        # Guardar equity curves como CSV
        for i, equity_df in enumerate(wf_results['equity_curves']):
            equity_df.to_csv(f'./artifacts/results/data/wf_window_{i+1}_equity.csv')
            
        return True
    except Exception as e:
        logging.error(f"Error guardando resultados walk-forward: {str(e)}")
        return False

def plot_walkforward_results(wf_results, filename):
    """Visualiza resultados comparativos de walk-forward test"""
    try:
        if not wf_results or 'equity_curves' not in wf_results or not wf_results['equity_curves']:
            logging.warning("Sin datos de equity curve para visualización walk-forward")
            return False
            
        plt.figure(figsize=(14, 10))
        
        # Panel 1: Comparación de equity curves
        ax1 = plt.subplot(2, 1, 1)
        
        colors = ['blue', 'green', 'red', 'purple', 'orange', 'brown']
        
        for i, equity_df in enumerate(wf_results['equity_curves']):
            color = colors[i % len(colors)]
            window_info = wf_results['windows'][i]
            label = f"Ventana {i+1}: {window_info['test_end'].strftime('%Y-%m-%d')}"
            
            # Normalizar a 1.0 para comparación
            normalized = equity_df['equity'] / equity_df['equity'].iloc[0]
            ax1.plot(range(len(normalized)), normalized, color=color, label=label)
            
            # Añadir retorno final
            final_return = normalized.iloc[-1] - 1
            ax1.annotate(f"{final_return*100:.1f}%", 
                       xy=(len(normalized)-1, normalized.iloc[-1]),
                       xytext=(len(normalized)-1, normalized.iloc[-1]),
                       color=color, fontweight='bold')
        
        ax1.set_title('Comparación de Rendimiento por Ventana', fontsize=12, fontweight='bold')
        ax1.set_ylabel('Equity Normalizada', fontsize=10)
        ax1.set_xlabel('Días', fontsize=10)
        ax1.legend(loc='upper left')
        ax1.grid(True, alpha=0.3)
        
        # Panel 2: Métricas comparativas
        ax2 = plt.subplot(2, 1, 2)
        
        metrics = ['sharpe_ratio', 'sortino_ratio', 'annual_return', 'max_drawdown', 'win_rate']
        metric_labels = ['Sharpe', 'Sortino', 'Retorno Anual', 'Drawdown Máx', 'Win Rate']
        
        # Extraer valores por ventana
        metric_values = {}
        for metric in metrics:
            metric_values[metric] = [m.get(metric, 0) for m in wf_results['metrics']]
        
        # Convertir a array para manipulación
        metric_arrays = {}
        for metric in metrics:
            if metric == 'max_drawdown' or metric == 'win_rate':
                # Convertir a porcentaje
                metric_arrays[metric] = np.array(metric_values[metric]) * 100
            else:
                metric_arrays[metric] = np.array(metric_values[metric])
        
        # Agrupar datos para gráfico de barras
        num_windows = len(wf_results['metrics'])
        width = 0.15  # ancho de barras
        indices = np.arange(len(metrics))
        
        for i in range(num_windows):
            window_values = [metric_arrays[metric][i] for metric in metrics]
            ax2.bar(indices + i*width, window_values, width, label=f'Ventana {i+1}',
                  color=colors[i % len(colors)])
        
        # Configuración
        ax2.set_ylabel('Valor', fontsize=10)
        ax2.set_xticks(indices + width * (num_windows-1)/2)
        ax2.set_xticklabels(metric_labels)
        ax2.legend(loc='upper right')
        ax2.set_title('Métricas por Ventana', fontsize=12, fontweight='bold')
        ax2.grid(True, alpha=0.3, axis='y')
        
        plt.tight_layout()
        plt.savefig(f'./artifacts/results/figures/{filename}', dpi=300, bbox_inches='tight')
        plt.close()
        
        return True
    except Exception as e:
        logging.error(f"Error visualizando resultados walk-forward: {str(e)}")
        logging.error(traceback.format_exc())
        return False

def print_performance_summary(metrics):
    """Imprime un resumen de performance en la consola"""
    try:
        overall = metrics.get('overall', {})
        
        print("\n" + "="*60)
        print("RESUMEN DE RENDIMIENTO DE LA ESTRATEGIA")
        print("="*60)
        
        print(f"\nRENDIMIENTO GENERAL:")
        print(f"  Retorno Anual:      {overall.get('annual_return', 0)*100:6.2f}%")
        print(f"  Sharpe Ratio:       {overall.get('sharpe_ratio', 0):6.2f}")
        print(f"  Sortino Ratio:      {overall.get('sortino_ratio', 0):6.2f}")
        print(f"  Volatilidad Anual:  {overall.get('volatility', 0)*100:6.2f}%")
        print(f"  Drawdown Máximo:    {overall.get('max_drawdown', 0)*100:6.2f}%")
        print(f"  Win Rate:           {overall.get('win_rate', 0)*100:6.2f}%")
        print(f"  Profit Factor:      {overall.get('profit_factor', 0):6.2f}")
        print(f"  Impacto de Costos:  {overall.get('cost_impact', 0)*100:6.2f}%")
        
        print("\nRENDIMIENTO POR RÉGIMEN:")
        regime_metrics = metrics.get('by_regime', {})
        for regime, regime_data in regime_metrics.items():
            if regime_data:
                print(f"  Régimen {regime}:")
                print(f"    Tiempo en régimen: {regime_data.get('pct_time', 0)*100:6.2f}%")
                print(f"    Retorno:           {regime_data.get('return', 0)*100:6.2f}%")
                print(f"    Sharpe:            {regime_data.get('sharpe', 0):6.2f}")
                print(f"    Win Rate:          {regime_data.get('win_rate', 0)*100:6.2f}%")
        
        print("\nMEJORES SECTORES:")
        sector_metrics = metrics.get('by_sector', {})
        
        # Ordenar sectores por profit factor
        if sector_metrics:
            sorted_sectors = sorted(
                sector_metrics.items(), 
                key=lambda x: x[1].get('profit_factor', 0), 
                reverse=True
            )[:3]  # Top 3
            
            for sector, sector_data in sorted_sectors:
                print(f"  {sector}:")
                print(f"    Trades:       {sector_data.get('trade_count', 0):6d}")
                print(f"    Win Rate:     {sector_data.get('win_rate', 0)*100:6.2f}%")
                print(f"    Profit Factor:{sector_data.get('profit_factor', 0):6.2f}")
        
        print("\n" + "="*60)
        
    except Exception as e:
        print(f"Error imprimiendo resumen: {str(e)}")

if __name__ == "__main__":
    try:
        # Iniciar temporizador
        start_time = time.time()
        
        # Ejecutar estrategia
        strategy = main()
        
        # Tiempo total
        elapsed_time = time.time() - start_time
        print(f"\nEjecución completada en {elapsed_time/60:.2f} minutos")
        
    except Exception as e:
        logging.error(f"Error en ejecución principal: {str(e)}")
        with open('./artifacts/errors.txt', 'a') as f:
            f.write(f"\n[{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] ERROR EN EJECUCIÓN PRINCIPAL:\n")
            traceback.print_exc(file=f)
        print(f"Error: {str(e)}")

Iniciando implementación de estrategia de arbitraje estadístico...
Obteniendo tickers del S&P 500 para período 2017-01-04 - 2025-04-15
Descargando datos históricos...


Descargando datos:   0%|                                | 0/503 [00:00<?, ?it/s]

YF.download() has changed argument auto_adjust default to True


Descargando datos: 100%|██████████████████████| 503/503 [02:34<00:00,  3.26it/s]


Datos obtenidos para 503 tickers. Fallaron 0 tickers.
Procesando datos...
Inicializando estrategia...
Ejecutando backtest...
