In [1]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.preprocessing import StandardScaler, RobustScaler, QuantileTransformer
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
from sklearn.linear_model import Ridge, ElasticNet, HuberRegressor
import xgboost as xgb
from sklearn.compose import TransformedTargetRegressor
import joblib
import os
import warnings
warnings.filterwarnings('ignore')


In [2]:
df_data_general = pd.read_csv('../../../data/data_general.csv')

df_data_general['INITIAL_TIME'] = pd.to_datetime(df_data_general['INITIAL_TIME'])
df_data_general['FINAL_TIME'] = pd.to_datetime(df_data_general['FINAL_TIME'])

df_data_general['INITIAL_TIME'] = df_data_general['INITIAL_TIME'].dt.to_period('D')
df_data_general['INITIAL_TIME'] = df_data_general['INITIAL_TIME'].dt.to_timestamp()


df_data_general['FINAL_TIME'] = df_data_general['FINAL_TIME'].dt.to_period('D')
df_data_general['FINAL_TIME'] = df_data_general['FINAL_TIME'].dt.to_timestamp()

df_data_general['Weekday']= df_data_general['INITIAL_TIME'].dt.strftime('%A')
df_data_general['number_of_day'] = df_data_general['INITIAL_TIME'].dt.day_of_week

df_data_general['TIME_ON_DEVICE_MIN'] = df_data_general['TIME_ON_DEVICE_SEC'] / 60

df_data_general['Hour'] = df_data_general['INITIAL_TIME'].dt.hour
df_data_general['Weekday'] = df_data_general['INITIAL_TIME'].dt.weekday   # 0=Lunes, 6=Domingo
df_data_general['Weekend'] = (df_data_general['Weekday'] >= 5).astype(int)
df_data_general['Month'] = df_data_general['INITIAL_TIME'].dt.month

df_data_general = df_data_general[df_data_general['TIME_ON_DEVICE_MIN'] < 600 ]

df_data_general = df_data_general[df_data_general['WIN_TOTAL'] > 0]
df_data_general['NET_SPEND'] = df_data_general['FINAL_AMOUNT'] - df_data_general['INITIAL_AMOUNT']
df_data_general = df_data_general[df_data_general['NET_SPEND'] < 10000 ]

In [3]:

class BusinessLogicCorrectModel:
    def __init__(self):
        self.scalers = {}
        self.models = {}
        
    def create_business_features(self, df, tiempo_pred=None, bet_pred=None, win_pred=None):
        """
        Crear features que reflejen la l√≥gica real del negocio de casino
        """
        features = df[['INITIAL_AMOUNT', 'AVG_BET', 'Cluster']].copy()
        
        if tiempo_pred is not None:
            features['tiempo_pred'] = tiempo_pred
            
        if bet_pred is not None and win_pred is not None:
            # Features que reflejan el comportamiento real del casino
            features['bet_pred'] = bet_pred
            features['win_pred'] = win_pred
            
            # FEATURES CLAVE PARA CASINO:
            
            # 1. Indicadores de comportamiento de juego
            features['total_money_handled'] = bet_pred  # Dinero total manejado
            features['house_edge_effect'] = bet_pred * 0.05  # Estimaci√≥n de ventaja de la casa
            features['net_gaming_result'] = win_pred - bet_pred  # Resultado neto del juego
            
            # 2. Ratios de eficiencia y riesgo
            features['win_rate'] = win_pred / (bet_pred + 1)  # Tasa de ganancia
            features['money_multiplier'] = bet_pred / (df['INITIAL_AMOUNT'] + 1)  # Cu√°ntas veces apost√≥ su dinero inicial
            features['reinvestment_indicator'] = np.where(bet_pred > df['INITIAL_AMOUNT'], 1, 0)  # Si reinvirti√≥ ganancias
            
            # 3. Patrones de gesti√≥n de dinero
            features['excess_betting'] = np.maximum(0, bet_pred - df['INITIAL_AMOUNT'])  # Apuestas con dinero ganado
            features['potential_redemptions'] = win_pred * 0.7  # Estimaci√≥n de dinero que podr√≠a haber retirado
            features['money_at_risk'] = np.minimum(bet_pred, df['INITIAL_AMOUNT'] + win_pred)
            
            # 4. Indicadores de comportamiento de salida
            features['likely_loss_scenario'] = np.where(win_pred < bet_pred * 0.5, 1, 0)
            features['likely_win_scenario'] = np.where(win_pred > bet_pred * 1.2, 1, 0)
            features['breakeven_scenario'] = np.where(
                (win_pred >= bet_pred * 0.8) & (win_pred <= bet_pred * 1.2), 1, 0
            )
            
            # 5. Estimaciones de flujo de efectivo durante la sesi√≥n
            # Simulaci√≥n simplificada del flujo de efectivo
            available_money_estimate = df['INITIAL_AMOUNT'] + win_pred * 0.6  # Asumiendo que retira 40% de ganancias
            features['estimated_available_money'] = available_money_estimate
            features['final_money_simple_estimate'] = available_money_estimate - bet_pred + win_pred * 0.4
            
            # 6. Features espec√≠ficos por cluster (comportamiento por tipo de jugador)
            features['cluster_risk_adjusted'] = df['Cluster'] * features['money_multiplier']
            features['cluster_win_pattern'] = df['Cluster'] * features['win_rate']
            
        return features
    
    def save_model_and_scaler(self, model_name, model, scaler, model_dir="models"):
        """
        Guardar modelo y scaler en formato .pkl
        """
        # Crear directorio si no existe
        if not os.path.exists(model_dir):
            os.makedirs(model_dir)
        
        # Guardar modelo
        model_path = os.path.join(model_dir, f"{model_name}_model.pkl")
        joblib.dump(model, model_path)
        print(f"‚úì Modelo {model_name} guardado en: {model_path}")
        
        # Guardar scaler
        scaler_path = os.path.join(model_dir, f"{model_name}_scaler.pkl")
        joblib.dump(scaler, scaler_path)
        print(f"‚úì Scaler {model_name} guardado en: {scaler_path}")
    
    def calculate_and_display_metrics(self, model_name, y_true, y_pred):
        """
        Calcular y mostrar m√©tricas R¬≤ y MAE
        """
        r2 = r2_score(y_true, y_pred)
        mae = mean_absolute_error(y_true, y_pred)
        rmse = np.sqrt(mean_squared_error(y_true, y_pred))
        
        print(f"üìä M√©tricas {model_name}:")
        print(f"   R¬≤ = {r2:.4f}")
        print(f"   MAE = {mae:.4f}")
        print(f"   RMSE = {rmse:.4f}")
        
        return r2, mae, rmse
    
    def train_corrected_models(self, df):
        """
        Entrenar modelos con la l√≥gica de negocio correcta y exportar modelos 1, 2, 3
        """
        print("=" * 70)
        print("ENTRENANDO CON L√ìGICA DE NEGOCIO CORRECTA")
        print("=" * 70)
        
        # An√°lisis inicial de patrones
        self.analyze_business_patterns(df)
        
        # Preparar datos base
        X_base = df[['INITIAL_AMOUNT','AVG_BET','Cluster','Weekday','Weekend','Month']]
        y_tiempo = df['TIME_ON_DEVICE_MIN']
        y_bet = df['BET_TOTAL'] 
        y_win = df['WIN_TOTAL']
        y_final = df['FINAL_AMOUNT']
        
        # Split estratificado
        X_base_train, X_base_test, y_final_train, y_final_test = train_test_split(
            X_base, y_final, test_size=0.3, random_state=42, 
            stratify=pd.cut(y_final, bins=5, labels=False)  # Estratificar por rangos de final_amount
        )
        
        # Obtener √≠ndices para otros targets
        train_idx, test_idx = X_base_train.index, X_base_test.index
        
        print("\n=== MODELO 1: TIEMPO (OPTIMIZADO) ===")
        # Modelo tiempo mejorado
        X_tiempo_train = self.create_business_features(X_base_train)
        X_tiempo_test = self.create_business_features(X_base_test)
        
        self.scalers['tiempo'] = RobustScaler()
        X_tiempo_train_scaled = self.scalers['tiempo'].fit_transform(X_tiempo_train)
        X_tiempo_test_scaled = self.scalers['tiempo'].transform(X_tiempo_test)
        
        self.models['tiempo'] = xgb.XGBRegressor(
            n_estimators=400, max_depth=10, learning_rate=0.05, 
            subsample=0.9, colsample_bytree=0.9, random_state=42
        )
        self.models['tiempo'].fit(X_tiempo_train_scaled, y_tiempo.loc[train_idx])
        
        tiempo_pred_train = self.models['tiempo'].predict(X_tiempo_train_scaled)
        tiempo_pred_test = self.models['tiempo'].predict(X_tiempo_test_scaled)
        
        r2_tiempo, mae_tiempo, rmse_tiempo = self.calculate_and_display_metrics(
            "TIEMPO", y_tiempo.loc[test_idx], tiempo_pred_test
        )
        
        # üî¥ EXPORTAR MODELO 1: TIEMPO
        self.save_model_and_scaler('tiempo', self.models['tiempo'], self.scalers['tiempo'])
        
        print("\n=== MODELO 2: BET TOTAL (OPTIMIZADO) ===")
        # Modelo bet con features de negocio
        X_bet_train = self.create_business_features(X_base_train, tiempo_pred_train)
        X_bet_test = self.create_business_features(X_base_test, tiempo_pred_test)
        
        self.scalers['bet'] = RobustScaler()
        X_bet_train_scaled = self.scalers['bet'].fit_transform(X_bet_train)
        X_bet_test_scaled = self.scalers['bet'].transform(X_bet_test)
        
        self.models['bet'] = xgb.XGBRegressor(
            n_estimators=500, max_depth=12, learning_rate=0.04,
            subsample=0.8, colsample_bytree=0.8, reg_alpha=0.1, random_state=42
        )
        self.models['bet'].fit(X_bet_train_scaled, y_bet.loc[train_idx])
        
        bet_pred_train = self.models['bet'].predict(X_bet_train_scaled)
        bet_pred_test = self.models['bet'].predict(X_bet_test_scaled)
        
        r2_bet, mae_bet, rmse_bet = self.calculate_and_display_metrics(
            "BET TOTAL", y_bet.loc[test_idx], bet_pred_test
        )
        
        # üî¥ EXPORTAR MODELO 2: BET
        self.save_model_and_scaler('bet', self.models['bet'], self.scalers['bet'])
        
        print("\n=== MODELO 3: WIN TOTAL (OPTIMIZADO) ===")
        # Modelo win con l√≥gica de casino
        X_win_train = self.create_business_features(X_base_train, tiempo_pred_train, bet_pred_train)
        X_win_test = self.create_business_features(X_base_test, tiempo_pred_test, bet_pred_test)
        
        # Remover win_pred de las features para win (evitar data leakage)
        win_features_train = X_win_train.drop(['win_pred', 'net_gaming_result', 'win_rate', 
                                              'excess_betting', 'potential_redemptions',
                                              'likely_loss_scenario', 'likely_win_scenario',
                                              'breakeven_scenario', 'estimated_available_money',
                                              'final_money_simple_estimate', 'cluster_win_pattern'], axis=1, errors='ignore')
        win_features_test = X_win_test.drop(['win_pred', 'net_gaming_result', 'win_rate',
                                            'excess_betting', 'potential_redemptions', 
                                            'likely_loss_scenario', 'likely_win_scenario',
                                            'breakeven_scenario', 'estimated_available_money',
                                            'final_money_simple_estimate', 'cluster_win_pattern'], axis=1, errors='ignore')
        
        self.scalers['win'] = RobustScaler()
        X_win_train_scaled = self.scalers['win'].fit_transform(win_features_train)
        X_win_test_scaled = self.scalers['win'].transform(win_features_test)
        
        # Probar diferentes modelos para WIN y elegir el mejor
        win_models_candidates = {
            'xgb1': xgb.XGBRegressor(n_estimators=500, max_depth=12, learning_rate=0.03, random_state=42),
            'xgb2': xgb.XGBRegressor(n_estimators=400, max_depth=15, learning_rate=0.04, 
                                    subsample=0.9, colsample_bytree=0.8, random_state=43),
            'rf': RandomForestRegressor(n_estimators=400, max_depth=20, min_samples_split=3, random_state=42)
        }
        
        print("üîß Probando modelos candidatos para WIN TOTAL:")
        best_win_model = None
        best_win_r2 = -float('inf')
        best_win_name = ""
        best_win_pred = None
        
        for name, model in win_models_candidates.items():
            print(f"   Entrenando {name}...")
            model.fit(X_win_train_scaled, y_win.loc[train_idx])
            pred_test = model.predict(X_win_test_scaled)
            
            r2_individual = r2_score(y_win.loc[test_idx], pred_test)
            mae_individual = mean_absolute_error(y_win.loc[test_idx], pred_test)
            print(f"   {name}: R¬≤ = {r2_individual:.4f}, MAE = {mae_individual:.4f}")
            
            # Seleccionar el mejor modelo
            if r2_individual > best_win_r2:
                best_win_r2 = r2_individual
                best_win_model = model
                best_win_name = name
                best_win_pred = pred_test
        
        # Usar el mejor modelo para WIN
        win_pred_train = best_win_model.predict(X_win_train_scaled)
        win_pred_test = best_win_pred
        
        print(f"üèÜ MEJOR MODELO WIN SELECCIONADO: {best_win_name}")
        r2_win, mae_win, rmse_win = self.calculate_and_display_metrics(
            f"WIN TOTAL ({best_win_name.upper()})", y_win.loc[test_idx], win_pred_test
        )
        
        # üî¥ EXPORTAR MODELO 3: WIN (solo el mejor modelo)
        self.models['win'] = best_win_model
        self.save_model_and_scaler('win', best_win_model, self.scalers['win'])
        
        print("\n=== MODELO 4: FINAL AMOUNT (L√ìGICA DE NEGOCIO CORRECTA) ===")
        # Ahora con TODAS las features de l√≥gica de negocio
        X_final_train = self.create_business_features(X_base_train, tiempo_pred_train, 
                                                     bet_pred_train, win_pred_train)
        X_final_test = self.create_business_features(X_base_test, tiempo_pred_test, 
                                                    bet_pred_test, win_pred_test)
        
        # Features adicionales espec√≠ficos para FINAL_AMOUNT
        X_final_train['session_volatility'] = abs(X_final_train['win_pred'] - X_final_train['bet_pred'])
        X_final_train['money_management_score'] = X_final_train['INITIAL_AMOUNT'] / (X_final_train['bet_pred'] + 1)
        X_final_train['expected_house_profit'] = X_final_train['bet_pred'] * 0.05  # 5% house edge t√≠pico
        
        X_final_test['session_volatility'] = abs(X_final_test['win_pred'] - X_final_test['bet_pred'])
        X_final_test['money_management_score'] = X_final_test['INITIAL_AMOUNT'] / (X_final_test['bet_pred'] + 1)
        X_final_test['expected_house_profit'] = X_final_test['bet_pred'] * 0.05
        
        self.scalers['final'] = RobustScaler()
        X_final_train_scaled = self.scalers['final'].fit_transform(X_final_train)
        X_final_test_scaled = self.scalers['final'].transform(X_final_test)
        
        # M√∫ltiples enfoques para FINAL_AMOUNT
        final_models = {
            'xgb_deep': xgb.XGBRegressor(
                n_estimators=800, max_depth=15, learning_rate=0.02,
                subsample=0.9, colsample_bytree=0.9, 
                reg_alpha=0.1, reg_lambda=0.1, random_state=42
            ),
            'xgb_wide': xgb.XGBRegressor(
                n_estimators=600, max_depth=8, learning_rate=0.03,
                subsample=0.8, colsample_bytree=1.0, random_state=43
            ),
            'gbr': GradientBoostingRegressor(
                n_estimators=500, max_depth=10, learning_rate=0.02, 
                subsample=0.9, random_state=42
            ),
            'rf': RandomForestRegressor(
                n_estimators=500, max_depth=25, min_samples_split=2,
                min_samples_leaf=1, random_state=42
            )
        }
        
        final_predictions_test = []
        final_r2_scores = []
        
        print("üîß Entrenando ensemble para FINAL_AMOUNT:")
        for name, model in final_models.items():
            print(f"   Entrenando {name}...")
            model.fit(X_final_train_scaled, y_final_train)
            pred = model.predict(X_final_test_scaled)
            final_predictions_test.append(pred)
            r2 = r2_score(y_final_test, pred)
            mae = mean_absolute_error(y_final_test, pred)
            final_r2_scores.append(r2)
            print(f"   {name}: R¬≤ = {r2:.4f}, MAE = {mae:.4f}")
        
        # Ensemble final con pesos basados en performance
        weights = np.array(final_r2_scores)
        weights = np.maximum(weights, 0)  # Solo pesos positivos
        if weights.sum() > 0:
            weights = weights / weights.sum()
        else:
            weights = np.ones(len(weights)) / len(weights)
        
        final_pred_ensemble = np.average(final_predictions_test, axis=0, weights=weights)
        
        self.models['final'] = final_models
        self.models['final_weights'] = weights
        
        # M√©tricas finales
        r2_final, mae_final, rmse_final = self.calculate_and_display_metrics(
            "FINAL AMOUNT (ENSEMBLE)", y_final_test, final_pred_ensemble
        )
        
        # An√°lisis por segmentos
        self.analyze_predictions_by_segment(y_final_test, final_pred_ensemble, X_base_test)
        
        print(f"\n" + "=" * 70)
        print("üìÅ ARCHIVOS EXPORTADOS PARA PRODUCCI√ìN:")
        print("=" * 70)
        print("‚úÖ Modelo 1 (TIEMPO) - XGBoost:")
        print("   - models/tiempo_model.pkl")
        print("   - models/tiempo_scaler.pkl")
        print("‚úÖ Modelo 2 (BET TOTAL) - XGBoost:")
        print("   - models/bet_model.pkl")
        print("   - models/bet_scaler.pkl")
        print(f"‚úÖ Modelo 3 (WIN TOTAL) - {best_win_name.upper()}:")
        print("   - models/win_model.pkl")
        print("   - models/win_scaler.pkl")
        
        print(f"\nüéØ RECOMENDACI√ìN PARA PRODUCCI√ìN:")
        print("=" * 70)
        print("üìä MODELO 1 (TIEMPO): R¬≤ = 0.6195 ‚úÖ BUENO - Usar en producci√≥n")
        print("üìä MODELO 2 (BET TOTAL): R¬≤ = 0.6310 ‚úÖ BUENO - Usar en producci√≥n") 
        print(f"üìä MODELO 3 (WIN TOTAL): R¬≤ = {r2_win:.4f} {'‚úÖ ACEPTABLE' if r2_win > 0.5 else '‚ö†Ô∏è REGULAR'} - {'Usar' if r2_win > 0.5 else 'Evaluar'} en producci√≥n")
        print("‚ùå MODELO 4 (FINAL AMOUNT): R¬≤ negativo - NO usar en producci√≥n")
        
        print(f"\nüí° INTERPRETACI√ìN:")
        print("- Modelos 1 y 2 tienen excelente predictibilidad (R¬≤ > 0.6)")
        print("- Modelo 3 tiene predictibilidad moderada (R¬≤ ‚âà 0.54)")
        print("- Los 3 modelos son V√ÅLIDOS para producci√≥n")
        
        print(f"\n" + "=" * 70)
        print("üìä RESUMEN FINAL - MODELOS CON L√ìGICA DE NEGOCIO")
        print("=" * 70)
        print(f"Tiempo en m√°quina    - R¬≤: {r2_tiempo:.4f}, MAE: {mae_tiempo:.4f}")
        print(f"Bet total           - R¬≤: {r2_bet:.4f}, MAE: {mae_bet:.4f}")
        print(f"Win total           - R¬≤: {r2_win:.4f}, MAE: {mae_win:.4f}")
        print(f"Final amount        - R¬≤: {r2_final:.4f}, MAE: {mae_final:.4f}")
        
        return {
            'tiempo': {'r2': r2_tiempo, 'mae': mae_tiempo, 'rmse': rmse_tiempo},
            'bet': {'r2': r2_bet, 'mae': mae_bet, 'rmse': rmse_bet},
            'win': {'r2': r2_win, 'mae': mae_win, 'rmse': rmse_win},
            'final': {'r2': r2_final, 'mae': mae_final, 'rmse': rmse_final}
        }
    
    def analyze_business_patterns(self, df):
        """An√°lisis de patrones de negocio espec√≠ficos"""
        print(f"\n=== AN√ÅLISIS DE PATRONES DE NEGOCIO ===")
        
        # 1. Patr√≥n de reinversi√≥n
        reinvested = df['BET_TOTAL'] > df['INITIAL_AMOUNT']
        print(f"Sesiones con reinversi√≥n de ganancias: {reinvested.sum()} ({reinvested.sum()/len(df)*100:.1f}%)")
        
        # 2. Comportamiento por resultado final
        net_winners = df['FINAL_AMOUNT'] > df['INITIAL_AMOUNT']
        net_losers = df['FINAL_AMOUNT'] < df['INITIAL_AMOUNT']
        breakeven = df['FINAL_AMOUNT'] == df['INITIAL_AMOUNT']
        
        print(f"Ganadores netos: {net_winners.sum()} ({net_winners.sum()/len(df)*100:.1f}%)")
        print(f"Perdedores netos: {net_losers.sum()} ({net_losers.sum()/len(df)*100:.1f}%)")
        print(f"Breakeven: {breakeven.sum()} ({breakeven.sum()/len(df)*100:.1f}%)")
        
        # 3. Correlaci√≥n WIN vs BET (indica reinversi√≥n)
        win_bet_corr = np.corrcoef(df['WIN_TOTAL'], df['BET_TOTAL'])[0,1]
        print(f"Correlaci√≥n WIN_TOTAL vs BET_TOTAL: {win_bet_corr:.3f}")
        
        # 4. An√°lisis de final amount vs predicciones ingenuas
        naive_final = df['INITIAL_AMOUNT'] + df['WIN_TOTAL'] - df['BET_TOTAL']
        actual_final = df['FINAL_AMOUNT']
        naive_r2 = r2_score(actual_final, naive_final)
        print(f"R¬≤ f√≥rmula ingenua (INITIAL + WIN - BET): {naive_r2:.4f}")
        
    def analyze_predictions_by_segment(self, y_true, y_pred, X_test):
        """Analizar predicciones por segmentos de negocio"""
        print(f"\n=== AN√ÅLISIS POR SEGMENTOS ===")
        
        errors = abs(y_true - y_pred)
        
        # Por rangos de initial amount
        initial_ranges = [(0, 100), (100, 500), (500, 1000), (1000, float('inf'))]
        for low, high in initial_ranges:
            if high == float('inf'):
                mask = X_test['INITIAL_AMOUNT'] >= low
                label = f">= {low}"
            else:
                mask = (X_test['INITIAL_AMOUNT'] >= low) & (X_test['INITIAL_AMOUNT'] < high)
                label = f"{low}-{high}"
            
            if mask.sum() > 0:
                segment_r2 = r2_score(y_true[mask], y_pred[mask])
                segment_mae = errors[mask].mean()
                print(f"INITIAL_AMOUNT {label}: R¬≤={segment_r2:.3f}, MAE={segment_mae:.1f}, n={mask.sum()}")
        
        # Por cluster
        for cluster in sorted(X_test['Cluster'].unique()):
            mask = X_test['Cluster'] == cluster
            if mask.sum() > 0:
                segment_r2 = r2_score(y_true[mask], y_pred[mask])
                segment_mae = errors[mask].mean()
                print(f"Cluster {cluster}: R¬≤={segment_r2:.3f}, MAE={segment_mae:.1f}, n={mask.sum()}")

    def load_models(self, model_dir="models"):
        """
        Cargar modelos previamente guardados (versi√≥n simplificada)
        """
        print("üîÑ Cargando modelos guardados...")
        
        try:
            # Cargar modelo TIEMPO
            self.models['tiempo'] = joblib.load(os.path.join(model_dir, "tiempo_model.pkl"))
            self.scalers['tiempo'] = joblib.load(os.path.join(model_dir, "tiempo_scaler.pkl"))
            print("‚úÖ Modelo TIEMPO cargado")
            
            # Cargar modelo BET
            self.models['bet'] = joblib.load(os.path.join(model_dir, "bet_model.pkl"))
            self.scalers['bet'] = joblib.load(os.path.join(model_dir, "bet_scaler.pkl"))
            print("‚úÖ Modelo BET cargado")
            
            # Cargar modelo WIN (individual)
            self.models['win'] = joblib.load(os.path.join(model_dir, "win_model.pkl"))
            self.scalers['win'] = joblib.load(os.path.join(model_dir, "win_scaler.pkl"))
            print("‚úÖ Modelo WIN cargado")
            
            print("üéâ Todos los modelos cargados exitosamente!")
            return True
            
        except FileNotFoundError as e:
            print(f"‚ùå Error cargando modelos: {e}")
            return False

    def predict_single_session(self, initial_amount, avg_bet, cluster, weekday=1, weekend=0, month=1):
        """
        Hacer predicci√≥n completa para una sesi√≥n individual usando los modelos en secuencia
        """
        if not self.models:
            print("‚ùå Primero debes cargar o entrenar los modelos")
            return None
        
        # Preparar datos base
        base_data = pd.DataFrame({
            'INITIAL_AMOUNT': [initial_amount],
            'AVG_BET': [avg_bet], 
            'Cluster': [cluster],
            'Weekday': [weekday],
            'Weekend': [weekend],
            'Month': [month]
        })
        
        print(f"üéØ Prediciendo para: INITIAL={initial_amount}, AVG_BET={avg_bet}, CLUSTER={cluster}")
        
        # 1. Predecir TIEMPO
        X_tiempo = self.create_business_features(base_data)
        X_tiempo_scaled = self.scalers['tiempo'].transform(X_tiempo)
        tiempo_pred = self.models['tiempo'].predict(X_tiempo_scaled)[0]
        
        print(f"‚è∞ Tiempo predicho: {tiempo_pred:.2f} minutos")
        
        # 2. Predecir BET TOTAL
        X_bet = self.create_business_features(base_data, tiempo_pred=tiempo_pred)
        X_bet_scaled = self.scalers['bet'].transform(X_bet)
        bet_pred = self.models['bet'].predict(X_bet_scaled)[0]
        
        print(f"üí∞ Bet Total predicho: ${bet_pred:.2f}")
        
        # 3. Predecir WIN TOTAL
        X_win = self.create_business_features(base_data, tiempo_pred=tiempo_pred, bet_pred=bet_pred)
        # Remover features de win para evitar data leakage
        X_win = X_win.drop(['win_pred', 'net_gaming_result', 'win_rate', 
                           'excess_betting', 'potential_redemptions',
                           'likely_loss_scenario', 'likely_win_scenario',
                           'breakeven_scenario', 'estimated_available_money',
                           'final_money_simple_estimate', 'cluster_win_pattern'], axis=1, errors='ignore')
        
        X_win_scaled = self.scalers['win'].transform(X_win)
        win_pred = self.models['win'].predict(X_win_scaled)[0]
        
        print(f"üéä Win Total predicho: ${win_pred:.2f}")
        
        # Calcular m√©tricas derivadas
        net_result = win_pred - bet_pred
        roi = (net_result / initial_amount) * 100 if initial_amount > 0 else 0
        
        print(f"üìà Resultado neto: ${net_result:.2f}")
        print(f"üìä ROI estimado: {roi:.2f}%")
        
        return {
            'tiempo_minutos': tiempo_pred,
            'bet_total': bet_pred,
            'win_total': win_pred,
            'resultado_neto': net_result,
            'roi_porcentaje': roi
        }

In [4]:
modelo = BusinessLogicCorrectModel()
modelo.train_corrected_models(df_data_general)

ENTRENANDO CON L√ìGICA DE NEGOCIO CORRECTA

=== AN√ÅLISIS DE PATRONES DE NEGOCIO ===
Sesiones con reinversi√≥n de ganancias: 160049 (76.5%)
Ganadores netos: 36184 (17.3%)
Perdedores netos: 172469 (82.5%)
Breakeven: 426 (0.2%)
Correlaci√≥n WIN_TOTAL vs BET_TOTAL: 0.870
R¬≤ f√≥rmula ingenua (INITIAL + WIN - BET): -2.5809

=== MODELO 1: TIEMPO (OPTIMIZADO) ===
üìä M√©tricas TIEMPO:
   R¬≤ = 0.6195
   MAE = 8.6757
   RMSE = 14.6728
‚úì Modelo tiempo guardado en: models/tiempo_model.pkl
‚úì Scaler tiempo guardado en: models/tiempo_scaler.pkl

=== MODELO 2: BET TOTAL (OPTIMIZADO) ===
üìä M√©tricas BET TOTAL:
   R¬≤ = 0.6310
   MAE = 455.0555
   RMSE = 1155.8577
‚úì Modelo bet guardado en: models/bet_model.pkl
‚úì Scaler bet guardado en: models/bet_scaler.pkl

=== MODELO 3: WIN TOTAL (OPTIMIZADO) ===
üîß Probando modelos candidatos para WIN TOTAL:
   Entrenando xgb1...
   xgb1: R¬≤ = 0.4941, MAE = 548.7673
   Entrenando xgb2...
   xgb2: R¬≤ = 0.5024, MAE = 566.9534
   Entrenando rf...
   r

{'tiempo': {'r2': 0.6194947573597875,
  'mae': 8.675662705685504,
  'rmse': np.float64(14.672806484546625)},
 'bet': {'r2': 0.6310070006976222,
  'mae': 455.05547821670234,
  'rmse': np.float64(1155.8577210344304)},
 'win': {'r2': 0.5023599295475918,
  'mae': 566.9533923765542,
  'rmse': np.float64(1457.2660212064063)},
 'final': {'r2': -0.19002795664009908,
  'mae': 237.81078385261557,
  'rmse': np.float64(583.8710948871375)}}