In [14]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.preprocessing import StandardScaler, RobustScaler, QuantileTransformer, LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
from sklearn.linear_model import Ridge, ElasticNet, HuberRegressor
import xgboost as xgb
from sklearn.compose import TransformedTargetRegressor
import warnings
warnings.filterwarnings('ignore')

In [28]:
df = pd.read_csv('../../../data/data_general.csv')

In [29]:
df.columns

Index(['Unnamed: 0.1', 'Unnamed: 0', 'PLAYER_ID', 'DOB', 'GENDER', 'AVG_BET',
       'BET_TOTAL', 'INITIAL_AMOUNT', 'INITIAL_TIME', 'FINAL_TIME',
       'INITIAL_PROMO_AMOUNT', 'FINAL_AMOUNT', 'FINAL_PROMO_AMOUNT',
       'MACHINE_ID', 'WIN_TOTAL', 'GAMES_PLAYED_TOTAL', 'GAMES_WON_TOTAL',
       'TIME_ON_DEVICE_SEC', 'PLAYER_LEVEL_ID', 'Casino', 'AVG_BET_std',
       'BET_TOTAL_std', 'INITIAL_AMOUNT_std', 'INITIAL_PROMO_AMOUNT_std',
       'FINAL_AMOUNT_std', 'FINAL_PROMO_AMOUNT_std', 'WIN_TOTAL_std',
       'GAMES_PLAYED_TOTAL_std', 'GAMES_WON_TOTAL_std',
       'TIME_ON_DEVICE_SEC_std', 'Edad', 'Rango_Edad', 'Rango_Edad_le',
       'Cluster'],
      dtype='object')

In [30]:
le = LabelEncoder()
df['Machine ID'] = le.fit_transform(df['MACHINE_ID'])

In [31]:
df['INITIAL_TIME'] = pd.to_datetime(df['INITIAL_TIME'])
df['FINAL_TIME'] = pd.to_datetime(df['FINAL_TIME'])

df['INITIAL_TIME'] = df['INITIAL_TIME'].dt.to_period('D')
df['INITIAL_TIME'] = df['INITIAL_TIME'].dt.to_timestamp()


df['FINAL_TIME'] = df['FINAL_TIME'].dt.to_period('D')
df['FINAL_TIME'] = df['FINAL_TIME'].dt.to_timestamp()

df['Weekday']= df['INITIAL_TIME'].dt.strftime('%A')
df['number_of_day'] = df['INITIAL_TIME'].dt.day_of_week

df['TIME_ON_DEVICE_MIN'] = df['TIME_ON_DEVICE_SEC'] / 60

df['Hour'] = df['INITIAL_TIME'].dt.hour
df['Weekday'] = df['INITIAL_TIME'].dt.weekday   # 0=Lunes, 6=Domingo
df['Weekend'] = (df['Weekday'] >= 5).astype(int)
df['Month'] = df['INITIAL_TIME'].dt.month

df = df[df['TIME_ON_DEVICE_MIN'] < 600 ]

df = df[df['WIN_TOTAL'] > 0]
df['NET_SPEND'] = df['FINAL_AMOUNT'] - df['INITIAL_AMOUNT']
df = df[df['NET_SPEND'] < 10000 ]

In [24]:
class BusinessLogicCorrectModel:
    def __init__(self):
        self.scalers = {}
        self.models = {}
        
    def create_business_features(self, df, tiempo_pred=None, bet_pred=None, win_pred=None):
        """
        Crear features que reflejen la lógica real del negocio de casino
        """
        features = df[['INITIAL_AMOUNT', 'AVG_BET', 'Cluster']].copy()
        
        if tiempo_pred is not None:
            features['tiempo_pred'] = tiempo_pred
            
        if bet_pred is not None and win_pred is not None:
            # Features que reflejan el comportamiento real del casino
            features['bet_pred'] = bet_pred
            features['win_pred'] = win_pred
            
            # FEATURES CLAVE PARA CASINO:
            
            # 1. Indicadores de comportamiento de juego
            features['total_money_handled'] = bet_pred  # Dinero total manejado
            features['house_edge_effect'] = bet_pred * 0.05  # Estimación de ventaja de la casa
            features['net_gaming_result'] = win_pred - bet_pred  # Resultado neto del juego
            
            # 2. Ratios de eficiencia y riesgo
            features['win_rate'] = win_pred / (bet_pred + 1)  # Tasa de ganancia
            features['money_multiplier'] = bet_pred / (df['INITIAL_AMOUNT'] + 1)  # Cuántas veces apostó su dinero inicial
            features['reinvestment_indicator'] = np.where(bet_pred > df['INITIAL_AMOUNT'], 1, 0)  # Si reinvirtió ganancias
            
            # 3. Patrones de gestión de dinero
            features['excess_betting'] = np.maximum(0, bet_pred - df['INITIAL_AMOUNT'])  # Apuestas con dinero ganado
            features['potential_redemptions'] = win_pred * 0.7  # Estimación de dinero que podría haber retirado
            features['money_at_risk'] = np.minimum(bet_pred, df['INITIAL_AMOUNT'] + win_pred)
            
            # 4. Indicadores de comportamiento de salida
            features['likely_loss_scenario'] = np.where(win_pred < bet_pred * 0.5, 1, 0)
            features['likely_win_scenario'] = np.where(win_pred > bet_pred * 1.2, 1, 0)
            features['breakeven_scenario'] = np.where(
                (win_pred >= bet_pred * 0.8) & (win_pred <= bet_pred * 1.2), 1, 0
            )
            
            # 5. Estimaciones de flujo de efectivo durante la sesión
            # Simulación simplificada del flujo de efectivo
            available_money_estimate = df['INITIAL_AMOUNT'] + win_pred * 0.6  # Asumiendo que retira 40% de ganancias
            features['estimated_available_money'] = available_money_estimate
            features['final_money_simple_estimate'] = available_money_estimate - bet_pred + win_pred * 0.4
            
            # 6. Features específicos por cluster (comportamiento por tipo de jugador)
            features['cluster_risk_adjusted'] = df['Cluster'] * features['money_multiplier']
            features['cluster_win_pattern'] = df['Cluster'] * features['win_rate']
            
        return features
    
    def train_corrected_models(self, df):
        """
        Entrenar modelos con la lógica de negocio correcta
        """
        print("=" * 70)
        print("ENTRENANDO CON LÓGICA DE NEGOCIO CORRECTA")
        print("=" * 70)
        
        # Análisis inicial de patrones
        self.analyze_business_patterns(df)
        
        # Preparar datos base
        X_base = df[['INITIAL_AMOUNT','AVG_BET','Cluster','Weekday','Weekend','Month', 'Machine ID']]
        y_tiempo = df['TIME_ON_DEVICE_MIN']
        y_bet = df['BET_TOTAL'] 
        y_win = df['WIN_TOTAL']
        y_final = df['FINAL_AMOUNT']
        
        # Split estratificado
        X_base_train, X_base_test, y_final_train, y_final_test = train_test_split(
            X_base, y_final, test_size=0.3, random_state=42, 
            stratify=pd.cut(y_final, bins=5, labels=False)  # Estratificar por rangos de final_amount
        )
        
        # Obtener índices para otros targets
        train_idx, test_idx = X_base_train.index, X_base_test.index
        
        print("\n=== MODELO 1: TIEMPO (OPTIMIZADO) ===")
        # Modelo tiempo mejorado
        X_tiempo_train = self.create_business_features(X_base_train)
        X_tiempo_test = self.create_business_features(X_base_test)
        
        self.scalers['tiempo'] = RobustScaler()
        X_tiempo_train_scaled = self.scalers['tiempo'].fit_transform(X_tiempo_train)
        X_tiempo_test_scaled = self.scalers['tiempo'].transform(X_tiempo_test)
        
        self.models['tiempo'] = xgb.XGBRegressor(
            n_estimators=400, max_depth=10, learning_rate=0.05, 
            subsample=0.9, colsample_bytree=0.9, random_state=42
        )
        self.models['tiempo'].fit(X_tiempo_train_scaled, y_tiempo.loc[train_idx])
        
        tiempo_pred_train = self.models['tiempo'].predict(X_tiempo_train_scaled)
        tiempo_pred_test = self.models['tiempo'].predict(X_tiempo_test_scaled)
        
        r2_tiempo = r2_score(y_tiempo.loc[test_idx], tiempo_pred_test)
        print(f"R² Tiempo: {r2_tiempo:.4f}")
        
        print("\n=== MODELO 2: BET TOTAL (OPTIMIZADO) ===")
        # Modelo bet con features de negocio
        X_bet_train = self.create_business_features(X_base_train, tiempo_pred_train)
        X_bet_test = self.create_business_features(X_base_test, tiempo_pred_test)
        
        self.scalers['bet'] = RobustScaler()
        X_bet_train_scaled = self.scalers['bet'].fit_transform(X_bet_train)
        X_bet_test_scaled = self.scalers['bet'].transform(X_bet_test)
        
        self.models['bet'] = xgb.XGBRegressor(
            n_estimators=500, max_depth=12, learning_rate=0.04,
            subsample=0.8, colsample_bytree=0.8, reg_alpha=0.1, random_state=42
        )
        self.models['bet'].fit(X_bet_train_scaled, y_bet.loc[train_idx])
        
        bet_pred_train = self.models['bet'].predict(X_bet_train_scaled)
        bet_pred_test = self.models['bet'].predict(X_bet_test_scaled)
        
        r2_bet = r2_score(y_bet.loc[test_idx], bet_pred_test)
        print(f"R² Bet Total: {r2_bet:.4f}")
        
        print("\n=== MODELO 3: WIN TOTAL (OPTIMIZADO) ===")
        # Modelo win con lógica de casino
        X_win_train = self.create_business_features(X_base_train, tiempo_pred_train, bet_pred_train)
        X_win_test = self.create_business_features(X_base_test, tiempo_pred_test, bet_pred_test)
        
        # Remover win_pred de las features para win (evitar data leakage)
        win_features_train = X_win_train.drop(['win_pred', 'net_gaming_result', 'win_rate', 
                                              'excess_betting', 'potential_redemptions',
                                              'likely_loss_scenario', 'likely_win_scenario',
                                              'breakeven_scenario', 'estimated_available_money',
                                              'final_money_simple_estimate', 'cluster_win_pattern'], axis=1, errors='ignore')
        win_features_test = X_win_test.drop(['win_pred', 'net_gaming_result', 'win_rate',
                                            'excess_betting', 'potential_redemptions', 
                                            'likely_loss_scenario', 'likely_win_scenario',
                                            'breakeven_scenario', 'estimated_available_money',
                                            'final_money_simple_estimate', 'cluster_win_pattern'], axis=1, errors='ignore')
        
        self.scalers['win'] = RobustScaler()
        X_win_train_scaled = self.scalers['win'].fit_transform(win_features_train)
        X_win_test_scaled = self.scalers['win'].transform(win_features_test)
        
        # Ensemble para WIN
        win_models = {
            'xgb1': xgb.XGBRegressor(n_estimators=500, max_depth=12, learning_rate=0.03, random_state=42),
            'xgb2': xgb.XGBRegressor(n_estimators=400, max_depth=15, learning_rate=0.04, 
                                    subsample=0.9, colsample_bytree=0.8, random_state=43),
            'rf': RandomForestRegressor(n_estimators=400, max_depth=20, min_samples_split=3, random_state=42)
        }
        
        win_predictions_train = []
        win_predictions_test = []
        
        for name, model in win_models.items():
            model.fit(X_win_train_scaled, y_win.loc[train_idx])
            win_predictions_train.append(model.predict(X_win_train_scaled))
            win_predictions_test.append(model.predict(X_win_test_scaled))
        
        # Ensemble con pesos optimizados para WIN
        win_pred_train = np.average(win_predictions_train, axis=0, weights=[0.4, 0.4, 0.2])
        win_pred_test = np.average(win_predictions_test, axis=0, weights=[0.4, 0.4, 0.2])
        
        self.models['win'] = win_models
        r2_win = r2_score(y_win.loc[test_idx], win_pred_test)
        print(f"R² Win Total: {r2_win:.4f}")
        
        print("\n=== MODELO 4: FINAL AMOUNT (LÓGICA DE NEGOCIO CORRECTA) ===")
        # Ahora con TODAS las features de lógica de negocio
        X_final_train = self.create_business_features(X_base_train, tiempo_pred_train, 
                                                     bet_pred_train, win_pred_train)
        X_final_test = self.create_business_features(X_base_test, tiempo_pred_test, 
                                                    bet_pred_test, win_pred_test)
        
        # Features adicionales específicos para FINAL_AMOUNT
        X_final_train['session_volatility'] = abs(X_final_train['win_pred'] - X_final_train['bet_pred'])
        X_final_train['money_management_score'] = X_final_train['INITIAL_AMOUNT'] / (X_final_train['bet_pred'] + 1)
        X_final_train['expected_house_profit'] = X_final_train['bet_pred'] * 0.05  # 5% house edge típico
        
        X_final_test['session_volatility'] = abs(X_final_test['win_pred'] - X_final_test['bet_pred'])
        X_final_test['money_management_score'] = X_final_test['INITIAL_AMOUNT'] / (X_final_test['bet_pred'] + 1)
        X_final_test['expected_house_profit'] = X_final_test['bet_pred'] * 0.05
        
        self.scalers['final'] = RobustScaler()
        X_final_train_scaled = self.scalers['final'].fit_transform(X_final_train)
        X_final_test_scaled = self.scalers['final'].transform(X_final_test)
        
        # Múltiples enfoques para FINAL_AMOUNT
        final_models = {
            'xgb_deep': xgb.XGBRegressor(
                n_estimators=800, max_depth=15, learning_rate=0.02,
                subsample=0.9, colsample_bytree=0.9, 
                reg_alpha=0.1, reg_lambda=0.1, random_state=42
            ),
            'xgb_wide': xgb.XGBRegressor(
                n_estimators=600, max_depth=8, learning_rate=0.03,
                subsample=0.8, colsample_bytree=1.0, random_state=43
            ),
            'gbr': GradientBoostingRegressor(
                n_estimators=500, max_depth=10, learning_rate=0.02, 
                subsample=0.9, random_state=42
            ),
            'rf': RandomForestRegressor(
                n_estimators=500, max_depth=25, min_samples_split=2,
                min_samples_leaf=1, random_state=42
            )
        }
        
        final_predictions_test = []
        final_r2_scores = []
        
        print("Entrenando ensemble para FINAL_AMOUNT:")
        for name, model in final_models.items():
            model.fit(X_final_train_scaled, y_final_train)
            pred = model.predict(X_final_test_scaled)
            final_predictions_test.append(pred)
            r2 = r2_score(y_final_test, pred)
            final_r2_scores.append(r2)
            print(f"  {name}: R² = {r2:.4f}")
        
        # Ensemble final con pesos basados en performance
        weights = np.array(final_r2_scores)
        weights = np.maximum(weights, 0)  # Solo pesos positivos
        if weights.sum() > 0:
            weights = weights / weights.sum()
        else:
            weights = np.ones(len(weights)) / len(weights)
        
        final_pred_ensemble = np.average(final_predictions_test, axis=0, weights=weights)
        
        self.models['final'] = final_models
        self.models['final_weights'] = weights
        
        # Métricas finales
        r2_final = r2_score(y_final_test, final_pred_ensemble)
        mae_final = mean_absolute_error(y_final_test, final_pred_ensemble)
        rmse_final = np.sqrt(mean_squared_error(y_final_test, final_pred_ensemble))
        
        print(f"\nRESULTADOS ENSEMBLE FINAL_AMOUNT:")
        print(f"R² = {r2_final:.4f}")
        print(f"MAE = {mae_final:.2f}")
        print(f"RMSE = {rmse_final:.2f}")
        
        # Análisis por segmentos
        self.analyze_predictions_by_segment(y_final_test, final_pred_ensemble, X_base_test)
        
        print(f"\n" + "=" * 70)
        print("RESUMEN FINAL - MODELOS CON LÓGICA DE NEGOCIO")
        print("=" * 70)
        print(f"Tiempo en máquina: {r2_tiempo:.4f}")
        print(f"Bet total: {r2_bet:.4f}")
        print(f"Win total: {r2_win:.4f}")
        print(f"Final amount: {r2_final:.4f}")
        
        return {
            'tiempo': r2_tiempo,
            'bet': r2_bet,
            'win': r2_win,
            'final': r2_final,
            'mae_final': mae_final,
            'rmse_final': rmse_final
        }
    
    def analyze_business_patterns(self, df):
        """Análisis de patrones de negocio específicos"""
        print(f"\n=== ANÁLISIS DE PATRONES DE NEGOCIO ===")
        
        # 1. Patrón de reinversión
        reinvested = df['BET_TOTAL'] > df['INITIAL_AMOUNT']
        print(f"Sesiones con reinversión de ganancias: {reinvested.sum()} ({reinvested.sum()/len(df)*100:.1f}%)")
        
        # 2. Comportamiento por resultado final
        net_winners = df['FINAL_AMOUNT'] > df['INITIAL_AMOUNT']
        net_losers = df['FINAL_AMOUNT'] < df['INITIAL_AMOUNT']
        breakeven = df['FINAL_AMOUNT'] == df['INITIAL_AMOUNT']
        
        print(f"Ganadores netos: {net_winners.sum()} ({net_winners.sum()/len(df)*100:.1f}%)")
        print(f"Perdedores netos: {net_losers.sum()} ({net_losers.sum()/len(df)*100:.1f}%)")
        print(f"Breakeven: {breakeven.sum()} ({breakeven.sum()/len(df)*100:.1f}%)")
        
        # 3. Correlación WIN vs BET (indica reinversión)
        win_bet_corr = np.corrcoef(df['WIN_TOTAL'], df['BET_TOTAL'])[0,1]
        print(f"Correlación WIN_TOTAL vs BET_TOTAL: {win_bet_corr:.3f}")
        
        # 4. Análisis de final amount vs predicciones ingenuas
        naive_final = df['INITIAL_AMOUNT'] + df['WIN_TOTAL'] - df['BET_TOTAL']
        actual_final = df['FINAL_AMOUNT']
        naive_r2 = r2_score(actual_final, naive_final)
        print(f"R² fórmula ingenua (INITIAL + WIN - BET): {naive_r2:.4f}")
        
    def analyze_predictions_by_segment(self, y_true, y_pred, X_test):
        """Analizar predicciones por segmentos de negocio"""
        print(f"\n=== ANÁLISIS POR SEGMENTOS ===")
        
        errors = abs(y_true - y_pred)
        
        # Por rangos de initial amount
        initial_ranges = [(0, 100), (100, 500), (500, 1000), (1000, float('inf'))]
        for low, high in initial_ranges:
            if high == float('inf'):
                mask = X_test['INITIAL_AMOUNT'] >= low
                label = f">= {low}"
            else:
                mask = (X_test['INITIAL_AMOUNT'] >= low) & (X_test['INITIAL_AMOUNT'] < high)
                label = f"{low}-{high}"
            
            if mask.sum() > 0:
                segment_r2 = r2_score(y_true[mask], y_pred[mask])
                segment_mae = errors[mask].mean()
                print(f"INITIAL_AMOUNT {label}: R²={segment_r2:.3f}, MAE={segment_mae:.1f}, n={mask.sum()}")
        
        # Por cluster
        for cluster in sorted(X_test['Cluster'].unique()):
            mask = X_test['Cluster'] == cluster
            if mask.sum() > 0:
                segment_r2 = r2_score(y_true[mask], y_pred[mask])
                segment_mae = errors[mask].mean()
                print(f"Cluster {cluster}: R²={segment_r2:.3f}, MAE={segment_mae:.1f}, n={mask.sum()}")



In [32]:
# Obtener estadísticas generales
df[['TIME_ON_DEVICE_MIN', 'BET_TOTAL', 'WIN_TOTAL']].describe()

# Información específica
print("Valores máximos:")
print(df[['TIME_ON_DEVICE_MIN', 'BET_TOTAL', 'WIN_TOTAL']].max())

print("\nValores mínimos:")
print(df[['TIME_ON_DEVICE_MIN', 'BET_TOTAL', 'WIN_TOTAL']].min())

print("\nMedianas:")
print(df[['TIME_ON_DEVICE_MIN', 'BET_TOTAL', 'WIN_TOTAL']].median())

Valores máximos:
TIME_ON_DEVICE_MIN       537.866667
BET_TOTAL             134666.600000
WIN_TOTAL             461502.000000
dtype: float64

Valores mínimos:
TIME_ON_DEVICE_MIN    0.083333
BET_TOTAL             0.000000
WIN_TOTAL             0.020000
dtype: float64

Medianas:
TIME_ON_DEVICE_MIN      7.716667
BET_TOTAL             363.000000
WIN_TOTAL             215.000000
dtype: float64


In [33]:
# Top 10 valores más altos por columna
top_time = df.nlargest(10, 'TIME_ON_DEVICE_MIN')
top_bet = df.nlargest(10, 'BET_TOTAL')
top_win = df.nlargest(10, 'WIN_TOTAL')

# Usando percentiles para identificar valores extremos
q95 = df[['TIME_ON_DEVICE_MIN', 'BET_TOTAL', 'WIN_TOTAL']].quantile(0.95)
significant_data = df[(df['TIME_ON_DEVICE_MIN'] >= q95['TIME_ON_DEVICE_MIN']) |
                     (df['BET_TOTAL'] >= q95['BET_TOTAL']) |
                     (df['WIN_TOTAL'] >= q95['WIN_TOTAL'])]

In [34]:
import numpy as np
from scipy import stats

# Calcular Z-scores
df['time_zscore'] = np.abs(stats.zscore(df['TIME_ON_DEVICE_MIN']))
df['bet_zscore'] = np.abs(stats.zscore(df['BET_TOTAL']))
df['win_zscore'] = np.abs(stats.zscore(df['WIN_TOTAL']))

# Seleccionar datos con Z-score > 2 (significativo estadísticamente)
significant_data = df[(df['time_zscore'] > 2) | 
                     (df['bet_zscore'] > 2) | 
                     (df['win_zscore'] > 2)]

In [35]:
significant_players = df[
    (df['TIME_ON_DEVICE_MIN'] > df['TIME_ON_DEVICE_MIN'].quantile(0.8)) &
    (df['BET_TOTAL'] > df['BET_TOTAL'].quantile(0.8))
]

# O jugadores con alta ratio ganancia/apuesta
df['win_ratio'] = df['WIN_TOTAL'] / df['BET_TOTAL']
high_performers = df[df['win_ratio'] > df['win_ratio'].quantile(0.9)]

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,PLAYER_ID,DOB,GENDER,AVG_BET,BET_TOTAL,INITIAL_AMOUNT,INITIAL_TIME,FINAL_TIME,...,Weekday,number_of_day,TIME_ON_DEVICE_MIN,Hour,Weekend,Month,NET_SPEND,time_zscore,bet_zscore,win_zscore
20,20,20,61F9BB6A-3FFD-435B-A703-F305C3FE6C8C,1970-04-02,0,6.56,5666.25,453.07,2025-03-26,2025-03-26,...,2,2,86.583333,0,0,3,2446.05,2.913422,2.558451,3.207498
32,32,32,DE4A57F7-73B5-4F8C-805B-8E70E12D7FE6,1977-07-13,0,2.52,1617.50,330.03,2025-03-23,2025-03-23,...,6,6,48.150000,0,1,3,1159.55,1.329771,0.404844,0.868514
37,37,37,8DB334A7-1E36-4D0F-9B23-0983BDD087D1,1963-09-04,0,5.72,1698.40,549.34,2025-06-08,2025-06-08,...,6,6,42.150000,0,1,6,-546.15,1.082540,0.447876,0.156199
46,46,46,A02E64F0-7A58-4E70-B9B9-73D8E1D39E5B,1941-07-19,0,2.74,1387.50,225.05,2025-04-26,2025-04-26,...,5,5,30.550000,0,1,4,-224.80,0.604561,0.282503,0.160780
63,63,63,6117CAF4-2BFD-4E39-85F0-9D9CA5196E41,1953-09-14,0,4.52,4015.00,843.94,2025-06-18,2025-06-18,...,2,2,66.050000,0,0,6,-842.75,2.067343,1.680120,1.041771
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
226680,226680,96527,51016D6E-8A35-45AB-A0FF-5E4BA4536156,1959-11-10,1,5.00,1325.00,180.62,2024-02-09,2024-02-09,...,4,4,24.483333,0,0,2,322.30,0.354583,0.249258,0.373230
226681,226681,96528,F6438AFA-407D-48C6-BE5D-C34E3CABE8DC,1956-09-09,1,3.29,8792.50,451.38,2025-07-28,2025-07-28,...,0,0,165.933333,0,0,7,-451.00,6.183049,4.221363,3.307980
226693,226693,96541,0A4E1F5B-FF7D-41C7-9C5C-266C6E0FF206,1953-07-05,0,2.93,3882.50,451.39,2025-01-31,2025-01-31,...,4,4,93.483333,0,0,1,-415.00,3.197737,1.609641,1.171209
226702,226702,96551,E2358F49-F02B-4094-A7EF-ECBEA2BA7728,1941-04-23,0,3.18,1427.25,194.81,2024-04-23,2024-04-23,...,1,1,55.666667,0,0,4,-194.35,1.639496,0.303646,0.191556


In [37]:
model = BusinessLogicCorrectModel()
results = model.train_corrected_models(significant_players)

ENTRENANDO CON LÓGICA DE NEGOCIO CORRECTA

=== ANÁLISIS DE PATRONES DE NEGOCIO ===
Sesiones con reinversión de ganancias: 27162 (100.0%)
Ganadores netos: 9704 (35.7%)
Perdedores netos: 17447 (64.2%)
Breakeven: 15 (0.1%)
Correlación WIN_TOTAL vs BET_TOTAL: 0.826
R² fórmula ingenua (INITIAL + WIN - BET): -6.2755

=== MODELO 1: TIEMPO (OPTIMIZADO) ===
R² Tiempo: 0.4423

=== MODELO 2: BET TOTAL (OPTIMIZADO) ===
R² Bet Total: 0.5510

=== MODELO 3: WIN TOTAL (OPTIMIZADO) ===
R² Win Total: -0.0038

=== MODELO 4: FINAL AMOUNT (LÓGICA DE NEGOCIO CORRECTA) ===
Entrenando ensemble para FINAL_AMOUNT:
  xgb_deep: R² = -0.3238
  xgb_wide: R² = -0.3039


KeyboardInterrupt: 