In [None]:


import optuna
import xgboost as xgb
import polars as pl
import numpy as np
import os
import gc
import json

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, roc_auc_score


MASTER_FILE = "draft_oracle_master_data.parquet"
FEATURE_FILE = "draft_oracle_feature_store.parquet"
TRAIN_SET_FILE = "draft_oracle_training_set.parquet"
MODEL_FILE = "draft_oracle_brain_optuna.json"

print("Entorno listo con Optuna (AutoML).")

Ensamblaje de la data

In [None]:


def enrich_combat_logic(df):
    print("Calculando Lógica de Combate...")
    sides = ["blue", "red"]
    for side in sides:
        magic_cols = [c for c in df.columns if f"{side}_" in c and ("stat_magic_dmg" in c or "avg_magic_dmg" in c)]
        phys_cols  = [c for c in df.columns if f"{side}_" in c and ("stat_phys_dmg" in c or "avg_phys_dmg" in c)]
        true_cols  = [c for c in df.columns if f"{side}_" in c and ("stat_true_dmg" in c or "avg_true_dmg" in c)]
        tank_cols  = [c for c in df.columns if f"{side}_" in c and "stat_mitigated" in c]
        heal_cols  = [c for c in df.columns if f"{side}_" in c and "stat_heal" in c]
        cc_cols    = [c for c in df.columns if f"{side}_" in c and "stat_hard_cc" in c]

        total_magic = pl.sum_horizontal(magic_cols) if magic_cols else pl.lit(0)
        total_phys  = pl.sum_horizontal(phys_cols) if phys_cols else pl.lit(0)
        total_true  = pl.sum_horizontal(true_cols) if true_cols else pl.lit(0)
        total_tank  = pl.sum_horizontal(tank_cols) if tank_cols else pl.lit(0)
        total_heal  = pl.sum_horizontal(heal_cols) if heal_cols else pl.lit(0)
        total_cc    = pl.sum_horizontal(cc_cols) if cc_cols else pl.lit(0)

        df = df.with_columns([
            total_magic.fill_null(0).alias(f"{side}_total_magic_dmg"),
            total_phys.fill_null(0).alias(f"{side}_total_phys_dmg"),
            total_true.fill_null(0).alias(f"{side}_total_true_dmg"),
            total_tank.fill_null(0).alias(f"{side}_total_tankiness"),
            total_heal.fill_null(0).alias(f"{side}_total_sustain"),
            total_cc.fill_null(0).alias(f"{side}_total_cc")
        ])

        total_dmg = pl.col(f"{side}_total_magic_dmg") + pl.col(f"{side}_total_phys_dmg") + pl.col(f"{side}_total_true_dmg") + 1
        df = df.with_columns((pl.col(f"{side}_total_magic_dmg") / total_dmg).alias(f"{side}_magic_dmg_ratio"))

    df = df.with_columns([
        ((pl.col("blue_total_magic_dmg") + pl.col("blue_total_true_dmg")) / (pl.col("red_total_tankiness") + 1)).alias("blue_shred_efficiency"),
        ((pl.col("red_total_magic_dmg") + pl.col("red_total_true_dmg")) / (pl.col("blue_total_tankiness") + 1)).alias("red_shred_efficiency"),
        ((pl.col("blue_total_phys_dmg") + pl.col("blue_total_magic_dmg")) / (pl.col("red_total_sustain") + 1000)).alias("blue_anti_sustain_burst"),
        ((pl.col("red_total_phys_dmg") + pl.col("red_total_magic_dmg")) / (pl.col("blue_total_sustain") + 1000)).alias("red_anti_sustain_burst")
    ])
    return df

def enrich_behavioral_strategy(df):
    print("Calculando Psicología y Riesgo...")
    sides = ["blue", "red"]
    for side in sides:
        jg_aggro = pl.col(f"{side}_JUNGLE_z_style_gank_heaviness").fill_null(0)
        lanes_aggro = (pl.col(f"{side}_TOP_z_style_lane_dominance").fill_null(0) +
                       pl.col(f"{side}_MIDDLE_z_style_lane_dominance").fill_null(0) +
                       pl.col(f"{side}_BOTTOM_z_style_lane_dominance").fill_null(0)) / 3
        df = df.with_columns((jg_aggro * lanes_aggro).alias(f"{side}_strat_gank_compatibility"))

        gold_hunger = sum([pl.col(f"{side}_{role}_z_style_gold_hunger").fill_null(0)
                           for role in ["TOP", "JUNGLE", "MIDDLE", "BOTTOM", "UTILITY"]])
        df = df.with_columns(gold_hunger.alias(f"{side}_strat_resource_friction"))

        invade_potential = pl.col(f"{side}_JUNGLE_z_style_invade_pressure").fill_null(0)
        backup_potential = (pl.col(f"{side}_TOP_z_style_roaming_tendency").fill_null(0) + pl.col(f"{side}_MIDDLE_z_style_roaming_tendency").fill_null(0))
        df = df.with_columns((invade_potential * backup_potential).alias(f"{side}_strat_invade_safety"))

    try:
        blue_vol = sum([pl.col(f"blue_{role}_var_gold_volatility").fill_null(0) for role in ["TOP", "JUNGLE", "MIDDLE", "BOTTOM", "UTILITY"]])
        red_vol  = sum([pl.col(f"red_{role}_var_gold_volatility").fill_null(0)  for role in ["TOP", "JUNGLE", "MIDDLE", "BOTTOM", "UTILITY"]])
        df = df.with_columns((blue_vol - red_vol).alias("diff_team_volatility"))
    except: pass
    return df

def enrich_critical_synergies(df):
    print("Calculando Sinergias...")
    try: synergy_matrix = pl.read_parquet("draft_oracle_synergy_matrix.parquet")
    except:
        return df
    critical_pairs = [("MIDDLE", "JUNGLE", "syn_mid_jg"), ("BOTTOM", "UTILITY", "syn_bot_duo"), ("TOP", "JUNGLE", "syn_top_jg")]
    sides = ["blue", "red"]
    for side in sides:
        for role1, role2, feat_name in critical_pairs:
            col_id1 = f"{side}_{role1}_champ_id"
            col_id2 = f"{side}_{role2}_champ_id"
            if col_id1 not in df.columns or col_id2 not in df.columns: continue
            temp_syn = synergy_matrix.select([pl.col("champ_id").alias(col_id1), pl.col("champ_id_right").alias(col_id2), pl.col("syn_winrate").alias(f"{side}_{feat_name}")])
            df = df.join(temp_syn, on=[col_id1, col_id2], how="left")
            df = df.with_columns(pl.col(f"{side}_{feat_name}").fill_null(0.5))
    if "blue_syn_mid_jg" in df.columns and "red_syn_mid_jg" in df.columns:
        df = df.with_columns((pl.col("blue_syn_mid_jg") - pl.col("red_syn_mid_jg")).alias("gap_syn_mid_jg"))
    return df


TRAIN_FILE = "draft_oracle_training_set.parquet"
if not os.path.exists(TRAIN_FILE): raise FileNotFoundError("Ejecuta ML2 primero para generar el dataset.")
df = pl.read_parquet(TRAIN_FILE)


df = enrich_combat_logic(df)
df = enrich_behavioral_strategy(df)
df = enrich_critical_synergies(df)


roles = ["TOP", "JUNGLE", "MIDDLE", "BOTTOM", "UTILITY"]
duel_stats = ["stat_winrate", "stat_gpm", "stat_dpm", "z_style_lane_dominance"]
new_cols = []
for role in roles:
    for stat in duel_stats:
        col_blue, col_red = f"blue_{role}_{stat}", f"red_{role}_{stat}"
        if col_blue in df.columns and col_red in df.columns:
            new_cols.append((pl.col(col_blue) - pl.col(col_red)).alias(f"duel_{role}_{stat}"))
if new_cols:
    df = df.with_columns(new_cols)
feature_cols = [c for c in df.columns if (
    "stat_" in c or "rune_" in c or "archetype_" in c or "gap_" in c or "duel_" in c or
    "_total_" in c or "_ratio" in c or "_efficiency" in c or "_burst" in c or
    "z_style_" in c or "_strat_" in c or "_syn_" in c or "diff_team_volatility" in c
) and c not in ["target", "split_key", "game_id"] and "win" not in c]

print("Preparando Matrices para Optuna...")
df = df.with_columns(pl.Series(name="split_key", values=np.random.rand(df.height)))
train_data = df.filter(pl.col("split_key") < 0.9)
test_data = df.filter(pl.col("split_key") >= 0.9)

dtrain = xgb.QuantileDMatrix(train_data.select(feature_cols), label=train_data.select("target"))
dtest  = xgb.QuantileDMatrix(test_data.select(feature_cols), label=test_data.select("target"), ref=dtrain)

del df, train_data, test_data
gc.collect()


def objective(trial):
    params = {
        'device': 'cuda',
        'tree_method': 'hist',
        'objective': 'binary:logistic',
        'eval_metric': 'auc',
        'nthread': -1,


        'learning_rate': trial.suggest_float('learning_rate', 0.005, 0.2, log=True),
        'max_depth': trial.suggest_int('max_depth', 6, 14),
        'subsample': trial.suggest_float('subsample', 0.5, 0.95),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 0.95),
        'min_child_weight': trial.suggest_int('min_child_weight', 1, 15),
        'gamma': trial.suggest_float('gamma', 0.0, 5.0),
        'lambda': trial.suggest_float('lambda', 1e-3, 10.0, log=True),
        'alpha': trial.suggest_float('alpha', 1e-3, 10.0, log=True),
    }


    model = xgb.train(
        params,
        dtrain,
        num_boost_round=10000,
        evals=[(dtest, "Test")],
        early_stopping_rounds=1000,
        verbose_eval=False
    )

    return model.best_score

print("Iniciando Optimización (100 Rondas)...")
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=200)

print(f"MEJORES PARÁMETROS: {study.best_params}")
with open("best_hyperparameters.json", "w") as f:
    json.dump(study.best_params, f)

Entrenamiento GPU

Entrendando los modelos