In [None]:
import polars as pl
import os

# --- CONFIGURACIÓN ---
INPUT_FILE = "draft_oracle_master_data.parquet"
OUTPUT_FILE = "draft_oracle_feature_store.parquet"

def generate_ultimate_dna_v3():
    print("Generando ADN V12 Fix (Incluyendo Games Played)...")

    if not os.path.exists(INPUT_FILE):
        raise FileNotFoundError("Falta el master data.")

    df = pl.scan_parquet(INPUT_FILE)

    # 1. AGREGACIÓN MAESTRA
    dna = df.group_by(["champ_id", "position", "region"]).agg([
        pl.len().alias("games_played"),
        pl.col("target").mean().alias("stat_winrate"),
        pl.col("stat_dpm").mean().alias("stat_dpm"),
        pl.col("stat_gpm").mean().alias("stat_gpm"),
        pl.col("stat_dmg_taken").mean().alias("stat_dmg_taken"),
        pl.col("stat_mitigated").mean().alias("stat_mitigated"),
        pl.col("stat_heal").mean().alias("stat_heal"),
        pl.col("stat_hard_cc").mean().alias("stat_hard_cc"),
        pl.col("stat_vision").mean().alias("stat_vision_score"),


        pl.col("stat_magic_dmg").mean().alias("avg_magic_dmg"),
        pl.col("stat_phys_dmg").mean().alias("avg_phys_dmg"),
        pl.col("stat_true_dmg").mean().alias("avg_true_dmg"),


        (pl.col("stat_roam_kills").mean()).alias("style_roaming_tendency"),
        (pl.col("stat_lane_diff").mean() + (pl.col("stat_solo_kills").mean() * 100)).alias("style_lane_dominance"),
        ((pl.col("stat_roam_kills").mean()) / (pl.col("stat_obj_control").mean() + 1)).alias("style_gank_heaviness"),
        (pl.col("stat_obj_control").mean()).alias("style_objective_control"),
        ((pl.col("stat_vision").mean() + pl.col("stat_obj_control").mean()) / 2).alias("style_invade_pressure"),
        (pl.col("stat_gpm").mean()).alias("style_gold_hunger"),


        pl.col("stat_gpm").std().alias("var_gold_volatility"),
        pl.col("stat_dpm").std().alias("var_damage_volatility"),
        pl.col("stat_lane_diff").std().alias("var_lane_stability")
    ])

    print("   Normalizando métricas...")
    dna = dna.collect()

    cols_to_norm = [c for c in dna.columns if c.startswith("style_") or c.startswith("var_")]

    for col in cols_to_norm:
        mean_val = pl.col(col).mean().over("position")
        std_val = pl.col(col).std().over("position")
        dna = dna.with_columns(
            ((pl.col(col) - mean_val) / (std_val + 0.001)).alias(f"z_{col}")
        )


    dna = dna.with_columns(
        (100 / (pl.col("var_gold_volatility") + pl.col("var_damage_volatility") + 1)).alias("stat_reliability_index")
    )

    print(f"   Feature Store Actualizado: {dna.shape}")
    dna.write_parquet(OUTPUT_FILE)


generate_ultimate_dna_v3()

In [None]:
MASTER_FILE = "draft_oracle_master_data.parquet"
OUTPUT_SYN_FILE = "draft_oracle_synergy_matrix.parquet"

def generate_synergy_matrix():
    print("Tejiendo la Matriz de Sinergias (Cálculo de Pares)...")

    if not os.path.exists(MASTER_FILE):
        raise FileNotFoundError("Falta el master data.")


    try:
        df = pl.scan_parquet(MASTER_FILE).select([
            "game_id", "champ_id", "position", "target", "side"
        ]).collect()
    except Exception as e:
        print(f"Error cargando 'target', probando con 'win'... ({e})")
        df = pl.scan_parquet(MASTER_FILE).select([
            "game_id", "champ_id", "position", "win", "side"
        ]).collect().rename({"win": "target"})


    pairs_to_check = [
        ("MIDDLE", "JUNGLE"),
        ("BOTTOM", "UTILITY"),
        ("TOP", "JUNGLE")
    ]

    synergy_list = []

    for role1, role2 in pairs_to_check:
        print(f"   Analizando dúo: {role1} + {role2}...")

        df_r1 = df.filter(pl.col("position") == role1)
        df_r2 = df.filter(pl.col("position") == role2)


        pair_df = df_r1.join(
            df_r2,
            on=["game_id", "side"],
            how="inner",
            suffix="_right"
        )


        stats = pair_df.group_by(["champ_id", "champ_id_right"]).agg([
            pl.count("target").alias("games_together"),
            pl.col("target").mean().alias("syn_winrate")
        ])

        stats = stats.filter(pl.col("games_together") > 5)
        stats = stats.with_columns(pl.lit(f"{role1}_{role2}").alias("duo_type"))
        synergy_list.append(stats)


    if synergy_list:
        full_matrix = pl.concat(synergy_list, how="diagonal")
        print(f"   Guardando Matriz de Sinergia: {full_matrix.shape}")
        full_matrix.write_parquet(OUTPUT_SYN_FILE)
    else:
        print("No se encontraron sinergias suficientes.")


generate_synergy_matrix()