In [9]:
import polars as pl
import os

# --- CONFIGURACI√ìN ---
INPUT_FILE = "draft_oracle_master_data.parquet"
OUTPUT_FILE = "draft_oracle_feature_store.parquet"

def generate_ultimate_dna_v3():
    print("üß¨ Generando ADN V12 Fix (Incluyendo Games Played)...")

    if not os.path.exists(INPUT_FILE):
        raise FileNotFoundError("Falta el master data.")

    df = pl.scan_parquet(INPUT_FILE)

    # 1. AGREGACI√ìN MAESTRA
    dna = df.group_by(["champ_id", "position", "region"]).agg([
        # --- ¬°ESTO FALTABA! ---
        pl.len().alias("games_played"),  # Cuenta de partidas

        # --- A. MEDIAS (LO QUE YA TEN√çAS) ---
        pl.col("target").mean().alias("stat_winrate"),
        pl.col("stat_dpm").mean().alias("stat_dpm"),
        pl.col("stat_gpm").mean().alias("stat_gpm"),
        pl.col("stat_dmg_taken").mean().alias("stat_dmg_taken"),
        pl.col("stat_mitigated").mean().alias("stat_mitigated"),
        pl.col("stat_heal").mean().alias("stat_heal"),
        pl.col("stat_hard_cc").mean().alias("stat_hard_cc"),
        pl.col("stat_vision").mean().alias("stat_vision_score"),

        # Desgloses de da√±o
        pl.col("stat_magic_dmg").mean().alias("avg_magic_dmg"),
        pl.col("stat_phys_dmg").mean().alias("avg_phys_dmg"),
        pl.col("stat_true_dmg").mean().alias("avg_true_dmg"),

        # --- B. ESTILOS DE JUEGO ---
        (pl.col("stat_roam_kills").mean()).alias("style_roaming_tendency"),
        (pl.col("stat_lane_diff").mean() + (pl.col("stat_solo_kills").mean() * 100)).alias("style_lane_dominance"),
        ((pl.col("stat_roam_kills").mean()) / (pl.col("stat_obj_control").mean() + 1)).alias("style_gank_heaviness"),
        (pl.col("stat_obj_control").mean()).alias("style_objective_control"),
        ((pl.col("stat_vision").mean() + pl.col("stat_obj_control").mean()) / 2).alias("style_invade_pressure"),
        (pl.col("stat_gpm").mean()).alias("style_gold_hunger"),

        # --- C. CONSISTENCIA ---
        pl.col("stat_gpm").std().alias("var_gold_volatility"),
        pl.col("stat_dpm").std().alias("var_damage_volatility"),
        pl.col("stat_lane_diff").std().alias("var_lane_stability")
    ])

    # 2. NORMALIZACI√ìN Z-SCORE
    print("   ‚öñÔ∏è  Normalizando m√©tricas...")
    dna = dna.collect()

    cols_to_norm = [c for c in dna.columns if c.startswith("style_") or c.startswith("var_")]

    for col in cols_to_norm:
        mean_val = pl.col(col).mean().over("position")
        std_val = pl.col(col).std().over("position")
        dna = dna.with_columns(
            ((pl.col(col) - mean_val) / (std_val + 0.001)).alias(f"z_{col}")
        )

    # 3. CREAR EL "RELIABILITY INDEX"
    dna = dna.with_columns(
        (100 / (pl.col("var_gold_volatility") + pl.col("var_damage_volatility") + 1)).alias("stat_reliability_index")
    )

    print(f"   üíæ Feature Store Actualizado: {dna.shape}")
    dna.write_parquet(OUTPUT_FILE)

# EJECUTAR AHORA
generate_ultimate_dna_v3()

üß¨ Generando ADN V12 Fix (Incluyendo Games Played)...
   ‚öñÔ∏è  Normalizando m√©tricas...
   üíæ Feature Store Actualizado: (2419, 34)


In [10]:
MASTER_FILE = "draft_oracle_master_data.parquet"
OUTPUT_SYN_FILE = "draft_oracle_synergy_matrix.parquet"

def generate_synergy_matrix():
    print("üï∏Ô∏è  Tejiendo la Matriz de Sinergias (C√°lculo de Pares)...")

    if not os.path.exists(MASTER_FILE):
        raise FileNotFoundError("Falta el master data.")

    # 1. Cargar solo lo necesario (USANDO 'target' EN LUGAR DE 'win')
    try:
        df = pl.scan_parquet(MASTER_FILE).select([
            "game_id", "champ_id", "position", "target", "side" # <--- CAMBIO AQU√ç
        ]).collect()
    except Exception as e:
        # Fallback por si acaso en alguna versi√≥n antigua se llamaba 'win'
        print(f"‚ö†Ô∏è Error cargando 'target', probando con 'win'... ({e})")
        df = pl.scan_parquet(MASTER_FILE).select([
            "game_id", "champ_id", "position", "win", "side"
        ]).collect().rename({"win": "target"})

    # 2. Definir las Parejas Cr√≠ticas (D√∫os)
    pairs_to_check = [
        ("MIDDLE", "JUNGLE"),
        ("BOTTOM", "UTILITY"),
        ("TOP", "JUNGLE")
    ]

    synergy_list = []

    for role1, role2 in pairs_to_check:
        print(f"   üîó Analizando d√∫o: {role1} + {role2}...")

        df_r1 = df.filter(pl.col("position") == role1)
        df_r2 = df.filter(pl.col("position") == role2)

        # Unimos por Partida y Lado
        pair_df = df_r1.join(
            df_r2,
            on=["game_id", "side"],
            how="inner",
            suffix="_right"
        )

        # Agrupamos (USANDO 'target')
        stats = pair_df.group_by(["champ_id", "champ_id_right"]).agg([
            pl.count("target").alias("games_together"), # <--- CAMBIO AQU√ç
            pl.col("target").mean().alias("syn_winrate") # <--- CAMBIO AQU√ç
        ])

        stats = stats.filter(pl.col("games_together") > 5)
        stats = stats.with_columns(pl.lit(f"{role1}_{role2}").alias("duo_type"))
        synergy_list.append(stats)

    # 3. Concatenar todo
    if synergy_list:
        full_matrix = pl.concat(synergy_list, how="diagonal")
        print(f"   üíæ Guardando Matriz de Sinergia: {full_matrix.shape}")
        full_matrix.write_parquet(OUTPUT_SYN_FILE)
    else:
        print("‚ö†Ô∏è No se encontraron sinergias suficientes.")

# ¬°EJECUTAR AHORA!
generate_synergy_matrix()

üï∏Ô∏è  Tejiendo la Matriz de Sinergias (C√°lculo de Pares)...
   üîó Analizando d√∫o: MIDDLE + JUNGLE...
   üîó Analizando d√∫o: BOTTOM + UTILITY...
   üîó Analizando d√∫o: TOP + JUNGLE...
   üíæ Guardando Matriz de Sinergia: (15412, 5)
