# Pokémon battles — XGBoost with 10-fold outer CV + Grid Search
Notebook breve che esegue: feature engineering, split train/val/test, 10-fold outer CV con GridSearchCV interno, valutazione per fold, valutazione su holdout e generazione submission.csv.

In [1]:
import json
import os
import numpy as np
import pandas as pd
from tqdm import tqdm
from sklearn.model_selection import train_test_split, StratifiedKFold, GridSearchCV
from sklearn.metrics import accuracy_score
from xgboost import XGBClassifier
import warnings
warnings.filterwarnings('ignore')

# --- Percorsi (modificare se necessario) ---
COMPETITION_NAME = 'fds-pokemon-battles-prediction-2025'
train_file_path = 'train.jsonl'
test_file_path = 'test.jsonl'

def load_jsonl(path):
    data = []
    with open(path, 'r') as f:
        for line in f:
            data.append(json.loads(line))
    return data

print('Caricamento dati...')
train_raw = load_jsonl(train_file_path)
test_raw = load_jsonl(test_file_path)
print(f'Train records: {len(train_raw)}, Test records: {len(test_raw)}')

Caricamento dati...
Train records: 10000, Test records: 5000


In [2]:
import math
from collections import Counter

# === TYPE CHART (Gen 1) ===
TYPE_CHART = {
    'normal': {'rock': 0.5, 'ghost': 0},
    'fire': {'fire': 0.5, 'water': 0.5, 'grass': 2, 'ice': 2, 'bug': 2, 'rock': 0.5, 'dragon': 0.5},
    'water': {'fire': 2, 'water': 0.5, 'grass': 0.5, 'ground': 2, 'rock': 2, 'dragon': 0.5},
    'grass': {'fire': 0.5, 'water': 2, 'grass': 0.5, 'poison': 0.5, 'ground': 2, 'flying': 0.5, 'bug': 0.5, 'rock': 2, 'dragon': 0.5},
    'electric': {'water': 2, 'grass': 0.5, 'electric': 0.5, 'ground': 0, 'flying': 2, 'dragon': 0.5},
    'ice': {'fire': 0.5, 'water': 0.5, 'grass': 2, 'ground': 2, 'flying': 2, 'dragon': 2},
    'fighting': {'normal': 2, 'ice': 2, 'poison': 0.5, 'flying': 0.5, 'psychic': 0.5, 'bug': 0.5, 'rock': 2, 'ghost': 0},
    'poison': {'grass': 2, 'poison': 0.5, 'ground': 0.5, 'bug': 2, 'rock': 0.5, 'ghost': 0.5},
    'ground': {'fire': 2, 'grass': 0.5, 'electric': 2, 'poison': 2, 'flying': 0, 'bug': 0.5, 'rock': 2},
    'flying': {'grass': 2, 'electric': 0.5, 'fighting': 2, 'bug': 2, 'rock': 0.5},
    'psychic': {'fighting': 2, 'poison': 2, 'psychic': 0.5, 'ghost': 0},
    'bug': {'fire': 0.5, 'grass': 2, 'fighting': 0.5, 'poison': 2, 'flying': 0.5, 'psychic': 2, 'ghost': 0.5},
    'rock': {'fire': 2, 'ice': 2, 'fighting': 0.5, 'ground': 0.5, 'flying': 2, 'bug': 2},
    'ghost': {'normal': 0, 'psychic': 0, 'ghost': 2},
    'dragon': {'dragon': 2}
}

def get_effectiveness(attack_type: str, defense_types: list) -> float:
    if not attack_type or not defense_types:
        return 1.0
    eff = 1.0
    for d in defense_types:
        eff *= TYPE_CHART.get(attack_type, {}).get(d, 1.0)
    return eff

def calculate_type_advantage(team1: list, team2_lead: dict) -> dict:
    out = {'p1_vs_lead_avg_effectiveness': 0.0, 'p1_vs_lead_max_effectiveness': 0.0, 'p1_super_effective_options': 0}
    if not team1 or not team2_lead:
        return out
    lead_types = [t.lower() for t in team2_lead.get('types', [])]
    if not lead_types:
        return out
    effs = []
    for p in team1:
        p_types = [t.lower() for t in p.get('types', [])]
        max_eff = 0.0
        for pt in p_types:
            max_eff = max(max_eff, get_effectiveness(pt, lead_types))
        effs.append(max_eff)
    if not effs:
        return out
    out['p1_vs_lead_avg_effectiveness'] = float(np.mean(effs))
    out['p1_vs_lead_max_effectiveness'] = float(np.max(effs))
    out['p1_super_effective_options'] = int(sum(1 for e in effs if e >= 2))
    return out

def _entropy(counter: Counter) -> float:
    total = sum(counter.values())
    if total == 0:
        return 0.0
    ent = 0.0
    for v in counter.values():
        p = v / total
        if p > 0:
            ent -= p * math.log(p, 2)
    return ent

def team_aggregate_features(team: list, prefix: str = 'p1_') -> dict:
    stats = ['base_hp','base_atk','base_def','base_spa','base_spd','base_spe']
    out = {}
    vals = {s: [] for s in stats}
    levels = []
    types_counter = Counter()
    names = []
    for p in team:
        names.append(p.get('name',''))
        for s in stats:
            vals[s].append(p.get(s, 0))
        levels.append(p.get('level', 0))
        for t in p.get('types', []):
            types_counter[t.lower()] += 1
    for s in stats:
        arr = np.array(vals[s], dtype=float)
        out[f'{prefix}{s}_sum'] = float(arr.sum())
        out[f'{prefix}{s}_mean'] = float(arr.mean())
        out[f'{prefix}{s}_max'] = float(arr.max())
        out[f'{prefix}{s}_min'] = float(arr.min())
        out[f'{prefix}{s}_std'] = float(arr.std())
    level_arr = np.array(levels, dtype=float)
    out[f'{prefix}level_mean'] = float(level_arr.mean()) if level_arr.size else 0.0
    out[f'{prefix}level_sum'] = float(level_arr.sum()) if level_arr.size else 0.0
    out[f'{prefix}n_unique_types'] = int(len(types_counter))
    common_types = ['normal','fire','water','electric','grass','psychic','ice','dragon','rock','ground','flying']
    for t in common_types:
        out[f'{prefix}type_{t}_count'] = int(types_counter.get(t, 0))
    out[f'{prefix}lead_name'] = names[0] if names else ''
    out[f'{prefix}n_unique_names'] = int(len(set(names)))
    out[f'{prefix}type_entropy'] = float(_entropy(types_counter))
    spe_arr = np.array(vals['base_spe'], dtype=float)
    out[f'{prefix}spe_p25'] = float(np.percentile(spe_arr, 25)) if spe_arr.size else 0.0
    out[f'{prefix}spe_p50'] = float(np.percentile(spe_arr, 50)) if spe_arr.size else 0.0
    out[f'{prefix}spe_p75'] = float(np.percentile(spe_arr, 75)) if spe_arr.size else 0.0
    return out

def lead_vs_lead_features(p1_lead: dict, p2_lead: dict) -> dict:
    out = {}
    stats = ['base_hp','base_atk','base_def','base_spa','base_spd','base_spe']
    for s in stats:
        out[f'lead_diff_{s}'] = float(p1_lead.get(s,0) - p2_lead.get(s,0))
    out['lead_speed_advantage'] = float(p1_lead.get('base_spe',0) - p2_lead.get('base_spe',0))
    p1_types = [t.lower() for t in p1_lead.get('types', [])]
    p2_types = [t.lower() for t in p2_lead.get('types', [])]
    max_eff = 0.0
    for pt in p1_types:
        max_eff = max(max_eff, get_effectiveness(pt, p2_types))
    out['lead_p1_vs_p2_effectiveness'] = float(max_eff)
    return out

def lead_aggregate_features(pokemon: dict, prefix: str = 'p2_lead_') -> dict:
    out = {}
    stats = ['base_hp','base_atk','base_def','base_spa','base_spd','base_spe']
    for s in stats:
        out[f'{prefix}{s}'] = float(pokemon.get(s,0))
    out[f'{prefix}level'] = int(pokemon.get('level',0))
    types = [x.lower() for x in pokemon.get('types', [])]
    common_types = ['normal','fire','water','electric','grass','psychic','ice','dragon','rock','ground','flying']
    for t in common_types:
        out[f'{prefix}type_{t}'] = int(t in types)
    out[f'{prefix}name'] = pokemon.get('name','')
    out[f'{prefix}n_unique_types'] = int(len(set(types)))
    return out

def summary_from_timeline(timeline: list, p1_team: list) -> dict:
    out = {}
    if not timeline:
        return {'tl_p1_moves':0,'tl_p2_moves':0,'tl_p1_est_damage':0.0,'tl_p2_est_damage':0.0,'damage_diff':0.0}
    p1_moves = p2_moves = 0
    p1_damage = p2_damage = 0.0
    p1_last_active = p2_last_active = ''
    p1_last_hp = p2_last_hp = np.nan
    p1_fainted = p2_fainted = 0
    p1_fainted_names = set()
    p2_fainted_names = set()
    last_p1_hp = {}
    last_p2_hp = {}
    p1_comeback_kos = 0
    p2_comeback_kos = 0
    p1_inflicted_statuses = Counter()
    p2_inflicted_statuses = Counter()
    p1_pokemon_statuses = {}
    p2_pokemon_statuses = {}
    p1_move_type_counts = Counter()
    p2_move_type_counts = Counter()
    p1_damage_first2 = 0.0
    p2_damage_first2 = 0.0

    # NEW: per-turn damage accumulation, KO timing and early/late KO counters
    p1_dmg_by_turn = {}  # damage inflitto da p1 (contro p2) per turno
    p2_dmg_by_turn = {}  # damage inflitto da p2 (contro p1) per turno
    seen_turns = set()
    first_ko_turn_p1_taken = None   # primo KO subìto da p1 (p1_fainted++)
    first_ko_turn_p1_inflicted = None  # primo KO inflitto da p1 (p2_fainted++)
    early_threshold = 10
    p1_kos_early = p1_kos_late = 0
    p2_kos_early = p2_kos_late = 0

    for turn in timeline[:30]:
        prev_p1_fainted, prev_p2_fainted = p1_fainted, p2_fainted
        p1_state = turn.get('p1_pokemon_state',{}) or {}
        p2_state = turn.get('p2_pokemon_state',{}) or {}
        tnum = turn.get('turn', None)
        if tnum is None:
            # fallback: usa lunghezza dei turni visti + 1
            tnum = (len(seen_turns) + 1)
        seen_turns.add(tnum)

        if p1_state.get('name'):
            p1_last_active = p1_state.get('name')
        if p2_state.get('name'):
            p2_last_active = p2_state.get('name')

        if p1_state.get('fainted') and p1_state.get('name') not in p1_fainted_names:
            p1_fainted += 1
            p1_fainted_names.add(p1_state.get('name'))
            if first_ko_turn_p1_taken is None:
                first_ko_turn_p1_taken = tnum
            if tnum <= early_threshold: p2_kos_early += 1
            else: p2_kos_late += 1
        if p2_state.get('fainted') and p2_state.get('name') not in p2_fainted_names:
            p2_fainted += 1
            p2_fainted_names.add(p2_state.get('name'))
            if first_ko_turn_p1_inflicted is None:
                first_ko_turn_p1_inflicted = tnum
            if tnum <= early_threshold: p1_kos_early += 1
            else: p1_kos_late += 1

        p2_name, p2_hp = p2_state.get('name'), p2_state.get('hp_pct')
        if p2_name and p2_hp is not None:
            prev_hp = last_p2_hp.get(p2_name)
            if prev_hp is not None:
                delta = max(0.0, prev_hp - p2_hp)
                p1_damage += delta
                p1_dmg_by_turn[tnum] = p1_dmg_by_turn.get(tnum, 0.0) + delta
                if turn.get('turn',999) <= 2:
                    p1_damage_first2 += delta
            last_p2_hp[p2_name] = p2_hp

        p1_name, p1_hp = p1_state.get('name'), p1_state.get('hp_pct')
        if p1_name and p1_hp is not None:
            prev_hp = last_p1_hp.get(p1_name)
            if prev_hp is not None:
                delta = max(0.0, prev_hp - p1_hp)
                p2_damage += delta
                p2_dmg_by_turn[tnum] = p2_dmg_by_turn.get(tnum, 0.0) + delta
                if turn.get('turn',999) <= 2:
                    p2_damage_first2 += delta
            last_p1_hp[p1_name] = p1_hp

        damage_diff_so_far = p1_damage - p2_damage
        if p2_fainted > prev_p2_fainted and damage_diff_so_far < -1.0:
            p1_comeback_kos += 1
        if p1_fainted > prev_p1_fainted and damage_diff_so_far > 1.0:
            p2_comeback_kos += 1

        p2_status = p2_state.get('status')
        if p2_name and p2_status and p2_pokemon_statuses.get(p2_name) != p2_status:
            p1_inflicted_statuses[p2_status] += 1
            p2_pokemon_statuses[p2_name] = p2_status
        p1_status = p1_state.get('status')
        if p1_name and p1_status and p1_pokemon_statuses.get(p1_name) != p1_status:
            p2_inflicted_statuses[p1_status] += 1
            p1_pokemon_statuses[p1_name] = p1_status

        p1_move = turn.get('p1_move_details') or {}
        p2_move = turn.get('p2_move_details') or {}
        if p1_move and p1_move.get('type'):
            p1_move_type_counts[(p1_move.get('type') or '').lower()] += 1
        if p2_move and p2_move.get('type'):
            p2_move_type_counts[(p2_move.get('type') or '').lower()] += 1
        if turn.get('p1_move_details'):
            p1_moves += 1
        if turn.get('p2_move_details'):
            p2_moves += 1
        p1_last_hp = p1_state.get('hp_pct', np.nan)
        p2_last_hp = p2_state.get('hp_pct', np.nan)

    # ...existing code computing out[...] baseline metrics...
    out['tl_p1_moves'] = int(p1_moves)
    out['tl_p2_moves'] = int(p2_moves)
    out['tl_p1_est_damage'] = float(p1_damage)
    out['tl_p2_est_damage'] = float(p2_damage)
    out['damage_diff'] = float(p1_damage - p2_damage)
    out['fainted_diff'] = int(p1_fainted - p2_fainted)
    out['tl_p1_last_hp'] = float(p1_last_hp) if not np.isnan(p1_last_hp) else 0.0
    out['tl_p2_last_hp'] = float(p2_last_hp) if not np.isnan(p2_last_hp) else 0.0
    out['tl_p1_last_active'] = p1_last_active
    out['tl_p2_last_active'] = p2_last_active
    if p1_team:
        p1_total_hp_sum = sum(p.get('base_hp',0) for p in p1_team)
        p1_avg_def = np.mean([p.get('base_def',0) for p in p1_team] or [0])
        p1_avg_spd = np.mean([p.get('base_spd',0) for p in p1_team] or [0])
        out['tl_p2_damage_vs_p1_hp_pool'] = float(p2_damage / (p1_total_hp_sum + 1e-6))
        out['tl_p1_defensive_endurance'] = float((p1_avg_def + p1_avg_spd) / (p2_damage + 1e-6))
    out['tl_p1_comeback_kos'] = int(p1_comeback_kos)
    out['tl_p2_comeback_kos'] = int(p2_comeback_kos)
    out['tl_comeback_kos_diff'] = int(p1_comeback_kos - p2_comeback_kos)

    common_statuses = ['brn','par','slp','frz','psn','tox']
    for status in common_statuses:
        out[f'tl_p1_inflicted_{status}_count'] = int(p1_inflicted_statuses.get(status,0))
        out[f'tl_p2_inflicted_{status}_count'] = int(p2_inflicted_statuses.get(status,0))
        out[f'tl_inflicted_{status}_diff'] = int(p1_inflicted_statuses.get(status,0) - p2_inflicted_statuses.get(status,0))

    common_move_types = ['normal','fire','water','electric','grass','psychic','ice','dragon','rock','ground','flying','ghost','bug','poison','fighting']
    for mt in common_move_types:
        out[f'tl_p1_move_type_{mt}_count'] = int(p1_move_type_counts.get(mt,0))
        out[f'tl_p2_move_type_{mt}_count'] = int(p2_move_type_counts.get(mt,0))
        out[f'tl_move_type_{mt}_count_diff'] = int(p1_move_type_counts.get(mt,0) - p2_move_type_counts.get(mt,0))

    out['tl_p1_damage_first2'] = float(p1_damage_first2)
    out['tl_p2_damage_first2'] = float(p2_damage_first2)
    out['tl_first2_damage_diff'] = float(p1_damage_first2 - p2_damage_first2)

    # NEW: derived, normalized and late-game features
    turns_count = max(1, len(seen_turns))
    out['tl_turns_count'] = int(turns_count)
    out['tl_p1_moves_rate'] = float(p1_moves / turns_count)
    out['tl_p2_moves_rate'] = float(p2_moves / turns_count)
    out['tl_p1_damage_per_turn'] = float(p1_damage / turns_count)
    out['tl_p2_damage_per_turn'] = float(p2_damage / turns_count)
    out['tl_damage_rate_diff'] = float(out['tl_p1_damage_per_turn'] - out['tl_p2_damage_per_turn'])

    # last-5-turns damage window
    if seen_turns:
        recent_turns = sorted(seen_turns)[-5:]
        p1_last5 = sum(p1_dmg_by_turn.get(t,0.0) for t in recent_turns)
        p2_last5 = sum(p2_dmg_by_turn.get(t,0.0) for t in recent_turns)
    else:
        p1_last5 = p2_last5 = 0.0
    out['tl_p1_damage_last5'] = float(p1_last5)
    out['tl_p2_damage_last5'] = float(p2_last5)
    out['tl_last5_damage_diff'] = float(p1_last5 - p2_last5)
    # NEW: ratio danno ultimi 5 turni vs totale
    out['tl_p1_last5_damage_ratio'] = float(p1_last5 / (p1_damage + 1e-6))
    out['tl_p2_last5_damage_ratio'] = float(p2_last5 / (p2_damage + 1e-6))
    out['tl_last5_damage_ratio_diff'] = float(out['tl_p1_last5_damage_ratio'] - out['tl_p2_last5_damage_ratio'])

    # time-weighted damage advantage (peso crescente con il turno)
    if seen_turns:
        ts = sorted(seen_turns)
        w = np.linspace(1.0, 2.0, num=len(ts))  # pesi crescenti
        w = w / (w.sum() + 1e-9)
        adv = [(p1_dmg_by_turn.get(t,0.0) - p2_dmg_by_turn.get(t,0.0)) for t in ts]
        out['tl_weighted_damage_diff'] = float(np.dot(w, adv))
    else:
        out['tl_weighted_damage_diff'] = 0.0

    # NEW: comeback indicator (cambio di segno dell'adv cumulativo)
    if seen_turns:
        ts = sorted(seen_turns)
        cum = 0.0
        signs = []
        for t in ts:
            cum += (p1_dmg_by_turn.get(t,0.0) - p2_dmg_by_turn.get(t,0.0))
            s = 1 if cum > 1e-9 else (-1 if cum < -1e-9 else 0)
            if s != 0:
                if not signs or signs[-1] != s:
                    signs.append(s)
        sign_flips = max(0, len(signs) - 1)
        comeback_flag = 1 if (len(signs) >= 2 and signs[0] != signs[-1]) else 0
    else:
        sign_flips = 0
        comeback_flag = 0
    out['tl_damage_adv_sign_flips'] = int(sign_flips)
    out['tl_comeback_flag'] = int(comeback_flag)

    # KO timing and early/late counts
    out['tl_first_ko_turn_p1_inflicted'] = int(first_ko_turn_p1_inflicted or 0)
    out['tl_first_ko_turn_p1_taken'] = int(first_ko_turn_p1_taken or 0)
    out['tl_first_ko_turn_diff'] = int((first_ko_turn_p1_inflicted or 0) - (first_ko_turn_p1_taken or 0))
    out['tl_kos_early_p1'] = int(p1_kos_early)
    out['tl_kos_late_p1'] = int(p1_kos_late)
    out['tl_kos_early_p2'] = int(p2_kos_early)
    out['tl_kos_late_p2'] = int(p2_kos_late)

    # normalized status rates per turn
    for status in common_statuses:
        c1 = p1_inflicted_statuses.get(status,0)
        c2 = p2_inflicted_statuses.get(status,0)
        out[f'tl_p1_inflicted_{status}_rate'] = float(c1 / turns_count)
        out[f'tl_p2_inflicted_{status}_rate'] = float(c2 / turns_count)
        out[f'tl_inflicted_{status}_rate_diff'] = float((c1 - c2) / turns_count)

    return out

def ability_features(team: list, prefix: str) -> dict:
    immunity_abilities = {'levitate':0,'volt_absorb':0,'water_absorb':0,'flash_fire':0}
    stat_drop_abilities = {'intimidate':0}
    weather_abilities = {'drought':0,'drizzle':0,'sand_stream':0}
    out = {}
    for pokemon in team:
        ability = (pokemon.get('ability','') or '').lower().replace(' ','_')
        if ability in immunity_abilities:
            immunity_abilities[ability] += 1
        if ability in stat_drop_abilities:
            stat_drop_abilities[ability] += 1
        if ability in weather_abilities:
            weather_abilities[ability] += 1
    for ability,count in immunity_abilities.items():
        out[f'{prefix}ability_{ability}_count'] = int(count)
    for ability,count in stat_drop_abilities.items():
        out[f'{prefix}ability_{ability}_count'] = int(count)
    for ability,count in weather_abilities.items():
        out[f'{prefix}ability_{ability}_count'] = int(count)
    out[f'{prefix}total_immunity_abilities'] = int(sum(immunity_abilities.values()))
    out[f'{prefix}total_stat_drop_abilities'] = int(sum(stat_drop_abilities.values()))
    return out

def prepare_record_features(record: dict, max_turns: int = 30) -> dict:
    out = {}
    out['battle_id'] = record.get('battle_id')
    if 'player_won' in record:
        out['player_won'] = int(bool(record.get('player_won')))
    p1_team = record.get('p1_team_details', [])
    out.update(team_aggregate_features(p1_team, prefix='p1_'))
    p2_lead = record.get('p2_lead_details', {})
    out.update(lead_aggregate_features(p2_lead, prefix='p2_lead_'))
    out.update(ability_features(p1_team, prefix='p1_'))
    p1_lead = p1_team[0] if p1_team else {}
    out.update(lead_vs_lead_features(p1_lead, p2_lead))
    out.update(ability_features([p2_lead], prefix='p2_lead_'))
    out['p1_intimidate_vs_lead'] = 1 if out.get('p1_ability_intimidate_count',0) > 0 else 0
    tl = record.get('battle_timeline', [])
    out.update(summary_from_timeline(tl[:max_turns], p1_team))
    out['team_hp_sum_minus_p2lead_hp'] = out.get('p1_base_hp_sum', 0) - out.get('p2_lead_base_hp', 0)
    out['team_spa_mean_minus_p2spa'] = out.get('p1_base_spa_mean', 0) - out.get('p2_lead_base_spa', 0)
    out['speed_advantage'] = out.get('p1_base_spe_sum', 0) - out.get('p2_lead_base_spe', 0)
    out['n_unique_types_diff'] = out.get('p1_n_unique_types', 0) - out.get('p2_lead_n_unique_types', 1)
    p1_moves = max(out.get('tl_p1_moves',1),1)
    p2_moves = max(out.get('tl_p2_moves',1),1)
    out['damage_per_turn_diff'] = (out.get('tl_p1_est_damage',0.0)/p1_moves) - (out.get('tl_p2_est_damage',0.0)/p2_moves)
    out['last_pair'] = f"{out.get('tl_p1_last_active','')}_VS_{out.get('tl_p2_last_active','')}"
    out.update(calculate_type_advantage(p1_team, p2_lead))
    p2_lead_bulk = out.get('p2_lead_base_def',1) + out.get('p2_lead_base_spd',1)
    out['p1_se_options_vs_lead_bulk'] = out.get('p1_super_effective_options',0) / (p2_lead_bulk + 1e-6)
    p2_team = record.get('p2_team_details', [])
    if p2_team:
        out.update(team_aggregate_features(p2_team, prefix='p2_'))
        out['team_hp_sum_diff'] = out.get('p1_base_hp_sum',0) - out.get('p2_base_hp_sum',0)
        out['team_spa_mean_diff'] = out.get('p1_base_spa_mean',0) - out.get('p2_base_spa_mean',0)
        out['team_spe_mean_diff'] = out.get('p1_base_spe_mean',0) - out.get('p2_base_spe_mean',0)
        out['n_unique_types_team_diff'] = out.get('p1_n_unique_types',0) - out.get('p2_n_unique_types',0)
    return out

def create_features_from_raw(data: list) -> pd.DataFrame:
    rows = []
    for b in tqdm(data, desc='FE'):
        try:
            feat = prepare_record_features(b, max_turns=30)
            if 'battle_id' not in feat:
                feat['battle_id'] = b.get('battle_id')
            rows.append(feat)
        except Exception as e:
            rows.append({'battle_id': b.get('battle_id'), 'error': 1})
    df = pd.DataFrame(rows)
    if 'player_won' in df.columns:
        df['player_won'] = df['player_won'].astype(int)
    return df.fillna(0)

train_df = create_features_from_raw(train_raw)
test_df = create_features_from_raw(test_raw)
print('Feature shape train/test:', train_df.shape, test_df.shape)
display(train_df.head())

FE: 100%|██████████| 10000/10000 [00:05<00:00, 1770.65it/s]
FE: 100%|██████████| 5000/5000 [00:02<00:00, 1825.48it/s]


Feature shape train/test: (10000, 232) (5000, 231)


Unnamed: 0,battle_id,player_won,p1_base_hp_sum,p1_base_hp_mean,p1_base_hp_max,p1_base_hp_min,p1_base_hp_std,p1_base_atk_sum,p1_base_atk_mean,p1_base_atk_max,...,team_hp_sum_minus_p2lead_hp,team_spa_mean_minus_p2spa,speed_advantage,n_unique_types_diff,damage_per_turn_diff,last_pair,p1_vs_lead_avg_effectiveness,p1_vs_lead_max_effectiveness,p1_super_effective_options,p1_se_options_vs_lead_bulk
0,0,1,695.0,115.833333,250.0,55.0,69.367179,435.0,72.5,110.0,...,635.0,0.0,365.0,3,-0.070393,starmie_VS_snorlax,1.083333,2.0,1,0.005405
1,1,1,740.0,123.333333,250.0,65.0,64.204534,435.0,72.5,110.0,...,685.0,-45.0,250.0,4,-0.012174,tauros_VS_alakazam,1.0,1.0,0,0.0
2,2,1,745.0,124.166667,250.0,60.0,64.382753,505.0,84.166667,130.0,...,495.0,-15.0,345.0,6,-0.00069,snorlax_VS_gengar,1.0,1.0,0,0.0
3,3,1,730.0,121.666667,250.0,60.0,65.362239,465.0,77.5,110.0,...,655.0,33.333333,345.0,6,-0.014574,snorlax_VS_zapdos,1.0,1.0,0,0.0
4,4,1,685.0,114.166667,250.0,50.0,70.794107,455.0,75.833333,110.0,...,625.0,-2.5,320.0,4,0.006923,tauros_VS_chansey,1.083333,2.0,1,0.005405


In [3]:
# Preparazione X, y - FILTRO COLONNE STRINGA
# Escludo colonne non numeriche (stringhe) che XGBoost non può usare
exclude_cols = ['battle_id', 'player_won']
# Identifica colonne stringa nel DataFrame
string_cols = train_df.select_dtypes(include=['object']).columns.tolist()
exclude_cols.extend(string_cols)

FEATURES = [c for c in train_df.columns if c not in exclude_cols]

# Opzionale: escludi le NUOVE feature late-game se peggiorano accuracy
DROP_NEW_TIMELINE_FEATURES = True  # metti False per tenerle
late_new_static = [
    'tl_turns_count','tl_p1_moves_rate','tl_p2_moves_rate',
    'tl_p1_damage_per_turn','tl_p2_damage_per_turn','tl_damage_rate_diff',
    'tl_p1_damage_last5','tl_p2_damage_last5','tl_last5_damage_diff',
    'tl_weighted_damage_diff','tl_first_ko_turn_p1_inflicted','tl_first_ko_turn_p1_taken',
    'tl_first_ko_turn_diff','tl_kos_early_p1','tl_kos_late_p1','tl_kos_early_p2','tl_kos_late_p2',
    # nuove feature aggiunte sotto (verranno whitelestate)
    'tl_p1_last5_damage_ratio','tl_p2_last5_damage_ratio','tl_last5_damage_ratio_diff',
    'tl_damage_adv_sign_flips','tl_comeback_flag'
]
rate_cols = [c for c in train_df.columns if c.startswith('tl_') and c.endswith('_rate')]
LATE_GAME_NEW_FEATURES = sorted(set(late_new_static + rate_cols))

# WHITELIST: tieni sempre queste feature anche se DROP_NEW_TIMELINE_FEATURES=True
WHITELIST_KEEP = {
    'tl_weighted_damage_diff', 'tl_last5_damage_diff',
    'tl_damage_adv_sign_flips', 'tl_comeback_flag',
    'tl_p1_last5_damage_ratio', 'tl_p2_last5_damage_ratio', 'tl_last5_damage_ratio_diff',
    'tl_p1_defensive_endurance', 'tl_p1_defensive_endurance_log',
    'tl_p2_damage_vs_p1_hp_pool', 'tl_p2_damage_vs_p1_hp_pool_log'
}
if DROP_NEW_TIMELINE_FEATURES:
    FEATURES = [c for c in FEATURES if (c not in LATE_GAME_NEW_FEATURES) or (c in WHITELIST_KEEP)]

# === Drop aggiuntivo anti-overfitting: rimuovi totali timeline e differenze di status pesanti ===
RAW_TL_TOTALS = ['tl_p1_est_damage','tl_p2_est_damage','tl_p1_moves','tl_p2_moves']
STATUS_DIFF_COLS = [c for c in train_df.columns if c.startswith('tl_inflicted_') and c.endswith('_diff')]
HIGH_VAR_TL = ['damage_diff', 'tl_first2_damage_diff']  # preserva endurance e weighted diff (cruciali)
drop_now = [c for c in RAW_TL_TOTALS if c in FEATURES] + [c for c in STATUS_DIFF_COLS if c in FEATURES] + [c for c in HIGH_VAR_TL if c in FEATURES]
FEATURES = [c for c in FEATURES if c not in set(drop_now)]

print(f'Feature numeriche (pre-stability): {len(FEATURES)}')

# === Stability filter (5-fold SMD) ===
STABILITY_THRESHOLD = 0.055
STABILITY_PROTECT = WHITELIST_KEEP | {'damage_per_turn_diff'}
y_all = train_df['player_won'].values.astype(int)
skf_stab = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
smd_cache = {f: [] for f in FEATURES}
for tr_idx, va_idx in skf_stab.split(train_df[FEATURES], y_all):
    tr_df = train_df.iloc[tr_idx][FEATURES].astype(float)
    va_df = train_df.iloc[va_idx][FEATURES].astype(float)
    std_tr = tr_df.std(ddof=1) + 1e-9
    smd = (va_df.mean() - tr_df.mean()) / std_tr
    for f, v in smd.items():
        smd_cache[f].append(abs(float(v)))
mean_smd = {f: (np.mean(vals) if len(vals) else 0.0) for f, vals in smd_cache.items()}
DRIFT_HARD_DROP = sorted([f for f, v in mean_smd.items() if v >= 0.09])
if DRIFT_HARD_DROP:
    FEATURES = [f for f in FEATURES if f not in set(DRIFT_HARD_DROP)]
    print(f'Drift hard drop ({len(DRIFT_HARD_DROP)}): {DRIFT_HARD_DROP[:6]}')

# === Stability filter: rimuovi feature instabili (alta varianza tra fold) ===
unstable_feats = sorted([f for f, v in mean_smd.items() if (v > STABILITY_THRESHOLD and f not in STABILITY_PROTECT)])
if unstable_feats:
    FEATURES = [f for f in FEATURES if f not in set(unstable_feats)]
    print(f'Stability filter: rimosse {len(unstable_feats)} colonne sopra {STABILITY_THRESHOLD:.3f}. Esempio: {unstable_feats[:6]}')
else:
    print('Stability filter: nessuna feature rimossa.')

print(f'Feature numeriche finali: {len(FEATURES)}')
print(f'Colonne escluse (stringhe): {len(string_cols)}')
if DROP_NEW_TIMELINE_FEATURES:
    kept = set(late_new_static) | set(rate_cols)
    dropped = [c for c in LATE_GAME_NEW_FEATURES if (c in train_df.columns and c not in WHITELIST_KEEP)]
    print(f"Late-game features rimosse (al netto whitelist): {len(dropped)}")
    print(f"Whitelist preservate: {[c for c in WHITELIST_KEEP if c in train_df.columns]}")
if drop_now:
    print(f"Drop extra (totali timeline e status diff): {len(drop_now)} -> esempi: {drop_now[:6]}")

X = train_df[FEATURES].values
y = y_all

# Split holdout test dal train (20%)
X_train_val, X_holdout, y_train_val, y_holdout, idx_train_val, idx_holdout = train_test_split(
    X, y, train_df.index.values, test_size=0.2, random_state=42, stratify=y)

print('train_val size:', X_train_val.shape[0], 'holdout size:', X_holdout.shape[0])

Feature numeriche (pre-stability): 180
Stability filter: nessuna feature rimossa.
Feature numeriche finali: 180
Colonne escluse (stringhe): 5
Late-game features rimosse (al netto whitelist): 27
Whitelist preservate: ['tl_p2_damage_vs_p1_hp_pool', 'tl_damage_adv_sign_flips', 'tl_p2_last5_damage_ratio', 'tl_last5_damage_ratio_diff', 'tl_comeback_flag', 'tl_p1_defensive_endurance', 'tl_last5_damage_diff', 'tl_weighted_damage_diff', 'tl_p1_last5_damage_ratio']
Drop extra (totali timeline e status diff): 18 -> esempi: ['tl_p1_est_damage', 'tl_p2_est_damage', 'tl_p1_moves', 'tl_p2_moves', 'tl_inflicted_brn_diff', 'tl_inflicted_par_diff']
train_val size: 8000 holdout size: 2000


In [9]:
# === Grid Search time-boxed (<= ~2 ore) ===
print("=== Time-boxed GridSearchCV (<= ~2 ore) ===")
import time, os
import numpy as np, pandas as pd
from sklearn.model_selection import StratifiedKFold, GridSearchCV, ParameterGrid
try:
    import joblib
    CPU_COUNT = joblib.cpu_count()
except Exception:
    CPU_COUNT = os.cpu_count() or 4

cv_inner = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

# Base estimator
base_clf = XGBClassifier(use_label_encoder=False, eval_metric='logloss', random_state=42, n_jobs=1, tree_method='hist')

# Calcola uno scale_pos_weight automatico e cerca attorno ad esso
pos_rate = float(y_train_val.mean())
spw_auto = float((1.0 - pos_rate) / max(pos_rate, 1e-9))
spw_grid = sorted({1.0, max(1.0, spw_auto*0.75), max(1.0, spw_auto), max(1.0, spw_auto*1.25)})
print(f'scale_pos_weight auto≈{spw_auto:.2f} -> grid={spw_grid}')

# Griglia COARSE (regolarizzata) — include scale_pos_weight
grid_coarse = {
    'n_estimators':      [300, 500, 700],
    'max_depth':         [3, 4],
    'min_child_weight':  [3, 5, 7],
    'learning_rate':     [0.03, 0.05, 0.07],
    'subsample':         [0.7, 0.8, 0.9],
    'colsample_bytree':  [0.7, 0.8, 0.9],
    'gamma':             [0.1, 0.2, 0.3],
    'reg_alpha':         [0.05, 0.1, 0.2],
    'reg_lambda':        [2.0, 3.0, 4.0],
    'scale_pos_weight':  spw_grid
}

# Stima tempo per-fit (warmup) — CORRETTO param_grid
warm_params = [
    {'n_estimators': 500, 'max_depth': 3, 'min_child_weight': 5, 'learning_rate': 0.05, 'subsample': 0.8, 'colsample_bytree': 0.8, 'gamma': 0.2, 'reg_alpha': 0.1, 'reg_lambda': 3.0, 'scale_pos_weight': max(1.0, spw_auto)},
    {'n_estimators': 700, 'max_depth': 4, 'min_child_weight': 5, 'learning_rate': 0.03, 'subsample': 0.8, 'colsample_bytree': 0.8, 'gamma': 0.2, 'reg_alpha': 0.1, 'reg_lambda': 3.0, 'scale_pos_weight': max(1.0, spw_auto*1.25)}
]
print("Warmup per stimare t_fit...")
t0 = time.time()
gs_warm = GridSearchCV(
    base_clf,
    param_grid=[{k:[v] for k,v in d.items()} for d in warm_params],
    scoring='balanced_accuracy',
    cv=cv_inner,
    n_jobs=-1,
    verbose=0,
    refit=False
)
gs_warm.fit(X_train_val, y_train_val)
elapsed_warm = time.time() - t0
fits_warm = len(warm_params) * cv_inner.get_n_splits()
t_fit_per_fold = max(0.01, elapsed_warm / fits_warm)
print(f"Warmup: {elapsed_warm:.2f}s per {fits_warm} fit -> ~{t_fit_per_fold:.3f}s/fit")

# Budget totale ~2 ore con margine sicurezza
TARGET_SECONDS = int(2*3600*0.9)
speedup = max(1, min(CPU_COUNT, cv_inner.get_n_splits()))
max_combos = int((TARGET_SECONDS * speedup) / (t_fit_per_fold * cv_inner.get_n_splits()))
max_combos = int(max(48, min(max_combos, 2000)))
print(f"CPU={CPU_COUNT}, speedup~{speedup}, max_combos≈{max_combos}")

# Costruisci tutte le combinazioni e campiona fino a max_combos
all_points = list(ParameterGrid(grid_coarse))
total = len(all_points)
print(f"Candidate totali nella griglia: {total}")
rng = np.random.default_rng(42)
if total > max_combos:
    idx = rng.choice(total, size=max_combos, replace=False)
    sampled = [all_points[i] for i in idx]
else:
    sampled = all_points
print(f"Config selezionate: {len(sampled)}")

# Converte in lista di 'micro-grid' (1 punto ciascuno) — CORRETTO
param_grid_list = [{k:[v] for k,v in pt.items()} for pt in sampled]

print("Esecuzione GridSearch time-boxed...")
t1 = time.time()
gs = GridSearchCV(
    estimator=base_clf,
    param_grid=param_grid_list,
    scoring='balanced_accuracy',
    cv=cv_inner,
    n_jobs=-1,
    verbose=2,
    refit=True
)
gs.fit(X_train_val, y_train_val)
elapsed = time.time() - t1

results_df = pd.DataFrame(gs.cv_results_).sort_values('rank_test_score')
csv_path = 'hp_search_results_timeboxed_grid.csv'
results_df.to_csv(csv_path, index=False)
best_params = gs.best_params_

print(f"\n✅ Salvato {csv_path} ({len(results_df)} righe)")
print("Migliori iperparametri:")
for k, v in best_params.items():
    print(f"  {k}: {v}")
print(f"Best CV (balanced_accuracy): {gs.best_score_:.4f}")
print(f"Tempo GridSearch: {elapsed/60:.1f} min (budget ~{TARGET_SECONDS/60:.0f} min)")
print("Ora puoi usare 'best_params' nelle celle successive.")

=== Time-boxed GridSearchCV (<= ~2 ore) ===
scale_pos_weight auto≈1.00 -> grid=[1.0, 1.25]
Warmup per stimare t_fit...


KeyboardInterrupt: 

In [4]:
# === Optuna Study per selezione iperparametri XGBoost (balanced_accuracy) ===
# Se Optuna non è installato: togli il commento alla riga sotto in Jupyter
# %pip install -q optuna

import numpy as np
import pandas as pd
import optuna
from optuna.samplers import TPESampler
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import balanced_accuracy_score
from xgboost import XGBClassifier
import warnings
warnings.filterwarnings("ignore")

# Usa lo stesso split interno del GridSearch
cv_inner = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

# Calibra il range di scale_pos_weight attorno al valore automatico
pos_rate = float(y_train_val.mean())
spw_auto = float((1.0 - pos_rate) / max(pos_rate, 1e-9))
spw_low = max(1.0, spw_auto * 0.6)
spw_high = min(10.0, spw_auto * 1.6)

EARLY_STOPPING_ROUNDS = 50

def _predict_proba_best(clf, X):
    # Compat con diverse versioni XGBoost/SKLearn wrapper
    best_iter = getattr(clf, "best_iteration", None)
    try:
        if best_iter is not None:
            return clf.predict_proba(X, iteration_range=(0, int(best_iter)+1))[:, 1]
    except TypeError:
        pass
    try:
        booster = clf.get_booster()
        best_ntree_limit = getattr(booster, "best_ntree_limit", None)
        if best_ntree_limit is not None:
            return clf.predict_proba(X, ntree_limit=int(best_ntree_limit))[:, 1]
    except Exception:
        pass
    return clf.predict_proba(X)[:, 1]

def objective(trial: optuna.Trial) -> float:
    params = {
        "booster": "gbtree",
        "tree_method": "hist",
        "n_estimators": trial.suggest_int("n_estimators", 300, 1200, step=100),
        "learning_rate": trial.suggest_float("learning_rate", 0.01, 0.10, log=True),
        "max_depth": trial.suggest_int("max_depth", 3, 6),
        "min_child_weight": trial.suggest_int("min_child_weight", 2, 10),
        "gamma": trial.suggest_float("gamma", 0.0, 0.8),
        "subsample": trial.suggest_float("subsample", 0.6, 0.9),
        "colsample_bytree": trial.suggest_float("colsample_bytree", 0.6, 0.9),
        "colsample_bylevel": trial.suggest_float("colsample_bylevel", 0.6, 1.0),
        "colsample_bynode": trial.suggest_float("colsample_bynode", 0.6, 1.0),
        "reg_alpha": trial.suggest_float("reg_alpha", 0.0, 1.0),
        "reg_lambda": trial.suggest_float("reg_lambda", 0.5, 10.0, log=True),
        "max_delta_step": trial.suggest_int("max_delta_step", 0, 2),
        "scale_pos_weight": trial.suggest_float("scale_pos_weight", spw_low, spw_high),
    }

    scores = []
    for fold_idx, (tr_idx, va_idx) in enumerate(cv_inner.split(X_train_val, y_train_val), start=1):
        X_tr, X_va = X_train_val[tr_idx], X_train_val[va_idx]
        y_tr, y_va = y_train_val[tr_idx], y_train_val[va_idx]

        clf = XGBClassifier(
            **params,
            use_label_encoder=False,
            eval_metric="logloss",
            random_state=42,
            n_jobs=1
        )

        # Early stopping su fold
        try:
            clf.fit(X_tr, y_tr, eval_set=[(X_va, y_va)], early_stopping_rounds=EARLY_STOPPING_ROUNDS, verbose=False)
        except TypeError:
            clf.fit(X_tr, y_tr, eval_set=[(X_va, y_va)], verbose=False)

        proba = _predict_proba_best(clf, X_va)
        preds = (proba >= 0.5).astype(int)
        score = balanced_accuracy_score(y_va, preds)
        scores.append(score)

        # Pruning
        trial.report(float(np.mean(scores)), step=fold_idx)
        if trial.should_prune():
            raise optuna.TrialPruned()

    return float(np.mean(scores))

N_TRIALS = 120    # aumenta/riduci in base al tempo a disposizione
TIMEOUT = None    # in secondi; ad es. 7200 per ~2h

study = optuna.create_study(direction="maximize", sampler=TPESampler(seed=42))
study.optimize(objective, n_trials=N_TRIALS, timeout=TIMEOUT, gc_after_trial=True)

print(f"Best balanced_accuracy: {study.best_value:.4f}")
print("Best params:")
for k, v in study.best_params.items():
    print(f"  {k}: {v}")

# Esporta risultati
trials_df = study.trials_dataframe()
trials_df.to_csv("optuna_trials_xgb.csv", index=False)
print("✅ Salvato optuna_trials_xgb.csv")

# Prepara best_params per le celle successive (CV/holdout/submission)
best_params = dict(study.best_params)
best_params.update({
    "booster": "gbtree",
    # Valori sicuri che non cerchiamo con Optuna, ma che il tuo codice usa
    # (lasciare invariati se non li vuoi forzare)
})
print("\nbest_params pronti per la 10-Fold CV:")
print(best_params)

[I 2025-11-03 14:33:24,414] A new study created in memory with name: no-name-1436204c-bbb6-468e-bbbe-5536bc1a4d17
[I 2025-11-03 14:33:35,941] Trial 0 finished with value: 0.80975 and parameters: {'n_estimators': 600, 'learning_rate': 0.08927180304353628, 'max_depth': 5, 'min_child_weight': 7, 'gamma': 0.12481491235394922, 'subsample': 0.6467983561008608, 'colsample_bytree': 0.6174250836504598, 'colsample_bylevel': 0.9464704583099741, 'colsample_bynode': 0.8404460046972835, 'reg_alpha': 0.7080725777960455, 'reg_lambda': 0.5318033256270142, 'max_delta_step': 2, 'scale_pos_weight': 1.4994655844802531}. Best is trial 0 with value: 0.80975.
[I 2025-11-03 14:33:44,280] Trial 1 finished with value: 0.8067499999999999 and parameters: {'n_estimators': 500, 'learning_rate': 0.015199348301309814, 'max_depth': 3, 'min_child_weight': 4, 'gamma': 0.4198051453057903, 'subsample': 0.7295835055926347, 'colsample_bytree': 0.6873687420594126, 'colsample_bylevel': 0.8447411578889518, 'colsample_bynode': 0

Best balanced_accuracy: 0.8234
Best params:
  n_estimators: 800
  learning_rate: 0.026903035419902337
  max_depth: 4
  min_child_weight: 8
  gamma: 0.039871447930199935
  subsample: 0.6092684563246151
  colsample_bytree: 0.8384119779272207
  colsample_bylevel: 0.808324436329465
  colsample_bynode: 0.6346001385909943
  reg_alpha: 0.5173888577259714
  reg_lambda: 0.7395028456377437
  max_delta_step: 1
  scale_pos_weight: 1.011137218440395
✅ Salvato optuna_trials_xgb.csv

best_params pronti per la 10-Fold CV:
{'n_estimators': 800, 'learning_rate': 0.026903035419902337, 'max_depth': 4, 'min_child_weight': 8, 'gamma': 0.039871447930199935, 'subsample': 0.6092684563246151, 'colsample_bytree': 0.8384119779272207, 'colsample_bylevel': 0.808324436329465, 'colsample_bynode': 0.6346001385909943, 'reg_alpha': 0.5173888577259714, 'reg_lambda': 0.7395028456377437, 'max_delta_step': 1, 'scale_pos_weight': 1.011137218440395, 'booster': 'gbtree'}


In [9]:
# === 10-Fold Cross-Validation con iperparametri FISSI ===
# IMPORTANTE: Assegna qui i migliori iperparametri trovati dalla cella precedente
# Oppure lascia questi di default (conservativi per ridurre overfitting)

best_params = {
    'booster': 'gbtree',
    'tree_method': 'hist',
    'max_bin': 256,          # istogrammi più grossolani = meno varianza e più veloce
    'learning_rate': 0.035,  # leggermente più alto con meno alberi
    'n_estimators': 900,     # meno alberi per ridurre overfitting
    'max_depth': 3,
    'min_child_weight': 9,   # nodi con più peso => meno overfit
    'gamma': 0.5,            # penalizza split deboli
    'subsample': 0.7,
    'colsample_bytree': 0.7,
    'colsample_bynode': 0.7,
    'colsample_bylevel': 0.8,
    'reg_alpha': 0.4,        # L1
    'reg_lambda': 10.0,      # L2 più alta
    'max_delta_step': 1
}

print("=== 10-Fold Cross-Validation (9 train + 1 validation) ===")
print(f"Parametri utilizzati: {best_params}\n")

from sklearn.model_selection import StratifiedKFold
import xgboost as xgb  # per callback EarlyStopping se disponibile
skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)
outer_accuracies = []
folds_info = []
train_accuracies = []
train_val_gaps = []
outer_accuracies_opt = []

EARLY_STOPPING_ROUNDS = 50
AUTO_BALANCE_POS_WEIGHT = True
if AUTO_BALANCE_POS_WEIGHT:
    pos_rate = float(y_train_val.mean())
    spw = (1.0 - pos_rate) / (pos_rate + 1e-9)
    if spw > 1.0:
        best_params = dict(best_params)
        best_params['scale_pos_weight'] = float(min(max(spw, 1.0), 10.0))
        print(f"Auto scale_pos_weight: {best_params['scale_pos_weight']:.3f}")

def best_threshold_for_accuracy(y_true, proba, n_grid=201):
    y_true = np.asarray(y_true).astype(int)
    proba = np.asarray(proba).astype(float)
    grid = np.unique(np.quantile(proba, np.linspace(0, 1, n_grid)))
    best_thr, best_acc = 0.5, 0.0
    for t in grid:
        acc = ( ((proba >= t).astype(int) == y_true).mean() )
        if (acc > best_acc) or (abs(acc - best_acc) < 1e-12 and abs(t - 0.5) < abs(best_thr - 0.5)):
            best_acc, best_thr = float(acc), float(t)
    return best_thr, best_acc

def _fit_with_es(clf, X_tr, y_tr, X_val, y_val):
    """Fit con EarlyStopping via callback se supportato; fallback senza ES."""
    try:
        cb = getattr(xgb.callback, 'EarlyStopping', None)
        if cb is not None:
            clf.fit(X_tr, y_tr, eval_set=[(X_val, y_val)], callbacks=[cb(rounds=EARLY_STOPPING_ROUNDS, save_best=True, maximize=False)], verbose=False)
            return True
    except TypeError:
        pass
    clf.fit(X_tr, y_tr, eval_set=[(X_val, y_val)], verbose=False)
    return False

def _predict_proba_best(clf, X, best_iter=None, best_ntree_limit=None):
    """Version-safe predict_proba using either iteration_range (new) or ntree_limit (old)."""
    try:
        if best_iter is not None:
            return clf.predict_proba(X, iteration_range=(0, int(best_iter)+1))[:, 1]
    except TypeError:
        pass
    try:
        if best_ntree_limit is not None:
            return clf.predict_proba(X, ntree_limit=int(best_ntree_limit))[:, 1]
    except TypeError:
        pass
    return clf.predict_proba(X)[:, 1]

fold_idx = 0
for train_idx, val_idx in skf.split(X_train_val, y_train_val):
    fold_idx += 1
    X_tr, X_val = X_train_val[train_idx], X_train_val[val_idx]
    y_tr, y_val = y_train_val[train_idx], y_train_val[val_idx]

    clf = XGBClassifier(**best_params, use_label_encoder=False, eval_metric='logloss', random_state=42)
    used_es = _fit_with_es(clf, X_tr, y_tr, X_val, y_val)

    best_iter = getattr(clf, 'best_iteration', None)
    try:
        booster = clf.get_booster()
    except Exception:
        booster = None
    best_ntree_limit = getattr(booster, 'best_ntree_limit', None) if booster is not None else None

    y_val_proba = _predict_proba_best(clf, X_val, best_iter, best_ntree_limit)
    y_pred = (y_val_proba >= 0.5).astype(int)
    acc = accuracy_score(y_val, y_pred)
    outer_accuracies.append(acc)

    y_tr_proba = _predict_proba_best(clf, X_tr, best_iter, best_ntree_limit)
    y_tr_pred = (y_tr_proba >= 0.5).astype(int)
    tr_acc = accuracy_score(y_tr, y_tr_pred)
    gap = float(tr_acc - acc)
    train_accuracies.append(tr_acc)
    train_val_gaps.append(gap)

    thr_acc, acc_opt = best_threshold_for_accuracy(y_val, y_val_proba, n_grid=301)
    outer_accuracies_opt.append(acc_opt)

    val_index_global = idx_train_val[val_idx]
    train_index_global = idx_train_val[train_idx]

    folds_info.append({
        'fold': fold_idx,
        'acc': float(acc),
        'train_acc': float(tr_acc),
        'gap_train_minus_val': float(gap),
        'acc_opt': float(acc_opt),
        'thr_acc': float(thr_acc),
        'best_iteration': int(best_iter) if best_iter is not None else None,
        'train_idx': train_idx,
        'val_idx': val_idx,
        'train_index_global': train_index_global,
        'val_index_global': val_index_global,
        'y_true': y_val.astype(int),
        'y_pred': y_pred.astype(int),
        'y_proba': y_val_proba.astype(float)
    })

    es_tag = 'with ES' if used_es else 'no ES'
    print(f'Fold {fold_idx}: {es_tag}, train={len(y_tr)}, val={len(y_val)}, acc_val={acc*100:.2f}%, acc_val_opt={acc_opt*100:.2f}% @thr={thr_acc:.3f}, acc_train={tr_acc*100:.2f}%, gap={(gap)*100:.2f}%')

print('\n' + '='*60)
print('Risultati Cross-Validation')
print('='*60)
for i, a in enumerate(outer_accuracies, 1):
    print(f'  Fold {i}: val_acc={a*100:.2f}%, val_acc_opt={outer_accuracies_opt[i-1]*100:.2f}% @thr={folds_info[i-1]["thr_acc"]:.3f}, train_acc={train_accuracies[i-1]*100:.2f}%, gap={train_val_gaps[i-1]*100:.2f}%')
print(f'\nMean CV accuracy (0.5): {np.mean(outer_accuracies)*100:.2f}%')
print(f'Mean CV accuracy (opt thr): {np.mean(outer_accuracies_opt)*100:.2f}%')
print(f'Mean train accuracy: {np.mean(train_accuracies)*100:.2f}%')
print(f'Mean gap (train - val): {np.mean(train_val_gaps)*100:.2f}%')
print(f'Std CV accuracy:  {np.std(outer_accuracies)*100:.2f}%')
print(f'Min/Max val acc:  {np.min(outer_accuracies)*100:.2f}% / {np.max(outer_accuracies)*100:.2f}%')

WORST_FOLD_IDX = int(np.argmin(outer_accuracies))
WORST_FOLD_NUM = int(folds_info[WORST_FOLD_IDX]['fold'])
print(f"\nPeggiore fold: #{WORST_FOLD_NUM} con acc_val={outer_accuracies[WORST_FOLD_IDX]*100:.2f}% | acc_val_opt={outer_accuracies_opt[WORST_FOLD_IDX]*100:.2f}% | acc_train={train_accuracies[WORST_FOLD_IDX]*100:.2f}% | gap={train_val_gaps[WORST_FOLD_IDX]*100:.2f}%")

=== 10-Fold Cross-Validation (9 train + 1 validation) ===
Parametri utilizzati: {'booster': 'gbtree', 'tree_method': 'hist', 'max_bin': 256, 'learning_rate': 0.035, 'n_estimators': 900, 'max_depth': 3, 'min_child_weight': 9, 'gamma': 0.5, 'subsample': 0.7, 'colsample_bytree': 0.7, 'colsample_bynode': 0.7, 'colsample_bylevel': 0.8, 'reg_alpha': 0.4, 'reg_lambda': 10.0, 'max_delta_step': 1}

Fold 1: no ES, train=7200, val=800, acc_val=81.62%, acc_val_opt=82.12% @thr=0.623, acc_train=86.56%, gap=4.93%
Fold 2: no ES, train=7200, val=800, acc_val=81.88%, acc_val_opt=82.12% @thr=0.504, acc_train=86.44%, gap=4.57%
Fold 3: no ES, train=7200, val=800, acc_val=82.12%, acc_val_opt=82.75% @thr=0.396, acc_train=86.64%, gap=4.51%
Fold 4: no ES, train=7200, val=800, acc_val=84.50%, acc_val_opt=84.88% @thr=0.459, acc_train=86.04%, gap=1.54%
Fold 5: no ES, train=7200, val=800, acc_val=82.88%, acc_val_opt=83.25% @thr=0.600, acc_train=86.58%, gap=3.71%
Fold 6: no ES, train=7200, val=800, acc_val=80.25%, 

In [10]:
print("=== Submission rapida post-CV ===")
cv_submission_model = XGBClassifier(**best_params, use_label_encoder=False, eval_metric='logloss', random_state=42)
cv_submission_model.fit(X_train_val, y_train_val)

test_aligned = test_df.reindex(columns=FEATURES, fill_value=0)
X_test_matrix = test_aligned.astype(float).to_numpy()
test_predictions = cv_submission_model.predict(X_test_matrix).astype(int)

submission_df = pd.DataFrame({
    'battle_id': test_df['battle_id'].astype(np.int64),
    'player_won': test_predictions.astype(np.int64)
})

submission_path = 'submission.csv'
submission_df.to_csv(submission_path, index=False)
print(f"✅ File di submission salvato in {submission_path}")
print(submission_df.head())

=== Submission rapida post-CV ===
✅ File di submission salvato in submission.csv
   battle_id  player_won
0          0           0
1          1           1
2          2           1
3          3           1
4          4           1
