# Pokémon battles — XGBoost with 10-fold outer CV
Notebook breve che esegue: feature engineering, split train/val/test, 10-fold outer CV con GridSearchCV interno, valutazione per fold, valutazione su holdout e generazione submission.csv.

# Load data

In [2]:
import json
import os
import numpy as np
import pandas as pd
from tqdm import tqdm
from sklearn.model_selection import train_test_split, StratifiedKFold, GridSearchCV
from sklearn.metrics import accuracy_score
from xgboost import XGBClassifier
import warnings
warnings.filterwarnings('ignore')

# --- Percorsi (modificare se necessario) ---
COMPETITION_NAME = 'fds-pokemon-battles-prediction-2025'
train_file_path = 'train.jsonl'
test_file_path = 'test.jsonl'

def load_jsonl(path):
    data = []
    with open(path, 'r') as f:
        for line in f:
            data.append(json.loads(line))
    return data

print('Caricamento dati...')
train_raw = load_jsonl(train_file_path)
test_raw = load_jsonl(test_file_path)
print(f'Train records: {len(train_raw)}, Test records: {len(test_raw)}')

Caricamento dati...
Train records: 10000, Test records: 5000


# Features engeneering

In [3]:
import math
from collections import Counter

# === TYPE CHART (Gen 1) ===
TYPE_CHART = {
    'normal': {'rock': 0.5, 'ghost': 0},
    'fire': {'fire': 0.5, 'water': 0.5, 'grass': 2, 'ice': 2, 'bug': 2, 'rock': 0.5, 'dragon': 0.5},
    'water': {'fire': 2, 'water': 0.5, 'grass': 0.5, 'ground': 2, 'rock': 2, 'dragon': 0.5},
    'grass': {'fire': 0.5, 'water': 2, 'grass': 0.5, 'poison': 0.5, 'ground': 2, 'flying': 0.5, 'bug': 0.5, 'rock': 2, 'dragon': 0.5},
    'electric': {'water': 2, 'grass': 0.5, 'electric': 0.5, 'ground': 0, 'flying': 2, 'dragon': 0.5},
    'ice': {'fire': 0.5, 'water': 0.5, 'grass': 2, 'ground': 2, 'flying': 2, 'dragon': 2},
    'fighting': {'normal': 2, 'ice': 2, 'poison': 0.5, 'flying': 0.5, 'psychic': 0.5, 'bug': 0.5, 'rock': 2, 'ghost': 0},
    'poison': {'grass': 2, 'poison': 0.5, 'ground': 0.5, 'bug': 2, 'rock': 0.5, 'ghost': 0.5},
    'ground': {'fire': 2, 'grass': 0.5, 'electric': 2, 'poison': 2, 'flying': 0, 'bug': 0.5, 'rock': 2},
    'flying': {'grass': 2, 'electric': 0.5, 'fighting': 2, 'bug': 2, 'rock': 0.5},
    'psychic': {'fighting': 2, 'poison': 2, 'psychic': 0.5, 'ghost': 0},
    'bug': {'fire': 0.5, 'grass': 2, 'fighting': 0.5, 'poison': 2, 'flying': 0.5, 'psychic': 2, 'ghost': 0.5},
    'rock': {'fire': 2, 'ice': 2, 'fighting': 0.5, 'ground': 0.5, 'flying': 2, 'bug': 2},
    'ghost': {'normal': 0, 'psychic': 0, 'ghost': 2},
    'dragon': {'dragon': 2}
}

def get_effectiveness(attack_type: str, defense_types: list) -> float:
    if not attack_type or not defense_types:
        return 1.0
    eff = 1.0
    for d in defense_types:
        eff *= TYPE_CHART.get(attack_type, {}).get(d, 1.0)
    return eff

def calculate_type_advantage(team1: list, team2_lead: dict) -> dict:
    out = {'p1_vs_lead_avg_effectiveness': 0.0, 'p1_vs_lead_max_effectiveness': 0.0, 'p1_super_effective_options': 0}
    if not team1 or not team2_lead:
        return out
    lead_types = [t.lower() for t in team2_lead.get('types', [])]
    if not lead_types:
        return out
    effs = []
    for p in team1:
        p_types = [t.lower() for t in p.get('types', [])]
        max_eff = 0.0
        for pt in p_types:
            max_eff = max(max_eff, get_effectiveness(pt, lead_types))
        effs.append(max_eff)
    if not effs:
        return out
    out['p1_vs_lead_avg_effectiveness'] = float(np.mean(effs))
    out['p1_vs_lead_max_effectiveness'] = float(np.max(effs))
    out['p1_super_effective_options'] = int(sum(1 for e in effs if e >= 2))
    return out

def _entropy(counter: Counter) -> float:
    total = sum(counter.values())
    if total == 0:
        return 0.0
    ent = 0.0
    for v in counter.values():
        p = v / total
        if p > 0:
            ent -= p * math.log(p, 2)
    return ent

def team_aggregate_features(team: list, prefix: str = 'p1_') -> dict:
    stats = ['base_hp','base_atk','base_def','base_spa','base_spd','base_spe']
    out = {}
    vals = {s: [] for s in stats}
    levels = []
    types_counter = Counter()
    names = []
    for p in team:
        names.append(p.get('name',''))
        for s in stats:
            vals[s].append(p.get(s, 0))
        levels.append(p.get('level', 0))
        for t in p.get('types', []):
            types_counter[t.lower()] += 1
    for s in stats:
        arr = np.array(vals[s], dtype=float)
        out[f'{prefix}{s}_sum'] = float(arr.sum())
        out[f'{prefix}{s}_mean'] = float(arr.mean())
        out[f'{prefix}{s}_max'] = float(arr.max())
        out[f'{prefix}{s}_min'] = float(arr.min())
        out[f'{prefix}{s}_std'] = float(arr.std())
    level_arr = np.array(levels, dtype=float)
    out[f'{prefix}level_mean'] = float(level_arr.mean()) if level_arr.size else 0.0
    out[f'{prefix}level_sum'] = float(level_arr.sum()) if level_arr.size else 0.0
    out[f'{prefix}n_unique_types'] = int(len(types_counter))
    common_types = ['normal','fire','water','electric','grass','psychic','ice','dragon','rock','ground','flying']
    for t in common_types:
        out[f'{prefix}type_{t}_count'] = int(types_counter.get(t, 0))
    out[f'{prefix}lead_name'] = names[0] if names else ''
    out[f'{prefix}n_unique_names'] = int(len(set(names)))
    out[f'{prefix}type_entropy'] = float(_entropy(types_counter))
    spe_arr = np.array(vals['base_spe'], dtype=float)
    out[f'{prefix}spe_p25'] = float(np.percentile(spe_arr, 25)) if spe_arr.size else 0.0
    out[f'{prefix}spe_p50'] = float(np.percentile(spe_arr, 50)) if spe_arr.size else 0.0
    out[f'{prefix}spe_p75'] = float(np.percentile(spe_arr, 75)) if spe_arr.size else 0.0
    return out

def lead_vs_lead_features(p1_lead: dict, p2_lead: dict) -> dict:
    out = {}
    stats = ['base_hp','base_atk','base_def','base_spa','base_spd','base_spe']
    for s in stats:
        out[f'lead_diff_{s}'] = float(p1_lead.get(s,0) - p2_lead.get(s,0))
    out['lead_speed_advantage'] = float(p1_lead.get('base_spe',0) - p2_lead.get('base_spe',0))
    p1_types = [t.lower() for t in p1_lead.get('types', [])]
    p2_types = [t.lower() for t in p2_lead.get('types', [])]
    max_eff = 0.0
    for pt in p1_types:
        max_eff = max(max_eff, get_effectiveness(pt, p2_types))
    out['lead_p1_vs_p2_effectiveness'] = float(max_eff)
    return out

def lead_aggregate_features(pokemon: dict, prefix: str = 'p2_lead_') -> dict:
    out = {}
    stats = ['base_hp','base_atk','base_def','base_spa','base_spd','base_spe']
    for s in stats:
        out[f'{prefix}{s}'] = float(pokemon.get(s,0))
    out[f'{prefix}level'] = int(pokemon.get('level',0))
    types = [x.lower() for x in pokemon.get('types', [])]
    common_types = ['normal','fire','water','electric','grass','psychic','ice','dragon','rock','ground','flying']
    for t in common_types:
        out[f'{prefix}type_{t}'] = int(t in types)
    out[f'{prefix}name'] = pokemon.get('name','')
    out[f'{prefix}n_unique_types'] = int(len(set(types)))
    return out

def summary_from_timeline(timeline: list, p1_team: list) -> dict:
    out = {}
    if not timeline:
        return {'tl_p1_moves':0,'tl_p2_moves':0,'tl_p1_est_damage':0.0,'tl_p2_est_damage':0.0,'damage_diff':0.0}
    p1_moves = p2_moves = 0
    p1_damage = p2_damage = 0.0
    p1_last_active = p2_last_active = ''
    p1_last_hp = p2_last_hp = np.nan
    p1_fainted = p2_fainted = 0
    p1_fainted_names = set()
    p2_fainted_names = set()
    last_p1_hp = {}
    last_p2_hp = {}
    p1_comeback_kos = 0
    p2_comeback_kos = 0
    p1_inflicted_statuses = Counter()
    p2_inflicted_statuses = Counter()
    p1_pokemon_statuses = {}
    p2_pokemon_statuses = {}
    p1_move_type_counts = Counter()
    p2_move_type_counts = Counter()
    p1_damage_first2 = 0.0
    p2_damage_first2 = 0.0

    # NEW: per-turn damage accumulation, KO timing and early/late KO counters
    p1_dmg_by_turn = {}  # damage inflitto da p1 (contro p2) per turno
    p2_dmg_by_turn = {}  # damage inflitto da p2 (contro p1) per turno
    seen_turns = set()
    first_ko_turn_p1_taken = None   # primo KO subìto da p1 (p1_fainted++)
    first_ko_turn_p1_inflicted = None  # primo KO inflitto da p1 (p2_fainted++)
    early_threshold = 10
    p1_kos_early = p1_kos_late = 0
    p2_kos_early = p2_kos_late = 0

    for turn in timeline[:30]:
        prev_p1_fainted, prev_p2_fainted = p1_fainted, p2_fainted
        p1_state = turn.get('p1_pokemon_state',{}) or {}
        p2_state = turn.get('p2_pokemon_state',{}) or {}
        tnum = turn.get('turn', None)
        if tnum is None:
            # fallback: usa lunghezza dei turni visti + 1
            tnum = (len(seen_turns) + 1)
        seen_turns.add(tnum)

        if p1_state.get('name'):
            p1_last_active = p1_state.get('name')
        if p2_state.get('name'):
            p2_last_active = p2_state.get('name')

        if p1_state.get('fainted') and p1_state.get('name') not in p1_fainted_names:
            p1_fainted += 1
            p1_fainted_names.add(p1_state.get('name'))
            if first_ko_turn_p1_taken is None:
                first_ko_turn_p1_taken = tnum
            if tnum <= early_threshold: p2_kos_early += 1
            else: p2_kos_late += 1
        if p2_state.get('fainted') and p2_state.get('name') not in p2_fainted_names:
            p2_fainted += 1
            p2_fainted_names.add(p2_state.get('name'))
            if first_ko_turn_p1_inflicted is None:
                first_ko_turn_p1_inflicted = tnum
            if tnum <= early_threshold: p1_kos_early += 1
            else: p1_kos_late += 1

        p2_name, p2_hp = p2_state.get('name'), p2_state.get('hp_pct')
        if p2_name and p2_hp is not None:
            prev_hp = last_p2_hp.get(p2_name)
            if prev_hp is not None:
                delta = max(0.0, prev_hp - p2_hp)
                p1_damage += delta
                p1_dmg_by_turn[tnum] = p1_dmg_by_turn.get(tnum, 0.0) + delta
                if turn.get('turn',999) <= 2:
                    p1_damage_first2 += delta
            last_p2_hp[p2_name] = p2_hp

        p1_name, p1_hp = p1_state.get('name'), p1_state.get('hp_pct')
        if p1_name and p1_hp is not None:
            prev_hp = last_p1_hp.get(p1_name)
            if prev_hp is not None:
                delta = max(0.0, prev_hp - p1_hp)
                p2_damage += delta
                p2_dmg_by_turn[tnum] = p2_dmg_by_turn.get(tnum, 0.0) + delta
                if turn.get('turn',999) <= 2:
                    p2_damage_first2 += delta
            last_p1_hp[p1_name] = p1_hp

        damage_diff_so_far = p1_damage - p2_damage
        if p2_fainted > prev_p2_fainted and damage_diff_so_far < -1.0:
            p1_comeback_kos += 1
        if p1_fainted > prev_p1_fainted and damage_diff_so_far > 1.0:
            p2_comeback_kos += 1

        p2_status = p2_state.get('status')
        if p2_name and p2_status and p2_pokemon_statuses.get(p2_name) != p2_status:
            p1_inflicted_statuses[p2_status] += 1
            p2_pokemon_statuses[p2_name] = p2_status
        p1_status = p1_state.get('status')
        if p1_name and p1_status and p1_pokemon_statuses.get(p1_name) != p1_status:
            p2_inflicted_statuses[p1_status] += 1
            p1_pokemon_statuses[p1_name] = p1_status

        p1_move = turn.get('p1_move_details') or {}
        p2_move = turn.get('p2_move_details') or {}
        if p1_move and p1_move.get('type'):
            p1_move_type_counts[(p1_move.get('type') or '').lower()] += 1
        if p2_move and p2_move.get('type'):
            p2_move_type_counts[(p2_move.get('type') or '').lower()] += 1
        if turn.get('p1_move_details'):
            p1_moves += 1
        if turn.get('p2_move_details'):
            p2_moves += 1
        p1_last_hp = p1_state.get('hp_pct', np.nan)
        p2_last_hp = p2_state.get('hp_pct', np.nan)

    # ...existing code computing out[...] baseline metrics...
    out['tl_p1_moves'] = int(p1_moves)
    out['tl_p2_moves'] = int(p2_moves)
    out['tl_p1_est_damage'] = float(p1_damage)
    out['tl_p2_est_damage'] = float(p2_damage)
    # NUOVE FEATURE: conteggio KO per squadra e rate normalizzati per turno
    out['tl_p1_fainted'] = int(p1_fainted)
    out['tl_p2_fainted'] = int(p2_fainted)
    turns_count = max(1, len(seen_turns))
    out['tl_p1_fainted_rate'] = float(out['tl_p1_fainted'] / turns_count)
    out['tl_p2_fainted_rate'] = float(out['tl_p2_fainted'] / turns_count)
    # fine nuovi features
    out['damage_diff'] = float(p1_damage - p2_damage)
    out['fainted_diff'] = int(p1_fainted - p2_fainted)
    out['tl_p1_last_hp'] = float(p1_last_hp) if not np.isnan(p1_last_hp) else 0.0
    out['tl_p2_last_hp'] = float(p2_last_hp) if not np.isnan(p2_last_hp) else 0.0
    out['tl_p1_last_active'] = p1_last_active
    out['tl_p2_last_active'] = p2_last_active
    if p1_team:
        p1_total_hp_sum = sum(p.get('base_hp',0) for p in p1_team)
        p1_avg_def = np.mean([p.get('base_def',0) for p in p1_team] or [0])
        p1_avg_spd = np.mean([p.get('base_spd',0) for p in p1_team] or [0])
        out['tl_p2_damage_vs_p1_hp_pool'] = float(p2_damage / (p1_total_hp_sum + 1e-6))
        out['tl_p1_defensive_endurance'] = float((p1_avg_def + p1_avg_spd) / (p2_damage + 1e-6))
    out['tl_p1_comeback_kos'] = int(p1_comeback_kos)
    out['tl_p2_comeback_kos'] = int(p2_comeback_kos)
    out['tl_comeback_kos_diff'] = int(p1_comeback_kos - p2_comeback_kos)

    common_statuses = ['brn','par','slp','frz','psn','tox']
    for status in common_statuses:
        out[f'tl_p1_inflicted_{status}_count'] = int(p1_inflicted_statuses.get(status,0))
        out[f'tl_p2_inflicted_{status}_count'] = int(p2_inflicted_statuses.get(status,0))
        out[f'tl_inflicted_{status}_diff'] = int(p1_inflicted_statuses.get(status,0) - p2_inflicted_statuses.get(status,0))

    common_move_types = ['normal','fire','water','electric','grass','psychic','ice','dragon','rock','ground','flying','ghost','bug','poison','fighting']
    for mt in common_move_types:
        out[f'tl_p1_move_type_{mt}_count'] = int(p1_move_type_counts.get(mt,0))
        out[f'tl_p2_move_type_{mt}_count'] = int(p2_move_type_counts.get(mt,0))
        out[f'tl_move_type_{mt}_count_diff'] = int(p1_move_type_counts.get(mt,0) - p2_move_type_counts.get(mt,0))

    out['tl_p1_damage_first2'] = float(p1_damage_first2)
    out['tl_p2_damage_first2'] = float(p2_damage_first2)
    out['tl_first2_damage_diff'] = float(p1_damage_first2 - p2_damage_first2)

    # NEW: derived, normalized and late-game features
    turns_count = max(1, len(seen_turns))
    out['tl_turns_count'] = int(turns_count)
    out['tl_p1_moves_rate'] = float(p1_moves / turns_count)
    out['tl_p2_moves_rate'] = float(p2_moves / turns_count)
    out['tl_p1_damage_per_turn'] = float(p1_damage / turns_count)
    out['tl_p2_damage_per_turn'] = float(p2_damage / turns_count)
    out['tl_damage_rate_diff'] = float(out['tl_p1_damage_per_turn'] - out['tl_p2_damage_per_turn'])

    # last-5-turns damage window
    if seen_turns:
        recent_turns = sorted(seen_turns)[-5:]
        p1_last5 = sum(p1_dmg_by_turn.get(t,0.0) for t in recent_turns)
        p2_last5 = sum(p2_dmg_by_turn.get(t,0.0) for t in recent_turns)
    else:
        p1_last5 = p2_last5 = 0.0
    out['tl_p1_damage_last5'] = float(p1_last5)
    out['tl_p2_damage_last5'] = float(p2_last5)
    out['tl_last5_damage_diff'] = float(p1_last5 - p2_last5)
    # NEW: ratio danno ultimi 5 turni vs totale
    out['tl_p1_last5_damage_ratio'] = float(p1_last5 / (p1_damage + 1e-6))
    out['tl_p2_last5_damage_ratio'] = float(p2_last5 / (p2_damage + 1e-6))
    out['tl_last5_damage_ratio_diff'] = float(out['tl_p1_last5_damage_ratio'] - out['tl_p2_last5_damage_ratio'])

    # time-weighted damage advantage (peso crescente con il turno)
    if seen_turns:
        ts = sorted(seen_turns)
        w = np.linspace(1.0, 2.0, num=len(ts))  # pesi crescenti
        w = w / (w.sum() + 1e-9)
        adv = [(p1_dmg_by_turn.get(t,0.0) - p2_dmg_by_turn.get(t,0.0)) for t in ts]
        out['tl_weighted_damage_diff'] = float(np.dot(w, adv))
    else:
        out['tl_weighted_damage_diff'] = 0.0

    # NEW: comeback indicator (cambio di segno dell'adv cumulativo)
    if seen_turns:
        ts = sorted(seen_turns)
        cum = 0.0
        signs = []
        for t in ts:
            cum += (p1_dmg_by_turn.get(t,0.0) - p2_dmg_by_turn.get(t,0.0))
            s = 1 if cum > 1e-9 else (-1 if cum < -1e-9 else 0)
            if s != 0:
                if not signs or signs[-1] != s:
                    signs.append(s)
        sign_flips = max(0, len(signs) - 1)
        comeback_flag = 1 if (len(signs) >= 2 and signs[0] != signs[-1]) else 0
    else:
        sign_flips = 0
        comeback_flag = 0
    out['tl_damage_adv_sign_flips'] = int(sign_flips)
    out['tl_comeback_flag'] = int(comeback_flag)

    # KO timing and early/late counts
    out['tl_first_ko_turn_p1_inflicted'] = int(first_ko_turn_p1_inflicted or 0)
    out['tl_first_ko_turn_p1_taken'] = int(first_ko_turn_p1_taken or 0)
    out['tl_first_ko_turn_diff'] = int((first_ko_turn_p1_inflicted or 0) - (first_ko_turn_p1_taken or 0))
    out['tl_kos_early_p1'] = int(p1_kos_early)
    out['tl_kos_late_p1'] = int(p1_kos_late)
    out['tl_kos_early_p2'] = int(p2_kos_early)
    out['tl_kos_late_p2'] = int(p2_kos_late)

    # normalized status rates per turn
    for status in common_statuses:
        c1 = p1_inflicted_statuses.get(status,0)
        c2 = p2_inflicted_statuses.get(status,0)
        out[f'tl_p1_inflicted_{status}_rate'] = float(c1 / turns_count)
        out[f'tl_p2_inflicted_{status}_rate'] = float(c2 / turns_count)
        out[f'tl_inflicted_{status}_rate_diff'] = float((c1 - c2) / turns_count)

    return out

def ability_features(team: list, prefix: str) -> dict:
    immunity_abilities = {'levitate':0,'volt_absorb':0,'water_absorb':0,'flash_fire':0}
    stat_drop_abilities = {'intimidate':0}
    weather_abilities = {'drought':0,'drizzle':0,'sand_stream':0}
    out = {}
    for pokemon in team:
        ability = (pokemon.get('ability','') or '').lower().replace(' ','_')
        if ability in immunity_abilities:
            immunity_abilities[ability] += 1
        if ability in stat_drop_abilities:
            stat_drop_abilities[ability] += 1
        if ability in weather_abilities:
            weather_abilities[ability] += 1
    for ability,count in immunity_abilities.items():
        out[f'{prefix}ability_{ability}_count'] = int(count)
    for ability,count in stat_drop_abilities.items():
        out[f'{prefix}ability_{ability}_count'] = int(count)
    for ability,count in weather_abilities.items():
        out[f'{prefix}ability_{ability}_count'] = int(count)
    out[f'{prefix}total_immunity_abilities'] = int(sum(immunity_abilities.values()))
    out[f'{prefix}total_stat_drop_abilities'] = int(sum(stat_drop_abilities.values()))
    return out

def prepare_record_features(record: dict, max_turns: int = 30) -> dict:
    out = {}
    out['battle_id'] = record.get('battle_id')
    if 'player_won' in record:
        out['player_won'] = int(bool(record.get('player_won')))
    p1_team = record.get('p1_team_details', [])
    out.update(team_aggregate_features(p1_team, prefix='p1_'))
    p2_lead = record.get('p2_lead_details', {})
    out.update(lead_aggregate_features(p2_lead, prefix='p2_lead_'))
    out.update(ability_features(p1_team, prefix='p1_'))
    p1_lead = p1_team[0] if p1_team else {}
    out.update(lead_vs_lead_features(p1_lead, p2_lead))
    out.update(ability_features([p2_lead], prefix='p2_lead_'))
    out['p1_intimidate_vs_lead'] = 1 if out.get('p1_ability_intimidate_count',0) > 0 else 0
    tl = record.get('battle_timeline', [])
    out.update(summary_from_timeline(tl[:max_turns], p1_team))
    out['team_hp_sum_minus_p2lead_hp'] = out.get('p1_base_hp_sum', 0) - out.get('p2_lead_base_hp', 0)
    out['team_spa_mean_minus_p2spa'] = out.get('p1_base_spa_mean', 0) - out.get('p2_lead_base_spa', 0)
    out['speed_advantage'] = out.get('p1_base_spe_sum', 0) - out.get('p2_lead_base_spe', 0)
    out['n_unique_types_diff'] = out.get('p1_n_unique_types', 0) - out.get('p2_lead_n_unique_types', 1)
    p1_moves = max(out.get('tl_p1_moves',1),1)
    p2_moves = max(out.get('tl_p2_moves',1),1)
    out['damage_per_turn_diff'] = (out.get('tl_p1_est_damage',0.0)/p1_moves) - (out.get('tl_p2_est_damage',0.0)/p2_moves)
    out['last_pair'] = f"{out.get('tl_p1_last_active','')}_VS_{out.get('tl_p2_last_active','')}"
    out.update(calculate_type_advantage(p1_team, p2_lead))
    p2_lead_bulk = out.get('p2_lead_base_def',1) + out.get('p2_lead_base_spd',1)
    out['p1_se_options_vs_lead_bulk'] = out.get('p1_super_effective_options',0) / (p2_lead_bulk + 1e-6)
    p2_team = record.get('p2_team_details', [])
    if p2_team:
        out.update(team_aggregate_features(p2_team, prefix='p2_'))
        out['team_hp_sum_diff'] = out.get('p1_base_hp_sum',0) - out.get('p2_base_hp_sum',0)
        out['team_spa_mean_diff'] = out.get('p1_base_spa_mean',0) - out.get('p2_base_spa_mean',0)
        out['team_spe_mean_diff'] = out.get('p1_base_spe_mean',0) - out.get('p2_base_spe_mean',0)
        out['n_unique_types_team_diff'] = out.get('p1_n_unique_types',0) - out.get('p2_n_unique_types',0)
    return out

def create_features_from_raw(data: list) -> pd.DataFrame:
    rows = []
    for b in tqdm(data, desc='FE'):
        try:
            feat = prepare_record_features(b, max_turns=30)
            if 'battle_id' not in feat:
                feat['battle_id'] = b.get('battle_id')
            rows.append(feat)
        except Exception as e:
            rows.append({'battle_id': b.get('battle_id'), 'error': 1})
    df = pd.DataFrame(rows)
    if 'player_won' in df.columns:
        df['player_won'] = df['player_won'].astype(int)
    return df.fillna(0)

train_df = create_features_from_raw(train_raw)
test_df = create_features_from_raw(test_raw)
print('Feature shape train/test:', train_df.shape, test_df.shape)
display(train_df.head())

FE:   0%|          | 0/10000 [00:00<?, ?it/s]

FE: 100%|██████████| 10000/10000 [00:19<00:00, 502.39it/s]
FE: 100%|██████████| 5000/5000 [00:07<00:00, 673.93it/s]


Feature shape train/test: (10000, 236) (5000, 235)


Unnamed: 0,battle_id,player_won,p1_base_hp_sum,p1_base_hp_mean,p1_base_hp_max,p1_base_hp_min,p1_base_hp_std,p1_base_atk_sum,p1_base_atk_mean,p1_base_atk_max,...,team_hp_sum_minus_p2lead_hp,team_spa_mean_minus_p2spa,speed_advantage,n_unique_types_diff,damage_per_turn_diff,last_pair,p1_vs_lead_avg_effectiveness,p1_vs_lead_max_effectiveness,p1_super_effective_options,p1_se_options_vs_lead_bulk
0,0,1,695.0,115.833333,250.0,55.0,69.367179,435.0,72.5,110.0,...,635.0,0.0,365.0,3,-0.070393,starmie_VS_snorlax,1.083333,2.0,1,0.005405
1,1,1,740.0,123.333333,250.0,65.0,64.204534,435.0,72.5,110.0,...,685.0,-45.0,250.0,4,-0.012174,tauros_VS_alakazam,1.0,1.0,0,0.0
2,2,1,745.0,124.166667,250.0,60.0,64.382753,505.0,84.166667,130.0,...,495.0,-15.0,345.0,6,-0.00069,snorlax_VS_gengar,1.0,1.0,0,0.0
3,3,1,730.0,121.666667,250.0,60.0,65.362239,465.0,77.5,110.0,...,655.0,33.333333,345.0,6,-0.014574,snorlax_VS_zapdos,1.0,1.0,0,0.0
4,4,1,685.0,114.166667,250.0,50.0,70.794107,455.0,75.833333,110.0,...,625.0,-2.5,320.0,4,0.006923,tauros_VS_chansey,1.083333,2.0,1,0.005405


# Search best features

In [7]:
# Cell: estrai top-100 features con SHAP (robusta)
import shap

from xgboost import XGBClassifier
import numpy as np
import pandas as pd

# Candidate features: tutte le colonne numeriche tranne id/target
candidate_features = [c for c in train_df.columns if c not in ('battle_id','player_won') and train_df[c].dtype != 'object']
X_all = train_df[candidate_features].astype(float).fillna(0.0)
y_all = train_df['player_won'].astype(int).values

# Per velocità, campiona fino a N per training + spiegazioni
RND = np.random.RandomState(42)
N_SAMPLE = min(2000, len(X_all))
sample_idx = RND.choice(len(X_all), size=N_SAMPLE, replace=False)
X_sample = X_all.iloc[sample_idx]
y_sample = y_all[sample_idx]

# Allena un modello leggero (veloce) usato solo per SHAP
shap_clf = XGBClassifier(n_estimators=300, max_depth=3, learning_rate=0.05,
                         use_label_encoder=False, eval_metric='logloss',
                         random_state=42, n_jobs=4, tree_method='hist')
shap_clf.fit(X_sample, y_sample)

# Calcola SHAP con fallback robusto
shap_arr = None
explainer = None
ev = None

def _to_shap_array(shap_vals, n_samples, n_features):
    arr = np.array(shap_vals)
    if arr.ndim == 3:
        # Possibili layout: (n_samples, n_classes, n_features) o (n_classes, n_samples, n_features)
        if arr.shape[0] == n_samples and arr.shape[2] == n_features:
            class_idx = 1 if arr.shape[1] > 1 else 0
            return arr[:, class_idx, :]
        if arr.shape[1] == n_samples and arr.shape[2] == n_features:
            class_idx = 1 if arr.shape[0] > 1 else 0
            return arr[class_idx, :, :]
        if arr.shape[0] == n_samples and arr.shape[1] == n_features:
            class_idx = 1 if arr.shape[2] > 1 else 0
            return arr[:, :, class_idx]
        raise RuntimeError(f"Formato 3D SHAP non riconosciuto: {arr.shape}")
    elif arr.ndim == 2:
        # Atteso (n_samples, n_features) oppure trasposto
        if arr.shape[0] == n_samples and arr.shape[1] == n_features:
            return arr
        if arr.shape[1] == n_samples and arr.shape[0] == n_features:
            return arr.T
        raise RuntimeError(f"Formato 2D SHAP non riconosciuto: {arr.shape}")
    else:
        raise RuntimeError(f"Formato SHAP inatteso: {arr.shape}")

n_samples = X_sample.shape[0]
n_features = X_sample.shape[1]

try:
    explainer = shap.TreeExplainer(shap_clf)
    shap_vals = explainer.shap_values(X_sample)
    shap_arr = _to_shap_array(shap_vals, n_samples, n_features)
except Exception as e:
    print("TreeExplainer non funzionante o formato non previsto, fallback a shap.Explainer:", e)
    explainer = shap.Explainer(shap_clf.predict_proba, X_sample)
    ev = explainer(X_sample)
    vals = ev.values
    shap_arr = _to_shap_array(vals, n_samples, n_features)

# Debug shapes se serve
print("shap_arr.shape:", getattr(shap_arr, "shape", None), "X_sample.shape:", getattr(X_sample, "shape", None), "X_all.shape:", getattr(X_all, "shape", None))

# Allinea nomi feature e calcola importanza
shap_imp = np.abs(shap_arr).mean(axis=0)

# Recupera nomi feature nell'ordine usato per SHAP
feat_names = None
try:
    if explainer is not None and getattr(explainer, 'feature_names', None) is not None:
        feat_names = list(explainer.feature_names)
except Exception:
    feat_names = None

if feat_names is None and ev is not None:
    try:
        if getattr(ev, 'feature_names', None) is not None:
            feat_names = list(ev.feature_names)
    except Exception:
        feat_names = None

if feat_names is None:
    try:
        feat_names = list(X_sample.columns)
    except Exception:
        feat_names = None

if feat_names is None:
    feat_names = list(X_all.columns)

if len(shap_imp) != len(feat_names):
    raise ValueError(f"Incoerenza lunghezze: len(shap_imp)={len(shap_imp)}, len(feat_names)={len(feat_names)}. Controlla X_sample/colonne.")

imp_df = pd.DataFrame({'feature': feat_names, 'shap_mean_abs': shap_imp})
imp_df = imp_df.sort_values('shap_mean_abs', ascending=False).reset_index(drop=True)

TOP_K = min(100, len(imp_df))
top100 = imp_df['feature'].iloc[:TOP_K].tolist()

imp_df.to_csv('shap_feature_importances_all.csv', index=False)
pd.DataFrame({'feature': top100}).to_csv('top100_shap_features.csv', index=False)

print(f"Top-{TOP_K} features SHAP salvate in top100_shap_features.csv")
print(imp_df.head(10))

SHAP_TOP100 = top100

TreeExplainer non funzionante o formato non previsto, fallback a shap.Explainer: could not convert string to float: '[5.06E-1]'


PermutationExplainer explainer: 2001it [04:40,  7.11it/s]                          


shap_arr.shape: (2000, 229) X_sample.shape: (2000, 229) X_all.shape: (10000, 229)
Top-100 features SHAP salvate in top100_shap_features.csv
                           feature  shap_mean_abs
0          tl_weighted_damage_diff       0.152290
1                      tl_p2_moves       0.054426
2            tl_inflicted_par_diff       0.050478
3        tl_p2_inflicted_slp_count       0.037319
4                      tl_p1_moves       0.027659
5        tl_p1_inflicted_slp_count       0.024133
6                    tl_p1_last_hp       0.023037
7                      damage_diff       0.018435
8  tl_move_type_psychic_count_diff       0.016813
9             tl_last5_damage_diff       0.016260


# Preprocessing

In [4]:
# ====== Preprocessing (senza transformer sklearn) =========
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

# base exclusions
exclude_cols = ['battle_id', 'player_won']
string_cols = train_df.select_dtypes(include=['object']).columns.tolist()
exclude_cols.extend(string_cols)

# tutte le colonne numeriche candidate
ALL_NUMERIC_FEATURES = [c for c in train_df.columns if c not in exclude_cols]

# flag per usare top features se necessario
use_top_features = False

# carica TOP100 se presente (comportamento invariato)
top100_path = r'top100_shap_features.csv'
try:
    top100_df = pd.read_csv(top100_path)
    TOP100 = [str(x).strip() for x in top100_df['feature'].tolist()]
except Exception:
    TOP100 = []

# --- INIZIO: filtro dalle keep_features_list se richiesto ---
features_filter = False  # imposta True per applicare il filtro, False per comportamento attuale
keep_list_path = 'keep_features_list.txt'

if features_filter:
    try:
        import os
        if os.path.exists(keep_list_path):
            keep_df = pd.read_csv(keep_list_path, header=None)
            keep_list = [str(x).strip() for x in keep_df.iloc[:, 0].tolist()]
            # mantieni solo feature numeriche valide presenti in ALL_NUMERIC_FEATURES
            filtered = [f for f in ALL_NUMERIC_FEATURES if f in keep_list]
            if filtered:
                # sovrascrive FEATURES più avanti: qui memorizziamo in temp
                FEATURES_FROM_KEEP = filtered
                print(f"features_filter=ON: trovato {len(filtered)} feature valide in {keep_list_path}")
            else:
                FEATURES_FROM_KEEP = None
                print(f"features_filter=ON: nessuna feature di {keep_list_path} presente in ALL_NUMERIC_FEATURES")
        else:
            FEATURES_FROM_KEEP = None
            print(f"features_filter=ON ma file {keep_list_path} non trovato. Nessun filtro applicato.")
    except Exception as e:
        FEATURES_FROM_KEEP = None
        print("Errore caricando keep_features_list.txt, nessun filtro applicato:", e)
else:
    FEATURES_FROM_KEEP = None
# --- FINE: filtro dalle keep_features_list ---

if use_top_features and TOP100:
    FEATURES = [f for f in TOP100 if f in ALL_NUMERIC_FEATURES]
elif features_filter:
    FEATURES = FEATURES_FROM_KEEP
else:
    FEATURES = ALL_NUMERIC_FEATURES

print(f'Num FEATURES numeriche rilevate (ALL): {len(ALL_NUMERIC_FEATURES)}')
print(f'Num FEATURES effettive usate (FEATURES): {len(FEATURES)}')
print(f'Num TOP100 caricate: {len(TOP100)}')

# costruisco DataFrame numerico raw
num_df = train_df[FEATURES].astype(float).replace([np.inf, -np.inf], np.nan)

# Imputazione semplice: usiamo la mediana per ogni feature calcolata sul train
medians = num_df.median()
train_imputed = num_df.fillna(medians)

# NON eseguo alcuno scaling: lascio i valori nella loro scala naturale
train_preproc_df = train_imputed.copy()

# target
y = train_df['player_won'].astype(int).values

# split holdout (20%) - mantengo comportamento originale
X = train_preproc_df.values
X_train_val, X_holdout, y_train_val, y_holdout, idx_train_val, idx_holdout = train_test_split(
    X, y, train_df.index.values, test_size=0.2, random_state=42, stratify=y)

print('Preprocessing (no transformers) completato.')
print('train_val size:', X_train_val.shape[0], 'holdout size:', X_holdout.shape[0])
print('Preprocessed feature count:', len(FEATURES))

# Allinea e imputa test_df usando le mediane del train (coerente con l'imputazione sopra)
test_aligned = test_df.reindex(columns=FEATURES, fill_value=np.nan).astype(float).replace([np.inf, -np.inf], np.nan)
test_imputed = test_aligned.fillna(medians)
test_preproc_df = pd.DataFrame(test_imputed.values, columns=FEATURES, index=test_df.index)

# Variabili pronte per le celle successive:
# FEATURES, X, y, X_train_val, X_holdout, y_train_val, y_holdout, test_preproc_df

Num FEATURES numeriche rilevate (ALL): 229
Num FEATURES effettive usate (FEATURES): 229
Num TOP100 caricate: 100
Preprocessing (no transformers) completato.
train_val size: 8000 holdout size: 2000
Preprocessed feature count: 229


# Feature filter

In [39]:
# --- NEW CELL: feature selection candidates (statistics, MI, corr, PSI) ---
import numpy as np
import pandas as pd
from sklearn.feature_selection import mutual_info_classif

def feature_basic_stats(df, features):
    rows = []
    for f in features:
        col = df[f]
        rows.append({
            'feature': f,
            'missing_pct': float(col.isna().mean()),
            'unique': int(col.nunique(dropna=True)),
            'std': float(col.std()),
            'min': float(col.min()) if np.isfinite(col.min()) else np.nan,
            'max': float(col.max()) if np.isfinite(col.max()) else np.nan
        })
    return pd.DataFrame(rows).set_index('feature')

def psi(expected, actual, buckets=10):
    # simple PSI by quantile buckets
    def _hist(arr):
        try:
            labels = pd.qcut(arr.rank(method='first'), buckets, labels=False)
        except Exception:
            labels = pd.cut(arr, buckets, labels=False, duplicates='drop')
            labels = np.nan_to_num(labels, nan=0).astype(int)
        cnt = np.bincount(labels, minlength=buckets).astype(float)
        return cnt / max(1.0, cnt.sum())
    e = _hist(expected.fillna(0).values)
    a = _hist(actual.fillna(0).values)
    eps = 1e-6
    return float(np.sum((e - a) * np.log((e + eps) / (a + eps))))

# run only if FEATURES is defined
try:
    feats = list(FEATURES)
except NameError:
    feats = [c for c in train_df.columns if c not in ('battle_id','player_won') and train_df[c].dtype != 'object']

print(f"Analisi feature: {len(feats)} features")

# basic stats
stats_df = feature_basic_stats(train_df, feats)

# mutual information
X = train_df[feats].fillna(0).astype(float)
y = train_df['player_won'].astype(int).values
mi = mutual_info_classif(X, y, discrete_features=False, random_state=42)
mi_ser = pd.Series(mi, index=feats, name='mi')

# max pairwise correlation (train)
corr = X.corr().abs()
max_corr = corr.where(~np.eye(len(corr),dtype=bool)).max(axis=0).fillna(0)
max_corr.name = 'max_abs_corr'

# PSI vs test (if test_df present)
psilist = []
for f in feats:
    if f in test_df.columns:
        psilist.append(psi(train_df[f].fillna(0), test_df[f].fillna(0), buckets=10))
    else:
        psilist.append(np.nan)
psi_ser = pd.Series(psilist, index=feats, name='psi')

# assemble summary
summary = stats_df.join(mi_ser).join(max_corr).join(psi_ser)
summary['drop_missing'] = summary['missing_pct'] > 0.5
summary['drop_constant'] = summary['unique'] <= 1
summary['drop_low_std'] = summary['std'].abs() < 1e-8
mi_thr = summary['mi'].quantile(0.05) if summary['mi'].notna().any() else 0.0
summary['drop_low_mi'] = summary['mi'] <= mi_thr
summary['unstable_psi'] = summary['psi'] > 0.5
summary['high_corr_flag'] = summary['max_abs_corr'] > 0.95

# decide drops: combine rules but keep decision explainable
drop_reasons = []
drop_set = set()
for f, row in summary.iterrows():
    reasons = []
    if row['drop_missing']:
        reasons.append('missing>0.5')
    if row['drop_constant']:
        reasons.append('constant')
    if row['drop_low_std']:
        reasons.append('std~0')
    if row['drop_low_mi']:
        reasons.append('low_mi')
    # high corr: mark but decide to drop the one with lower MI within correlated pairs later
    if reasons:
        drop_reasons.append((f, ';'.join(reasons)))
        drop_set.add(f)

# resolve high-corr groups (keep feature with higher MI)
if summary['high_corr_flag'].any():
    cor_mat = corr
    visited = set()
    for i, fi in enumerate(feats):
        if fi in visited:
            continue
        # find strongly correlated partners
        partners = [fj for fj in feats if (fj != fi and cor_mat.at[fi,fj] > 0.95)]
        if partners:
            group = [fi] + partners
            visited.update(group)
            # pick best by MI
            group_mi = summary.loc[group, 'mi'].fillna(-1.0)
            keep = group_mi.idxmax()
            for g in group:
                if g != keep:
                    drop_set.add(g)
                    summary.at[g, 'drop_high_corr'] = True
            summary.at[keep, 'drop_high_corr'] = False

summary['to_drop'] = summary.index.isin(drop_set)
summary = summary.sort_values(['to_drop','mi'], ascending=[False, False])

# save results
out_path = 'feature_selection_candidates.csv'
summary.reset_index().to_csv(out_path, index=False)
print(f"Feature selection summary salvata in {out_path}")
print("Esempio TOP 30 (to_drop prima):")
display(summary.head(30))

# convenience lists
KEEP = [c for c in summary.index.tolist() if not summary.at[c,'to_drop']]
DROP = [c for c in summary.index.tolist() if summary.at[c,'to_drop']]
print(f"KEEP: {len(KEEP)}  DROP: {len(DROP)}")
# ... you can later modify thresholds and re-run this cell to update candidates ...

# --- INIZIO: scrivi keep_features_list.txt dalle feature KEEP generate qui ---
keep_path = 'keep_features_list.txt'
seen = set()
cleaned_keep = []
for f in KEEP:
    ff = str(f).strip()
    if not ff or ff in seen:
        continue
    seen.add(ff)
    cleaned_keep.append(ff)

if cleaned_keep:
    with open(keep_path, 'w', encoding='utf-8') as fh:
        for feat in cleaned_keep:
            fh.write(feat + '\n')
    print(f"Auto-salvato '{keep_path}' con {len(cleaned_keep)} feature (una per riga).")
else:
    # se KEEP è vuoto, rimuovere file esistente per evitare confusione
    try:
        import os
        if os.path.exists(keep_path):
            os.remove(keep_path)
            print(f"KEEP vuoto: rimosso eventuale '{keep_path}' esistente.")
    except Exception:
        pass
# --- FINE: scrivi keep_features_list.txt ---

Analisi feature: 229 features
Feature selection summary salvata in feature_selection_candidates.csv
Esempio TOP 30 (to_drop prima):


Unnamed: 0_level_0,missing_pct,unique,std,min,max,mi,max_abs_corr,psi,drop_missing,drop_constant,drop_low_std,drop_low_mi,unstable_psi,high_corr_flag,drop_high_corr,to_drop
feature,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
damage_diff,0.0,5886,1.469337,-5.345115,4.157353,0.188876,1.0,0.006958,False,False,False,False,False,True,True,True
tl_damage_rate_diff,0.0,6209,0.048978,-0.17817,0.138578,0.179022,1.0,0.006958,False,False,False,False,False,True,True,True
tl_p2_est_damage,0.0,4536,1.159621,0.0,6.8,0.107689,1.0,0.060739,False,False,False,False,False,True,True,True
tl_p1_moves,0.0,26,3.073948,0.0,30.0,0.080224,1.0,0.003664,False,False,False,False,False,True,True,True
tl_p2_moves_rate,0.0,27,0.102139,0.0,1.0,0.066995,1.0,0.002273,False,False,False,False,False,True,True,True
tl_p1_est_damage,0.0,4343,0.879283,0.0,5.817803,0.058031,1.0,0.03825,False,False,False,False,False,True,True,True
tl_inflicted_slp_diff,0.0,14,1.328172,-6.0,7.0,0.054041,1.0,0.493303,False,False,False,False,False,True,True,True
tl_p1_inflicted_slp_rate,0.0,8,0.029157,0.0,0.233333,0.047021,1.0,3.593849,False,False,False,False,True,True,True,True
tl_inflicted_frz_diff,0.0,6,0.549196,-3.0,2.0,0.042962,1.0,22.683193,False,False,False,False,True,True,True,True
tl_p2_inflicted_slp_rate,0.0,7,0.02906,0.0,0.2,0.031291,1.0,0.001029,False,False,False,False,False,True,True,True


KEEP: 103  DROP: 126
Auto-salvato 'keep_features_list.txt' con 103 feature (una per riga).


# Hyperparameter serch

In [9]:
# === Grid Search time-boxed (<= ~2 ore) ===
print("=== Time-boxed GridSearchCV (<= ~2 ore) ===")
import time, os
import numpy as np, pandas as pd
from sklearn.model_selection import StratifiedKFold, GridSearchCV, ParameterGrid
try:
    import joblib
    CPU_COUNT = joblib.cpu_count()
except Exception:
    CPU_COUNT = os.cpu_count() or 4

cv_inner = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

# Base estimator
base_clf = XGBClassifier(use_label_encoder=False, eval_metric='logloss', random_state=42, n_jobs=1, tree_method='hist')

# Calcola uno scale_pos_weight automatico e cerca attorno ad esso
pos_rate = float(y_train_val.mean())
spw_auto = float((1.0 - pos_rate) / max(pos_rate, 1e-9))
spw_grid = sorted({1.0, max(1.0, spw_auto*0.75), max(1.0, spw_auto), max(1.0, spw_auto*1.25)})
print(f'scale_pos_weight auto≈{spw_auto:.2f} -> grid={spw_grid}')

# Griglia COARSE (regolarizzata) — include scale_pos_weight
grid_coarse = {
    'n_estimators':      [300, 500, 700],
    'max_depth':         [3, 4],
    'min_child_weight':  [3, 5, 7],
    'learning_rate':     [0.03, 0.05, 0.07],
    'subsample':         [0.7, 0.8, 0.9],
    'colsample_bytree':  [0.7, 0.8, 0.9],
    'gamma':             [0.1, 0.2, 0.3],
    'reg_alpha':         [0.05, 0.1, 0.2],
    'reg_lambda':        [2.0, 3.0, 4.0],
    'scale_pos_weight':  spw_grid
}

# Stima tempo per-fit (warmup) — CORRETTO param_grid
warm_params = [
    {'n_estimators': 500, 'max_depth': 3, 'min_child_weight': 5, 'learning_rate': 0.05, 'subsample': 0.8, 'colsample_bytree': 0.8, 'gamma': 0.2, 'reg_alpha': 0.1, 'reg_lambda': 3.0, 'scale_pos_weight': max(1.0, spw_auto)},
    {'n_estimators': 700, 'max_depth': 4, 'min_child_weight': 5, 'learning_rate': 0.03, 'subsample': 0.8, 'colsample_bytree': 0.8, 'gamma': 0.2, 'reg_alpha': 0.1, 'reg_lambda': 3.0, 'scale_pos_weight': max(1.0, spw_auto*1.25)}
]
print("Warmup per stimare t_fit...")
t0 = time.time()
gs_warm = GridSearchCV(
    base_clf,
    param_grid=[{k:[v] for k,v in d.items()} for d in warm_params],
    scoring='balanced_accuracy',
    cv=cv_inner,
    n_jobs=-1,
    verbose=0,
    refit=False
)
gs_warm.fit(X_train_val, y_train_val)
elapsed_warm = time.time() - t0
fits_warm = len(warm_params) * cv_inner.get_n_splits()
t_fit_per_fold = max(0.01, elapsed_warm / fits_warm)
print(f"Warmup: {elapsed_warm:.2f}s per {fits_warm} fit -> ~{t_fit_per_fold:.3f}s/fit")

# Budget totale ~2 ore con margine sicurezza
TARGET_SECONDS = int(2*3600*0.9)
speedup = max(1, min(CPU_COUNT, cv_inner.get_n_splits()))
max_combos = int((TARGET_SECONDS * speedup) / (t_fit_per_fold * cv_inner.get_n_splits()))
max_combos = int(max(48, min(max_combos, 2000)))
print(f"CPU={CPU_COUNT}, speedup~{speedup}, max_combos≈{max_combos}")

# Costruisci tutte le combinazioni e campiona fino a max_combos
all_points = list(ParameterGrid(grid_coarse))
total = len(all_points)
print(f"Candidate totali nella griglia: {total}")
rng = np.random.default_rng(42)
if total > max_combos:
    idx = rng.choice(total, size=max_combos, replace=False)
    sampled = [all_points[i] for i in idx]
else:
    sampled = all_points
print(f"Config selezionate: {len(sampled)}")

# Converte in lista di 'micro-grid' (1 punto ciascuno) — CORRETTO
param_grid_list = [{k:[v] for k,v in pt.items()} for pt in sampled]

print("Esecuzione GridSearch time-boxed...")
t1 = time.time()
gs = GridSearchCV(
    estimator=base_clf,
    param_grid=param_grid_list,
    scoring='balanced_accuracy',
    cv=cv_inner,
    n_jobs=-1,
    verbose=2,
    refit=True
)
gs.fit(X_train_val, y_train_val)
elapsed = time.time() - t1

results_df = pd.DataFrame(gs.cv_results_).sort_values('rank_test_score')
csv_path = 'hp_search_results_timeboxed_grid.csv'
results_df.to_csv(csv_path, index=False)
best_params = gs.best_params_

print(f"\n✅ Salvato {csv_path} ({len(results_df)} righe)")
print("Migliori iperparametri:")
for k, v in best_params.items():
    print(f"  {k}: {v}")
print(f"Best CV (balanced_accuracy): {gs.best_score_:.4f}")
print(f"Tempo GridSearch: {elapsed/60:.1f} min (budget ~{TARGET_SECONDS/60:.0f} min)")
print("Ora puoi usare 'best_params' nelle celle successive.")

=== Time-boxed GridSearchCV (<= ~2 ore) ===
scale_pos_weight auto≈1.00 -> grid=[1.0, 1.25]
Warmup per stimare t_fit...


KeyboardInterrupt: 

In [62]:
# === SOSTITUISCI COMPLETAMENTE cella 14 (Optuna Fine-Tuning) ===

import optuna
import xgboost as xgb
from sklearn.metrics import accuracy_score
import numpy as np

print("="*70)
print("OPTUNA FINE-TUNING - RICERCA FOCALIZZATA ATTORNO A BASELINE")
print("="*70)

# 1. Parametri baseline (bussola)
known_params = {
    'booster': 'gbtree',
    'tree_method': 'hist',
    'max_bin': 256,
    'learning_rate': 0.035,
    'n_estimators': 900,
    'max_depth': 3,
    'min_child_weight': 9,
    'gamma': 0.5,
    'subsample': 0.7,
    'colsample_bytree': 0.7,
    'colsample_bynode': 0.7,
    'colsample_bylevel': 0.8,
    'reg_alpha': 0.4,
    'reg_lambda': 10.0,
    'max_delta_step': 1
}

# 2. Funzione obiettivo con spazio RISTRETTO
def objective_fine_tuning(trial):
    param = {
        'booster': 'gbtree',
        'tree_method': 'hist',
        'use_label_encoder': False,
        'eval_metric': 'logloss',
        'random_state': 42,
        
        # ✅ Range ristretti attorno a baseline
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.06),
        'max_depth': trial.suggest_int('max_depth', 1, 5),
        'n_estimators': trial.suggest_int('n_estimators', 700, 1100),
        'max_bin': trial.suggest_int('max_bin', 128, 512),
        'min_child_weight': trial.suggest_int('min_child_weight', 5, 15),
        'gamma': trial.suggest_float('gamma', 0.25, 0.75),
        'subsample': trial.suggest_float('subsample', 0.5, 0.9),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 0.9),
        'colsample_bynode': trial.suggest_float('colsample_bynode', 0.5, 0.9),
        'colsample_bylevel': trial.suggest_float('colsample_bylevel', 0.6, 1.0),
        'reg_alpha': trial.suggest_float('reg_alpha', 0.2, 0.6),
        'reg_lambda': trial.suggest_float('reg_lambda', 5.0, 15.0),
        'max_delta_step': trial.suggest_int('max_delta_step', 0, 5)
    }

    EARLY_STOPPING_ROUNDS = 50
    
    # ✅ FIX: callback con maximize=False per logloss
    model = xgb.XGBClassifier(**param)
    
    try:
        cb = getattr(xgb.callback, "EarlyStopping", None)
        if cb is not None:
            model.fit(
                X_train_val, y_train_val, 
                eval_set=[(X_holdout, y_holdout)], 
                callbacks=[cb(rounds=EARLY_STOPPING_ROUNDS, save_best=True, maximize=False)],  # ✅ maximize=False!
                verbose=False
            )
        else:
            # Fallback per versioni vecchie
            model.fit(
                X_train_val, y_train_val, 
                eval_set=[(X_holdout, y_holdout)],
                early_stopping_rounds=EARLY_STOPPING_ROUNDS,
                verbose=False
            )
    except Exception as e:
        print(f"⚠️ Warning fit: {e}")
        model.fit(X_train_val, y_train_val, verbose=False)
    
    # Valuta accuracy su holdout
    preds = model.predict(X_holdout)
    accuracy = accuracy_score(y_holdout, preds)
    
    return accuracy

# 3. Sampler aggressivo (TPE dopo 1 trial)
sampler = optuna.samplers.TPESampler(n_startup_trials=1, seed=42)

study = optuna.create_study(
    direction='maximize', 
    sampler=sampler,
    pruner=optuna.pruners.MedianPruner(n_startup_trials=5, n_warmup_steps=3)
)

# 4. ✅ Inserisci baseline come primo trial
study.enqueue_trial(known_params)

# 5. Ottimizzazione
N_TRIALS = 50
print(f"\nAvvio {N_TRIALS} trial (1° trial = baseline, poi TPE)...\n")

study.optimize(objective_fine_tuning, n_trials=N_TRIALS, timeout=3600, gc_after_trial=True)

# 6. Report
print("\n" + "="*70)
print("OPTUNA FINE-TUNING RESULTS")
print("="*70)
print(f"Best accuracy (holdout): {study.best_value*100:.2f}%")
print(f"Best trial: #{study.best_trial.number}")

print(f"\n⚙️ Best hyperparameters:")
for k, v in study.best_params.items():
    if isinstance(v, float):
        print(f"  {k:20s}: {v:.4f}")
    else:
        print(f"  {k:20s}: {v}")

# Salva risultati
import pandas as pd
trials_df = study.trials_dataframe()
trials_df = trials_df.sort_values('value', ascending=False)
trials_df.to_csv('optuna_finetuning_trials.csv', index=False)
print(f"\n✅ Salvati {len(trials_df)} trial in 'optuna_finetuning_trials.csv'")

# Confronto con baseline
baseline_trial = trials_df[trials_df['number'] == 0].iloc[0] if len(trials_df) > 0 else None
if baseline_trial is not None:
    baseline_acc = baseline_trial['value']
    improvement = (study.best_value - baseline_acc) * 100
    print(f"\n📊 CONFRONTO:")
    print(f"  Baseline (trial 0):  {baseline_acc*100:.2f}%")
    print(f"  Best (trial {study.best_trial.number}):     {study.best_value*100:.2f}%")
    print(f"  Improvement:         {improvement:+.2f}%")
    
    if improvement > 0.3:
        print(f"\n✅ MIGLIORAMENTO SIGNIFICATIVO → Usa best_params Optuna")
        USE_OPTUNA = True
    else:
        print(f"\n⚠️ Miglioramento marginale (<0.3%) → Mantieni baseline")
        USE_OPTUNA = False
else:
    USE_OPTUNA = False

# Assegna best_params per CV
if USE_OPTUNA:
    best_params = study.best_params.copy()
    best_params.update({
        'booster': 'gbtree',
        'tree_method': 'hist',
        'use_label_encoder': False,
        'eval_metric': 'logloss',
        'random_state': 42
    })
    print(f"\n✅ Variabile 'best_params' aggiornata con HP Optuna")
else:
    best_params = known_params.copy()
    print(f"\n⚠️ Mantieni best_params baseline")

print("\n🎯 NEXT STEP: Esegui cella 'Cross validation' per validare su 10-fold")

[I 2025-11-06 23:49:19,098] A new study created in memory with name: no-name-25fbef7b-92e9-4e7a-b9f1-c6ceaf31b544


OPTUNA FINE-TUNING - RICERCA FOCALIZZATA ATTORNO A BASELINE

Avvio 50 trial (1° trial = baseline, poi TPE)...



[I 2025-11-06 23:49:20,785] Trial 0 finished with value: 0.827 and parameters: {'learning_rate': 0.035, 'max_depth': 3, 'n_estimators': 900, 'max_bin': 256, 'min_child_weight': 9, 'gamma': 0.5, 'subsample': 0.7, 'colsample_bytree': 0.7, 'colsample_bynode': 0.7, 'colsample_bylevel': 0.8, 'reg_alpha': 0.4, 'reg_lambda': 10.0, 'max_delta_step': 1}. Best is trial 0 with value: 0.827.




[I 2025-11-06 23:49:30,632] Trial 1 finished with value: 0.8265 and parameters: {'learning_rate': 0.03476852692609299, 'max_depth': 3, 'n_estimators': 899, 'max_bin': 248, 'min_child_weight': 9, 'gamma': 0.5005488255307521, 'subsample': 0.7189071324479795, 'colsample_bytree': 0.6878950618483477, 'colsample_bynode': 0.6978577205688217, 'colsample_bylevel': 0.8066910394876151, 'reg_alpha': 0.3879782367216648, 'reg_lambda': 10.101285805370015, 'max_delta_step': 1}. Best is trial 0 with value: 0.827.




[I 2025-11-06 23:49:32,972] Trial 2 finished with value: 0.824 and parameters: {'learning_rate': 0.058588342660541505, 'max_depth': 2, 'n_estimators': 1075, 'max_bin': 433, 'min_child_weight': 5, 'gamma': 0.26432776350546017, 'subsample': 0.5201849961848604, 'colsample_bytree': 0.864766689287175, 'colsample_bynode': 0.8981761930224601, 'colsample_bylevel': 0.6412641535530137, 'reg_alpha': 0.5487283159585304, 'reg_lambda': 5.491624100942104, 'max_delta_step': 2}. Best is trial 0 with value: 0.827.




[I 2025-11-06 23:49:35,428] Trial 3 finished with value: 0.823 and parameters: {'learning_rate': 0.010529967743918466, 'max_depth': 5, 'n_estimators': 731, 'max_bin': 130, 'min_child_weight': 15, 'gamma': 0.7110475118678332, 'subsample': 0.8422337139158931, 'colsample_bytree': 0.5426086911496627, 'colsample_bynode': 0.5540950302579799, 'colsample_bylevel': 0.9985642153294361, 'reg_alpha': 0.20525612628712334, 'reg_lambda': 14.65337946065455, 'max_delta_step': 5}. Best is trial 0 with value: 0.827.




[I 2025-11-06 23:49:37,051] Trial 4 finished with value: 0.8225 and parameters: {'learning_rate': 0.03313607799416753, 'max_depth': 1, 'n_estimators': 896, 'max_bin': 305, 'min_child_weight': 10, 'gamma': 0.48616098083360076, 'subsample': 0.6336701687776085, 'colsample_bytree': 0.7208673405398668, 'colsample_bynode': 0.727684455145088, 'colsample_bylevel': 0.787308666528242, 'reg_alpha': 0.3799406054337648, 'reg_lambda': 10.302316000991786, 'max_delta_step': 0}. Best is trial 0 with value: 0.827.




[I 2025-11-06 23:49:41,257] Trial 5 finished with value: 0.8215 and parameters: {'learning_rate': 0.05132173996798651, 'max_depth': 5, 'n_estimators': 1033, 'max_bin': 446, 'min_child_weight': 7, 'gamma': 0.5002896690347844, 'subsample': 0.7480549387959301, 'colsample_bytree': 0.6931447349379222, 'colsample_bynode': 0.7065206662627888, 'colsample_bylevel': 0.8238463572387134, 'reg_alpha': 0.5103299544198501, 'reg_lambda': 7.132833356311223, 'max_delta_step': 4}. Best is trial 0 with value: 0.827.




[I 2025-11-06 23:49:43,299] Trial 6 finished with value: 0.8245 and parameters: {'learning_rate': 0.015584758086674678, 'max_depth': 3, 'n_estimators': 772, 'max_bin': 198, 'min_child_weight': 14, 'gamma': 0.6580562009798354, 'subsample': 0.8961858197838287, 'colsample_bytree': 0.5560013993607453, 'colsample_bynode': 0.5577440224186132, 'colsample_bylevel': 0.6758907709476152, 'reg_alpha': 0.28708691675932585, 'reg_lambda': 13.190673762345009, 'max_delta_step': 3}. Best is trial 0 with value: 0.827.




[I 2025-11-06 23:49:46,702] Trial 7 finished with value: 0.819 and parameters: {'learning_rate': 0.042644233465294355, 'max_depth': 4, 'n_estimators': 990, 'max_bin': 358, 'min_child_weight': 12, 'gamma': 0.3506818104614414, 'subsample': 0.6042708667303991, 'colsample_bytree': 0.8136965453522745, 'colsample_bynode': 0.8239398842218817, 'colsample_bylevel': 0.9352317373142313, 'reg_alpha': 0.4615535851701884, 'reg_lambda': 7.843601868522235, 'max_delta_step': 0}. Best is trial 0 with value: 0.827.




[I 2025-11-06 23:49:48,550] Trial 8 finished with value: 0.8195 and parameters: {'learning_rate': 0.025276362353724567, 'max_depth': 1, 'n_estimators': 827, 'max_bin': 511, 'min_child_weight': 8, 'gamma': 0.5990483200027006, 'subsample': 0.8070415171190455, 'colsample_bytree': 0.6176614901833262, 'colsample_bynode': 0.6344984789779685, 'colsample_bylevel': 0.7267626635152488, 'reg_alpha': 0.3193212198876563, 'reg_lambda': 12.013996257570197, 'max_delta_step': 2}. Best is trial 0 with value: 0.827.




[I 2025-11-06 23:49:51,689] Trial 9 finished with value: 0.828 and parameters: {'learning_rate': 0.023323950745284924, 'max_depth': 4, 'n_estimators': 960, 'max_bin': 308, 'min_child_weight': 12, 'gamma': 0.3989528930661683, 'subsample': 0.6367000926452869, 'colsample_bytree': 0.7582574702948834, 'colsample_bynode': 0.7915855181058915, 'colsample_bylevel': 0.8860341794131611, 'reg_alpha': 0.44237266779130013, 'reg_lambda': 8.334320598165451, 'max_delta_step': 1}. Best is trial 9 with value: 0.828.




[I 2025-11-06 23:49:54,829] Trial 10 finished with value: 0.828 and parameters: {'learning_rate': 0.022599069527023553, 'max_depth': 4, 'n_estimators': 975, 'max_bin': 358, 'min_child_weight': 12, 'gamma': 0.38705185623784305, 'subsample': 0.5030794686269633, 'colsample_bytree': 0.7900685645280803, 'colsample_bynode': 0.8113526169287668, 'colsample_bylevel': 0.9111376006923405, 'reg_alpha': 0.599765397635074, 'reg_lambda': 7.6577901498944865, 'max_delta_step': 3}. Best is trial 9 with value: 0.828.




[I 2025-11-06 23:49:57,978] Trial 11 finished with value: 0.824 and parameters: {'learning_rate': 0.021910031928754587, 'max_depth': 4, 'n_estimators': 981, 'max_bin': 363, 'min_child_weight': 12, 'gamma': 0.34740273713640746, 'subsample': 0.500997484782991, 'colsample_bytree': 0.7820963773721878, 'colsample_bynode': 0.8199113620908142, 'colsample_bylevel': 0.9022868695194126, 'reg_alpha': 0.5995977729369859, 'reg_lambda': 7.927135841069047, 'max_delta_step': 3}. Best is trial 9 with value: 0.828.




[I 2025-11-06 23:50:01,314] Trial 12 finished with value: 0.824 and parameters: {'learning_rate': 0.02350172271317825, 'max_depth': 4, 'n_estimators': 980, 'max_bin': 330, 'min_child_weight': 12, 'gamma': 0.3927624331817231, 'subsample': 0.5943926275573392, 'colsample_bytree': 0.7866620526137165, 'colsample_bynode': 0.8048946358360277, 'colsample_bylevel': 0.8922468402573281, 'reg_alpha': 0.47574104233157494, 'reg_lambda': 5.992997180875783, 'max_delta_step': 4}. Best is trial 9 with value: 0.828.




[I 2025-11-06 23:50:04,737] Trial 13 finished with value: 0.825 and parameters: {'learning_rate': 0.01764099062802981, 'max_depth': 4, 'n_estimators': 948, 'max_bin': 412, 'min_child_weight': 13, 'gamma': 0.41799289753679125, 'subsample': 0.5548709527101245, 'colsample_bytree': 0.8899371640050817, 'colsample_bynode': 0.8976421601380132, 'colsample_bylevel': 0.9708876675717859, 'reg_alpha': 0.5820797955818061, 'reg_lambda': 8.90679628710091, 'max_delta_step': 1}. Best is trial 9 with value: 0.828.




[I 2025-11-06 23:50:08,711] Trial 14 finished with value: 0.8195 and parameters: {'learning_rate': 0.029687202461133985, 'max_depth': 5, 'n_estimators': 1088, 'max_bin': 271, 'min_child_weight': 11, 'gamma': 0.2976646329847751, 'subsample': 0.6351819929436423, 'colsample_bytree': 0.7592104192075209, 'colsample_bynode': 0.7715407250186762, 'colsample_bylevel': 0.8631851197538537, 'reg_alpha': 0.45612668913288545, 'reg_lambda': 6.757523620398064, 'max_delta_step': 2}. Best is trial 9 with value: 0.828.




[I 2025-11-06 23:50:10,711] Trial 15 finished with value: 0.826 and parameters: {'learning_rate': 0.04374060249859062, 'max_depth': 2, 'n_estimators': 842, 'max_bin': 381, 'min_child_weight': 14, 'gamma': 0.4300971708133748, 'subsample': 0.6716386623546919, 'colsample_bytree': 0.841970302157797, 'colsample_bynode': 0.7665140725820534, 'colsample_bylevel': 0.9405490189343315, 'reg_alpha': 0.5373458075806263, 'reg_lambda': 8.889419728348368, 'max_delta_step': 4}. Best is trial 9 with value: 0.828.




[I 2025-11-06 23:50:13,575] Trial 16 finished with value: 0.8245 and parameters: {'learning_rate': 0.010377327039659549, 'max_depth': 4, 'n_estimators': 1033, 'max_bin': 189, 'min_child_weight': 11, 'gamma': 0.5794282054190651, 'subsample': 0.5573922803488951, 'colsample_bytree': 0.6408208534970825, 'colsample_bynode': 0.848548556384197, 'colsample_bylevel': 0.8577806267749256, 'reg_alpha': 0.317273888656067, 'reg_lambda': 5.042600704123307, 'max_delta_step': 3}. Best is trial 9 with value: 0.828.




[I 2025-11-06 23:50:16,020] Trial 17 finished with value: 0.83 and parameters: {'learning_rate': 0.026933918169389524, 'max_depth': 3, 'n_estimators': 943, 'max_bin': 304, 'min_child_weight': 15, 'gamma': 0.3640002264427433, 'subsample': 0.7646801838048869, 'colsample_bytree': 0.7460088554636374, 'colsample_bynode': 0.639084261488893, 'colsample_bylevel': 0.9210124038311636, 'reg_alpha': 0.2236732830127453, 'reg_lambda': 11.128683316949994, 'max_delta_step': 1}. Best is trial 17 with value: 0.83.




[I 2025-11-06 23:50:17,917] Trial 18 finished with value: 0.825 and parameters: {'learning_rate': 0.02866295836032223, 'max_depth': 2, 'n_estimators': 859, 'max_bin': 285, 'min_child_weight': 15, 'gamma': 0.30058415293973256, 'subsample': 0.7717083120694559, 'colsample_bytree': 0.7354418337575523, 'colsample_bynode': 0.6397271633627091, 'colsample_bylevel': 0.7362829644487112, 'reg_alpha': 0.20726183978972879, 'reg_lambda': 11.695661864646713, 'max_delta_step': 0}. Best is trial 17 with value: 0.83.




[I 2025-11-06 23:50:20,180] Trial 19 finished with value: 0.8235 and parameters: {'learning_rate': 0.04002394553889389, 'max_depth': 3, 'n_estimators': 939, 'max_bin': 206, 'min_child_weight': 14, 'gamma': 0.45276630170791, 'subsample': 0.6738830450107247, 'colsample_bytree': 0.643113776096649, 'colsample_bynode': 0.6457730453255128, 'colsample_bylevel': 0.8623368191555644, 'reg_alpha': 0.2582170779000309, 'reg_lambda': 11.60538939878262, 'max_delta_step': 1}. Best is trial 17 with value: 0.83.




[I 2025-11-06 23:50:22,504] Trial 20 finished with value: 0.828 and parameters: {'learning_rate': 0.01642755922284916, 'max_depth': 2, 'n_estimators': 1036, 'max_bin': 320, 'min_child_weight': 15, 'gamma': 0.5545268119254297, 'subsample': 0.791902950043108, 'colsample_bytree': 0.8178766726929959, 'colsample_bynode': 0.5169612157954635, 'colsample_bylevel': 0.9625753638453611, 'reg_alpha': 0.35596887759470264, 'reg_lambda': 13.083433959133869, 'max_delta_step': 1}. Best is trial 17 with value: 0.83.




[I 2025-11-06 23:50:25,074] Trial 21 finished with value: 0.823 and parameters: {'learning_rate': 0.019852796823646685, 'max_depth': 3, 'n_estimators': 940, 'max_bin': 336, 'min_child_weight': 13, 'gamma': 0.357254886872706, 'subsample': 0.7371661819244653, 'colsample_bytree': 0.7576980671687085, 'colsample_bynode': 0.7747419303815859, 'colsample_bylevel': 0.9026082659512672, 'reg_alpha': 0.41211497819014226, 'reg_lambda': 8.540673719086467, 'max_delta_step': 2}. Best is trial 17 with value: 0.83.




[I 2025-11-06 23:50:28,585] Trial 22 finished with value: 0.8245 and parameters: {'learning_rate': 0.0279194366858868, 'max_depth': 4, 'n_estimators': 1008, 'max_bin': 393, 'min_child_weight': 11, 'gamma': 0.3853323134890955, 'subsample': 0.8392189563427166, 'colsample_bytree': 0.793078678587833, 'colsample_bynode': 0.6037702251823018, 'colsample_bylevel': 0.9278106022909172, 'reg_alpha': 0.24652558116317103, 'reg_lambda': 10.835591370237301, 'max_delta_step': 3}. Best is trial 17 with value: 0.83.




[I 2025-11-06 23:50:32,141] Trial 23 finished with value: 0.8275 and parameters: {'learning_rate': 0.024243612565688504, 'max_depth': 5, 'n_estimators': 948, 'max_bin': 227, 'min_child_weight': 13, 'gamma': 0.314226123604932, 'subsample': 0.5583518320581365, 'colsample_bytree': 0.7400515378693457, 'colsample_bynode': 0.8582221603280678, 'colsample_bylevel': 0.8833325678175743, 'reg_alpha': 0.49749090522648987, 'reg_lambda': 9.278213679250497, 'max_delta_step': 2}. Best is trial 17 with value: 0.83.




[I 2025-11-06 23:50:35,010] Trial 24 finished with value: 0.8235 and parameters: {'learning_rate': 0.014651865777200495, 'max_depth': 3, 'n_estimators': 866, 'max_bin': 295, 'min_child_weight': 10, 'gamma': 0.4507742289100171, 'subsample': 0.675054082748384, 'colsample_bytree': 0.8362607255479276, 'colsample_bynode': 0.7414330710245162, 'colsample_bylevel': 0.987363166221956, 'reg_alpha': 0.4341993204978584, 'reg_lambda': 6.754691313476613, 'max_delta_step': 0}. Best is trial 17 with value: 0.83.




[I 2025-11-06 23:50:38,178] Trial 25 finished with value: 0.8225 and parameters: {'learning_rate': 0.030895420756839494, 'max_depth': 4, 'n_estimators': 966, 'max_bin': 347, 'min_child_weight': 13, 'gamma': 0.3841895800179522, 'subsample': 0.6195867720326407, 'colsample_bytree': 0.7674633649829914, 'colsample_bynode': 0.6803533550982502, 'colsample_bylevel': 0.8423657066243853, 'reg_alpha': 0.34777756330417275, 'reg_lambda': 7.915015807076238, 'max_delta_step': 5}. Best is trial 17 with value: 0.83.




[I 2025-11-06 23:50:41,472] Trial 26 finished with value: 0.826 and parameters: {'learning_rate': 0.025798546428979635, 'max_depth': 4, 'n_estimators': 916, 'max_bin': 491, 'min_child_weight': 12, 'gamma': 0.25524309953602575, 'subsample': 0.5848489415377721, 'colsample_bytree': 0.6823481322395012, 'colsample_bynode': 0.79072724639617, 'colsample_bylevel': 0.7575900607872407, 'reg_alpha': 0.5656361171915351, 'reg_lambda': 9.420332084373573, 'max_delta_step': 1}. Best is trial 17 with value: 0.83.




[I 2025-11-06 23:50:44,381] Trial 27 finished with value: 0.828 and parameters: {'learning_rate': 0.020226993752791442, 'max_depth': 3, 'n_estimators': 1008, 'max_bin': 303, 'min_child_weight': 6, 'gamma': 0.32930332990502253, 'subsample': 0.5273038376180819, 'colsample_bytree': 0.8994119631340372, 'colsample_bynode': 0.8562093883934132, 'colsample_bylevel': 0.926332446748375, 'reg_alpha': 0.5213335841336528, 'reg_lambda': 10.849950722594185, 'max_delta_step': 2}. Best is trial 17 with value: 0.83.




[I 2025-11-06 23:50:48,513] Trial 28 finished with value: 0.8215 and parameters: {'learning_rate': 0.036770283337387924, 'max_depth': 5, 'n_estimators': 1059, 'max_bin': 383, 'min_child_weight': 14, 'gamma': 0.4065997023954557, 'subsample': 0.7694873507738675, 'colsample_bytree': 0.6572680132666904, 'colsample_bynode': 0.5918097880547311, 'colsample_bylevel': 0.9571279839707805, 'reg_alpha': 0.4347342882491866, 'reg_lambda': 7.430069541491518, 'max_delta_step': 3}. Best is trial 17 with value: 0.83.




[I 2025-11-06 23:50:51,114] Trial 29 finished with value: 0.8225 and parameters: {'learning_rate': 0.03810739245486754, 'max_depth': 4, 'n_estimators': 807, 'max_bin': 251, 'min_child_weight': 9, 'gamma': 0.46521424203748957, 'subsample': 0.701936154441081, 'colsample_bytree': 0.7227936578023534, 'colsample_bynode': 0.6739806446125416, 'colsample_bylevel': 0.8289901619244561, 'reg_alpha': 0.25659614043461176, 'reg_lambda': 9.678670468777737, 'max_delta_step': 1}. Best is trial 17 with value: 0.83.




[I 2025-11-06 23:50:53,372] Trial 30 finished with value: 0.825 and parameters: {'learning_rate': 0.03323917744873356, 'max_depth': 3, 'n_estimators': 915, 'max_bin': 163, 'min_child_weight': 10, 'gamma': 0.5427522257861856, 'subsample': 0.8885335307911475, 'colsample_bytree': 0.808894620153653, 'colsample_bynode': 0.7417118562133773, 'colsample_bylevel': 0.9134523517443287, 'reg_alpha': 0.4918359040452287, 'reg_lambda': 6.295872379998453, 'max_delta_step': 4}. Best is trial 17 with value: 0.83.




[I 2025-11-06 23:50:55,739] Trial 31 finished with value: 0.824 and parameters: {'learning_rate': 0.014201169923058114, 'max_depth': 2, 'n_estimators': 1042, 'max_bin': 321, 'min_child_weight': 15, 'gamma': 0.5297184883817323, 'subsample': 0.8193843874721138, 'colsample_bytree': 0.8284368123430704, 'colsample_bynode': 0.5964482315789056, 'colsample_bylevel': 0.9604480445658062, 'reg_alpha': 0.3564341936401978, 'reg_lambda': 13.671099650482617, 'max_delta_step': 1}. Best is trial 17 with value: 0.83.




[I 2025-11-06 23:50:57,930] Trial 32 finished with value: 0.8245 and parameters: {'learning_rate': 0.017838391848022155, 'max_depth': 2, 'n_estimators': 1011, 'max_bin': 262, 'min_child_weight': 15, 'gamma': 0.6078185730837715, 'subsample': 0.7853111122027194, 'colsample_bytree': 0.8059940570314775, 'colsample_bynode': 0.5089542943072007, 'colsample_bylevel': 0.8819860528825318, 'reg_alpha': 0.31226987034920634, 'reg_lambda': 12.54897821842771, 'max_delta_step': 1}. Best is trial 17 with value: 0.83.




[I 2025-11-06 23:50:59,640] Trial 33 finished with value: 0.8175 and parameters: {'learning_rate': 0.020833445183300877, 'max_depth': 1, 'n_estimators': 885, 'max_bin': 316, 'min_child_weight': 14, 'gamma': 0.5411056124983253, 'subsample': 0.7276788828451599, 'colsample_bytree': 0.8689395574898171, 'colsample_bynode': 0.5079211126769085, 'colsample_bylevel': 0.9515645852903627, 'reg_alpha': 0.3642436401336262, 'reg_lambda': 14.620007090596054, 'max_delta_step': 1}. Best is trial 17 with value: 0.83.




[I 2025-11-06 23:51:02,160] Trial 34 finished with value: 0.8225 and parameters: {'learning_rate': 0.027124984690207694, 'max_depth': 2, 'n_estimators': 1097, 'max_bin': 284, 'min_child_weight': 15, 'gamma': 0.27732809483526055, 'subsample': 0.8666970030710195, 'colsample_bytree': 0.8566884246035268, 'colsample_bynode': 0.550347950780187, 'colsample_bylevel': 0.9813631256703983, 'reg_alpha': 0.40830837036593187, 'reg_lambda': 13.823072622394873, 'max_delta_step': 0}. Best is trial 17 with value: 0.83.




[I 2025-11-06 23:51:04,577] Trial 35 finished with value: 0.826 and parameters: {'learning_rate': 0.012464407249146398, 'max_depth': 3, 'n_estimators': 923, 'max_bin': 236, 'min_child_weight': 9, 'gamma': 0.6550471591472655, 'subsample': 0.7971206784972484, 'colsample_bytree': 0.7706858219820507, 'colsample_bynode': 0.528590590276854, 'colsample_bylevel': 0.8018142443468852, 'reg_alpha': 0.2231889044136359, 'reg_lambda': 10.37350998944361, 'max_delta_step': 2}. Best is trial 17 with value: 0.83.




[I 2025-11-06 23:51:07,043] Trial 36 finished with value: 0.8245 and parameters: {'learning_rate': 0.017696241824102746, 'max_depth': 2, 'n_estimators': 966, 'max_bin': 432, 'min_child_weight': 11, 'gamma': 0.47837579530618335, 'subsample': 0.7080111782039765, 'colsample_bytree': 0.7073526050654216, 'colsample_bynode': 0.7173834193664125, 'colsample_bylevel': 0.9933372307039863, 'reg_alpha': 0.284881771579538, 'reg_lambda': 8.35933539639973, 'max_delta_step': 0}. Best is trial 17 with value: 0.83.




[I 2025-11-06 23:51:09,111] Trial 37 finished with value: 0.8195 and parameters: {'learning_rate': 0.02270293714187802, 'max_depth': 1, 'n_estimators': 1068, 'max_bin': 362, 'min_child_weight': 8, 'gamma': 0.5167590177622825, 'subsample': 0.753027864531497, 'colsample_bytree': 0.7551977169812151, 'colsample_bynode': 0.6935628608666784, 'colsample_bylevel': 0.6284198784332296, 'reg_alpha': 0.38137154947784513, 'reg_lambda': 12.98747091091306, 'max_delta_step': 1}. Best is trial 17 with value: 0.83.




[I 2025-11-06 23:51:13,065] Trial 38 finished with value: 0.8225 and parameters: {'learning_rate': 0.031237332934545715, 'max_depth': 5, 'n_estimators': 1022, 'max_bin': 343, 'min_child_weight': 13, 'gamma': 0.7482672034634053, 'subsample': 0.6451744935197238, 'colsample_bytree': 0.583437219401962, 'colsample_bynode': 0.5760528314974878, 'colsample_bylevel': 0.7746256325471543, 'reg_alpha': 0.54871740827158, 'reg_lambda': 11.139516542827163, 'max_delta_step': 2}. Best is trial 17 with value: 0.83.




[I 2025-11-06 23:51:15,858] Trial 39 finished with value: 0.8215 and parameters: {'learning_rate': 0.0540970609645624, 'max_depth': 3, 'n_estimators': 1051, 'max_bin': 306, 'min_child_weight': 14, 'gamma': 0.3634288114704128, 'subsample': 0.8419596580422526, 'colsample_bytree': 0.6830832356619286, 'colsample_bynode': 0.6182488140605673, 'colsample_bylevel': 0.9199957084011516, 'reg_alpha': 0.33324253790233205, 'reg_lambda': 10.014506818493428, 'max_delta_step': 0}. Best is trial 17 with value: 0.83.




[I 2025-11-06 23:51:18,800] Trial 40 finished with value: 0.8255 and parameters: {'learning_rate': 0.018275465008466347, 'max_depth': 3, 'n_estimators': 993, 'max_bin': 411, 'min_child_weight': 5, 'gamma': 0.5712979713069237, 'subsample': 0.7733404159772269, 'colsample_bytree': 0.8228001533738049, 'colsample_bynode': 0.8367699255353823, 'colsample_bylevel': 0.9434177775137621, 'reg_alpha': 0.27343021310091187, 'reg_lambda': 12.170592712205409, 'max_delta_step': 2}. Best is trial 17 with value: 0.83.




[I 2025-11-06 23:51:21,669] Trial 41 finished with value: 0.8265 and parameters: {'learning_rate': 0.020540379858768623, 'max_depth': 3, 'n_estimators': 1003, 'max_bin': 305, 'min_child_weight': 7, 'gamma': 0.3269253303216845, 'subsample': 0.527626440372335, 'colsample_bytree': 0.893747868855859, 'colsample_bynode': 0.8740596178077351, 'colsample_bylevel': 0.9202337807351552, 'reg_alpha': 0.5204516803645075, 'reg_lambda': 10.59884654065265, 'max_delta_step': 1}. Best is trial 17 with value: 0.83.




[I 2025-11-06 23:51:25,073] Trial 42 finished with value: 0.8255 and parameters: {'learning_rate': 0.025214497384164078, 'max_depth': 4, 'n_estimators': 971, 'max_bin': 322, 'min_child_weight': 6, 'gamma': 0.3319351040351094, 'subsample': 0.5065459551260044, 'colsample_bytree': 0.871584383876979, 'colsample_bynode': 0.799895388370917, 'colsample_bylevel': 0.880301655405256, 'reg_alpha': 0.5230122518222181, 'reg_lambda': 11.206559916609, 'max_delta_step': 2}. Best is trial 17 with value: 0.83.




[I 2025-11-06 23:51:28,363] Trial 43 finished with value: 0.826 and parameters: {'learning_rate': 0.015960514554318052, 'max_depth': 4, 'n_estimators': 1015, 'max_bin': 278, 'min_child_weight': 5, 'gamma': 0.42653984984446064, 'subsample': 0.5299507229296874, 'colsample_bytree': 0.5134404973330368, 'colsample_bynode': 0.8824719100756997, 'colsample_bylevel': 0.9041548894555669, 'reg_alpha': 0.5708444507033904, 'reg_lambda': 12.508301093973824, 'max_delta_step': 3}. Best is trial 17 with value: 0.83.




[I 2025-11-06 23:51:30,259] Trial 44 finished with value: 0.822 and parameters: {'learning_rate': 0.02234882611279579, 'max_depth': 1, 'n_estimators': 960, 'max_bin': 357, 'min_child_weight': 8, 'gamma': 0.36287977209858585, 'subsample': 0.5659850063130013, 'colsample_bytree': 0.7883033840754209, 'colsample_bynode': 0.8207354462900643, 'colsample_bylevel': 0.9700159928145884, 'reg_alpha': 0.46895197293534097, 'reg_lambda': 7.379886345673161, 'max_delta_step': 1}. Best is trial 17 with value: 0.83.




[I 2025-11-06 23:51:33,335] Trial 45 finished with value: 0.8255 and parameters: {'learning_rate': 0.012153536706639413, 'max_depth': 3, 'n_estimators': 994, 'max_bin': 303, 'min_child_weight': 12, 'gamma': 0.27921000593863443, 'subsample': 0.5397170421743301, 'colsample_bytree': 0.7359287177271789, 'colsample_bynode': 0.8555244063377648, 'colsample_bylevel': 0.932739238634548, 'reg_alpha': 0.5952714353277588, 'reg_lambda': 14.964431063868602, 'max_delta_step': 2}. Best is trial 17 with value: 0.83.




[I 2025-11-06 23:51:36,625] Trial 46 finished with value: 0.8255 and parameters: {'learning_rate': 0.0199786136859381, 'max_depth': 4, 'n_estimators': 1024, 'max_bin': 264, 'min_child_weight': 9, 'gamma': 0.3337629389480449, 'subsample': 0.5763639001623867, 'colsample_bytree': 0.8490844029089726, 'colsample_bynode': 0.7543684990704321, 'colsample_bylevel': 0.8222985959808353, 'reg_alpha': 0.43840193969338104, 'reg_lambda': 8.428390859949324, 'max_delta_step': 3}. Best is trial 17 with value: 0.83.




[I 2025-11-06 23:51:38,544] Trial 47 finished with value: 0.8255 and parameters: {'learning_rate': 0.025742423923135804, 'max_depth': 2, 'n_estimators': 743, 'max_bin': 372, 'min_child_weight': 10, 'gamma': 0.4003968371578644, 'subsample': 0.6064708403609336, 'colsample_bytree': 0.8011947068821332, 'colsample_bynode': 0.8341359938496176, 'colsample_bylevel': 0.6876410721150595, 'reg_alpha': 0.48997166446572704, 'reg_lambda': 9.708333664556385, 'max_delta_step': 1}. Best is trial 17 with value: 0.83.




[I 2025-11-06 23:51:41,271] Trial 48 finished with value: 0.829 and parameters: {'learning_rate': 0.02356500935866276, 'max_depth': 3, 'n_estimators': 890, 'max_bin': 338, 'min_child_weight': 6, 'gamma': 0.49536799135630916, 'subsample': 0.5138982263026048, 'colsample_bytree': 0.8782788092523248, 'colsample_bynode': 0.8067378506179144, 'colsample_bylevel': 0.871828483173245, 'reg_alpha': 0.5464642418229524, 'reg_lambda': 14.060529232431005, 'max_delta_step': 4}. Best is trial 17 with value: 0.83.




[I 2025-11-06 23:51:43,502] Trial 49 finished with value: 0.8245 and parameters: {'learning_rate': 0.030388051898137054, 'max_depth': 2, 'n_estimators': 894, 'max_bin': 399, 'min_child_weight': 12, 'gamma': 0.4929037014539197, 'subsample': 0.8263812711635129, 'colsample_bytree': 0.7076660390172161, 'colsample_bynode': 0.6554322052439002, 'colsample_bylevel': 0.8443833840761443, 'reg_alpha': 0.5598765826950408, 'reg_lambda': 14.33945850158953, 'max_delta_step': 4}. Best is trial 17 with value: 0.83.



OPTUNA FINE-TUNING RESULTS
Best accuracy (holdout): 83.00%
Best trial: #17

⚙️ Best hyperparameters:
  learning_rate       : 0.0269
  max_depth           : 3
  n_estimators        : 943
  max_bin             : 304
  min_child_weight    : 15
  gamma               : 0.3640
  subsample           : 0.7647
  colsample_bytree    : 0.7460
  colsample_bynode    : 0.6391
  colsample_bylevel   : 0.9210
  reg_alpha           : 0.2237
  reg_lambda          : 11.1287
  max_delta_step      : 1

✅ Salvati 50 trial in 'optuna_finetuning_trials.csv'

📊 CONFRONTO:
  Baseline (trial 0):  82.70%
  Best (trial 17):     83.00%
  Improvement:         +0.30%

✅ MIGLIORAMENTO SIGNIFICATIVO → Usa best_params Optuna

✅ Variabile 'best_params' aggiornata con HP Optuna

🎯 NEXT STEP: Esegui cella 'Cross validation' per validare su 10-fold


In [55]:
# === SOSTITUISCI COMPLETAMENTE cella Optuna (dopo "# Hyperparameter search") ===

import optuna
import numpy as np
from optuna.samplers import TPESampler
from optuna.pruners import MedianPruner
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score
from xgboost import XGBClassifier
import xgboost as xgb

print("="*70)
print("OPTUNA - ACCURACY MAXIMIZATION CON GAP TARGET ~4%")
print("="*70)

N_TRIALS = 120  # ridotto da 150 per convergenza veloce
EARLY_STOPPING_ROUNDS = 50
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

# TARGET: mantieni gap medio tra 3-5% (ideale 4%)
GAP_TARGET_MIN = 0.03
GAP_TARGET_MAX = 0.05
GAP_TARGET_IDEAL = 0.04

def _predict_proba_best(clf, X):
    """Predict proba usando best_iteration se disponibile."""
    it = getattr(clf, "best_iteration", None)
    try:
        if it is not None:
            return clf.predict_proba(X, iteration_range=(0, int(it)+1))[:,1]
    except Exception:
        pass
    try:
        b = clf.get_booster()
        nt = getattr(b, "best_ntree_limit", None)
        if nt is not None:
            return clf.predict_proba(X, ntree_limit=int(nt))[:,1]
    except Exception:
        pass
    return clf.predict_proba(X)[:,1]

def objective(trial):
    # ✅ SEARCH SPACE OTTIMIZZATO per accuracy con gap 4%
    params = {
        "booster": "gbtree",
        "tree_method": "hist",
        "max_bin": 256,  # fisso per velocità
        
        # Alberi: range ampio con early stopping
        "n_estimators": trial.suggest_int("n_estimators", 500, 1500, step=100),
        "learning_rate": trial.suggest_float("learning_rate", 0.02, 0.12, log=True),
        
        # Depth: range completo (200+ features)
        "max_depth": trial.suggest_int("max_depth", 3, 6),
        "min_child_weight": trial.suggest_int("min_child_weight", 3, 12),
        
        # Regularization: range ampio per controllare gap
        "gamma": trial.suggest_float("gamma", 0.0, 1.2),
        "reg_alpha": trial.suggest_float("reg_alpha", 0.0, 2.5),
        "reg_lambda": trial.suggest_float("reg_lambda", 1.0, 12.0),
        
        # Sampling: completo
        "subsample": trial.suggest_float("subsample", 0.65, 0.95),
        "colsample_bytree": trial.suggest_float("colsample_bytree", 0.65, 0.95),
        "colsample_bylevel": trial.suggest_float("colsample_bylevel", 0.7, 0.95),
    }

    fold_accs = []
    fold_gaps = []
    
    for tr_idx, va_idx in cv.split(X_train_val, y_train_val):
        X_tr, X_va = X_train_val[tr_idx], X_train_val[va_idx]
        y_tr, y_va = y_train_val[tr_idx], y_train_val[va_idx]

        clf = XGBClassifier(**params, use_label_encoder=False, eval_metric="logloss", 
                           random_state=42, n_jobs=1)
        
        # Fit con early stopping
        try:
            cb = getattr(xgb.callback, "EarlyStopping", None)
            if cb is not None:
                clf.fit(X_tr, y_tr, eval_set=[(X_va, y_va)], 
                       callbacks=[cb(rounds=EARLY_STOPPING_ROUNDS, save_best=True, maximize=False)], 
                       verbose=False)
            else:
                clf.fit(X_tr, y_tr, eval_set=[(X_va, y_va)], 
                       early_stopping_rounds=EARLY_STOPPING_ROUNDS, verbose=False)
        except Exception:
            clf.fit(X_tr, y_tr, eval_set=[(X_va, y_va)], verbose=False)

        # ✅ ACCURACY (coerente con CV finale)
        proba_val = _predict_proba_best(clf, X_va)
        pred_val = (proba_val >= 0.5).astype(int)
        val_acc = accuracy_score(y_va, pred_val)
        
        proba_tr = _predict_proba_best(clf, X_tr)
        pred_tr = (proba_tr >= 0.5).astype(int)
        tr_acc = accuracy_score(y_tr, pred_tr)

        gap = tr_acc - val_acc
        fold_accs.append(val_acc)
        fold_gaps.append(gap)

        # Pruning: interrompi se accuracy troppo bassa
        trial.report(val_acc, len(fold_accs))
        if trial.should_prune():
            raise optuna.TrialPruned()

    mean_val_acc = float(np.mean(fold_accs))
    mean_gap = float(np.mean(fold_gaps))
    std_val = float(np.std(fold_accs))

    # ✅ PENALTY PER GAP FUORI TARGET 3-5%
    if mean_gap < GAP_TARGET_MIN:
        # Gap troppo basso → underfit, penalizza moderatamente
        gap_penalty = (GAP_TARGET_MIN - mean_gap) * 1.0
    elif mean_gap > GAP_TARGET_MAX:
        # Gap troppo alto → overfit, penalizza fortemente
        gap_penalty = (mean_gap - GAP_TARGET_MAX) * 2.0
    else:
        # Gap nel range 3-5% → bonus se vicino a 4%
        distance_from_ideal = abs(mean_gap - GAP_TARGET_IDEAL)
        gap_penalty = -0.002 if distance_from_ideal < 0.005 else 0.0  # bonus se gap ~4%
    
    # ✅ PENALTY per variance alta (instabilità tra fold)
    stability_penalty = std_val * 0.4
    
    # Objective: massimizza accuracy - penalties
    objective_score = mean_val_acc - gap_penalty - stability_penalty

    # Logga metriche per debug
    trial.set_user_attr("mean_val_acc", mean_val_acc)
    trial.set_user_attr("mean_gap", mean_gap)
    trial.set_user_attr("std_val", std_val)
    trial.set_user_attr("gap_penalty", gap_penalty)

    return float(objective_score)

# ✅ Esegui ottimizzazione
print(f"Avvio ottimizzazione Optuna...")
print(f"Trials: {N_TRIALS} | CV folds: 5 | Gap target: {GAP_TARGET_IDEAL*100:.1f}% ± 1%\n")

study = optuna.create_study(
    direction="maximize", 
    sampler=TPESampler(seed=42, n_startup_trials=15),  # 15 trial random warmup
    pruner=MedianPruner(n_startup_trials=10, n_warmup_steps=2)  # pruning aggressivo
)

study.optimize(objective, n_trials=N_TRIALS, timeout=5400, gc_after_trial=True)

# ✅ Report dettagliato
print("\n" + "="*70)
print("OPTUNA OPTIMIZATION RESULTS")
print("="*70)
print(f"Best objective score: {study.best_value:.4f}")
print(f"Best trial: #{study.best_trial.number}")

best_attrs = study.best_trial.user_attrs
best_val_acc = best_attrs.get('mean_val_acc', 0.0)
best_gap = best_attrs.get('mean_gap', 0.0)
best_std = best_attrs.get('std_val', 0.0)

print(f"\n📊 Best trial metrics:")
print(f"  Mean CV accuracy:    {best_val_acc*100:.2f}%")
print(f"  Mean train-val gap:  {best_gap*100:.2f}%  (target: {GAP_TARGET_IDEAL*100:.1f}%)")
print(f"  Std CV accuracy:     {best_std*100:.2f}%")

print(f"\n⚙️ Best hyperparameters:")
for k, v in study.best_params.items():
    if isinstance(v, float):
        print(f"  {k:20s}: {v:.4f}")
    else:
        print(f"  {k:20s}: {v}")

# Salva risultati
import pandas as pd
trials_df = study.trials_dataframe()

# Aggiungi colonne user_attrs per analisi
for attr in ['mean_val_acc', 'mean_gap', 'std_val']:
    trials_df[attr] = trials_df['user_attrs'].apply(lambda x: x.get(attr, np.nan) if isinstance(x, dict) else np.nan)

trials_df = trials_df.sort_values('value', ascending=False)
trials_df.to_csv('optuna_trials_gap4_target.csv', index=False)
print(f"\n✅ Salvati {len(trials_df)} trial in 'optuna_trials_gap4_target.csv'")

# ✅ Analisi distribuzione gap nei trial
valid_trials = trials_df[trials_df['state'] == 'COMPLETE'].copy()
if len(valid_trials) > 0:
    gaps = valid_trials['mean_gap'].dropna()
    print(f"\n📈 DISTRIBUZIONE GAP (su {len(gaps)} trial completi):")
    print(f"  Mean: {gaps.mean()*100:.2f}%")
    print(f"  Std:  {gaps.std()*100:.2f}%")
    print(f"  Min:  {gaps.min()*100:.2f}%")
    print(f"  Max:  {gaps.max()*100:.2f}%")
    print(f"  Trials con gap 3-5%: {((gaps >= 0.03) & (gaps <= 0.05)).sum()}/{len(gaps)} ({((gaps >= 0.03) & (gaps <= 0.05)).mean()*100:.1f}%)")

# ✅ Confronto con baseline
print(f"\n{'='*70}")
print("COMPARISON WITH BASELINE")
print(f"{'='*70}")
baseline_cv = 81.38
baseline_gap = 4.19
improvement_acc = (best_val_acc * 100) - baseline_cv
improvement_gap = abs(best_gap*100 - baseline_gap)

print(f"Baseline:")
print(f"  CV accuracy:  {baseline_cv:.2f}%")
print(f"  Gap:          {baseline_gap:.2f}%")
print(f"\nOptuna best:")
print(f"  CV accuracy:  {best_val_acc*100:.2f}%  ({improvement_acc:+.2f}%)")
print(f"  Gap:          {best_gap*100:.2f}%  (Δ={improvement_gap:+.2f}%)")

# Decision logic
if improvement_acc > 0.3 and abs(best_gap*100 - GAP_TARGET_IDEAL*100) < 1.5:
    print(f"\n🎉 OTTIMO! Accuracy migliorata E gap ottimale (~4%)")
    print(f"   ✅ USA questi HP per CV/holdout")
    USE_OPTUNA_HP = True
elif improvement_acc > 0.5:
    print(f"\n✅ Accuracy significativamente migliorata (>{improvement_acc:.2f}%)")
    print(f"   Gap: {best_gap*100:.2f}% (target 4%, attuale baseline 4.19%)")
    if best_gap*100 <= 6.0:
        print(f"   ✅ Gap accettabile, USA questi HP")
        USE_OPTUNA_HP = True
    else:
        print(f"   ⚠️ Gap alto, valuta holdout prima di usare")
        USE_OPTUNA_HP = False
elif best_gap*100 >= GAP_TARGET_MIN*100 and best_gap*100 <= GAP_TARGET_MAX*100:
    print(f"\n✅ Gap ottimale ({best_gap*100:.2f}% nel range 3-5%)")
    if improvement_acc > 0:
        print(f"   Accuracy: {improvement_acc:+.2f}% → USA questi HP")
        USE_OPTUNA_HP = True
    else:
        print(f"   Accuracy non migliorata, mantieni baseline")
        USE_OPTUNA_HP = False
else:
    print(f"\n⚠️ Nessun miglioramento significativo")
    print(f"   Baseline HP già ottimi, SKIP Optuna")
    USE_OPTUNA_HP = False

# ✅ Assegna best_params per le celle successive
if USE_OPTUNA_HP:
    best_params = study.best_params.copy()
    best_params.update({
        'booster': 'gbtree',
        'tree_method': 'hist',
        'max_bin': 256,
        'use_label_encoder': False,
        'eval_metric': 'logloss',
        'random_state': 42
    })
    print(f"\n✅ Variabile 'best_params' aggiornata con HP Optuna")
    print("   Riesegui cella 'Cross validation' per validare su 10-fold")
else:
    print(f"\n⚠️ Mantieni best_params baseline (non sovrascritto)")
    print("   Puoi comunque testare manualmente copiando HP da best_trial")

[I 2025-11-06 21:16:01,279] A new study created in memory with name: no-name-899fcf9f-026c-4652-a526-1daefbb64b83


OPTUNA - ACCURACY MAXIMIZATION CON GAP TARGET ~4%
Avvio ottimizzazione Optuna...
Trials: 120 | CV folds: 5 | Gap target: 4.0% ± 1%



[I 2025-11-06 21:16:32,747] Trial 0 finished with value: 0.5311407497219631 and parameters: {'n_estimators': 900, 'learning_rate': 0.10985745201142037, 'max_depth': 5, 'min_child_weight': 8, 'gamma': 0.1872223685309238, 'reg_alpha': 0.3899863008405066, 'reg_lambda': 1.638919733850194, 'subsample': 0.9098528437324805, 'colsample_bytree': 0.8303345035229626, 'colsample_bylevel': 0.8770181444490113}. Best is trial 0 with value: 0.5311407497219631.
[I 2025-11-06 21:16:55,684] Trial 1 finished with value: 0.540425 and parameters: {'n_estimators': 500, 'learning_rate': 0.11370159575730848, 'max_depth': 6, 'min_child_weight': 5, 'gamma': 0.21818996064852073, 'reg_alpha': 0.45851127463358454, 'reg_lambda': 4.346664672554915, 'subsample': 0.8074269294896713, 'colsample_bytree': 0.7795835055926348, 'colsample_bylevel': 0.7728072850495105}. Best is trial 1 with value: 0.540425.
[I 2025-11-06 21:17:29,057] Trial 2 finished with value: 0.7368320923951681 and parameters: {'n_estimators': 1100, 'lear


OPTUNA OPTIMIZATION RESULTS
Best objective score: 0.8219
Best trial: #112

📊 Best trial metrics:
  Mean CV accuracy:    82.54%
  Mean train-val gap:  4.73%  (target: 4.0%)
  Std CV accuracy:     0.86%

⚙️ Best hyperparameters:
  n_estimators        : 1100
  learning_rate       : 0.0283
  max_depth           : 3
  min_child_weight    : 10
  gamma               : 0.6837
  reg_alpha           : 1.9505
  reg_lambda          : 11.2794
  subsample           : 0.6873
  colsample_bytree    : 0.8829
  colsample_bylevel   : 0.7089


KeyError: 'user_attrs'

# Ottimizzazione Threshold

In [27]:
# === NEW CELL: Threshold Optimization su Calibration Set ===
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import numpy as np
import pandas as pd

print("="*70)
print("THRESHOLD OPTIMIZATION - CALIBRATION SET APPROACH")
print("="*70)

# Split train_val in 70% train + 30% calib (NO LEAK!)
X_train, X_calib, y_train, y_calib = train_test_split(
    X_train_val, y_train_val, test_size=0.3, random_state=42, stratify=y_train_val
)

print(f"Train size: {len(X_train)} | Calib size: {len(X_calib)} | Holdout size: {len(X_holdout)}")

# 1. Fit model su train (70%)
clf_thr = XGBClassifier(**best_params, use_label_encoder=False, eval_metric='logloss', random_state=42)
clf_thr.fit(X_train, y_train)

# 2. Predict proba su calib set (30% unseen)
proba_calib = clf_thr.predict_proba(X_calib)[:, 1]

# 3. Grid search threshold su calib (NO LEAK: non usiamo y_calib per nient'altro)
def find_best_threshold(y_true, proba, metric='accuracy', n_grid=301):
    """Grid search threshold ottimizzando metric specificata."""
    grid = np.unique(np.quantile(proba, np.linspace(0, 1, n_grid)))
    best_thr, best_score = 0.5, 0.0
    
    for t in grid:
        pred = (proba >= t).astype(int)
        
        if metric == 'accuracy':
            score = accuracy_score(y_true, pred)
        elif metric == 'balanced_accuracy':
            from sklearn.metrics import balanced_accuracy_score
            score = balanced_accuracy_score(y_true, pred)
        else:
            raise ValueError(f"metric={metric} non supportato")
        
        # Tie-break: preferisci threshold più vicino a 0.5 (più stabile)
        if (score > best_score) or (abs(score - best_score) < 1e-12 and abs(t - 0.5) < abs(best_thr - 0.5)):
            best_score, best_thr = float(score), float(t)
    
    return best_thr, best_score

thr_opt, acc_calib_opt = find_best_threshold(y_calib, proba_calib, metric='accuracy', n_grid=501)

# Confronta con threshold fisso 0.5
pred_calib_05 = (proba_calib >= 0.5).astype(int)
acc_calib_05 = accuracy_score(y_calib, pred_calib_05)

print(f"\nCALIBRATION RESULTS:")
print(f"  Threshold 0.5:  accuracy = {acc_calib_05*100:.2f}%")
print(f"  Threshold opt:  accuracy = {acc_calib_opt*100:.2f}% @thr={thr_opt:.3f}")
print(f"  Gain:           +{(acc_calib_opt - acc_calib_05)*100:.2f}%")

# 4. Valida su HOLDOUT (unbiased final test)
proba_holdout = clf_thr.predict_proba(X_holdout)[:, 1]

pred_holdout_05 = (proba_holdout >= 0.5).astype(int)
acc_holdout_05 = accuracy_score(y_holdout, pred_holdout_05)

pred_holdout_opt = (proba_holdout >= thr_opt).astype(int)
acc_holdout_opt = accuracy_score(y_holdout, pred_holdout_opt)

print(f"\nHOLDOUT VALIDATION (final unbiased estimate):")
print(f"  Threshold 0.5:  accuracy = {acc_holdout_05*100:.2f}%")
print(f"  Threshold opt:  accuracy = {acc_holdout_opt*100:.2f}% @thr={thr_opt:.3f}")
print(f"  Gain:           +{(acc_holdout_opt - acc_holdout_05)*100:.2f}%")

# 5. Check se threshold è stabile (confronta con CV folds)
cv_thresholds = [folds_info[i]['thr_acc'] for i in range(10)]
thr_mean_cv = np.mean(cv_thresholds)
thr_std_cv = np.std(cv_thresholds)

print(f"\nSTABILITY CHECK:")
print(f"  CV thresholds:     mean={thr_mean_cv:.3f}, std={thr_std_cv:.3f}, range=[{min(cv_thresholds):.3f}, {max(cv_thresholds):.3f}]")
print(f"  Calib threshold:   {thr_opt:.3f}")
print(f"  Distance from CV:  {abs(thr_opt - thr_mean_cv):.3f} (< 0.1 → stable)")

if thr_std_cv > 0.15:
    print("\n⚠️ WARNING: CV thresholds highly variable → probabilità mal calibrate")
    print("   Considera Platt scaling o isotonic regression per calibration")
elif acc_holdout_opt > acc_holdout_05 + 0.005:
    print(f"\n✅ RACCOMANDAZIONE: Usa threshold={thr_opt:.3f} in submission")
    print(f"   Guadagno stimato su test: +{(acc_holdout_opt - acc_holdout_05)*100:.2f}%")
    BEST_THRESHOLD = thr_opt
else:
    print(f"\n⚠️ Guadagno marginale (<0.5%), meglio threshold=0.5 per robustezza")
    BEST_THRESHOLD = 0.5

# 6. Re-train su FULL train_val con threshold ottimale
print(f"\n{'='*70}")
print("RE-TRAINING SU FULL TRAIN_VAL CON THRESHOLD OTTIMALE")
print(f"{'='*70}")

clf_final = XGBClassifier(**best_params, use_label_encoder=False, eval_metric='logloss', random_state=42)
clf_final.fit(X_train_val, y_train_val)

# Final holdout eval
proba_holdout_final = clf_final.predict_proba(X_holdout)[:, 1]
pred_holdout_final = (proba_holdout_final >= BEST_THRESHOLD).astype(int)
acc_holdout_final = accuracy_score(y_holdout, pred_holdout_final)

print(f"Final holdout accuracy @thr={BEST_THRESHOLD:.3f}: {acc_holdout_final*100:.2f}%")
print(f"CV accuracy (riferimento):                      {81.38:.2f}%")
print(f"Gain vs baseline CV:                            +{(acc_holdout_final - 81.38/100)*100:.2f}%")

# Save per submission
OPTIMIZED_THRESHOLD = BEST_THRESHOLD
print(f"\n✅ Variabile OPTIMIZED_THRESHOLD = {OPTIMIZED_THRESHOLD:.3f} salvata per submission")

THRESHOLD OPTIMIZATION - CALIBRATION SET APPROACH
Train size: 5600 | Calib size: 2400 | Holdout size: 2000

CALIBRATION RESULTS:
  Threshold 0.5:  accuracy = 81.96%
  Threshold opt:  accuracy = 82.33% @thr=0.445
  Gain:           +0.38%

HOLDOUT VALIDATION (final unbiased estimate):
  Threshold 0.5:  accuracy = 82.15%
  Threshold opt:  accuracy = 81.70% @thr=0.445
  Gain:           +-0.45%

STABILITY CHECK:
  CV thresholds:     mean=0.498, std=0.090, range=[0.326, 0.623]
  Calib threshold:   0.445
  Distance from CV:  0.053 (< 0.1 → stable)

⚠️ Guadagno marginale (<0.5%), meglio threshold=0.5 per robustezza

RE-TRAINING SU FULL TRAIN_VAL CON THRESHOLD OTTIMALE
Final holdout accuracy @thr=0.500: 82.70%
CV accuracy (riferimento):                      81.38%
Gain vs baseline CV:                            +1.32%

✅ Variabile OPTIMIZED_THRESHOLD = 0.500 salvata per submission


# Cross validation

In [5]:
# === 10-Fold Cross-Validation con iperparametri FISSI ===
# IMPORTANTE: Assegna qui i migliori iperparametri trovati dalla cella precedente
# Oppure lascia questi di default (conservativi per ridurre overfitting)

best_params = {
    'booster': 'gbtree',
    'tree_method': 'hist',
    'max_bin': 256,          # istogrammi più grossolani = meno varianza e più veloce
    'learning_rate': 0.035,  # leggermente più alto con meno alberi
    'n_estimators': 900,     # meno alberi per ridurre overfitting
    'max_depth': 3,
    'min_child_weight': 9,   # nodi con più peso => meno overfit
    'gamma': 0.5,            # penalizza split deboli
    'subsample': 0.7,
    'colsample_bytree': 0.7,
    'colsample_bynode': 0.7,
    'colsample_bylevel': 0.8,
    'reg_alpha': 0.4,        # L1
    'reg_lambda': 10.0,      # L2 più alta
    'max_delta_step': 1
}

print("=== 10-Fold Cross-Validation (9 train + 1 validation) ===")
print(f"Parametri utilizzati: {best_params}\n")

from sklearn.model_selection import StratifiedKFold
import xgboost as xgb  # per callback EarlyStopping se disponibile
skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)
outer_accuracies = []
folds_info = []
train_accuracies = []
train_val_gaps = []
outer_accuracies_opt = []

EARLY_STOPPING_ROUNDS = 50

def best_threshold_for_accuracy(y_true, proba, n_grid=201):
    y_true = np.asarray(y_true).astype(int)
    proba = np.asarray(proba).astype(float)
    grid = np.unique(np.quantile(proba, np.linspace(0, 1, n_grid)))
    best_thr, best_acc = 0.5, 0.0
    for t in grid:
        acc = ( ((proba >= t).astype(int) == y_true).mean() )
        if (acc > best_acc) or (abs(acc - best_acc) < 1e-12 and abs(t - 0.5) < abs(best_thr - 0.5)):
            best_acc, best_thr = float(acc), float(t)
    return best_thr, best_acc

def _fit_with_es(clf, X_tr, y_tr, X_val, y_val):
    """Fit con EarlyStopping via callback se supportato; fallback senza ES."""
    try:
        cb = getattr(xgb.callback, 'EarlyStopping', None)
        if cb is not None:
            clf.fit(X_tr, y_tr, eval_set=[(X_val, y_val)], callbacks=[cb(rounds=EARLY_STOPPING_ROUNDS, save_best=True, maximize=False)], verbose=False)
            return True
    except TypeError:
        pass
    clf.fit(X_tr, y_tr, eval_set=[(X_val, y_val)], verbose=False)
    return False

def _predict_proba_best(clf, X, best_iter=None, best_ntree_limit=None):
    """Version-safe predict_proba using either iteration_range (new) or ntree_limit (old)."""
    try:
        if best_iter is not None:
            return clf.predict_proba(X, iteration_range=(0, int(best_iter)+1))[:, 1]
    except TypeError:
        pass
    try:
        if best_ntree_limit is not None:
            return clf.predict_proba(X, ntree_limit=int(best_ntree_limit))[:, 1]
    except TypeError:
        pass
    return clf.predict_proba(X)[:, 1]

fold_idx = 0
for train_idx, val_idx in skf.split(X_train_val, y_train_val):
    fold_idx += 1
    X_tr, X_val = X_train_val[train_idx], X_train_val[val_idx]
    y_tr, y_val = y_train_val[train_idx], y_train_val[val_idx]

    clf = XGBClassifier(**best_params, use_label_encoder=False, eval_metric='logloss', random_state=42)
    used_es = _fit_with_es(clf, X_tr, y_tr, X_val, y_val)

    best_iter = getattr(clf, 'best_iteration', None)
    try:
        booster = clf.get_booster()
    except Exception:
        booster = None
    best_ntree_limit = getattr(booster, 'best_ntree_limit', None) if booster is not None else None

    y_val_proba = _predict_proba_best(clf, X_val, best_iter, best_ntree_limit)
    y_pred = (y_val_proba >= 0.5).astype(int)
    acc = accuracy_score(y_val, y_pred)
    outer_accuracies.append(acc)

    y_tr_proba = _predict_proba_best(clf, X_tr, best_iter, best_ntree_limit)
    y_tr_pred = (y_tr_proba >= 0.5).astype(int)
    tr_acc = accuracy_score(y_tr, y_tr_pred)
    gap = float(tr_acc - acc)
    train_accuracies.append(tr_acc)
    train_val_gaps.append(gap)

    thr_acc, acc_opt = best_threshold_for_accuracy(y_val, y_val_proba, n_grid=301)
    outer_accuracies_opt.append(acc_opt)

    val_index_global = idx_train_val[val_idx]
    train_index_global = idx_train_val[train_idx]

    folds_info.append({
        'fold': fold_idx,
        'acc': float(acc),
        'train_acc': float(tr_acc),
        'gap_train_minus_val': float(gap),
        'acc_opt': float(acc_opt),
        'thr_acc': float(thr_acc),
        'best_iteration': int(best_iter) if best_iter is not None else None,
        'train_idx': train_idx,
        'val_idx': val_idx,
        'train_index_global': train_index_global,
        'val_index_global': val_index_global,
        'y_true': y_val.astype(int),
        'y_pred': y_pred.astype(int),
        'y_proba': y_val_proba.astype(float)
    })

    es_tag = 'with ES' if used_es else 'no ES'
    print(f'Fold {fold_idx}: {es_tag}, train={len(y_tr)}, val={len(y_val)}, acc_val={acc*100:.2f}%, acc_val_opt={acc_opt*100:.2f}% @thr={thr_acc:.3f}, acc_train={tr_acc*100:.2f}%, gap={(gap)*100:.2f}%')

print('\n' + '='*60)
print('Risultati Cross-Validation')
print('='*60)
for i, a in enumerate(outer_accuracies, 1):
    print(f'  Fold {i}: val_acc={a*100:.2f}%, val_acc_opt={outer_accuracies_opt[i-1]*100:.2f}% @thr={folds_info[i-1]["thr_acc"]:.3f}, train_acc={train_accuracies[i-1]*100:.2f}%, gap={train_val_gaps[i-1]*100:.2f}%')
print(f'\nMean CV accuracy (0.5): {np.mean(outer_accuracies)*100:.2f}%')
print(f'Mean CV accuracy (opt thr): {np.mean(outer_accuracies_opt)*100:.2f}%')
print(f'Mean train accuracy: {np.mean(train_accuracies)*100:.2f}%')
print(f'Mean gap (train - val): {np.mean(train_val_gaps)*100:.2f}%')
print(f'Std CV accuracy:  {np.std(outer_accuracies)*100:.2f}%')
print(f'Min/Max val acc:  {np.min(outer_accuracies)*100:.2f}% / {np.max(outer_accuracies)*100:.2f}%')

WORST_FOLD_IDX = int(np.argmin(outer_accuracies))
WORST_FOLD_NUM = int(folds_info[WORST_FOLD_IDX]['fold'])
print(f"\nPeggiore fold: #{WORST_FOLD_NUM} con acc_val={outer_accuracies[WORST_FOLD_IDX]*100:.2f}% | acc_val_opt={outer_accuracies_opt[WORST_FOLD_IDX]*100:.2f}% | acc_train={train_accuracies[WORST_FOLD_IDX]*100:.2f}% | gap={train_val_gaps[WORST_FOLD_IDX]*100:.2f}%")

=== 10-Fold Cross-Validation (9 train + 1 validation) ===
Parametri utilizzati: {'booster': 'gbtree', 'tree_method': 'hist', 'max_bin': 256, 'learning_rate': 0.035, 'n_estimators': 900, 'max_depth': 3, 'min_child_weight': 9, 'gamma': 0.5, 'subsample': 0.7, 'colsample_bytree': 0.7, 'colsample_bynode': 0.7, 'colsample_bylevel': 0.8, 'reg_alpha': 0.4, 'reg_lambda': 10.0, 'max_delta_step': 1}

Fold 1: no ES, train=7200, val=800, acc_val=80.88%, acc_val_opt=81.12% @thr=0.490, acc_train=86.99%, gap=6.11%
Fold 2: no ES, train=7200, val=800, acc_val=81.25%, acc_val_opt=81.75% @thr=0.439, acc_train=86.93%, gap=5.68%
Fold 3: no ES, train=7200, val=800, acc_val=82.25%, acc_val_opt=82.38% @thr=0.400, acc_train=86.82%, gap=4.57%
Fold 4: no ES, train=7200, val=800, acc_val=83.88%, acc_val_opt=84.88% @thr=0.429, acc_train=87.00%, gap=3.12%
Fold 5: no ES, train=7200, val=800, acc_val=85.25%, acc_val_opt=85.38% @thr=0.504, acc_train=86.71%, gap=1.46%
Fold 6: no ES, train=7200, val=800, acc_val=81.38%, 

# Holdout validation

In [6]:
# === Valutazione su holdout ===
from sklearn.metrics import balanced_accuracy_score, roc_auc_score, confusion_matrix, classification_report
import pandas as pd
import numpy as np
from xgboost import XGBClassifier

print('Start holdout evaluation')

# Usa best_params se esiste, fallback a parametri di base
try:
    params = dict(best_params)
    print('Using best_params from notebook')
except Exception:
    params = {}
    print('best_params non trovato: uso parametri di default')

clf = XGBClassifier(**params, use_label_encoder=False, eval_metric='logloss', random_state=42)
clf.fit(X_train_val, y_train_val)

# predizioni e probabilità su holdout
try:
    proba = clf.predict_proba(X_holdout)[:, 1]
except Exception:
    # fallback: predict then map to 0/1 probs
    preds_tmp = clf.predict(X_holdout)
    proba = preds_tmp.astype(float)

pred = (proba >= 0.5).astype(int)

# metriche
acc = (pred == y_holdout).mean()
bacc = balanced_accuracy_score(y_holdout, pred)
try:
    roc = roc_auc_score(y_holdout, proba)
except Exception:
    roc = float('nan')

cm = confusion_matrix(y_holdout, pred)
cr = classification_report(y_holdout, pred, digits=4)

print(f'Holdout size: {len(y_holdout)}')
print(f'Accuracy: {acc:.4f}, Balanced Accuracy: {bacc:.4f}, ROC AUC: {roc:.4f}')
print('Confusion matrix:\n', cm)
print('\nClassification report:\n', cr)

# Salva predizioni holdout per ispezione (include battle_id se disponibili)
try:
    holdout_idx = idx_holdout
    holdout_ids = train_df.loc[holdout_idx, 'battle_id'] if 'battle_id' in train_df.columns else pd.Series(holdout_idx, index=holdout_idx)
    out_df = pd.DataFrame({'battle_id': holdout_ids.values, 'y_true': y_holdout, 'y_pred': pred, 'y_proba': proba})
except Exception:
    out_df = pd.DataFrame({'y_true': y_holdout, 'y_pred': pred, 'y_proba': proba})

out_path = 'holdout_predictions.csv'
out_df.to_csv(out_path, index=False)
print(f'Holdout predictions saved to {out_path} (rows={len(out_df)})')

Start holdout evaluation
Using best_params from notebook
Holdout size: 2000
Accuracy: 0.8270, Balanced Accuracy: 0.8270, ROC AUC: 0.8936
Confusion matrix:
 [[833 167]
 [179 821]]

Classification report:
               precision    recall  f1-score   support

           0     0.8231    0.8330    0.8280      1000
           1     0.8310    0.8210    0.8260      1000

    accuracy                         0.8270      2000
   macro avg     0.8270    0.8270    0.8270      2000
weighted avg     0.8270    0.8270    0.8270      2000

Holdout predictions saved to holdout_predictions.csv (rows=2000)


# Make submission

In [7]:
print("=== Submission rapida post-CV ===")
cv_submission_model = XGBClassifier(**best_params, use_label_encoder=False, eval_metric='logloss', random_state=42)
cv_submission_model.fit(X_train_val, y_train_val)

test_aligned = test_df.reindex(columns=FEATURES, fill_value=0)
X_test_matrix = test_aligned.astype(float).to_numpy()
test_predictions = cv_submission_model.predict(X_test_matrix).astype(int)

submission_df = pd.DataFrame({
    'battle_id': test_df['battle_id'].astype(np.int64),
    'player_won': test_predictions.astype(np.int64)
})

submission_path = 'submission.csv'
submission_df.to_csv(submission_path, index=False)
print(f"✅ File di submission salvato in {submission_path}")
print(submission_df.head())

=== Submission rapida post-CV ===
✅ File di submission salvato in submission.csv
   battle_id  player_won
0          0           0
1          1           1
2          2           1
3          3           1
4          4           1
