The core strategy in this notebook is Stacked Generalization (Stacking), a highly effective form of Meta-Ensemble Learning.

Base Layer (Level 0) Modeling A diverse set of Base Models (including Ridge Regression, QDA, SVC, Logistic Regression, and various tree-based ensembles like Hist Gradient Boosting, Random Forest, and Extra Trees) are trained. Each base model is trained on either the Timeline or Combat feature view.

The key output of this layer is the set of Out-of-Fold (OOF) probability predictions from all base models on the training data.

Meta Layer (Level 1) Modeling The OOF probabilities generated by the Base Layer are concatenated to form a new, small feature matrix. This matrix is used to train a Meta-Model (Logistic Regression).

The Meta-Model learns how to optimally combine (weigh) the strengths and weaknesses of the base models' predictions to produce a final, improved probability score, which is then converted to the final prediction using an optimized probability threshold.

In [80]:
import os, json, math, warnings
from typing import Dict, Any, List, Tuple
from collections import Counter, defaultdict

warnings.filterwarnings("ignore")

import numpy as np
import pandas as pd

from joblib import Parallel, delayed
import multiprocessing

from tqdm.auto import tqdm
from packaging.version import Version
from sklearn import __version__ as skl_version

from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.pipeline import Pipeline, make_pipeline
from sklearn.feature_selection import SelectKBest, mutual_info_classif, VarianceThreshold
from sklearn.linear_model import LogisticRegression, LogisticRegressionCV, RidgeClassifierCV
from sklearn.ensemble import HistGradientBoostingClassifier, ExtraTreesClassifier, GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis, LinearDiscriminantAnalysis
from sklearn.svm import SVC
from sklearn.calibration import CalibratedClassifierCV
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.metrics import accuracy_score, roc_auc_score



In [None]:
import os
import json

# --- percorso assoluto su Kaggle ---
DATA_PATH = "/kaggle/input/fds-pokemon-battles-prediction-2025"
SUBMISSION_PATH = "submission.csv"


train_file_path = os.path.join(DATA_PATH, "train.jsonl")
test_file_path = os.path.join(DATA_PATH, "test.jsonl")

train_data = []
with open(train_file_path, "r") as f:
    for line in f:
        train_data.append(json.loads(line))

test_data = []
with open(test_file_path, "r") as f:
    for line in f:
        test_data.append(json.loads(line))

print(f"✅ Train data loaded: {len(train_data)} records")


✅ Train data loaded: 10000 records


In [84]:
# Set a fixed random seed for reproducibility.
RANDOM_STATE = 42
np.random.seed(RANDOM_STATE)




In [85]:
# Define a list of turn cutoffs for feature extraction.
TURN_CUTOFFS = [None]


In [86]:
# Load data from a JSONL file, parsing each line as a JSON object.
def load_jsonl(path: str) -> List[Dict[str, Any]]:
    rows = []
    with open(path, "r", encoding="utf-8") as f:
        for line in f:
            line = line.strip()
            if line:
                rows.append(json.loads(line))
    return rows


In [87]:
# Define type effectiveness multipliers for Generation 1 Pokémon battles.
GEN1_TYPES = {
    "normal": {"rock": 0.5, "ghost": 0.0, "steel": 0.5},
    "fire": {"fire": 0.5, "water": 0.5, "grass": 2.0, "ice": 2.0, "bug": 2.0, "rock": 0.5, "dragon": 0.5, "steel": 2.0},
    "water": {"fire": 2.0, "water": 0.5, "grass": 0.5, "ground": 2.0, "rock": 2.0, "dragon": 0.5},
    "electric": {"water": 2.0, "electric": 0.5, "grass": 0.5, "ground": 0.0, "flying": 2.0, "dragon": 0.5},
    "grass": {"fire": 0.5, "water": 2.0, "grass": 0.5, "poison": 0.5, "ground": 2.0, "flying": 0.5, "bug": 0.5, "rock": 2.0, "dragon": 0.5, "steel": 0.5},
    "ice": {"fire": 0.5, "water": 0.5, "grass": 2.0, "ground": 2.0, "flying": 2.0, "dragon": 2.0, "steel": 0.5},
    "fighting": {"normal": 2.0, "ice": 2.0, "poison": 0.5, "flying": 0.5, "psychic": 0.5, "bug": 0.5, "rock": 2.0, "ghost": 0.0, "dark": 2.0, "steel": 2.0, "fairy": 0.5},
    "poison": {"grass": 2.0, "poison": 0.5, "ground": 0.5, "rock": 0.5, "ghost": 0.5, "steel": 0.0, "fairy": 2.0},
    "ground": {"fire": 2.0, "electric": 2.0, "grass": 0.5, "poison": 2.0, "flying": 0.0, "bug": 0.5, "rock": 2.0, "steel": 2.0},
    "flying": {"electric": 0.5, "grass": 2.0, "fighting": 2.0, "bug": 2.0, "rock": 0.5, "steel": 0.5},
    "psychic": {"fighting": 2.0, "poison": 2.0, "psychic": 0.5, "dark": 0.0, "steel": 0.5},
    "bug": {"fire": 0.5, "grass": 2.0, "fighting": 0.5, "poison": 0.5, "flying": 0.5, "psychic": 2.0, "ghost": 0.5, "dark": 2.0, "steel": 0.5, "fairy": 0.5},
    "rock": {"fire": 2.0, "ice": 2.0, "fighting": 0.5, "ground": 0.5, "flying": 2.0, "bug": 2.0, "steel": 0.5},
    "ghost": {"normal": 0.0, "psychic": 2.0, "ghost": 2.0, "dark": 0.5},
    "dragon": {"dragon": 2.0, "steel": 0.5, "fairy": 0.0},
    "dark": {"fighting": 0.5, "psychic": 2.0, "ghost": 2.0, "dark": 0.5, "fairy": 0.5},
    "steel": {"fire": 0.5, "water": 0.5, "electric": 0.5, "ice": 2.0, "rock": 2.0, "steel": 0.5, "fairy": 2.0},
    "fairy": {"fire": 0.5, "fighting": 2.0, "poison": 0.5, "dragon": 2.0, "dark": 2.0, "steel": 0.5}}



In [88]:
# Remove a specific battle entry from the training data due to data anomaly.
train_data.remove(train_data[4877])


In [89]:
# Filter out training battles where any player 1 Pokémon's level is not 100.
# This ensures consistency in competitive battle assumptions.
for i, battle in enumerate(train_data):
  squadra1 = battle['p1_team_details']
  for pok in squadra1:
    livello = pok['level']
    if livello < 100:
      print(livello)
      train_data.remove(train_data[i])

print(len(train_data))


55
55
55
55
55
55
55
55
55
55
55
55
85
9986


In [90]:
# Calculate the type effectiveness multiplier for an attack against defending Pokémon types.
def type_multiplier(attack_type: str, defend_types: List[str]) -> float:
    at = str(attack_type or "").lower()
    mult = 1.0
    chart = GEN1_TYPES.get(at, {})
    for dt in defend_types or []:
        mult *= chart.get(str(dt or "").lower(), 1.0) # Multiply for each defending type
    return mult


In [91]:
# Safely convert a value to a float, returning a default if conversion fails.
def safe_float(x, default=1.0):
    try:
        return float(x)
    except:
        return default


In [92]:
# Identify all Pokémon species seen on the opponent's side in a battle timeline.
def species_seen_in_timeline(timeline: List[Dict[str, Any]]) -> set:
    names = set()
    for turn in timeline or []:
        name = (turn.get("p2_pokemon_state") or {}).get("name")
        if name:
            names.add(str(name).lower())
    return names


In [93]:
# Discover all unique Pokémon types present in the provided battle data.
def discover_all_types(data: List[Dict[str, Any]]) -> List[str]:
    s = set()
    for b in data:
        for p in b.get("p1_team_details") or []:
            for t in p.get("types") or []:
                tl = str(t or "").lower()
                if tl and tl != "notype" and tl in GEN1_TYPES:
                    s.add(tl)
    return sorted(s)


In [94]:
# Identify and store all unique Pokémon types from the training data.
ALL_TYPES = discover_all_types(train_data)


In [95]:
# Print the discovered Pokémon types and their count.
print(ALL_TYPES)
print(len(ALL_TYPES))


['dragon', 'electric', 'fire', 'flying', 'ghost', 'grass', 'ground', 'ice', 'normal', 'poison', 'psychic', 'rock', 'water']
13


Identify top species and moves to track as features

In [96]:
# Identify the most common Pokémon species appearing in battles,
# considering both player 1's team and Pokémon seen in the timeline.
def top_species(data: List[Dict[str, Any]], top_k=120) -> List[str]:
    c = Counter()
    for b in data:
        for p in b.get("p1_team_details") or []:
            nm = str(p.get("name","")).lower()
            if nm: c[nm]+=1
        for sp in species_seen_in_timeline(b.get("battle_timeline")):
            c[sp]+=0.5 # Give less weight to seen species than on-team species
    return [s for s,_ in c.most_common(top_k)]



In [97]:
# Determine the top K most frequently appearing Pokémon species.
TOP_SPECIES = top_species(train_data, 120)


In [98]:
# Print the first few top species identified.
print(TOP_SPECIES[:5])


['chansey', 'tauros', 'snorlax', 'exeggutor', 'alakazam']


In [99]:
# Build a mapping from Pokémon names to their types based on training data.
NAME_TO_TYPES: Dict[str, List[str]] = {}
for b in train_data:
    for p in b.get("p1_team_details") or []:
        nm = str(p.get("name","")).lower()
        if nm and nm not in NAME_TO_TYPES:
            NAME_TO_TYPES[nm] = [t.lower() for t in (p.get("types") or []) if t and t!="notype"]


In [100]:
# Calculate the STAB (Same-Type Attack Bonus) adjusted base power of a move.
def stab_adjusted_bp(move, attacker_name: str) -> float:
    if not move:
      return 0.0
    bp = float(move.get("base_power") or 0)
    if bp <= 0:
      return 0.0
    mtype = str(move.get("type") or "").lower()
    if not mtype or not attacker_name:
      return bp
    attacker_types = NAME_TO_TYPES.get(attacker_name, [])
    # Apply STAB if move type matches attacker's type
    return bp * 1.5 if mtype in attacker_types else bp


In [101]:
# Build prior probabilities of opponent Pokémon appearing in the timeline given their lead Pokémon.
def build_lead_back_priors(train_data, top_species):
    co = defaultdict(lambda: Counter()) # Count co-occurrences of lead and seen species
    lead_cnt = Counter() # Count occurrences of each lead species
    for b in train_data:
        lead = (b.get("p2_lead_details") or {}).get("name","")
        lead = str(lead).lower()
        if not lead: continue
        seen = species_seen_in_timeline(b.get("battle_timeline"))
        lead_cnt[lead]+=1
        for s in seen:
            co[lead][s]+=1
    priors = {}
    for lead, cnt in lead_cnt.items():
        priors[lead] = {s: co[lead][s]/cnt for s in top_species} # Calculate probability
    return priors


In [102]:
# Calculate prior probabilities of opponent Pokémon appearing after a specific lead.
P_LEAD_BACK = build_lead_back_priors(train_data, TOP_SPECIES)


In [103]:
# Generate features representing prior probabilities of opponent Pokémon appearing given their lead.
def prior_features_for_lead(lead_name: str) -> Dict[str,float]:
    lead = str(lead_name or "").lower()
    row = P_LEAD_BACK.get(lead, {}) # Get prior probabilities for the given lead
    feats = {f"p2_prob_{s}_given_lead": float(row.get(s, 0.0)) for s in TOP_SPECIES}
    return feats


In [104]:
# Normalize a move name by converting to lowercase and removing spaces/hyphens.
def normalize_move_name(name: str) -> str:
    if not name:
        return ""
    n = str(name).strip().lower()
    n = n.replace(" ", "").replace("-", "")
    return n



In [105]:
# Count the occurrences of each move in the battle timelines for both players.
def move_counts(battles):
    c = Counter()
    for b in battles:
        for tr in b.get("battle_timeline") or []:
            for side in ("p1","p2"):
                md = tr.get(f"{side}_move_details")
                if md and md.get("name"):
                    c[normalize_move_name(md["name"])] += 1
    return c


In [106]:
# Build a vocabulary of most frequently used moves to cover a target percentage of all moves.
def build_move_vocab_by_coverage(battles, target_coverage=0.985, min_freq=3):
    c = move_counts(battles)
    total = sum(c.values()) if c else 1
    running = 0
    vocab = []
    for m,f in c.most_common():
        if f < min_freq:
            break
        vocab.append(m)
        running += f
        if running / total >= target_coverage:
            break
    return vocab, c

TRACK_MOVES, MOVE_FREQ = build_move_vocab_by_coverage(train_data, target_coverage=0.985, min_freq=3)

print("TRACK_MOVES size:", len(TRACK_MOVES))


TRACK_MOVES size: 33


In [107]:
# Print the list of tracked moves, which represent common and impactful moves.
print(TRACK_MOVES)


['bodyslam', 'psychic', 'thunderwave', 'blizzard', 'thunderbolt', 'icebeam', 'seismictoss', 'earthquake', 'sleeppowder', 'softboiled', 'hyperbeam', 'rest', 'recover', 'reflect', 'lovelykiss', 'clamp', 'amnesia', 'drillpeck', 'surf', 'hypnosis', 'sing', 'stunspore', 'explosion', 'wrap', 'counter', 'doubleedge', 'nightshade', 'rockslide', 'megadrain', 'selfdestruct', 'confuseray', 'substitute', 'doublekick']


In [108]:
# Define sets of specific move categories.
STATUS_MOVES = {
    "thunderwave","stunspore","sleeppowder","sing","lovelykiss","toxic",
    "hypnosis","substitute","reflect","recover","softboiled","rest",
    "agility","swordsdance","amnesia"
}
PARTIAL_TRAP = {"wrap","clamp","firespin"}
EXPLODE     = {"explosion","selfdestruct"}


In [109]:
# Define sets for healing and priority moves.
HEALING_MOVES = {
    "recover","softboiled","rest",
    "megadrain"
}
PRIORITY_MOVES = {}


In [110]:

# Collect all unique move names observed across all battle timelines.
def collect_all_moves(battles):
    moves = set()
    for b in battles:
        for tr in b.get("battle_timeline") or []:
            for side in ("p1", "p2"):
                md = tr.get(f"{side}_move_details")
                if md and md.get("name"):
                    mv = normalize_move_name(md["name"])
                    if mv:
                        moves.add(mv)
    return moves

ALL_MOVES = collect_all_moves(train_data)



In [111]:
# Dynamically identify status and explosive moves from battle timelines.
# This helps capture moves not explicitly listed in static sets.
def dynamic_status_moves(battles):
    s = set()
    for b in battles:
        for tr in b.get("battle_timeline") or []:
            for side in ("p1","p2"):
                md = tr.get(f"{side}_move_details")
                if not md: continue
                mv = normalize_move_name(md["name"])
                cat = md.get("category","").lower()
                if cat == "status": s.add(mv)
                if mv in {"toxic","thunderwave"}: s.add(mv)
    return s

def dynamic_explode(battles):
    boom = set()
    for b in battles:
        for tr in b.get("battle_timeline") or []:
            for side in ("p1","p2"):
                md = tr.get(f"{side}_move_details")
                if md:
                    mv = normalize_move_name(md["name"])
                    if "explode" in mv or "selfdestruct" in mv:
                        boom.add(mv)
    return boom

DYN_STATUS  = dynamic_status_moves(train_data)
DYN_EXPLODE = dynamic_explode(train_data)



In [112]:
# Update move category sets with dynamically discovered moves,
# filtering for moves present in ALL_MOVES to ensure relevance.
STATUS_MOVES   = (STATUS_MOVES | DYN_STATUS) & ALL_MOVES
PARTIAL_TRAP   = PARTIAL_TRAP & ALL_MOVES
EXPLODE        = (EXPLODE | DYN_EXPLODE) & ALL_MOVES
HEALING_MOVES  = HEALING_MOVES & ALL_MOVES


print("STATUS_MOVES:", STATUS_MOVES)
print("HEALING_MOVES:", HEALING_MOVES)
print("PARTIAL_TRAP:", PARTIAL_TRAP)
print("EXPLODE:", EXPLODE)
print("PRIORITY_MOVES:", PRIORITY_MOVES)


STATUS_MOVES: {'sing', 'softboiled', 'amnesia', 'confuseray', 'swordsdance', 'thunderwave', 'agility', 'rest', 'hypnosis', 'sleeppowder', 'reflect', 'substitute', 'recover', 'lovelykiss', 'toxic', 'stunspore'}
HEALING_MOVES: {'softboiled', 'recover', 'megadrain', 'rest'}
PARTIAL_TRAP: {'firespin', 'wrap', 'clamp'}
EXPLODE: {'selfdestruct', 'explosion'}
PRIORITY_MOVES: {}


In [113]:
# Print the number of moves in each defined category for summary.
print(len(STATUS_MOVES))
print(len(PARTIAL_TRAP))
print(len(EXPLODE))
print(len(HEALING_MOVES))
print(len(PRIORITY_MOVES))


16
3
2
4
0


In [114]:
# Calculate the sum of stat boosts and count move occurrences within a battle timeline.
# Also, assess early status effects.
def sum_boosts(d):
    if not d: return 0.0
    s=0.0
    for v in d.values():
        try: s+=float(v)
        except: pass
    return s

def timeline_move_counts(timeline) -> Dict[str,float]:
    feats = {f"p1_mv_{m}":0.0 for m in TRACK_MOVES}
    feats.update({f"p2_mv_{m}":0.0 for m in TRACK_MOVES})
    feats["p1_mv_other"] = 0.0 # Track moves not in TRACK_MOVES
    feats["p2_mv_other"] = 0.0 # Track moves not in TRACK_MOVES

    early_status_score = 0.0
    for idx, turn in enumerate(timeline or [], start=1):
        for side in ("p1","p2"):
            md = turn.get(f"{side}_move_details")
            if md and md.get("name"):
                mv = normalize_move_name(md["name"])
                key = f"{side}_mv_{mv}" if mv in TRACK_MOVES else f"{side}_mv_other"
                feats[key] += 1.0

        # Assess early game status advantage
        if idx <= 3:
            s1 = (turn.get("p1_pokemon_state") or {}).get("status")
            s2 = (turn.get("p2_pokemon_state") or {}).get("status")
            if s2 in ("slp","frz","par"): early_status_score += 1.0
            if s1 in ("slp","frz","par"): early_status_score -= 1.0

    feats["early_status_score"] = early_status_score
    return feats


In [115]:
# Extract various timeline-based features such as move accuracy, healing, priority,
# HP changes, and status accumulation, providing a detailed view of battle flow.
def timeline_additions(timeline: List[Dict[str, Any]]) -> Dict[str, float]:
    feats = {
        "p1_mult_mean": 1.0, "p2_mult_mean": 1.0,
        "p1_move_accuracy": 0.0, "p2_move_accuracy": 0.0,
        "p1_healing_moves": 0.0, "p2_healing_moves": 0.0,
        "p1_priority_moves": 0.0, "p2_priority_moves": 0.0,
        "p1_status_score": 0.0, "p2_status_score": 0.0,
        "p1_life_turn30": 1.0, "p2_life_turn30": 1.0,

        "mult_diff": 0.0,
        "diff_move_accuracy": 0.0,
        "diff_healing_moves": 0.0,
        "priority_diff": 0.0,
        "diff_status": 0.0,
        "life_diff_turn30": 0.0,
    }
    if not timeline:
        return feats

    # Initialize HP tracking for accuracy calculation
    prev_p1_hp = safe_float((timeline[0].get("p1_pokemon_state") or {}).get("hp_pct", 1.0))
    prev_p2_hp = safe_float((timeline[0].get("p2_pokemon_state") or {}).get("hp_pct", 1.0))

    p1_acc_hits = p1_acc_attempts = 0
    p2_acc_hits = p2_acc_attempts = 0
    p1_heal = p2_heal = 0
    p1_pri  = p2_pri  = 0

    mult_p1, mult_p2 = [], [] # Store type multipliers

    def status_weight(s):
        return 1.0 if s=="par" else 2.0 if s=="slp" else 3.0 if s=="frz" else 0.0

    last_turn_seen = 0

    for tr in timeline:
        last_turn_seen = int(tr.get("turn", last_turn_seen)) # Track last turn
        p1s = tr.get("p1_pokemon_state") or {}
        p2s = tr.get("p2_pokemon_state") or {}
        m1  = tr.get("p1_move_details") or {}
        m2  = tr.get("p2_move_details") or {}

        p1_hp = safe_float(p1s.get("hp_pct", prev_p1_hp))
        p2_hp = safe_float(p2s.get("hp_pct", prev_p2_hp))

        n1 = normalize_move_name(m1.get("name",""))
        n2 = normalize_move_name(m2.get("name",""))
        if n1 in HEALING_MOVES:  p1_heal += 1
        if n2 in HEALING_MOVES:  p2_heal += 1
        if n1 in PRIORITY_MOVES: p1_pri  += 1
        if n2 in PRIORITY_MOVES: p2_pri  += 1

        # Check if a move is damaging (base power > 0)
        def damaging(m): return float(m.get("base_power") or 0) > 0
        if m1 and damaging(m1):
            p1_acc_attempts += 1
            if p2_hp < prev_p2_hp - 1e-9: # If opponent HP decreased, it was a hit
                p1_acc_hits += 1
        if m2 and damaging(m2):
            p2_acc_attempts += 1
            if p1_hp < prev_p1_hp - 1e-9:
                p2_acc_hits += 1

        # Calculate type effectiveness multiplier for damaging moves
        def push_mult(m, defender_state, bag):
            if not damaging(m): return
            mt = str(m.get("type","")).lower()
            def_types = [t for t in defender_state.get("types", []) if t and t!="notype"]
            bag.append(type_multiplier(mt, def_types))
        push_mult(m1, p2s, mult_p1)
        push_mult(m2, p1s, mult_p2)

        # status score accumulation
        feats["p1_status_score"] += status_weight(p1s.get("status"))
        feats["p2_status_score"] += status_weight(p2s.get("status"))

        prev_p1_hp, prev_p2_hp = p1_hp, p2_hp

    # Capture HP at a specific turn (e.g., turn 30) for early game assessment
    target_turn = min(30, last_turn_seen if last_turn_seen>0 else 30)
    p1_t = p2_t = 1.0
    for tr in timeline:
        if int(tr.get("turn", 0)) <= target_turn:
            p1_t = safe_float((tr.get("p1_pokemon_state") or {}).get("hp_pct", p1_t))
            p2_t = safe_float((tr.get("p2_pokemon_state") or {}).get("hp_pct", p2_t))
    feats["p1_life_turn30"] = float(p1_t)
    feats["p2_life_turn30"] = float(p2_t)
    feats["life_diff_turn30"] = float(p1_t - p2_t)

    # Calculate mean type multiplier and move accuracy
    feats["p1_mult_mean"] = float(np.mean(mult_p1)) if mult_p1 else 1.0
    feats["p2_mult_mean"] = float(np.mean(mult_p2)) if mult_p2 else 1.0
    p1_acc = (p1_acc_hits / p1_acc_attempts) if p1_acc_attempts > 0 else 0.0
    p2_acc = (p2_acc_hits / p2_acc_attempts) if p2_acc_attempts > 0 else 0.0
    feats["p1_move_accuracy"] = float(p1_acc)
    feats["p2_move_accuracy"] = float(p2_acc)
    feats["p1_healing_moves"] = float(p1_heal)
    feats["p2_healing_moves"] = float(p2_heal)
    feats["p1_priority_moves"] = float(p1_pri)
    feats["p2_priority_moves"] = float(p2_pri)


    # Calculate differences between players for various metrics
    feats["mult_diff"] = feats["p1_mult_mean"] - feats["p2_mult_mean"]
    feats["diff_move_accuracy"] = feats["p1_move_accuracy"] - feats["p2_move_accuracy"]
    feats["diff_healing_moves"] = feats["p1_healing_moves"] - feats["p2_healing_moves"]
    feats["priority_diff"] = feats["p1_priority_moves"] - feats["p2_priority_moves"]
    feats["diff_status"] = feats["p2_status_score"] - feats["p1_status_score"]
    return feats


In [116]:
# Extract features related to move categories like status, partial trap,
# explosion, and STAB (Same-Type Attack Bonus) hits for both players.
def move_buckets_features(timeline):
    b = {
        "p1_status_moves":0.0,"p2_status_moves":0.0,
        "p1_partial_trap":0.0,"p2_partial_trap":0.0,
        "p1_explodes":0.0,"p2_explodes":0.0,
        "p1_stab_hits":0.0,"p2_stab_hits":0.0
    }
    for tr in (timeline or []):
        p1s = (tr.get("p1_pokemon_state") or {})
        p2s = (tr.get("p2_pokemon_state") or {})
        for side, ms, opps in [("p1","p1_move_details","p2_pokemon_state"), ("p2","p2_move_details","p1_pokemon_state")]:
            md = tr.get(ms)
            if not md: continue
            mv = normalize_move_name(md.get("name",""))
            if not mv: continue
            # Increment counters for specific move categories
            if mv in STATUS_MOVES: b[f"{side}_status_moves"] += 1.0
            if mv in PARTIAL_TRAP: b[f"{side}_partial_trap"] += 1.0
            if mv in EXPLODE:      b[f"{side}_explodes"]     += 1.0
            attacker = (tr.get(f"{side}_pokemon_state") or {}).get("name","")
            if (md.get("base_power",0) or 0) > 0:
                atypes = NAME_TO_TYPES.get(str(attacker).lower(), [])
                if str(md.get("type","")).lower() in atypes: # Check for STAB
                    b[f"{side}_stab_hits"] += 1.0
    return b


In [117]:
# Count the number of turns each player is affected by paralysis, sleep, or freeze,
# and calculate the differences between players.
def status_turn_counts(timeline) -> Dict[str,float]:
    t = timeline or []
    c = {"p1_par":0.0,"p1_slp":0.0,"p1_frz":0.0,"p2_par":0.0,"p2_slp":0.0,"p2_frz":0.0}
    for turn in t:
        s1 = (turn.get("p1_pokemon_state") or {}).get("status")
        s2 = (turn.get("p2_pokemon_state") or {}).get("status")
        if s1 == "par": c["p1_par"] += 1
        if s1 == "slp": c["p1_slp"] += 1
        if s1 == "frz": c["p1_frz"] += 1
        if s2 == "par": c["p2_par"] += 1
        if s2 == "slp": c["p2_slp"] += 1
        if s2 == "frz": c["p2_frz"] += 1
    c["par_diff"] = c["p2_par"] - c["p1_par"]
    c["slp_diff"] = c["p2_slp"] - c["p1_slp"]
    c["frz_diff"] = c["p2_frz"] - c["p1_frz"]
    return c


Status Effects & Critical Hits

In [118]:
# Extract features related to early-game crowd control (sleep, freeze) infliction.
# This identifies who gains status advantage early in the battle.
def early_cc_features(timeline, window_size=5):
    feats = {"first_cc_winner":0.0, "early_sleep_hits":0.0, "early_freeze_hits":0.0}
    first_cc = None # Track who inflicted the first major status condition
    for _, turn in enumerate(timeline[:window_size] or [], start=1):
        s1 = (turn.get("p1_pokemon_state") or {}).get("status")
        s2 = (turn.get("p2_pokemon_state") or {}).get("status")
        if first_cc is None:
            if s1 in ("slp","frz") and s2 not in ("slp","frz"): first_cc = -1
            if s2 in ("slp","frz") and s1 not in ("slp","frz"): first_cc = +1
        if s2 == "slp": feats["early_sleep_hits"] += 1 # Opponent inflicts sleep
        if s1 == "slp": feats["early_sleep_hits"] -= 1 # Player inflicts sleep (negative for diff)
        if s2 == "frz": feats["early_freeze_hits"] += 1
        if s1 == "frz": feats["early_freeze_hits"] -= 1
    feats["first_cc_winner"] = float(first_cc or 0)
    return feats


In [119]:
# Identify if Hyper Beam resulted in a KO for either player within the timeline.
def hyper_beam_ko_flags(timeline):
    o = {"p1_hb_ko":0.0,"p2_hb_ko":0.0}
    for turn in timeline or []:
        m1 = (turn.get("p1_move_details") or {})
        m2 = (turn.get("p2_move_details") or {})
        # Check if player 1 used Hyper Beam and opponent fainted
        if str(m1.get("name","")).lower().replace(" ","").replace("-","")=="hyperbeam" and (turn.get("p2_pokemon_state") or {}).get("status")=="fnt":
            o["p1_hb_ko"]=1.0
        # Check if player 2 used Hyper Beam and player 1 fainted
        if str(m2.get("name","")).lower().replace(" ","").replace("-","")=="hyperbeam" and (turn.get("p1_pokemon_state") or {}).get("status")=="fnt":
            o["p2_hb_ko"]=1.0
    return o


In [120]:
# Encode player team types and opponent lead types as binary features.
def encode_types_block(player_team: List[Dict[str, Any]], opponent_lead: Dict[str, Any]) -> Dict[str,float]:
    feats = {}
    player_types=[]
    for p in (player_team or []):
        for t in p.get("types") or []:
            tl = str(t or "").lower()
            if tl and tl in ALL_TYPES:
                player_types.append(tl)
    # Binary features for player 1 having each type
    for t in ALL_TYPES:
        feats[f"p1_has_{t}"] = float(player_types.count(t))
    # Binary features for opponent's lead having each type
    opp_types = [str(t or "").lower() for t in (opponent_lead or {}).get("types", []) if str(t or "").lower() in ALL_TYPES]
    for t in ALL_TYPES:
        feats[f"p2_lead_is_{t}"] = 1.0 if t in opp_types else 0.0
    return feats


In [121]:
# Summarize base stats for the player's team and compare them to the opponent's lead Pokémon.
def stat_summary_block(player_team: List[Dict[str, Any]], opponent_lead: Dict[str, Any]) -> Dict[str,float]:
    feats={}
    if not player_team: return feats
    cols = ["base_hp","base_atk","base_def","base_spa","base_spd","base_spe"]
    df = pd.DataFrame([{c:(p.get(c,0) or 0) for c in cols} for p in player_team])
    lead = {c:(opponent_lead.get(c,0) or 0) for c in cols}
    for c in cols:
        feats[f"p1_avg_{c[5:]}"] = float(df[c].mean()) # Player 1 average stat
        feats[f"lead_diff_{c[5:]}"] = float((df.iloc[0][c] if len(df)>0 else 0) - lead[c]) # Difference between P1's first pokemon and P2 lead
    feats["p1_total_avg"] = float(df.sum(axis=1).mean()) # Player 1 average total stats
    feats["p2_lead_total"] = float(sum(lead.values())) # Opponent lead total stats
    feats["total_diff"] = feats["p1_total_avg"] - feats["p2_lead_total"]
    feats["speed_adv"] = float(df["base_spe"].mean() - lead["base_spe"]) # Player 1 average speed advantage
    feats["p1_fastest_spe"] = float(df["base_spe"].max()) # Player 1 fastest pokemon speed
    feats["p1_bulk_sum95"] = float(np.percentile(df["base_hp"]+df["base_def"]+df["base_spd"], 95)) # Player 1 95th percentile bulk
    return feats


In [122]:
# Extract features related to the type and speed matchup between the player's team and the opponent's lead Pokémon.
def lead_matchup_block(player_team, opponent_lead) -> Dict[str,float]:
    feats={}
    if not player_team or not opponent_lead: return feats
    opp_types = [t for t in opponent_lead.get("types",[]) if t!="notype"]
    mults=[]
    for p in player_team:
        for t in p.get("types") or []:
            if t!="notype":
                mults.append(type_multiplier(t, opp_types)) # Type multipliers for player's types vs opponent lead
    if mults:
        feats["lead_type_avg"] = float(np.mean(mults))
        feats["lead_type_max"] = float(np.max(mults))
        feats["lead_type_se"]  = float(sum(1 for m in mults if m>=2.0)) # Count super-effective hits
        feats["lead_type_res"] = float(sum(1 for m in mults if m<=0.5)) # Count resisted hits
    fastest = max((p.get("base_spe",0) for p in player_team), default=0)
    feats["lead_speed_gap_max"] = float(fastest - (opponent_lead.get("base_spe",0) or 0)) # Speed gap with fastest P1 and P2 lead
    return feats


In [123]:
# Build a dictionary mapping Pokemon names to their detailed information from the training data.
# This allows for quick lookup of Pokemon stats and types.
POKEMON_BY_NAME = {}
for b in train_data:
    for p in b.get("p1_team_details") or []:
        nm = str(p.get("name","")).lower()
        if nm and nm not in POKEMON_BY_NAME:
            POKEMON_BY_NAME[nm] = p


In [124]:

# Approximate the opponent's full team based on Pokemon seen in the battle timeline.
# This helps infer the full composition of the opponent's team.
def approximate_p2_team(timeline: List[Dict[str, Any]]) -> List[Dict[str,Any]]:
    team, seen = [], set()
    for nm in species_seen_in_timeline(timeline):
        if nm in POKEMON_BY_NAME and nm not in seen: # Only add if details are known and not already added
            team.append(POKEMON_BY_NAME[nm]); seen.add(nm)
    return team


In [125]:

# Extract features describing overall type advantages and disadvantages between teams.
# This assesses the offensive and defensive capabilities of each team based on types.
def type_matchup_block(player_team, opponent_team_approx) -> Dict[str,float]:
    feats={}
    p_off, o_off = [], [] # Player 1 offense, Opponent offense
    for atk in (player_team or []):
        atk_types = [t for t in atk.get("types",[]) if t!="notype"]
        for d in (opponent_team_approx or []):
            def_types = [t for t in d.get("types",[]) if t!="notype"]
            for t in atk_types:
                p_off.append(type_multiplier(t, def_types)) # Player's offensive type multipliers
    for atk in (opponent_team_approx or []):
        atk_types = [t for t in atk.get("types",[]) if t!="notype"]
        for d in (player_team or []):
            def_types = [t for t in d.get("types",[]) if t!="notype"]
            for t in atk_types:
                o_off.append(type_multiplier(t, def_types)) # Opponent's offensive type multipliers
    if p_off:
        feats["p1_type_avg"] = float(np.mean(p_off))
        feats["p1_type_max"] = float(np.max(p_off))
        feats["p1_type_se"]  = float(sum(1 for m in p_off if m>=2.0))
        feats["p1_type_res"] = float(sum(1 for m in p_off if m<=0.5))
    if o_off:
        feats["p2_type_avg"] = float(np.mean(o_off))
        feats["p2_type_se"]  = float(sum(1 for m in o_off if m>=2.0))
    feats["type_adv"] = feats.get("p1_type_avg",1.0) - feats.get("p2_type_avg",1.0) # Overall type advantage
    feats["type_se_diff"] = feats.get("p1_type_se",0.0) - feats.get("p2_type_se",0.0) # Super effective hit difference
    return feats


In [126]:
# Calculate differences in average base stats between the player's team and the approximated opponent's team.
# This provides a direct comparison of raw power and bulk.

def base_stat_diffs_vs_team(player_team: List[Dict[str, Any]], opponent_team_approx: List[Dict[str, Any]]) -> Dict[str, float]:
    feats = {}
    keys = ["hp","atk","def","spa","spd","spe","special"]
    for k in keys:
        feats[f"base_{k}_diff_vs_team"] = 0.0
    feats["p2_seen_count"] = float(len(opponent_team_approx or []))
    if not player_team or not opponent_team_approx:
        return feats

    cols = ["base_hp","base_atk","base_def","base_spa","base_spd","base_spe"]

    p1_df = pd.DataFrame([{c: float(p.get(c, 0) or 0) for c in cols} for p in player_team])
    p2_df = pd.DataFrame([{c: float(p.get(c, 0) or 0) for c in cols} for p in opponent_team_approx])

    p1_avg = p1_df.mean(axis=0)
    p2_avg = p2_df.mean(axis=0)

    # Calculate stat differences
    feats["base_hp_diff_vs_team"]  = float(p1_avg["base_hp"]  - p2_avg["base_hp"])
    feats["base_atk_diff_vs_team"] = float(p1_avg["base_atk"] - p2_avg["base_atk"])
    feats["base_def_diff_vs_team"] = float(p1_avg["base_def"] - p2_avg["base_def"])
    feats["base_spa_diff_vs_team"] = float(p1_avg["base_spa"] - p2_avg["base_spa"])
    feats["base_spd_diff_vs_team"] = float(p1_avg["base_spd"] - p2_avg["base_spd"])
    feats["base_spe_diff_vs_team"] = float(p1_avg["base_spe"] - p2_avg["base_spe"])

    p1_special = 0.5 * (p1_avg["base_spa"] + p1_avg["base_spd"])
    p2_special = 0.5 * (p2_avg["base_spa"] + p2_avg["base_spd"])
    feats["special_diff_vs_team"] = float(p1_special - p2_special)

    # Max speed and bulk differences
    feats["base_spe_max_gap_vs_team"] = float(p1_df["base_spe"].max() - p2_df["base_spe"].max())
    feats["base_bulk95_gap_vs_team"] = float(
        np.percentile(p1_df["base_hp"]+p1_df["base_def"]+p1_df["base_spd"], 95)
        - np.percentile(p2_df["base_hp"]+p2_df["base_def"]+p2_df["base_spd"], 95)
    )
    return feats



In [127]:
# Generate binary features indicating the presence of specific top species
# in the player's team or among the opponent's seen Pokemon.
def bag_of_species_features(player_team, opponent_seen) -> Dict[str,float]:
    feats={}
    player_names = [str(p.get("name","")).lower() for p in (player_team or [])]
    player_set = set(player_names)
    for s in TOP_SPECIES:
        feats[f"p1_has_{s}"] = 1.0 if s in player_set else 0.0 # Does player 1 have this species?
        feats[f"p2_seen_{s}"] = 1.0 if s in opponent_seen else 0.0 # Has this species been seen on opponent's side?
    return feats



In [128]:
# Calculate basic statistics (mean, std, min, max) for a given array.
# Returns default values if the array is empty.
def window_stats(arr):
    if len(arr)==0:
        return {"mean":0.0,"std":0.0,"min":0.0,"max":0.0}
    return {"mean": float(np.mean(arr)), "std": float(np.std(arr)), "min": float(np.min(arr)), "max": float(np.max(arr))}


In [129]:
# Extract features describing HP differences and their trends over different time windows in the battle.
# This captures dynamic shifts in battle advantage.
def hp_windows_features(timeline):
    t = timeline or []
    p_hp = np.array([safe_float((tr.get("p1_pokemon_state") or {}).get("hp_pct",1.0)) for tr in t], dtype=float)
    o_hp = np.array([safe_float((tr.get("p2_pokemon_state") or {}).get("hp_pct",1.0)) for tr in t], dtype=float)
    diff = p_hp - o_hp # HP difference (P1 - P2)
    deriv = np.diff(diff) if len(diff) >= 2 else np.array([], dtype=float) # Rate of change of HP difference
    def band(a,b):
        sw = window_stats(diff[a:b] if len(diff) else np.array([]))
        sd = window_stats(deriv[a:b] if len(deriv) else np.array([]))
        return {
            f"hp_diff_{a}_{b}_mean": sw["mean"],
            f"hp_diff_{a}_{b}_std":  sw["std"],
            f"hp_diff_{a}_{b}_min":  sw["min"],
            f"hp_diff_{a}_{b}_max":  sw["max"],
            f"hp_mom_{a}_{b}_mean":  sd["mean"],
            f"hp_mom_{a}_{b}_std":   sd["std"],
        }
    feats = {}
    # Extract stats for different time windows
    feats.update(band(0, min(3, len(diff))))
    feats.update(band(0, min(5, len(diff))))
    feats.update(band(5, min(10, len(diff))))
    return feats


In [130]:
# Identify the timing (turn number) of the first paralysis, sleep, or freeze
# inflicted or received by each player, and calculate their differences.
def first_status_timing_features(timeline):
    t = timeline or []
    INF = 999.0 # Use a large number to represent status not inflicted/received
    def find_first(status, side):
        for i, tr in enumerate(t, start=1):
            st = (tr.get(f"{side}_pokemon_state") or {}).get("status")
            if st == status: return float(i)
        return INF
    feats={}
    for st in ("par","slp","frz"):
        o_turn = find_first(st, "p2") # Turn opponent inflicts status on player
        p_turn = find_first(st, "p1") # Turn player inflicts status on opponent
        feats[f"first_{st}_we_inflict"] = o_turn
        feats[f"first_{st}_we_receive"] = p_turn
        feats[f"first_{st}_diff"] = p_turn - o_turn
    # More explicit diff features
    feats["first_par_diff"] = feats.get("first_par_we_receive",999.0) - feats.get("first_par_we_inflict",999.0)
    feats["first_slp_diff"] = feats.get("first_slp_we_receive",999.0) - feats.get("first_slp_we_inflict",999.0)
    feats["first_frz_diff"] = feats.get("first_frz_we_receive",999.0) - feats.get("first_frz_we_inflict",999.0)
    return feats


Transform raw battle data into meaningful features

In [131]:
# Extract core timeline features such as battle length, faints, HP differentials,
# offensive metrics (base power, effective damage), boosts, and status turns.
def timeline_core_features(timeline) -> Dict[str,float]:
    feats = {}
    t = timeline or []
    if not t:
        # Default features for empty timelines
        feats.update({
            "battle_len":0.0,"faint_diff":0.0,"hp_diff_final":0.0,"hp_auc":0.0,
            "time_in_lead":0.0,"first_faint_adv":0.0,"effective_damage_diff":0.0,
            "base_power_diff":0.0,"boost_adv":0.0,"status_adv":0.0,
            "eff_damage_ratio":1.0,"bp_ratio":1.0,
            "p1_switches":0.0, "p2_switches":0.0, "switch_diff":0.0,
            "p1_status_turns":0.0, "p2_status_turns":0.0,
            "p1_faints":0.0, "p2_faints":0.0,
            "hp_diff_std":0.0, "hp_diff_deriv_abs_mean":0.0,
            "p1_time_below_50":0.0, "p2_time_below_50":0.0, "below50_diff":0.0
        })
        return feats
    feats["battle_len"] = float(len(t))
    p_sw = o_sw = 0 # Player switches
    prev_p = prev_o = None
    auc = 0.0; lead = 0.0 # Area under curve for HP diff, time in lead
    p_f = o_f = 0 # Faints
    p_st = o_st = 0 # Status turns
    p_bp = o_bp = 0.0 # Base power dealt
    p_eff = o_eff = 0.0 # Effective damage dealt
    p_boost = o_boost = 0.0 # Stat boosts
    p_f_first = math.inf; o_f_first = math.inf # First faint turn

    for idx, turn in enumerate(t, start=1):
        ps = turn.get("p1_pokemon_state") or {}
        os = turn.get("p2_pokemon_state") or {}
        pm = turn.get("p1_move_details")
        om = turn.get("p2_move_details")
        pn = ps.get("name"); on = os.get("name")

        # Count switches
        if prev_p is not None and pn and pn != prev_p: p_sw += 1
        if prev_o is not None and on and on != prev_o: o_sw += 1
        prev_p, prev_o = pn, on

        php = safe_float(ps.get("hp_pct",1.0), 1.0)
        ohp = safe_float(os.get("hp_pct",1.0), 1.0)
        d = php - ohp # Current HP difference
        auc += d # Accumulate for AUC
        if d > 0: lead += 1 # Accumulate time in lead

        # Count status turns
        pstatus = ps.get("status"); ostatus = os.get("status")
        if pstatus and pstatus not in ("nostatus","fnt"): p_st += 1
        if ostatus and ostatus not in ("nostatus","fnt"): o_st += 1
        if pstatus == "fnt":
            p_f += 1
            if p_f_first == math.inf: p_f_first = idx
        if ostatus == "fnt":
            o_f += 1
            if o_f_first == math.inf: o_f_first = idx

        # Accumulate stat boosts
        p_boost += sum_boosts(ps.get("boosts"))
        o_boost += sum_boosts(os.get("boosts"))

        # Calculate base power and effective damage
        if pm and float(pm.get("base_power") or 0) > 0:
            adj = stab_adjusted_bp(pm, str(pn or "").lower())
            mt = str(pm.get("type") or "").lower()
            otypes = [x for x in os.get("types",[]) if x!="notype"]
            p_bp += adj
            p_eff += adj * type_multiplier(mt, otypes)
        if om and float(om.get("base_power") or 0) > 0:
            adj = stab_adjusted_bp(om, str(on or "").lower())
            mt = str(om.get("type") or "").lower()
            ptypes = [x for x in ps.get("types",[]) if x!="notype"]
            o_bp += adj
            o_eff += adj * type_multiplier(mt, ptypes)

    feats["p1_switches"] = float(p_sw)
    feats["p2_switches"] = float(o_sw)
    feats["switch_diff"] = float(o_sw - p_sw)
    feats["hp_auc"] = float(auc)
    feats["time_in_lead"] = float(lead)
    # Final HP difference at the end of the battle
    feats["hp_diff_final"] = float(
        safe_float((t[-1].get("p1_pokemon_state") or {}).get("hp_pct",1.0)) -
        safe_float((t[-1].get("p2_pokemon_state") or {}).get("hp_pct",1.0))
    )
    feats["p1_status_turns"] = float(p_st)
    feats["p2_status_turns"] = float(o_st)
    feats["status_adv"] = float(o_st - p_st)
    feats["p1_faints"] = float(p_f)
    feats["p2_faints"] = float(o_f)
    feats["faint_diff"] = float(o_f - p_f)
    feats["base_power_diff"] = float(p_bp - o_bp)
    feats["effective_damage_diff"] = float(p_eff - o_eff)
    feats["boost_adv"] = float(p_boost - o_boost)

    pft = p_f_first if p_f_first < math.inf else 999.0
    oft = o_f_first if o_f_first < math.inf else 999.0
    feats["first_faint_adv"] = float(oft - pft)

    p_series = np.array([safe_float((tr.get("p1_pokemon_state") or {}).get("hp_pct",1.0)) for tr in t])
    o_series = np.array([safe_float((tr.get("p2_pokemon_state") or {}).get("hp_pct",1.0)) for tr in t])
    diff_series = p_series - o_series
    feats["hp_diff_std"] = float(np.std(diff_series)) if len(diff_series)>1 else 0.0
    feats["hp_diff_deriv_abs_mean"] = float(np.mean(np.abs(np.diff(diff_series)))) if len(diff_series)>2 else 0.0

    # Calculate time spent below certain HP thresholds
    def time_below(arr, thr):
      if len(arr)==0: return 0.0
      return float(np.mean(arr <= thr))

    feats["p1_time_below_50"] = time_below(p_series, 0.5)
    feats["p2_time_below_50"] = time_below(o_series, 0.5)
    feats["below50_diff"] = feats["p2_time_below_50"] - feats["p1_time_below_50"]
    feats["eff_damage_ratio"] = (p_eff + 1e-3) / (o_eff + 1e-3)
    feats["bp_ratio"] = (p_bp + 1e-3) / (o_bp + 1e-3)
    return feats




In [132]:
# Create interaction features by combining existing features to capture more complex battle dynamics.
def interaction_features(fd: Dict[str,float]) -> Dict[str,float]:
    out={}
    # Example interaction: Faint difference combined with final HP difference
    if "faint_diff" in fd and "hp_diff_final" in fd:
        out["faint_hp_compound"] = fd["faint_diff"] * fd["hp_diff_final"]
    # Status advantage combined with effective damage difference
    if "status_adv" in fd and "effective_damage_diff" in fd:
        out["status_damage_compound"] = fd["status_adv"] * fd["effective_damage_diff"]
    # Type advantage combined with effective damage difference
    if "type_adv" in fd and "effective_damage_diff" in fd:
        out["type_damage_synergy"] = fd["type_adv"] * fd["effective_damage_diff"]
    # HP area under curve combined with time in lead for control score
    if "hp_auc" in fd and "time_in_lead" in fd:
        out["lead_control_score"] = fd["hp_auc"] * (1.0 + 0.01 * fd["time_in_lead"])
    return out


In [133]:
# Extract features related to player's Pokémon speed relative to opponent's lead,
# indicating potential for

In [134]:
# Add compact polynomial features and apply clipping to some features to handle outliers.
def add_compact_polys(df: pd.DataFrame) -> pd.DataFrame:
    new_df = df.copy()
    # Calculate faint ratio, handling division by zero
    if {"p1_faints","p2_faints"}.issubset(new_df.columns):
        new_df["faint_ratio_safe"] = (new_df["p2_faints"] + 0.5) / (new_df["p1_faints"] + 0.5)
    # Calculate lead share (proportion of battle in lead), handling division by zero
    if {"time_in_lead","battle_len"}.issubset(new_df.columns):
        new_df["lead_share"] = np.where(new_df["battle_len"] > 0, new_df["time_in_lead"] / new_df["battle_len"], 0.0)
    # Normalize HP Area Under Curve
    if {"hp_auc","battle_len"}.issubset(new_df.columns):
        new_df["hp_auc_norm"] = np.where(new_df["battle_len"] > 0, new_df["hp_auc"]/new_df["battle_len"], 0.0)
    # Clip certain columns to the 1st and 99th percentiles to reduce outlier impact
    for col in ("hp_diff_std","hp_diff_deriv_abs_mean","effective_damage_diff","hp_auc","lead_control_score"):
        if col in new_df.columns:
            new_df[col] = np.clip(new_df[col], np.percentile(new_df[col],1), np.percentile(new_df[col],99))
    # Define columns for polynomial feature generation
    poly_cols = [c for c in [
        "lead_share","hp_auc_norm","effective_damage_diff","base_power_diff",
        "type_adv","lead_control_score","faint_diff","status_adv","lead_speed_gap_max",
        "eff_damage_ratio","bp_ratio","below50_diff"
    ] if c in new_df.columns]
    # Generate polynomial features if relevant columns exist
    if poly_cols:
        poly = PolynomialFeatures(degree=2, include_bias=False)
        feats = poly.fit_transform(new_df[poly_cols])
        names = [f"poly_{n}" for n in poly.get_feature_names_out(poly_cols)]
        new_df = pd.concat([new_df, pd.DataFrame(feats, columns=names, index=new_df.index)], axis=1)
    return new_df


Captures who's winning right now

In [135]:
# Calculate momentum features based on HP differences over various turn windows.
def momentum_features(timeline):
    feats={}
    timeline = timeline or []
    if len(timeline) < 1:
        # Initialize momentum features to 0 if timeline is too short
        for cp in [5,10,15,20,25]:
            feats[f"momentum_{cp}"]=0.0
            feats[f"momentum_trend_{cp}"]=0.0
        return feats
    for cp in [5,10,15,20,25]: # Iterate through different capture points (turn cutoffs)
        if cp <= len(timeline):
            turns = timeline[:cp] # Consider turns up to the current capture point
            diffs=[]
            for tr in turns:
                p1 = safe_float((tr.get("p1_pokemon_state") or {}).get("hp_pct",1.0))
                p2 = safe_float((tr.get("p2_pokemon_state") or {}).get("hp_pct",1.0))
                diffs.append(p1 - p2) # Calculate HP difference (P1 - P2)
            # Calculate mean HP difference for the last few turns (momentum)
            feats[f"momentum_{cp}"] = float(np.mean(diffs[-3:])) if len(diffs)>=1 else 0.0
            # Calculate trend of HP difference for the last few turns (momentum trend)
            feats[f"momentum_trend_{cp}"] = float(np.mean(np.diff(diffs[-4:]))) if len(diffs) > 3 else 0.0
        else:
            # Set momentum features to 0 if the timeline doesn't reach the capture point
            feats[f"momentum_{cp}"]=0.0
            feats[f"momentum_trend_{cp}"]=0.0
    return feats


In [136]:
# Extract features related to comebacks and largest HP swings in a battle.
def critical_turn_features(timeline):
    feats={"p1_comebacks":0.0,"p2_comebacks":0.0,"largest_swing_p1":0.0,"largest_swing_p2":0.0}
    timeline = timeline or []
    if len(timeline) < 2: return feats # Need at least two turns to calculate changes
    diffs=[]
    for tr in timeline:
        p1 = safe_float((tr.get("p1_pokemon_state") or {}).get("hp_pct",1.0))
        p2 = safe_float((tr.get("p2_pokemon_state") or {}).get("hp_pct",1.0))
        diffs.append(p1 - p2) # Store HP differences for each turn
    changes = np.diff(diffs) # Calculate turn-over-turn changes in HP difference
    feats["largest_swing_p1"] = float(np.max(changes)) if len(changes)>0 else 0.0 # Largest positive swing for P1
    feats["largest_swing_p2"] = float(np.min(changes)) if len(changes)>0 else 0.0 # Largest negative swing for P1 (positive for P2)
    # Count comebacks where the lead changes from negative to positive for P1 or vice versa
    for i in range(1, len(diffs)):
        if diffs[i-1] < 0 and diffs[i] > 0: feats["p1_comebacks"] += 1
        elif diffs[i-1] > 0 and diffs[i] < 0: feats["p2_comebacks"] += 1
    return feats


In [137]:
# Calculate features related to super-effective and resisted hits for both players.
def move_effectiveness_features(timeline):
    feats={}
    p1_se = p2_se = 0 # Player 1 and Player 2 super-effective hits
    p1_res = p2_res = 0 # Player 1 and Player 2 resisted hits
    for tr in timeline or []:
        p1m = tr.get("p1_move_details"); p2m = tr.get("p2_move_details")
        p1s = tr.get("p1_pokemon_state") or {}
        p2s = tr.get("p2_pokemon_state") or {}
        # Check P1's move effectiveness
        if p1m and float(p1m.get("base_power",0)) > 0:
            mtype = str(p1m.get("type","")).lower()
            def_types = [t for t in p2s.get("types", []) if t != "notype"]
            mult = type_multiplier(mtype, def_types)
            if mult >= 2.0: p1_se += 1 # Super-effective hit
            elif mult <= 0.5: p1_res += 1 # Resisted hit
        # Check P2's move effectiveness
        if p2m and float(p2m.get("base_power",0)) > 0:
            mtype = str(p2m.get("type","")).lower()
            def_types = [t for t in p1s.get("types", []) if t != "notype"]
            mult = type_multiplier(mtype, def_types)
            if mult >= 2.0: p2_se += 1
            elif mult <= 0.5: p2_res += 1
    feats["effective_hits_diff"] = float(p1_se - p2_se) # Difference in super-effective hits
    feats["resisted_hits_diff"] = float(p2_res - p1_res) # Difference in resisted hits
    feats["hit_quality_score"] = feats["effective_hits_diff"] - feats["resisted_hits_diff"] # Combined quality score
    return feats


In [138]:
def crit_pressure_features(player_team, opponent_lead):
    sp = [p.get("base_spe",0) for p in (player_team or [])]
    p80 = np.percentile(sp, 80) if sp else 0.0
    gap = (max(sp) if sp else 0.0) - float((opponent_lead or {}).get("base_spe",0))
    return {"crit_pressure_p80": float(p80), "lead_crit_gap": float(gap)}

In [139]:
# Orchestrates the extraction of a comprehensive set of features from a single battle record.
def extract_example_features(battle: Dict[str,Any], max_turn=None) -> Dict[str,float]:
    feats = {"battle_id": battle.get("battle_id")}
    pteam = battle.get("p1_team_details") or []
    opp_lead = battle.get("p2_lead_details") or {}
    timeline = battle.get("battle_timeline") or []
    if max_turn is not None:
        timeline = [t for t in timeline if int(t.get("turn",0)) <= max_turn] # Truncate timeline if max_turn is specified
    opp_approx = approximate_p2_team(timeline) # Approximate opponent's team from timeline

    # Add various feature blocks
    feats.update(prior_features_for_lead((opp_lead or {}).get("name","")))
    feats.update(encode_types_block(pteam, opp_lead))
    feats.update(stat_summary_block(pteam, opp_lead))
    feats.update(lead_matchup_block(pteam, opp_lead))
    feats.update(base_stat_diffs_vs_team(pteam, opp_approx))
    feats.update(type_matchup_block(pteam, opp_approx))
    feats.update(timeline_core_features(timeline))
    feats.update(interaction_features(feats))
    feats.update(bag_of_species_features(pteam, species_seen_in_timeline(timeline)))
    feats.update(timeline_move_counts(timeline))
    feats.update(move_buckets_features(timeline))
    feats.update(status_turn_counts(timeline))
    feats.update(early_cc_features(timeline, window_size=5))
    feats.update(hyper_beam_ko_flags(timeline))
    feats.update(crit_pressure_features(pteam, opp_lead)) # This function is not defined in the provided notebook. Assuming it exists elsewhere.
    feats.update(hp_windows_features(timeline))
    feats.update(first_status_timing_features(timeline))
    feats.update(momentum_features(timeline))
    feats.update(critical_turn_features(timeline))
    feats.update(move_effectiveness_features(timeline))

    feats.update(timeline_additions(timeline))
    return feats


In [140]:
# Perform parallel feature extraction for a list of battles, handling train/test data and polynomial feature generation.
def parallel_feature_extraction(battles, is_train=True, n_jobs=-1, max_turn=None):
    if n_jobs == -1:
        n_jobs = multiprocessing.cpu_count() # Use all available CPU cores by default
    # Extract features for each battle in parallel using joblib
    rows = Parallel(n_jobs=n_jobs, backend='loky')(
        delayed(extract_example_features)(b, max_turn=max_turn) for b in tqdm(battles, desc=f"Feature extraction (max_turn={max_turn})")
    )
    y = []
    if is_train:
        y = [1 if b.get("player_won") else 0 for b in battles] # Extract target variable for training data
    df = pd.DataFrame(rows)
    ids = df["battle_id"].values
    X = df.drop(columns=["battle_id"]).fillna(0.0) # Drop battle_id and fill NaN values with 0.0
    X = add_compact_polys(X) # Add polynomial features
    return (X, np.array(y), ids) if is_train else (X, ids)


In [141]:
# Apply Out-of-Fold (OOF) target encoding for presence-based features.
# This helps prevent data leakage by computing target statistics only on out-of-fold data.
def target_encode_presence_oof(X: pd.DataFrame, y: np.ndarray, prefix="p1_has_", folds=5):
    skf = StratifiedKFold(n_splits=folds, shuffle=True, random_state=RANDOM_STATE)
    cols = [c for c in X.columns if c.startswith(prefix)] # Identify columns to be target encoded
    if not cols: return X
    te = pd.DataFrame(0.5, index=X.index, columns=[c+"_te" for c in cols]) # Initialize target encoding DataFrame
    for tr_idx, va_idx in skf.split(X, y):
        Xtr = X.iloc[tr_idx][cols]; ytr = y[tr_idx]
        means = {}
        for c in cols:
            # Calculate mean of target for rows where the feature is present (greater than 0)
            pos = (((Xtr[c] > 0).astype(int)) & (ytr == 1)).sum()
            cnt = (Xtr[c] > 0).sum()
            means[c] = (pos + 1.0) / (cnt + 2.0) if cnt > 0 else 0.5 # Apply Laplace smoothing
        for c in cols:
            # Apply the calculated mean to the validation set, or 0.5 if feature is absent
            te.loc[te.index[va_idx], c+"_te"] = np.where(X.iloc[va_idx][c] > 0, means[c], 0.5)
    return pd.concat([X, te], axis=1) # Concatenate original features with target-encoded features


In [142]:
# Apply target encoding to test data using statistics learned from the training data.
def target_encode_presence_test(Xtr: pd.DataFrame, ytr: np.ndarray, Xte: pd.DataFrame, prefix="p1_has_"):
    cols = [c for c in Xtr.columns if c.startswith(prefix)] # Identify columns used for target encoding in training data
    for c in cols:
        if c not in Xte.columns:
            Xte[c] = 0.0 # Add missing columns to test data, initializing to 0.0
    means = {}
    for c in cols:
        # Calculate mean of target for rows where the feature is present in training data
        pos = (((Xtr[c] > 0).astype(int)) & (ytr == 1)).sum()
        cnt = (Xtr[c] > 0).sum()
        means[c] = (pos + 1.0) / (cnt + 2.0) if cnt > 0 else 0.5 # Apply Laplace smoothing
    for c in cols:
        # Apply the calculated mean to the test data, or 0.5 if feature is absent
        Xte[c+"_te"] = np.where(Xte[c] > 0, means[c], 0.5)
    return Xte


In [143]:
# Prune features with low variance or high correlation to reduce dimensionality and multicollinearity.
def prune_low_var_high_corr(features: pd.DataFrame, correlation_cutoff=0.995, variance_cutoff=1e-10):
    # Filter out features with variance below the cutoff
    filtered = features.loc[:, features.var(numeric_only=True) > variance_cutoff]
    # Calculate absolute correlation matrix
    corr = filtered.corr(numeric_only=True).abs()
    # Select upper triangle of correlation matrix
    up = corr.where(np.triu(np.ones(corr.shape), k=1).astype(bool))
    # Identify columns to drop based on high correlation
    drop = [c for c in up.columns if any(up[c] > correlation_cutoff)]
    return filtered.drop(columns=drop, errors="ignore") # Drop identified columns


In [144]:
# Get probability predictions from a model, handling different types of model outputs.
def model_proba(model, X):
    if hasattr(model, "predict_proba"):
        return model.predict_proba(X)[:,1] # Return probability of the positive class
    if hasattr(model, "decision_function"):
        d = model.decision_function(X) # Convert decision function output to probabilities
        return 1.0 / (1.0 + np.exp(-d))
    pred = model.predict(X).astype(float) # Fallback for models without predict_proba/decision_function
    return pred * 0.999 + 0.0005 # Scale to approximate probabilities


In [145]:
# Fit a list of models to the provided data.
def fit_models(model_list, X, y):
    fitted=[]
    for m in tqdm(model_list, desc="Training base models"):
        m.fit(X, y) # Train each model
        fitted.append(m)
    return fitted


In [146]:

def stack_probas(model_list, X) -> np.ndarray:
    # Stack probability predictions from multiple models into a single NumPy array.
    return np.vstack([model_proba(m, X) for m in model_list])


For each model select most predictive features to prevent overfitting and generate unbiased predictions on training data using cross-validation.

In [147]:
# Perform Out-of-Fold (OOF) stacking with inner feature selection for base models.
# This method ensures that feature selection is done independently for each fold,
# preventing data leakage and producing unbiased OOF predictions.
def oof_stack_with_inner_fs(model_builder, X, y, k_features=450, folds=5):
    skf = StratifiedKFold(n_splits=folds, shuffle=True, random_state=RANDOM_STATE)
    num_models = len(model_builder()) # Get the number of base models
    N = len(y)
    oof = np.zeros((num_models, N), dtype=float) # Array to store OOF predictions
    selected_cols_union = set() # To store the union of selected columns across all folds

    for _, (tr, va) in enumerate(tqdm(skf.split(X, y), total=folds, desc="OOF Stacking + inner FS")):
        vt = VarianceThreshold(1e-6) # Remove features with very low variance
        Xtr_vt = vt.fit_transform(X.iloc[tr])
        cols_vt = X.columns[vt.get_support()]

        skb = SelectKBest(mutual_info_classif, k=min(k_features, len(cols_vt))) # Select top K features based on mutual information
        skb.fit(Xtr_vt, y[tr])
        cols_fold = cols_vt[skb.get_support()] # Features selected for the current fold
        selected_cols_union.update(cols_fold) # Add to the union of selected columns

        Xtr = X.iloc[tr][cols_fold] # Training data for the current fold with selected features
        Xva = X.iloc[va][cols_fold] # Validation data for the current fold with selected features

        fold_models = model_builder() # Re-initialize models for each fold to ensure independence
        for m in fold_models:
            m.fit(Xtr, y[tr]) # Train base models on the training fold
        fold_probs = [model_proba(m, Xva) for m in fold_models] # Get predictions on the validation fold
        for mi, pr in enumerate(fold_probs):
            oof[mi, va] = pr # Store OOF predictions

    selected_cols_union = list(sorted(selected_cols_union)) # Convert to sorted list
    return oof, selected_cols_union


Find optimal probability threshold for converting predictions to binary decisions

In [148]:
# Find the optimal probability threshold for converting continuous predictions into binary classifications.
def tune_threshold(probs: np.ndarray, y: np.ndarray, low=0.2, high=0.8, steps=601) -> float:
    grid = np.linspace(low, high, steps) # Create a grid of possible thresholds
    best_t, best_acc = 0.5, -1.0 # Initialize best threshold and accuracy
    for t in grid:
        acc = accuracy_score(y, (probs >= t).astype(int)) # Calculate accuracy for current threshold
        if acc > best_acc:
            best_acc, best_t = acc, float(t) # Update if a better threshold is found
    return best_t


Split features into 2 groups to create model diversity.

In [149]:
# Builds feature views (subsets of features) to create diversity among base models.
def build_feature_views(df: pd.DataFrame) -> Dict[str, List[str]]:
    cols = list(df.columns)

    views = {
        "timeline": [],
        "combat": [],
    }

    # Assign features to 'timeline' or 'combat' view based on their prefixes
    for c in cols:
        if c.startswith((
            "battle_len", "hp_", "hp_diff_", "hp_mom_",
            "time_in_lead", "lead_share", "hp_auc", "hp_auc_norm",
            "lead_control_score", "switch_", "p1_time_below_50",
            "p2_time_below_50", "below50_diff",
            "faint_diff", "p1_faints", "p2_faints",
            "status_adv", "p1_status_turns", "p2_status_turns",
            "first_faint_adv", "first_par_", "first_slp_", "first_frz_",
            "early_status_score", "early_sleep_hits", "early_freeze_hits",
            "p1_hb_ko", "p2_hb_ko",
            "p1_life_turn30", "p2_life_turn30", "life_diff_turn30",
            "par_diff", "slp_diff", "frz_diff",
            "first_cc_winner"
        )):
            views["timeline"].append(c)
        else:
            views["combat"].append(c)

    # Remove any empty views
    views = {name: v for name, v in views.items() if len(v) > 0}

    print("\n" + "="*60)
    print("FEATURE VIEWS:")
    print("="*60)
    for name, v in views.items():
        print(f"  {name:15}: {len(v):4} features")
    print("="*60 + "\n")

    return views


In [150]:

# Execute the feature extraction, preprocessing, and base model training for a given turn cutoff.
def run_one_cutoff(max_turn, builder):
    # Extract features for training and testing data
    Xtr, ytr, _ = parallel_feature_extraction(
        train_data, is_train=True, n_jobs=-1, max_turn=max_turn
    )
    Xte, ids = parallel_feature_extraction(
        test_data, is_train=False, n_jobs=-1, max_turn=max_turn
    )

    # Apply target encoding to training data (Out-of-Fold to prevent leakage)
    Xtr = target_encode_presence_oof(Xtr, ytr, prefix="p1_has_")
    Xtr = target_encode_presence_oof(Xtr, ytr, prefix="p2_seen_")

    # Reindex test data to match training columns and apply target encoding
    Xte = Xte.reindex(columns=Xtr.columns, fill_value=0.0)
    Xte = target_encode_presence_test(Xtr, ytr, Xte, prefix="p1_has_")
    Xte = target_encode_presence_test(Xtr, ytr, Xte, prefix="p2_seen_")

    # Prune features in training data based on variance and correlation
    Xtr = prune_low_var_high_corr(Xtr)
    # Reindex test data again to match the pruned training columns
    Xte = Xte.reindex(columns=Xtr.columns, fill_value=0.0)

    views = build_feature_views(Xtr) # Define feature views for diverse base models

    all_oof_blocks = [] # Stores OOF predictions from base models
    all_train_blocks = [] # Stores predictions on full training set from base models
    all_test_blocks = [] # Stores predictions on test set from base models

    for view_name, view_cols in views.items():
        print(f"\n=== View '{view_name}' with {len(view_cols)} features ===")

        Xtr_view = Xtr[view_cols]
        Xte_view = Xte[view_cols]

        # Perform OOF stacking with inner feature selection for the current view
        base_oof_view, selected_cols_union = oof_stack_with_inner_fs(
            builder,
            Xtr_view,
            ytr,
            k_features=min(800, len(view_cols)),
            folds=5
        )

        selected_cols_union = list(selected_cols_union)
        Xtr_sel_view = Xtr_view[selected_cols_union]
        Xte_sel_view = Xte_view.reindex(columns=selected_cols_union, fill_value=0.0)

        print(f"View '{view_name}': {len(selected_cols_union)} columns after inner FS")

        # Train final base models for the current view on the full training set (with selected features)
        final_base_models_view = fit_models(builder(), Xtr_sel_view, ytr)

        # Get predictions from the final base models
        train_base_probs_view = stack_probas(final_base_models_view, Xtr_sel_view)
        test_base_probs_view  = stack_probas(final_base_models_view, Xte_sel_view)

        all_oof_blocks.append(base_oof_view)
        all_train_blocks.append(train_base_probs_view)
        all_test_blocks.append(test_base_probs_view)

    # Concatenate predictions from all views
    base_oof_probs = np.vstack(all_oof_blocks)
    train_base_probs = np.vstack(all_train_blocks)
    test_base_probs  = np.vstack(all_test_blocks)

    print(f"\n[cutoff={max_turn}] Total base models after views: {base_oof_probs.shape[0]}")

    return {
        "Xtr_sel": Xtr,
        "Xte_sel": Xte,
        "ytr": ytr,
        "test_ids": ids,
        "base_oof_probs": base_oof_probs,
        "train_base_probs": train_base_probs,
        "test_base_probs": test_base_probs,
    }


In [151]:
# Recalculate and print the size of the tracked moves vocabulary.
TRACK_MOVES, MOVE_FREQ = build_move_vocab_by_coverage(train_data, target_coverage=0.985, min_freq=3)
print(f"TRACK_MOVES size: {len(TRACK_MOVES)}")


TRACK_MOVES size: 33


-----

In [152]:
# Define a function to construct a list of diverse base models for stacking.
def build_base_models():

    models = [
        # Ridge Classifier with StandardScaler for regularization
        Pipeline([("scaler", StandardScaler()), ("ridge", RidgeClassifierCV(alphas=np.logspace(-3, 3, 20)))]),

        # Quadratic Discriminant Analysis with a regularization parameter
        Pipeline([("scaler", StandardScaler()), ("qda", QuadraticDiscriminantAnalysis(reg_param=0.01))]),

        # Logistic Regression with L2 regularization and increased max_iter
        Pipeline([("scaler", StandardScaler()), ("logreg", LogisticRegression(C=1.0, max_iter=2000, random_state=RANDOM_STATE))]),

        # Gradient Boosting Classifier (tree-based ensemble)
        GradientBoostingClassifier(n_estimators=300, learning_rate=0.05, max_depth=5, random_state=RANDOM_STATE),

        # Support Vector Classifier with RBF kernel and probability calibration
        Pipeline([("scaler", StandardScaler()), ("svc", SVC(kernel="rbf", C=2.5, gamma="scale", probability=True, random_state=RANDOM_STATE))]),

        # Random Forest Classifier with specified estimators, depth, and min_samples_split
        RandomForestClassifier(n_estimators=500, max_depth=15, min_samples_split=10, n_jobs=-1, random_state=RANDOM_STATE),
        # AdaBoost Classifier (boosting ensemble)
        AdaBoostClassifier(n_estimators=200, learning_rate=0.8, random_state=RANDOM_STATE),

        # Calibrated Extra Trees Classifier for robust probability predictions
        CalibratedClassifierCV(
            estimator=ExtraTreesClassifier(
                n_estimators=1200, max_depth=None, max_features=0.6,
                min_samples_split=4, min_samples_leaf=2, bootstrap=True,
                n_jobs=-1, random_state=RANDOM_STATE
            ),
            method="isotonic", cv=3
        ),

        # Histogram-based Gradient Boosting Classifier for speed and performance
        HistGradientBoostingClassifier(max_iter=500, learning_rate=0.03, max_depth=10, l2_regularization=1.0, random_state=RANDOM_STATE),

        # Linear Discriminant Analysis
        Pipeline([("scaler", StandardScaler()), ("lda", LinearDiscriminantAnalysis())]), # Adding LDA based on output from m3Nv2gTivPMc

    ]

    return tuple(models)


In [153]:
# Assign the base model builder function to a variable 'builder' for later use.
builder = build_base_models


In [None]:
# Iterate through defined turn cutoffs, running the feature extraction and base model training pipeline for each.
per_cut = []
for N in TURN_CUTOFFS:
  per_cut.append(run_one_cutoff(N, builder))


-----

In [155]:
# Aggregate Out-of-Fold (OOF), training, and test predictions from all cutoffs and base models.
base_oof_all = np.vstack([d["base_oof_probs"] for d in per_cut])
base_train_all = np.vstack([d["train_base_probs"] for d in per_cut])
base_test_all = np.vstack([d["test_base_probs"] for d in per_cut])


In [156]:
# Extract the true labels for training and battle IDs for testing from the first cutoff's results.
y_train = per_cut[0]["ytr"]
test_ids = per_cut[0]["test_ids"]


In [157]:
# Calculate and print the dimensions of the stacking ensemble for clarity.
n_cutoffs = len(TURN_CUTOFFS)
n_base_models = 10 # Updated to 10 as LDA was added in build_base_models
n_views = 2
n_total_models = base_oof_all.shape[0]

print(f"Turn cutoffs: {TURN_CUTOFFS}")
print(f"Base model types: {n_base_models}")
print(f"Feature views: {n_views}")
print(f"Total models: {n_total_models} ({n_cutoffs} \u00d7 {n_base_models} \u00d7 {n_views})")


Turn cutoffs: [None]
Base model types: 10
Feature views: 2
Total models: 20 (1 × 10 × 2)


In [158]:
# Print summary information about the number of base models for the stacking ensemble.
print(f"Turn cutoffs: {TURN_CUTOFFS}")
print(f"Base models per cutoff: {n_base_models}") # This variable was defined as 10 now, reflecting the added LDA model
print(f"Total models: {n_total_models}")


Turn cutoffs: [None]
Base models per cutoff: 10
Total models: 20


In [159]:
# Evaluate the accuracy of each individual base model's OOF predictions.
cutoff_names = [f"full" if c is None else f"t{c}" for c in TURN_CUTOFFS]
model_names_base = ['Ridge', 'QDA', 'LogReg', 'GB', 'SVC', 'RandomF', 'AdaB', 'ExtraT', 'HistGB', 'LDA'] # Added LDA
view_names = ['timeline', 'combat']

for cutoff_idx, cutoff_name in enumerate(cutoff_names):
    for view_idx, view_name in enumerate(view_names):
        print(f"\nCutoff: {cutoff_name}, View: {view_name}")
        for model_idx, model_name in enumerate(model_names_base):
            # Calculate the global index for the current base model's predictions
            global_idx = cutoff_idx * (n_base_models * n_views) + view_idx * n_base_models + model_idx

            if global_idx < n_total_models:
                preds = (base_oof_all[global_idx] >= 0.5).astype(int) # Convert probabilities to binary predictions
                acc = accuracy_score(y_train, preds) # Calculate accuracy
                print(f"  {model_name:10} @ {view_name:12}: {acc:.4f}")



Cutoff: full, View: timeline
  Ridge      @ timeline    : 0.8201
  QDA        @ timeline    : 0.7816
  LogReg     @ timeline    : 0.8193
  GB         @ timeline    : 0.8159
  SVC        @ timeline    : 0.8106
  RandomF    @ timeline    : 0.8109
  AdaB       @ timeline    : 0.8134
  ExtraT     @ timeline    : 0.8115
  HistGB     @ timeline    : 0.8184
  LDA        @ timeline    : 0.8205

Cutoff: full, View: combat
  Ridge      @ combat      : 0.8362
  QDA        @ combat      : 0.7883
  LogReg     @ combat      : 0.8324
  GB         @ combat      : 0.8282
  SVC        @ combat      : 0.8282
  RandomF    @ combat      : 0.8142
  AdaB       @ combat      : 0.8258
  ExtraT     @ combat      : 0.8224
  HistGB     @ combat      : 0.8309
  LDA        @ combat      : 0.8334


In [160]:

from scipy.stats import spearmanr

# Calculate pairwise Spearman correlations between base models' OOF predictions.
correlations = []
for i in range(n_total_models):
    for j in range(i+1, n_total_models):
        corr, _ = spearmanr(base_oof_all[i], base_oof_all[j]) # Spearman correlation for rank agreement
        correlations.append(corr)

avg_corr = np.mean(correlations)
min_corr = np.min(correlations)
max_corr = np.max(correlations)

print(f"Average pairwise correlation: {avg_corr:.3f}")
print(f"Min correlation: {min_corr:.3f}")
print(f"Max correlation: {max_corr:.3f}")


Average pairwise correlation: 0.888
Min correlation: 0.719
Max correlation: 1.000


In [161]:
# Prepare the input matrix for the meta-model, concatenating base model predictions with a bias term.
def meta_matrix(base_probs: np.ndarray) -> np.ndarray:
    M = base_probs.T # Transpose to have (num_samples, num_models)
    M = np.concatenate([M, np.ones((M.shape[0], 1))], axis=1) # Add a bias column of ones
    return M


In [162]:
# Create the meta-feature matrices for OOF and test predictions.
X_meta_oof = meta_matrix(base_oof_all)
X_meta_test = meta_matrix(base_test_all)


In [163]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_predict
from sklearn.metrics import accuracy_score, roc_auc_score
from scipy.stats import spearmanr
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_predict, StratifiedKFold


In [187]:

# Train the meta-model (Logistic Regression) using cross-validation to get OOF predictions
# and then fit on the full dataset to predict on the test data.
for C in [0.1, 0.2, 0.3, 0.5, 0.7, 1.0]:
    meta_model = LogisticRegression(
        penalty='l2',
        C=C,
        fit_intercept=True,
        solver='lbfgs',
        max_iter=5000,
        random_state=RANDOM_STATE
    )

    meta_oof_probs = cross_val_predict(
        meta_model, X_meta_oof, y_train,
        cv=10,
        method='predict_proba'
    )[:, 1]

    thr = tune_threshold(meta_oof_probs, y_train, low=0.3, high=0.7, steps=401)
    acc = accuracy_score(y_train, (meta_oof_probs >= thr).astype(int))
    auc = roc_auc_score(y_train, meta_oof_probs)

    print(f"C={C}: acc={acc:.4f}, auc={auc:.4f}, thr={thr:.3f}")



# Train the final meta-model on the entire OOF meta-features
meta_model.fit(X_meta_oof, y_train)
# Get predictions on the test set using the trained meta-model
meta_test_probs = meta_model.predict_proba(X_meta_test)[:, 1]


C=0.1: acc=0.8401, auc=0.9063, thr=0.488
C=0.2: acc=0.8397, auc=0.9066, thr=0.484
C=0.3: acc=0.8398, auc=0.9067, thr=0.542
C=0.5: acc=0.8399, auc=0.9069, thr=0.497
C=0.7: acc=0.8402, auc=0.9070, thr=0.493
C=1.0: acc=0.8404, auc=0.9071, thr=0.508


In [188]:
# Calculate the average of all base models' OOF predictions and evaluate its accuracy and AUC.
base_oof_avg = np.mean(base_oof_all, axis=0)
base_acc = accuracy_score(y_train, (base_oof_avg >= 0.5).astype(int))
base_auc = roc_auc_score(y_train, base_oof_avg)


In [189]:
# Tune the optimal threshold for the meta-model's OOF predictions and evaluate its accuracy and AUC.
global_thr = tune_threshold(meta_oof_probs, y_train, low=0.3, high=0.7, steps=401)
meta_acc = accuracy_score(y_train, (meta_oof_probs >= global_thr).astype(int))
meta_auc = roc_auc_score(y_train, meta_oof_probs)


In [190]:
# Print the performance comparison between the average base model and the meta-model.
print(f"   Base OOF: {base_acc:.4f} (AUC: {base_auc:.4f})")
print(f"   Meta OOF @ {global_thr:.3f}: {meta_acc:.4f} (AUC: {meta_auc:.4f})")
print(f"   Improvement: {meta_acc - base_acc:+.4f} ({(meta_acc/base_acc - 1)*100:+.1f}%)")


   Base OOF: 0.8334 (AUC: 0.9008)
   Meta OOF @ 0.508: 0.8404 (AUC: 0.9071)
   Improvement: +0.0070 (+0.8%)


In [191]:
# Print the final meta-model OOF accuracy.
print(f"\n{meta_acc:.4f}")



0.8404


In [192]:

# Generate final predictions on the test set and save them to a CSV file.
final_preds = (meta_test_probs >= global_thr).astype(int) # Apply the optimal threshold to test predictions

submission = pd.DataFrame({"battle_id": test_ids, "player_won": final_preds})
submission.to_csv(SUBMISSION_PATH, index=False)

print(f"\nSubmission saved: {SUBMISSION_PATH}")
print(f"Test predicted wins: {np.sum(final_preds)}/{len(final_preds)} ({np.mean(final_preds)*100:.1f}%)")



Submission saved: /content/drive/MyDrive/challenge ds/data/test_res.csv
Test predicted wins: 2471/5000 (49.4%)


In [193]:
# Calculate and print the average pairwise Spearman correlation among base model OOF predictions.
correlations = []
for i in range(n_total_models):
    for j in range(i+1, n_total_models):
        corr, _ = spearmanr(base_oof_all[i], base_oof_all[j])
        correlations.append(corr)


avg_corr = np.mean(correlations)
print(f"Average pairwise correlation: {avg_corr:.3f}")


Average pairwise correlation: 0.888


In [194]:
# Perform cross-validated threshold tuning to estimate robust test accuracy.
def oof_threshold_cv(probs, y, outer_folds=5, low=0.3, high=0.7, steps=201):
    skf = StratifiedKFold(n_splits=outer_folds, shuffle=True, random_state=RANDOM_STATE)
    accs = []

    for tr, va in skf.split(probs.reshape(-1,1), y):
        # Tune threshold on training fold and evaluate on validation fold
        thr = tune_threshold(probs[tr], y[tr], low=low, high=high, steps=steps)
        preds = (probs[va] >= thr).astype(int)
        accs.append(accuracy_score(y[va], preds))

    return np.mean(accs), np.std(accs)

# Estimate the final test accuracy using cross-validated threshold tuning
mean_cv_acc, std_cv_acc = oof_threshold_cv(meta_oof_probs, y_train)
print(f"CV estimate of final test accuracy: {mean_cv_acc:.4f} \u00b1 {std_cv_acc:.4f}")



CV estimate of final test accuracy: 0.8378 ± 0.0048
