In [1]:
from typing import Any
import json
import os
import re
from pprint import pprint
from warnings import simplefilter
import numpy as np
import pandas as pd
from cleantext import clean

from sklearn.preprocessing import StandardScaler

from sklearn.svm import LinearSVC
from xgboost import XGBClassifier
from catboost import CatBoostClassifier
from lightgbm import LGBMClassifier

from sklearn.metrics import make_scorer, accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
from sklearn.model_selection import StratifiedKFold, cross_validate

simplefilter("ignore")
pd.set_option("display.max_columns", 0)

Since the GPL-licensed package `unidecode` is not installed, using Python's `unicodedata` package which yields worse results.


### 1. Loading and Inspecting the Data

When you create a notebook within a Kaggle competition, the competition's data is automatically attached and available in the `../input/` directory.

The dataset is in a `.jsonl` format, which means each line is a separate JSON object. This is great because we can process it one line at a time without needing to load the entire large file into memory.

Let's write a simple loop to load the training data and inspect the first battle.

In [2]:
COMPETITION_NAME = 'fds-pokemon-battles-prediction-2025'
DATA_PATH = os.getcwd() #os.path.join('../input', COMPETITION_NAME)
UNTOUCHED = {'battle_id', 'player_won'}
INFORMATIVE = {
    "p1_unique_pokemon",
    "p2_unique_pokemon",
    "final_p1_hp",  
    "p1_fainted_count", 
    "p1_turns_statused", 
    "p1_missed_turns", 
    "p2_turns_statused",
    "p2_missed_turns", 
    "battle_id",
    "player_won"
}

train_file_path = os.path.join(DATA_PATH, 'train.jsonl')
test_file_path = os.path.join(DATA_PATH, 'test.jsonl')

print(f"Loading data from '{train_file_path}'...")
try:
    with open(train_file_path, 'r', encoding="utf-8") as f:
        train_data = [json.loads(line) for line in f]

    print(f"Successfully loaded {len(train_data)} battles.")

    #print("\n--- Structure of the first train battle: ---")
    if train_data:
        first_battle = train_data[0]
        
        battle_for_display = first_battle.copy()
        battle_for_display['battle_timeline'] = battle_for_display.get('battle_timeline', [])[:2] # Show first 2 turns
        
        #pprint(battle_for_display)
        if len(first_battle.get('battle_timeline', [])) > 3:
            print("    ...")
            print("    (battle_timeline has been truncated for display)")

except FileNotFoundError:
    print(f"ERROR: Could not find the training file at '{train_file_path}'.")
    print("Please make sure you have added the competition data to this notebook.")

Loading data from 'c:\Users\Stefano\Desktop\pokemon-challenge\train.jsonl'...
Successfully loaded 10000 battles.
    ...
    (battle_timeline has been truncated for display)


### 2. Basic Feature Engineering

A successful model will likely require creating many complex features. For this starter notebook, however, we will create a very simple feature set based **only on the initial team stats**. This will be enough to train a model and generate a submission file.

It's up to you to engineer more powerful features!

In [3]:
def features_check(data: dict) -> None:
    print("All battles have at least one turn: ", all(all(turn for turn in battle.get('battle_timeline', False)) for battle in data))
    print("All battles' turns have at least one P1 move: ", 
        all((
            any((turn.get("p1_move_details", False) for turn in battle.get('battle_timeline', False))) for battle in data
        ))
    )
    print("All battles' turns have at least one P2 move: ", 
        all((
            any((turn.get("p2_move_details", False) for turn in battle.get('battle_timeline', False))) for battle in data
        ))
    )
    print("player_won feature always exists: ", all(('player_won' in battle for battle in data)))
    print("P1 Team always exists: ", all(battle.get('p1_team_details', False) for battle in data))
    print("P2 Team always exists: ", all(battle.get('p2_team_details', False) for battle in data))
    
    return None

In [4]:
# ------------------------------------------------------------
# Aggregate collected per-player stats
# ------------------------------------------------------------
def agg_pokemons_stats(prefix: str, stats: dict[str, Any]):
    return {
        f"{prefix}_mean_power": np.mean(stats["powers"]) if stats["powers"] else 0,
        f"{prefix}_mean_accuracy": np.mean(stats["accuracy"]) if stats["accuracy"] else 0,
        f"{prefix}_lost_hp": stats["lost_hp"],
        f"{prefix}_turns_statused": stats["turns_statused"],
        f"{prefix}_missed_turns": stats["missed_turns"],
        f"{prefix}_switches": stats["switches"],
        f"{prefix}_net_boost": stats["net_boost"],
    }

In [5]:
eps = 1e-6

# ------------------------------------------------------------
# Main Feature Engineering Function
# ------------------------------------------------------------
def create_features(data: list[dict]) -> pd.DataFrame:  # Function takes a list of battle dicts and returns a pandas DataFrame
    # Generates a comprehensive feature set for each battle
    # The function derives team-level aggregates, lead-pokemon attributes, timeline-derived dynamics, HP/status/effect/boost tracking, per-pokemon presence flags, and more

    # ------------------------------------------------------------
    # Collect all unique Player-1 Pokémon names across dataset
    # This allows creating consistent one-hot "p1_has_<pkm>" features.
    # ------------------------------------------------------------
    unique_p1_names = set()  # Set to accumulate every distinct p1 Pokémon name observed in the dataset
    for battle in data:  # Iterate each battle record (each element is expected to be a dict)
        for p in battle.get('p1_team_details', []) or []:  # Iterate the declared team details for player 1 (if any)
            if p and 'name' in p: unique_p1_names.add(p['name'])  # Add pokemon name if present to ensure presence features

        for timeline_entry in battle.get('battle_timeline', []) or []:  # Iterate timeline to capture leads introduced only in timeline
            if name := timeline_entry.get('p1_pokemon_state', {}).get('name'):  # Use walrus operator to capture name if it exists, and if so, adds it to the pokemon names set
                unique_p1_names.add(name)

    unique_name_to_feat = {  # Map raw Pokémon name with its respective feature column name (one-hot)
        name: f"p1_has_{clean(name, lower=True, normalize_whitespace=True)}" 
        for name in unique_p1_names
    }  # clean() normalizes names for safe column keys (lowercase, trim spaces, etc.)

    # Feature rows
    feature_list = []  # List that will collect a dict of features per battle, converted to DataFrame and returned at the end by the function

    # ------------------------------------------------------------
    # Process each battle
    # ------------------------------------------------------------
    for battle in data:  # Loop over battles again to build a dataframe record for each battle
        features = {}  # Dict to accumulate features for the current battle

        # Initialize all p1_has_<pkm> = False by default
        features.update({feat_key: False for feat_key in unique_name_to_feat.values()}) # For each pokemon we set by default its non-presence in the battle

        # --------------------------------------------------------
        # (A) TEAM-LEVEL FEATURES
        # --------------------------------------------------------
        if p1_team := battle.get('p1_team_details', []):  # If team details exist, compute aggregated base-stat features and presence flags
            # Mark pokemons' presence features
            for p in p1_team:
                if p and 'name' in p and p['name'] in unique_name_to_feat: # If the pokemon name is within the names set we created before then set one-hot True for Pokémon that are on p1 team
                    features[unique_name_to_feat[p['name']]] = True

            # Compute team base-stat means — use numpy nanmean to be robust to missing or zero entries
            features.update({
                'p1_mean_hp': np.nanmean([p.get('base_hp', 0)  for p in p1_team]), # Mean base HP for p1 team
                'p1_mean_spe': np.nanmean([p.get('base_spe', 0) for p in p1_team]), # Mean base Speed
                'p1_mean_atk': np.nanmean([p.get('base_atk', 0) for p in p1_team]), # Mean base Attack
                'p1_mean_def': np.nanmean([p.get('base_def', 0) for p in p1_team]), # Mean base Defense
                'p1_mean_special':np.nanmean([p.get('base_spa', 0) for p in p1_team]), # Mean base Special Attack
            })

        else:
            # Default zero-values if team structure is not provided to avoid missing columns downstream
            features.update({
                'p1_mean_hp': 0,
                'p1_mean_spe': 0,
                'p1_mean_atk': 0,
                'p1_mean_def': 0,
                'p1_mean_special': 0,
            })

        # --------------------------------------------------------
        # (B) LEAD-POKÉMON FEATURES
        # --------------------------------------------------------
        timeline = battle.get("battle_timeline", [])  # timeline is the turn-by-turn snapshot list (may be empty)
        first_turn = timeline[0] if timeline else {}  # first_turn is used to infer initial lead Pokémon

        p1_lead = first_turn.get("p1_pokemon_state", {}).get("name", "")  # p1 lead pokemon name (empty string if unknown)
        p2_lead_details = battle.get("p2_lead_details", {})  # p2 lead pokemon details may be provided separately in some exports

        # Extract P1 lead stats by searching the declared team for the lead's base stats
        p1_lead_stats = {}  # Start with empty stats; populate only if lead is found in p1_team
        for p in p1_team:
            if p.get('name') == p1_lead:
                p1_lead_stats = {
                    'p1_lead_hp': p.get('base_hp', 0),  # Base HP of lead
                    'p1_lead_atk': p.get('base_atk', 0),  # Base Attack of lead
                    'p1_lead_def': p.get('base_def', 0),  # Base Defense of lead
                    'p1_lead_special': p.get('base_spa', 0),  # Base Special Attack of lead
                    'p1_lead_spe': p.get('base_spe', 0),  # Base Speed of lead
                }
                break  # Stop searching once we found the lead entry
        features.update(p1_lead_stats)  # Insert found lead pokemon stats into the features dict

        # P2 lead stats — if external details provided, use them, also compute simple lead speed advantage flag
        if p2_lead_details:
            features['p2_lead_hp'] = p2_lead_details.get('base_hp', 0)  # p2 lead pokemon base HP (fallback 0)
            features['p2_lead_spe'] = p2_lead_details.get('base_spe', 0)  # p2 lead pokemon speed
            features['p2_lead_atk'] = p2_lead_details.get('base_atk', 0)  # p2 lead pokemon attack
            features['p2_lead_def'] = p2_lead_details.get('base_def', 0)  # p2 lead pokemon defense
            features['p2_lead_special'] = p2_lead_details.get('base_spa', 0)  # p2 lead pokemon special attack
            # spe_lead_adv is 1 if p1 lead's base speed strictly greater than p2 lead pokemon's base speed, else 0
            features['spe_lead_adv'] = int(p1_lead_stats.get('p1_lead_spe', 0) > p2_lead_details.get('base_spe', 0))
        else:
            features['spe_lead_adv'] = 0  # Default to no advantage if p2 lead details aren't available

        # --------------------------------------------------------
        # (C) TIMELINE-BASED FEATURES
        # --------------------------------------------------------
        # Initialize per-player dynamic stats to keep per-turn aggregates organized and consistent
        def init_stats():  # init_stats() returns a tracking dict template for a player's dynamic values through the timeline
            return {
                "powers": [],  # List of move base powers seen over timeline (for damage potential)
                "accuracy": [],  # List of move accuracies attempted
                "hp_t0": {},  # Map pokemon_name to the latest observed hp_pct (for per-pokemon hp tracking)
                "lost_hp": 0,  # Accumulated hp% lost across all Pokémon (sum of deltas)
                "turns_statused": 0,  # Total turns player had any status (e.g., brn, psn, par)
                "missed_turns": 0,  # Number of turns with no move details (could indicate skip or missing data)
                "priority": 0,  #Placeholder for priority move use counts (not filled here but reserved)
                "switches": 0,  # Count of in-battle Pokémon switches observed
                "net_boost": 0,  # Cumulative net boosts across tracked stats
                "base_boosts": {k: 0 for k in ["atk", "def", "spa", "spd", "spe"]},  # Last-observed boost baseline
                "status_turns": {},  # Map status to the total turns that status was active on any pokemon
                "effect_turns": {},  # Map effect to the total turns that effect was active (e.g., substitute)
                "hp_loss_while_statused": 0,  # HP lost while a status was active (to measure status impact)
                "hp_loss_while_effect": 0,  # HP lost while effects were active (to measure effect damage)
            }

        p1_stats = init_stats()  # Initialize dynamic stats tracking dict for player 1
        p2_stats = init_stats()  # Initialize dynamic stats tracking dict for player 2

        if timeline:  # Only populate timeline-derived features if we actually have per-turn snapshots
            p1_names, p2_names = [], []  # Lists to record the sequence of Pokémon names seen for each player
            p2_hp_deltas = []  # List of HP percentage drops applied to p2 between consecutive turns (for mean damage)

            prev_p1_hp, prev_p2_hp = None, None  # Previous-turn hp_pct for p1 and p2 (used to compute instantaneous loss)
            prev_p1_status, prev_p2_status = None, None  # Previous-turn status strings (used to attribute loss to statuses)
            prev_p1_effects, prev_p2_effects = set(), set()  # Previous-turn effects sets (used to attribute loss to effects)

            # Iterate over turns
            for turn_idx, turn in enumerate(timeline):  # Loop through timeline with index to allow comparing to previous turn
                p1_state = turn.get("p1_pokemon_state", {})  # Snapshot dict for p1's active pokemon state at this turn
                p2_state = turn.get("p2_pokemon_state", {})  # Snapshot dict for p2's active pokemon state at this turn

                p1_name = p1_state.get("name", "")  # Active p1 pokemon name for this turn (empty string fallback)
                p2_name = p2_state.get("name", "")  # Active p2 pokemon name for this turn (empty string fallback)
                p1_hp = p1_state.get("hp_pct", 1.0)  # Active p1 hp as a fraction of max (default 1.0 if missing)
                p2_hp = p2_state.get("hp_pct", 1.0)  # Active p2 hp as a fraction of max
                p1_status = p1_state.get("status", "nostatus")  # Status string or "nostatus"
                p2_status = p2_state.get("status", "nostatus")  # Status string or "nostatus"
                p1_effects = set(p1_state.get("effects", ["noeffect"]))  # Convert effects list into a set
                p2_effects = set(p2_state.get("effects", ["noeffect"]))  # Do the same for p2

                # Track pokemon identity appearances
                if p1_name:
                    p1_names.append(p1_name)  # Append to p1 seen pokemons sequence for tracking
                    if p1_name in unique_name_to_feat:
                        features[unique_name_to_feat[p1_name]] = True  # Turn on one-hot if this pokemon appears at any turn
                if p2_name:
                    p2_names.append(p2_name)  # Append to p2 seen pokemons sequence

                # --------------------------------------------------
                # MOVES AND ACCURACY
                # --------------------------------------------------
                for key, stats in [
                    ("p1_move_details", p1_stats),
                    ("p2_move_details", p2_stats)
                ]:  # Iterate both players' move detail keys with corresponding stat accumulator
                    move = turn.get(key)  # Collect move details dict for this turn (if present)
                    if move:
                        stats["powers"].append(move.get("base_power", 0))  # Record base power (0 if moves like status)
                        stats["accuracy"].append(move.get("accuracy", 0))  # Record accuracy metric when present
                    else:
                        stats["missed_turns"] += 1  # Increment missed_turns where move info is absent (possible skip or missing data)

                # --------------------------------------------------
                # STATUS / EFFECT TRACKING
                # --------------------------------------------------
                if p1_status != "nostatus":
                    p1_stats["turns_statused"] += 1  # Increment aggregate count of turns p1 had any non-empty status
                    p1_stats["status_turns"][p1_status] = p1_stats["status_turns"].get(p1_status, 0) + 1  # Per-status counter
                if p2_status != "nostatus":
                    p2_stats["turns_statused"] += 1  # same for p2
                    p2_stats["status_turns"][p2_status] = p2_stats["status_turns"].get(p2_status, 0) + 1

                # Accumulate effect usage counts
                p1_et = p1_stats["effect_turns"]
                p1_et.update({e: p1_et.get(e, 0) + 1 for e in p1_effects})

                p2_et = p2_stats["effect_turns"]
                p2_et.update({e: p2_et.get(e, 0) + 1 for e in p2_effects})


                # --------------------------------------------------
                # HP TRACKING
                # --------------------------------------------------
                if p1_name in p1_stats["hp_t0"]:
                    delta = p1_stats["hp_t0"][p1_name] - p1_hp  # delta = previous observed hp_pct - current hp_pct
                    if delta > 0:
                        p1_stats["lost_hp"] += delta  # Add to cumulative lost_hp only when actual loss occurred
                p1_stats["hp_t0"][p1_name] = p1_hp  # Update the last-observed hp_pct for this pokemon

                if p2_name in p2_stats["hp_t0"]:
                    delta = p2_stats["hp_t0"][p2_name] - p2_hp
                    if delta > 0:
                        p2_stats["lost_hp"] += delta  # Similar accumulation for p2
                p2_stats["hp_t0"][p2_name] = p2_hp

                # HP loss under status/effect: attribute previous-turn to the current loss to status/effect active in previous turn
                if prev_p1_hp is not None and p1_hp < prev_p1_hp:
                    loss = prev_p1_hp - p1_hp  # Compute amount lost since previous snapshot
                    if prev_p1_status != "nostatus": p1_stats["hp_loss_while_statused"] += loss  # Add to status-attributed loss if previous had status
                    if prev_p1_effects: p1_stats["hp_loss_while_effect"] += loss  # Likewise for effect-attributed loss

                if prev_p2_hp is not None and p2_hp < prev_p2_hp:
                    loss = prev_p2_hp - p2_hp
                    if prev_p2_status != "nostatus": p2_stats["hp_loss_while_statused"] += loss
                    if prev_p2_effects: p2_stats["hp_loss_while_effect"] += loss

                # --------------------------------------------------
                # SWITCHES
                # --------------------------------------------------
                if turn_idx > 0:  # Avoid counting the first turn as a switch (initial send-out isn't a 'switch' event)
                    if p1_name != timeline[turn_idx - 1]['p1_pokemon_state']['name']:
                        p1_stats["switches"] += 1  # Increment when the active p1 pokemon name differs from previous turn
                    if p2_name != timeline[turn_idx - 1]['p2_pokemon_state']['name']:
                        p2_stats["switches"] += 1  # Same logic for p2

                # --------------------------------------------------
                # BOOST TRACKING
                # --------------------------------------------------
                p1_boosts = p1_state.get("boosts", {})  # Dictionary of stat boosts this turn for p1 active pokemon
                p2_boosts = p2_state.get("boosts", {})  # Same for p2

                for stat in ["atk", "def", "spa", "spd", "spe"]:
                    # net_boost accumulates changes relative to the last boosts baseline stored in base_boosts
                    p1_stats["net_boost"] += p1_boosts.get(stat, 0) - p1_stats["base_boosts"].get(stat, 0)
                    p2_stats["net_boost"] += p2_boosts.get(stat, 0) - p2_stats["base_boosts"].get(stat, 0)

                # Store deltas for mean-damage computation: compute p2 hp drop relative to previous turn's p2 hp
                if turn_idx > 0:
                    prev_hp = timeline[turn_idx - 1]['p2_pokemon_state']['hp_pct']  # Previous p2 hp_pct snapshot
                    p2_hp_deltas.append(prev_hp - p2_hp)  # Append observed delta (can be negative if healed)

                # Update previous values for the next-iteration comparisons
                prev_p1_hp = p1_hp  # Set previous hp for next loop iteration
                prev_p2_hp = p2_hp
                prev_p1_status = p1_status  # Remember previous status to attribute hp loss next turn
                prev_p2_status = p2_status
                prev_p1_effects = p1_effects  # Remember previous effects set
                prev_p2_effects = p2_effects
                p1_stats["base_boosts"] = p1_boosts.copy()  # Update baseline boosts to the current turn's boosts
                p2_stats["base_boosts"] = p2_boosts.copy()

            # --------------------------------------------------------
            # Aggregate summary timeline features
            # --------------------------------------------------------
            features['p1_unique_pokemon'] = len(set(p1_names))  # Number of distinct p1 pokémon seen during the battle
            features['p2_unique_pokemon'] = len(set(p2_names))  # Number of distinct p2 pokémon seen during the battle
            features['n_turns'] = len(timeline)  # Total number of turns / snapshots recorded in timeline
            # mean_damage_dealt: mean of positive deltas (damage dealt to p2 between consecutive turns), 0 if no data is available
            features['mean_damage_dealt'] = np.nanmean([d for d in p2_hp_deltas if d > 0]) if p2_hp_deltas else 0

            features['final_p1_hp'] = timeline[-1]['p1_pokemon_state'].get('hp_pct', None)  # Final active p1 hp_pct observed
            features['p1_fainted_count'] = sum(t['p1_pokemon_state']['status'] == 'fnt' for t in timeline)  # Count p1 fnt (fainting) occurrences
            features['p2_fainted_count'] = sum(t['p2_pokemon_state']['status'] == 'fnt' for t in timeline)  # Count p2 fnt (fainting) occurrences

            # HP final states: average of last-observed hp_pct for all seen pokémon of each player
            p1_final_hp = np.nanmean(list(p1_stats["hp_t0"].values())) if p1_stats["hp_t0"] else 1.0  # p1 final hp, fallback 1.0 (full hp)
            p2_final_hp = np.nanmean(list(p2_stats["hp_t0"].values())) if p2_stats["hp_t0"] else 1.0  # p2 final hp, fallback 1.0 (full hp)

            # Per-player mean of final hp% across their pokemon
            features['p1_mean_final_hp'] = p1_final_hp
            features['p2_mean_final_hp'] = p2_final_hp

            # HP-based ratios: normalized measures to compare damage taken vs remaining HP (eps added for numerical stability)
            features['p1_hp_loss_ratio'] = p1_stats["lost_hp"] / (p1_stats["lost_hp"] + p1_final_hp + eps) # p1 lost HPs compared to the total available. Adding 1e-6 to avoid division by zero in case no HPs were lost
            features['p2_hp_loss_ratio'] = p2_stats["lost_hp"] / (p2_stats["lost_hp"] + p2_final_hp + eps) # Same as above, but for p2
            features['hp_diff_lost'] = p2_stats["lost_hp"] - p1_stats["lost_hp"]  # Absolute difference in hp lost totals
            features['hp_diff_final'] = p1_final_hp - p2_final_hp  # Difference in mean final hp% (p1 - p2)
            features['hp_ratio_p1_p2'] = p1_stats["lost_hp"] / (p2_stats["lost_hp"] + eps)  # Ratio of lost hp between players. Adding 1e-6 to avoid division by zero in case no HPs were lost

            # HP stats: mean and min-level snapshots across pokemon observed
            features['p1_mean_hp_pct'] = np.nanmean([v for v in p1_stats["hp_t0"].values() if v > 0]) if p1_stats["hp_t0"] else 1.0
            features['p2_mean_hp_pct'] = np.nanmean([v for v in p2_stats["hp_t0"].values() if v > 0]) if p2_stats["hp_t0"] else 1.0
            features['p1_min_hp'] = np.min(list(p1_stats["hp_t0"].values())) if p1_stats["hp_t0"] else 1.0
            features['p2_min_hp'] = np.min(list(p2_stats["hp_t0"].values())) if p2_stats["hp_t0"] else 1.0

            # Survival: fraction of unique pokémon that remain with hp > 0 at last observation
            p1_alive = sum(1 for v in p1_stats["hp_t0"].values() if v > 0)  # Count alive p1 pokemon at final snapshot
            p2_alive = sum(1 for v in p2_stats["hp_t0"].values() if v > 0)  # Count alive p2 pokemon at final snapshot
            features['p1_survival_ratio'] = p1_alive / (len(p1_stats["hp_t0"]) + eps)  # Divide by total observed pokemon count
            features['p2_survival_ratio'] = p2_alive / (len(p2_stats["hp_t0"]) + eps)
            features['survival_diff'] = features['p1_survival_ratio'] - features['p2_survival_ratio']  # Comparison between survival ratios

            # HP per-pokemon / per-turn normalized metrics for learning algorithms
            features['p1_avg_hp_lost_per_pkm'] = p1_stats["lost_hp"] / (len(p1_stats["hp_t0"]) + eps)
            features['p2_avg_hp_lost_per_pkm'] = p2_stats["lost_hp"] / (len(p2_stats["hp_t0"]) + eps)
            features['p1_hp_loss_per_turn'] = p1_stats["lost_hp"] / (features['n_turns'] + eps)  # Avg lost hp per turn
            features['p2_hp_loss_per_turn'] = p2_stats["lost_hp"] / (features['n_turns'] + eps)
            features['hp_delta_per_turn'] = (p2_stats["lost_hp"] - p1_stats["lost_hp"]) / (features['n_turns'] + eps)
            # hp_relative_strength: normalized difference in final hp averages (signed), small eps avoids div-by-zero
            features['hp_relative_strength'] = (p1_final_hp - p2_final_hp) / (abs(p1_final_hp) + abs(p2_final_hp) + eps)

            # Effect/Status Impacts: ratios describing how much hp loss was attributable to status/effect conditions
            features['p1_hp_loss_status_ratio'] = p1_stats["hp_loss_while_statused"] / (p1_stats["lost_hp"] + eps)
            features['p2_hp_loss_status_ratio'] = p2_stats["hp_loss_while_statused"] / (p2_stats["lost_hp"] + eps)
            features['p1_hp_loss_effect_ratio'] = p1_stats["hp_loss_while_effect"] / (p1_stats["lost_hp"] + eps)
            features['p2_hp_loss_effect_ratio'] = p2_stats["hp_loss_while_effect"] / (p2_stats["lost_hp"] + eps)

            features['hp_diff_statused'] = p2_stats["hp_loss_while_statused"] - p1_stats["hp_loss_while_statused"]  # Net status-attributed loss diff for p1
            features['hp_diff_effected'] = p2_stats["hp_loss_while_effect"] - p1_stats["hp_loss_while_effect"]  # Net effect-attributed diff for p1

            features['p1_resistance_index'] = 1 - features['p1_hp_loss_status_ratio']  # Interpret as fraction of hp not lost to status (p1)
            features['p2_resistance_index'] = 1 - features['p2_hp_loss_status_ratio']  # Interpret as fraction of hp not lost to status (p2)
            features['resistance_diff'] = features['p1_resistance_index'] - features['p2_resistance_index']  # Difference between resistance measures p1 and p2

            # Add individual status/effect counts as explicit features (sparse, but informative)
            features.update({f"p1_pkm_{k}": v for k, v in p1_stats["status_turns"].items()}) # e.g., p1_pkm_brn = number of turns any p1 pokemon was burned
            features.update({f"p2_pkm_{k}": v for k, v in p2_stats["status_turns"].items()})
            features.update({f"p1_pkm_{k}": v for k, v in p1_stats["effect_turns"].items()}) # e.g., p1_pkm_substitute = number of turns substitute was active
            features.update({f"p2_pkm_{k}": v for k, v in p2_stats["effect_turns"].items()})

        else:
            # In case no timeline is present to fill minimal fields with conservative defaults to keep consistent schema
            features.update({
                'p1_unique_pokemon': 0,
                'p2_unique_pokemon': 0,
                'n_turns': 0,
                'mean_damage_dealt': 0,
                'final_p1_hp': None,
                'p1_fainted_count': 0,
                'p2_fainted_count': 0,
            })

        # Append aggregated stats computed per-pokemon (helper function expected to return a dict of additional features)
        features.update(agg_pokemons_stats("p1", p1_stats))  # Merge p1 aggregated per-pokemon stats (function must be defined elsewhere)
        features.update(agg_pokemons_stats("p2", p2_stats))  # Merge p2 aggregated per-pokemon stats

        # Outcome + ID
        features['battle_id'] = battle.get('battle_id')  # Preserve original battle identifier for traceability
        if 'player_won' in battle:
            features['player_won'] = int(battle['player_won'])  # Convert boolean to integer label (1/0) for modelling

        feature_list.append(features)  # Add this battle's feature dict to the master list


    feats_df = pd.DataFrame(feature_list).fillna(0) # Construct final DataFrame and fill remaining NaNs with zeros for model compatibility

    # Faint advantage
    feats_df["pkm_fnt_adv"] = (feats_df["p2_fainted_count"] - feats_df["p1_fainted_count"]) / (feats_df["p1_fainted_count"] + feats_df["p2_fainted_count"] + eps)

    # Lost HP advantage
    feats_df["lost_hp_adv"] = (feats_df["p2_lost_hp"] - feats_df["p1_lost_hp"]) / (feats_df["p1_lost_hp"] + feats_df["p2_lost_hp"] + eps)

    # Missed turns advantage
    feats_df["missed_turns_adv"] = (feats_df["p2_missed_turns"] - feats_df["p1_missed_turns"]) / (feats_df["p1_missed_turns"] + feats_df["p2_missed_turns"] + eps)

    # Pokémon without status advantage
    # (you track these implicitly — we compute from status_turns)
    feats_df["p1_pkm_nostatus"] = feats_df.get("p1_pkm_nostatus", 0)
    feats_df["p2_pkm_nostatus"] = feats_df.get("p2_pkm_nostatus", 0)

    feats_df["pkm_nostatus_adv"] = (feats_df["p1_pkm_nostatus"] - feats_df["p2_pkm_nostatus"]) / (feats_df["p1_pkm_nostatus"] + feats_df["p2_pkm_nostatus"] + eps)

    # Sleep disadvantage (sleep = 'slp')
    feats_df["p1_pkm_slp"] = feats_df.get("p1_pkm_slp", 0)
    feats_df["p2_pkm_slp"] = feats_df.get("p2_pkm_slp", 0)

    feats_df["pkm_slp_adv"] = (feats_df["p2_pkm_slp"] - feats_df["p1_pkm_slp"]) / (feats_df["p1_pkm_slp"] + feats_df["p2_pkm_slp"] + eps)

    # Freeze disadvantage
    feats_df["p2_pkm_frz"] = feats_df.get("p2_pkm_frz", 0)
    feats_df["pkm_frz_adv"] = -feats_df["p2_pkm_frz"] / (feats_df["p2_pkm_frz"] + 1 + eps)

    # Unique Pokémon disadvantage
    feats_df["unique_pokemon_adv"] = (feats_df["p2_unique_pokemon"] - feats_df["p1_unique_pokemon"]) / (feats_df["p1_unique_pokemon"] + feats_df["p2_unique_pokemon"] + eps)

    # Switch disadvantage
    feats_df["switches_adv"] = -feats_df["p2_switches"] / (feats_df["p2_switches"] + 1 + eps)

    adv_cols = [c for c in feats_df.columns if c.endswith("_adv")]
    feats_df["overall_advantage"] = feats_df[adv_cols].mean(axis=1)

    return feats_df

In [6]:
print("Processing training data...")
train_feats_df = create_features(train_data)

print("\nProcessing test data...")
with open(test_file_path, 'r', encoding="utf-8") as f:
    test_data = [json.loads(line) for line in f]
        
test_feats_df = create_features(test_data)

Processing training data...

Processing test data...


In [7]:
keepers = list(set(train_feats_df.columns).difference(UNTOUCHED)) #Keeping only the features which need to be scaled

In [8]:
#Scaling features
scaler = StandardScaler(with_mean=True, with_std=True)

train_feats_df[keepers] = scaler.fit_transform(train_feats_df[keepers])
test_feats_df[keepers] = scaler.fit_transform(test_feats_df[keepers])

In [9]:
print("\nTraining dataset preview:")
display(train_feats_df.head())
display(train_feats_df.describe())
display(train_feats_df.dtypes)

print("\nTesting dataset preview:")
display(test_feats_df.head())
display(test_feats_df.describe())
display(test_feats_df.dtypes)

print(list(test_feats_df.columns))


Training dataset preview:


Unnamed: 0,p1_has_charizard,p1_has_alakazam,p1_has_dragonite,p1_has_victreebel,p1_has_golem,p1_has_jolteon,p1_has_exeggutor,p1_has_tauros,p1_has_lapras,p1_has_starmie,p1_has_persian,p1_has_jynx,p1_has_slowbro,p1_has_gengar,p1_has_zapdos,p1_has_chansey,p1_has_cloyster,p1_has_articuno,p1_has_snorlax,p1_has_rhydon,p1_mean_hp,p1_mean_spe,p1_mean_atk,p1_mean_def,p1_mean_special,p1_lead_hp,p1_lead_atk,p1_lead_def,p1_lead_special,p1_lead_spe,p2_lead_hp,p2_lead_spe,p2_lead_atk,p2_lead_def,p2_lead_special,spe_lead_adv,p1_unique_pokemon,p2_unique_pokemon,n_turns,mean_damage_dealt,...,p2_mean_power,p2_mean_accuracy,p2_lost_hp,p2_turns_statused,p2_missed_turns,p2_switches,p2_net_boost,battle_id,player_won,p2_pkm_substitute,p2_pkm_reflect,p1_pkm_frz,p1_pkm_wrap,p1_pkm_tox,p1_pkm_psn,p1_pkm_confusion,p1_pkm_substitute,p2_pkm_confusion,p1_pkm_clamp,p2_pkm_clamp,p2_pkm_tox,p2_pkm_psn,p2_pkm_brn,p2_pkm_wrap,p1_pkm_brn,p1_pkm_typechange,p2_pkm_typechange,p1_pkm_firespin,p2_pkm_firespin,pkm_fnt_adv,lost_hp_adv,missed_turns_adv,p1_pkm_nostatus,p2_pkm_nostatus,pkm_nostatus_adv,pkm_slp_adv,pkm_frz_adv,unique_pokemon_adv,switches_adv,overall_advantage
0,-0.09259,1.051737,-0.199834,-0.202524,-0.371718,-0.317357,0.573809,0.341423,-0.341606,1.082819,-0.187036,-0.559949,-0.343246,-0.576273,-0.603074,0.42403,-0.402467,-0.193247,0.445173,-0.595522,0.202093,0.520813,-0.732064,-0.745443,0.644639,-0.50823,0.553031,1.039088,-0.562587,0.763073,-0.279683,0.490498,0.642099,1.185993,-0.741369,-0.747885,-1.404495,-1.33184,0.0,-0.30049,...,0.446467,0.785598,0.029295,1.042992,2.443488,0.622462,-0.136512,0,1,-0.166375,-0.464671,-0.32638,-0.041399,-0.105891,-0.071695,-0.168464,-0.173161,-0.16849,-0.049726,-0.052151,-0.108606,-0.079622,-0.06164,-0.041126,-0.064922,-0.014144,-0.014144,-0.011768,-0.010946,1.859545,0.381153,2.07285,0.0,0.0,0.0,0.005948,-3.228641,0.036636,-0.522986,0.197016
1,-0.09259,-0.950808,-0.199834,-0.202524,-0.371718,-0.317357,0.573809,0.341423,-0.341606,-0.923516,-0.187036,1.785878,2.91336,-0.576273,-0.603074,0.42403,-0.402467,-0.193247,0.445173,-0.595522,0.761596,-1.738011,-0.732064,-0.492591,-0.759727,-0.429812,-0.327042,-0.661341,-0.806141,0.078764,-0.450188,0.723525,-0.711192,-0.611967,1.072092,-0.747885,0.91852,0.950271,0.0,-1.373696,...,0.170319,0.322559,0.539208,-1.090316,0.158909,0.622462,-1.382055,1,1,-0.166375,-0.464671,-0.32638,-0.041399,-0.105891,-0.071695,-0.168464,-0.173161,-0.16849,-0.049726,-0.052151,-0.108606,-0.079622,-0.06164,-0.041126,-0.064922,-0.014144,-0.014144,-0.011768,-0.010946,-0.737944,0.338204,-0.027299,0.0,0.0,0.0,0.268123,0.416382,0.036636,-0.522986,-0.198934
2,-0.09259,-0.950808,-0.199834,-0.202524,-0.371718,-0.317357,0.573809,0.341423,-0.341606,-0.923516,-0.187036,-0.559949,-0.343246,1.73529,-0.603074,0.42403,-0.402467,-0.193247,0.445173,1.679199,0.823762,-1.224642,0.906915,0.097399,-0.759727,0.040699,1.25709,1.039088,0.655186,-1.289855,6.199509,-2.538851,-3.147117,-2.409928,-0.482303,1.337103,-2.566002,-1.33184,0.0,-0.64976,...,-0.30951,-0.358604,-0.268309,0.509665,0.485277,-1.822126,-0.136512,2,1,13.933237,-0.464671,-0.32638,-0.041399,-0.105891,-0.071695,-0.168464,-0.173161,-0.16849,-0.049726,-0.052151,-0.108606,-0.079622,-0.06164,-0.041126,-0.064922,-0.014144,-0.014144,-0.011768,-0.010946,-0.737942,0.845914,1.448012,0.0,0.0,0.0,0.333667,0.416382,1.216275,2.077149,1.225181
3,-0.09259,-0.950808,-0.199834,-0.202524,-0.371718,-0.317357,0.573809,0.341423,-0.341606,-0.923516,-0.187036,-0.559949,-0.343246,1.73529,1.658171,0.42403,-0.402467,-0.193247,0.445173,-0.595522,0.637262,0.007444,-0.029644,-0.492591,1.112761,-0.50823,0.201002,0.188873,0.89874,0.591996,0.231832,0.257471,1.99539,1.635483,-2.295764,-0.747885,-0.242987,-1.33184,0.0,0.162461,...,1.549122,-0.440752,0.70981,-1.090316,-0.493828,-0.774445,-0.136512,3,1,-0.166375,0.56081,-0.32638,-0.041399,-0.105891,-0.071695,-0.168464,-0.173161,-0.16849,-0.049726,-0.052151,-0.108606,-0.079622,-0.06164,-0.041126,-0.064922,-0.014144,-0.014144,-0.011768,-0.010946,-0.737944,0.17199,-0.568247,0.0,0.0,0.0,1.316821,0.416382,-0.88086,0.326038,0.185251
4,-0.09259,1.051737,-0.199834,-0.202524,-0.371718,-0.317357,0.573809,0.341423,-0.341606,-0.923516,-0.187036,-0.559949,-0.343246,-0.576273,-0.603074,0.42403,2.484673,-0.193247,0.445173,-0.595522,0.07776,-0.403251,-0.263784,0.855957,0.293548,-0.586649,-0.327042,-0.321255,1.142295,0.934151,-0.279683,0.490498,0.642099,1.185993,-0.741369,1.337103,-0.242987,-0.190785,0.0,0.322979,...,-0.686812,0.363574,0.812171,1.93187,-0.820197,-0.425219,-0.136512,4,1,-0.166375,0.048069,-0.32638,-0.041399,-0.105891,-0.071695,-0.168464,-0.173161,-0.16849,-0.049726,-0.052151,-0.108606,-0.079622,-0.06164,-0.041126,-0.064922,-0.014144,-0.014144,-0.011768,-0.010946,-0.737942,0.611928,-0.027299,0.0,0.0,0.0,0.005948,0.416382,0.036636,0.034186,0.466819


Unnamed: 0,p1_has_charizard,p1_has_alakazam,p1_has_dragonite,p1_has_victreebel,p1_has_golem,p1_has_jolteon,p1_has_exeggutor,p1_has_tauros,p1_has_lapras,p1_has_starmie,p1_has_persian,p1_has_jynx,p1_has_slowbro,p1_has_gengar,p1_has_zapdos,p1_has_chansey,p1_has_cloyster,p1_has_articuno,p1_has_snorlax,p1_has_rhydon,p1_mean_hp,p1_mean_spe,p1_mean_atk,p1_mean_def,p1_mean_special,p1_lead_hp,p1_lead_atk,p1_lead_def,p1_lead_special,p1_lead_spe,p2_lead_hp,p2_lead_spe,p2_lead_atk,p2_lead_def,p2_lead_special,spe_lead_adv,p1_unique_pokemon,p2_unique_pokemon,n_turns,mean_damage_dealt,...,p2_mean_power,p2_mean_accuracy,p2_lost_hp,p2_turns_statused,p2_missed_turns,p2_switches,p2_net_boost,battle_id,player_won,p2_pkm_substitute,p2_pkm_reflect,p1_pkm_frz,p1_pkm_wrap,p1_pkm_tox,p1_pkm_psn,p1_pkm_confusion,p1_pkm_substitute,p2_pkm_confusion,p1_pkm_clamp,p2_pkm_clamp,p2_pkm_tox,p2_pkm_psn,p2_pkm_brn,p2_pkm_wrap,p1_pkm_brn,p1_pkm_typechange,p2_pkm_typechange,p1_pkm_firespin,p2_pkm_firespin,pkm_fnt_adv,lost_hp_adv,missed_turns_adv,p1_pkm_nostatus,p2_pkm_nostatus,pkm_nostatus_adv,pkm_slp_adv,pkm_frz_adv,unique_pokemon_adv,switches_adv,overall_advantage
count,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,...,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0
mean,-1.4210850000000002e-17,-7.105427e-18,-7.105427000000001e-17,-3.1263880000000006e-17,4.9737990000000006e-17,7.673862e-17,9.80549e-17,1.875833e-16,4.973799e-18,0.0,4.8316910000000003e-17,6.394885e-18,4.8316910000000003e-17,3.0908610000000003e-17,-1.421085e-18,-7.531753000000001e-17,4.1211480000000007e-17,1.278977e-17,-4.4053650000000004e-17,1.016076e-16,-4.185097e-16,-9.471535e-16,7.389644e-17,3.140599e-16,1.67546e-15,-3.7658760000000006e-17,3.268497e-17,-3.268497e-17,2.422951e-16,1.854517e-16,5.115908e-17,1.605827e-16,1.012523e-16,7.105426999999999e-19,2.785328e-16,-3.1263880000000006e-17,-1.136868e-16,-2.842171e-16,0.0,5.909939e-16,...,1.620037e-16,-1.056577e-15,-4.5474740000000006e-17,-9.094947000000001e-17,1.364242e-16,3.183231e-16,-2.2737370000000003e-17,4999.5,0.5,1.5631940000000003e-17,-1.0835780000000001e-17,-2.842171e-18,2.131628e-18,2.2737370000000003e-17,-1.9895200000000002e-17,-2.557954e-17,1.421085e-18,-5.542233e-17,-7.460699e-18,-2.0605740000000003e-17,9.947598e-18,-2.842171e-18,-3.1796790000000005e-17,6.394885e-18,-1.634248e-17,1.0302870000000002e-17,1.0302870000000002e-17,3.5527139999999997e-19,6.394885e-18,-2.103206e-16,2.2737370000000003e-17,2.2737370000000003e-17,0.0,0.0,0.0,1.136868e-17,0.0,4.5474740000000006e-17,-2.620482e-15,-1.023182e-16
std,1.00005,1.00005,1.00005,1.00005,1.00005,1.00005,1.00005,1.00005,1.00005,1.00005,1.00005,1.00005,1.00005,1.00005,1.00005,1.00005,1.00005,1.00005,1.00005,1.00005,1.00005,1.00005,1.00005,1.00005,1.00005,1.00005,1.00005,1.00005,1.00005,1.00005,1.00005,1.00005,1.00005,1.00005,1.00005,1.00005,1.00005,1.00005,0.0,1.00005,...,1.00005,1.00005,1.00005,1.00005,1.00005,1.00005,1.00005,2886.89568,0.500025,1.00005,1.00005,1.00005,1.00005,1.00005,1.00005,1.00005,1.00005,1.00005,1.00005,1.00005,1.00005,1.00005,1.00005,1.00005,1.00005,1.00005,1.00005,1.00005,1.00005,1.00005,1.00005,1.00005,0.0,0.0,0.0,1.00005,1.00005,1.00005,1.00005,1.00005
min,-0.09258979,-0.9508084,-0.1998335,-0.2025242,-0.3717181,-0.317357,-1.74274,-2.928915,-0.3416059,-0.923516,-0.1870358,-0.5599488,-0.3432463,-0.5762725,-0.6030741,-2.358321,-0.4024674,-0.193247,-2.246319,-0.595522,-3.714422,-3.58614,-3.073463,-2.262559,-3.100336,-0.6650673,-1.911174,-1.681599,-3.241686,-2.145242,-0.620693,-3.470958,-3.147117,-2.409928,-3.591093,-0.7478854,-4.889016,-4.755008,0.0,-3.404248,...,-3.154841,-24.70957,-2.63399,-1.979194,-2.12567,-2.869807,-5.118686,0.0,0.0,-0.1663754,-0.4646711,-0.32638,-0.04139927,-0.1058914,-0.07169523,-0.1684638,-0.1731605,-0.1684904,-0.04972632,-0.0521513,-0.108606,-0.0796216,-0.06163991,-0.04112586,-0.06492211,-0.01414355,-0.01414355,-0.01176778,-0.01094606,-0.7379443,-2.986614,-3.272985,0.0,0.0,0.0,-1.304925,-3.326101,-5.861558,-1.463987,-2.407285
25%,-0.09258979,-0.9508084,-0.1998335,-0.2025242,-0.3717181,-0.317357,0.5738092,0.3414233,-0.3416059,-0.923516,-0.1870358,-0.5599488,-0.3432463,-0.5762725,-0.6030741,0.4240304,-0.4024674,-0.193247,0.4451727,-0.595522,-0.2952418,-0.8139467,-0.7320642,-0.7454431,-0.6426962,-0.5082303,-0.3270423,-0.6613411,-0.8061411,-1.289855,-0.4501879,-0.4416091,-0.7111924,-0.6119674,-0.7413688,-0.7478854,-0.2429873,-0.1907845,0.0,-0.6681183,...,-0.6610847,-0.4070766,-0.6891268,-0.7347645,-0.8201966,-0.7744455,-0.1365116,2499.75,0.0,-0.1663754,-0.4646711,-0.32638,-0.04139927,-0.1058914,-0.07169523,-0.1684638,-0.1731605,-0.1684904,-0.04972632,-0.0521513,-0.108606,-0.0796216,-0.06163991,-0.04112586,-0.06492211,-0.01414355,-0.01414355,-0.01176778,-0.01094606,-0.7379439,-0.6872752,-0.6764362,0.0,0.0,0.0,-1.066584,0.416382,-0.7140428,-0.5229858,-0.7591487
50%,-0.09258979,-0.9508084,-0.1998335,-0.2025242,-0.3717181,-0.317357,0.5738092,0.3414233,-0.3416059,-0.923516,-0.1870358,-0.5599488,-0.3432463,-0.5762725,-0.6030741,0.4240304,-0.4024674,-0.193247,0.4451727,-0.595522,0.2642603,0.007443852,-0.2637844,-0.1554538,0.176517,-0.4298118,-0.3270423,-0.3212553,-0.3190321,0.5919961,-0.2796828,0.4904982,0.1007823,0.06226787,0.5539605,-0.7478854,-0.2429873,-0.1907845,0.0,-0.06707954,...,0.06870744,0.1939377,-0.04083921,-0.02366194,-0.1674597,-0.07599176,-0.1365116,4999.5,0.5,-0.1663754,-0.4646711,-0.32638,-0.04139927,-0.1058914,-0.07169523,-0.1684638,-0.1731605,-0.1684904,-0.04972632,-0.0521513,-0.108606,-0.0796216,-0.06163991,-0.04112586,-0.06492211,-0.01414355,-0.01414355,-0.01176778,-0.01094606,-0.7379422,-0.04513739,-0.02729913,0.0,0.0,0.0,0.005948293,0.416382,0.03663642,-0.1928099,-0.0229336
75%,-0.09258979,1.051737,-0.1998335,-0.2025242,-0.3717181,-0.317357,0.5738092,0.3414233,-0.3416059,1.082819,-0.1870358,-0.5599488,-0.3432463,-0.5762725,1.658171,0.4240304,-0.4024674,-0.193247,0.4451727,1.679199,0.6372618,0.5208129,0.5557053,0.5188198,0.6446389,0.0406992,0.553031,1.039088,0.8987405,0.7630735,-0.1091778,0.723525,0.6420988,1.185993,1.072092,1.337103,0.91852,0.9502713,0.0,0.5626859,...,0.7192934,0.6389051,0.6732656,0.6874406,0.4852773,0.6224619,-0.1365116,7499.25,1.0,-0.1663754,-0.2083009,-0.32638,-0.04139927,-0.1058914,-0.07169523,-0.1684638,-0.1731605,-0.1684904,-0.04972632,-0.0521513,-0.108606,-0.0796216,-0.06163991,-0.04112586,-0.06492211,-0.01414355,-0.01414355,-0.01176778,-0.01094606,0.7463351,0.6536333,0.7217052,0.0,0.0,0.0,1.025516,0.416382,0.7873157,0.3260379,0.7021701
max,10.80033,1.051737,5.004165,4.937683,2.69021,3.151026,0.5738092,0.3414233,2.92735,1.082819,5.346569,1.785878,2.91336,1.73529,1.658171,0.4240304,2.484673,5.174725,0.4451727,1.679199,1.694099,4.319744,4.62974,4.227324,3.687431,2.471673,2.630004,4.269903,1.142295,1.276306,6.199509,1.189579,3.835866,5.45615,1.072092,1.337103,0.91852,0.9502713,0.0,7.189937,...,4.33588,1.108321,3.98287,2.998524,7.665383,7.257772,7.33675,9999.0,1.0,30.38279,6.970067,10.03254,57.40257,35.91158,32.37658,13.11965,19.56827,14.25836,37.87713,36.13447,32.60403,26.40897,43.29365,57.02347,46.45248,70.70361,70.70361,98.05309,99.49873,4.457032,3.996694,3.218386,0.0,0.0,0.0,1.316821,0.416382,5.93483,14.33492,3.812364


p1_has_charizard      float64
p1_has_alakazam       float64
p1_has_dragonite      float64
p1_has_victreebel     float64
p1_has_golem          float64
                       ...   
pkm_slp_adv           float64
pkm_frz_adv           float64
unique_pokemon_adv    float64
switches_adv          float64
overall_advantage     float64
Length: 129, dtype: object


Testing dataset preview:


Unnamed: 0,p1_has_charizard,p1_has_alakazam,p1_has_dragonite,p1_has_victreebel,p1_has_golem,p1_has_jolteon,p1_has_exeggutor,p1_has_tauros,p1_has_lapras,p1_has_persian,p1_has_starmie,p1_has_jynx,p1_has_slowbro,p1_has_gengar,p1_has_zapdos,p1_has_chansey,p1_has_cloyster,p1_has_articuno,p1_has_snorlax,p1_has_rhydon,p1_mean_hp,p1_mean_spe,p1_mean_atk,p1_mean_def,p1_mean_special,p1_lead_hp,p1_lead_atk,p1_lead_def,p1_lead_special,p1_lead_spe,p2_lead_hp,p2_lead_spe,p2_lead_atk,p2_lead_def,p2_lead_special,spe_lead_adv,p1_unique_pokemon,p2_unique_pokemon,n_turns,mean_damage_dealt,...,p2_lost_hp,p2_turns_statused,p2_missed_turns,p2_switches,p2_net_boost,battle_id,p2_pkm_frz,p1_pkm_wrap,p2_pkm_clamp,p2_pkm_wrap,p1_pkm_brn,p1_pkm_confusion,p2_pkm_fnt,p1_pkm_clamp,p1_pkm_tox,p1_pkm_psn,p2_pkm_reflect,p1_pkm_reflect,p2_pkm_substitute,p2_pkm_tox,p2_pkm_brn,p2_pkm_confusion,p2_pkm_psn,p1_pkm_substitute,p1_pkm_firespin,p2_pkm_firespin,p1_pkm_typechange,p2_pkm_typechange,p1_pkm_disable,pkm_fnt_adv,lost_hp_adv,missed_turns_adv,p1_pkm_nostatus,p2_pkm_nostatus,pkm_nostatus_adv,pkm_slp_adv,pkm_frz_adv,unique_pokemon_adv,switches_adv,overall_advantage
0,-0.101514,-0.95616,-0.204655,-0.192689,-0.362245,-0.316784,0.558562,0.353354,-0.348682,5.101669,-0.936041,-0.541596,-0.354428,1.687932,-0.601687,0.438707,-0.402132,-0.202524,0.459609,-0.590593,0.347635,0.297212,-0.497947,-0.937347,-0.327487,2.42747,-1.90309,-1.690965,-0.344096,-1.460522,-0.111797,1.206605,0.120196,0.067841,-0.216179,-0.763728,-0.25292,-0.210231,0.0,0.358902,...,0.325505,1.0109,-0.172866,-0.40312,-0.130808,0,-0.317795,-0.051956,-0.049493,-0.044594,-0.060414,-0.168902,-0.602802,-0.046761,-0.105941,-0.081073,-0.450774,-0.480383,-0.160492,-0.113318,-0.071229,-0.179306,-0.072233,-0.145773,-0.014144,-0.014144,-0.014144,-0.014144,0.0,-0.725001,-0.098008,-0.836934,0.0,0.0,0.0,-0.3198,0.407015,0.029265,0.025819,-0.784265
1,-0.101514,-0.95616,4.886269,5.189698,-0.362245,3.156725,-1.790311,0.353354,-0.348682,-0.196014,1.068329,-0.541596,-0.354428,-0.592441,-0.601687,-2.279427,2.486746,-0.202524,-2.175763,-0.590593,-3.135315,2.448247,2.49983,2.635469,-0.209235,-0.435025,0.209725,0.193802,-0.096794,1.272235,-0.437793,0.730007,-0.697689,-0.607419,1.098777,1.309367,-1.414171,0.926151,0.0,-1.754321,...,-0.262702,0.313102,3.749966,-0.054702,-0.130808,1,-0.317795,-0.051956,-0.049493,-0.044594,-0.060414,-0.168902,-0.602802,-0.046761,-0.105941,-0.081073,-0.450774,-0.480383,-0.160492,-0.113318,-0.071229,-0.179306,-0.072233,-0.145773,-0.014144,-0.014144,-0.014144,-0.014144,0.0,-0.725,1.741145,2.284686,0.0,0.0,0.0,1.332289,0.407015,1.697766,-0.220225,2.095922
2,-0.101514,-0.95616,-0.204655,5.189698,-0.362245,-0.316784,-1.790311,0.353354,-0.348682,-0.196014,-0.936041,-0.541596,-0.354428,-0.592441,-0.601687,0.438707,2.486746,-0.202524,0.459609,1.693215,0.531594,-1.751393,1.825911,1.784799,-2.456024,2.42747,-1.90309,-1.690965,-0.344096,-1.460522,-0.437793,0.730007,-0.697689,-0.607419,1.098777,-0.763728,-0.25292,0.926151,0.0,-2.355355,...,-1.185299,1.0109,-0.499769,-0.40312,1.240344,2,1.906624,3.606893,3.005633,16.943684,-0.060414,-0.168902,-0.602802,-0.046761,-0.105941,-0.081073,-0.450774,-0.480383,-0.160492,-0.113318,-0.071229,-0.179306,-0.072233,-0.145773,-0.014144,-0.014144,-0.014144,-0.014144,0.0,-0.725,0.232402,-0.027625,0.0,0.0,0.0,1.332288,-2.665707,0.787674,0.025819,-0.098038
3,-0.101514,-0.95616,-0.204655,-0.192689,-0.362245,3.156725,0.558562,-2.830019,-0.348682,-0.196014,-0.936041,1.846394,-0.354428,-0.592441,-0.601687,0.438707,2.486746,-0.202524,0.459609,-0.590593,0.102357,-0.52223,-1.078912,0.083458,0.263773,-0.435025,-0.318479,-0.66291,-0.8387,0.076654,2.985166,-3.559383,2.573852,0.292928,-2.583099,1.309367,-2.575421,-0.210231,0.0,0.457355,...,0.256305,1.0109,-0.172866,-1.448375,-0.130808,3,-0.317795,-0.051956,-0.049493,-0.044594,38.458309,1.938863,-0.602802,-0.046761,-0.105941,-0.081073,-0.450774,-0.480383,-0.160492,-0.113318,-0.071229,-0.179306,-0.072233,-0.145773,-0.014144,-0.014144,-0.014144,-0.014144,0.0,1.899957,-0.022916,1.590992,0.0,0.0,0.0,1.332289,0.407015,2.114891,1.354457,2.405248
4,-0.101514,-0.95616,-0.204655,-0.192689,-0.362245,-0.316784,0.558562,0.353354,-0.348682,-0.196014,1.068329,-0.541596,-0.354428,1.687932,-0.601687,0.438707,-0.402132,-0.202524,0.459609,-0.590593,0.286316,0.297212,-0.381754,-0.512012,0.500277,-0.51239,0.561861,1.050514,-0.591398,0.759843,-0.274795,0.253408,0.120196,0.067841,0.835786,1.309367,-0.25292,0.926151,0.0,1.209074,...,0.31759,-0.384696,-0.172866,0.642135,-0.130808,4,-0.317795,-0.051956,-0.049493,-0.044594,-0.060414,-0.168902,1.163909,23.333727,-0.105941,-0.081073,-0.450774,-0.480383,-0.160492,-0.113318,-0.071229,-0.179306,-0.072233,-0.145773,-0.014144,-0.014144,-0.014144,-0.014144,0.0,0.587478,-0.067408,-0.027625,0.0,0.0,0.0,1.332289,0.407015,0.787674,-0.578108,1.465312


Unnamed: 0,p1_has_charizard,p1_has_alakazam,p1_has_dragonite,p1_has_victreebel,p1_has_golem,p1_has_jolteon,p1_has_exeggutor,p1_has_tauros,p1_has_lapras,p1_has_persian,p1_has_starmie,p1_has_jynx,p1_has_slowbro,p1_has_gengar,p1_has_zapdos,p1_has_chansey,p1_has_cloyster,p1_has_articuno,p1_has_snorlax,p1_has_rhydon,p1_mean_hp,p1_mean_spe,p1_mean_atk,p1_mean_def,p1_mean_special,p1_lead_hp,p1_lead_atk,p1_lead_def,p1_lead_special,p1_lead_spe,p2_lead_hp,p2_lead_spe,p2_lead_atk,p2_lead_def,p2_lead_special,spe_lead_adv,p1_unique_pokemon,p2_unique_pokemon,n_turns,mean_damage_dealt,...,p2_lost_hp,p2_turns_statused,p2_missed_turns,p2_switches,p2_net_boost,battle_id,p2_pkm_frz,p1_pkm_wrap,p2_pkm_clamp,p2_pkm_wrap,p1_pkm_brn,p1_pkm_confusion,p2_pkm_fnt,p1_pkm_clamp,p1_pkm_tox,p1_pkm_psn,p2_pkm_reflect,p1_pkm_reflect,p2_pkm_substitute,p2_pkm_tox,p2_pkm_brn,p2_pkm_confusion,p2_pkm_psn,p1_pkm_substitute,p1_pkm_firespin,p2_pkm_firespin,p1_pkm_typechange,p2_pkm_typechange,p1_pkm_disable,pkm_fnt_adv,lost_hp_adv,missed_turns_adv,p1_pkm_nostatus,p2_pkm_nostatus,pkm_nostatus_adv,pkm_slp_adv,pkm_frz_adv,unique_pokemon_adv,switches_adv,overall_advantage
count,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,...,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0
mean,1.705303e-17,-5.684342e-17,3.1263880000000006e-17,-5.684342e-17,-8.526513e-17,-5.684342e-18,2.842171e-17,-1.250555e-16,-2.7711170000000003e-17,4.4053650000000004e-17,-6.821210000000001e-17,-3.1263880000000006e-17,0.0,4.263256e-18,1.136868e-17,1.847411e-17,-8.526513e-17,1.705303e-17,-2.2737370000000003e-17,2.806644e-17,-9.123369e-16,1.355005e-15,3.637979e-16,-7.560175e-16,-1.540457e-15,-8.526513e-17,1.421085e-16,-9.947598e-18,3.396394e-16,-2.4158450000000003e-17,-4.8316910000000003e-17,-3.467449e-16,-2.2737370000000003e-17,-9.166001e-17,1.634248e-17,-3.1263880000000006e-17,-5.002221e-16,4.774847e-16,0.0,3.11573e-16,...,9.094947000000001e-17,4.5474740000000006e-17,-1.136868e-16,-2.2737370000000003e-17,3.4106050000000003e-17,2499.5,0.0,-1.278977e-17,4.263256e-18,-5.684342e-18,-1.4210850000000002e-17,-1.4210850000000002e-17,2.557954e-17,-1.278977e-17,-1.136868e-17,5.684342e-18,5.2580160000000004e-17,1.136868e-17,7.105427e-18,0.0,8.526513e-18,-3.4106050000000003e-17,-1.136868e-17,2.2737370000000003e-17,7.105427e-18,9.947598e-18,1.421085e-18,1.421085e-18,0.0012,4.774847e-16,-4.5474740000000006e-17,0.0,0.0,0.0,0.0,0.0,0.0,-4.5474740000000006e-17,1.455192e-15,-1.818989e-16
std,1.0001,1.0001,1.0001,1.0001,1.0001,1.0001,1.0001,1.0001,1.0001,1.0001,1.0001,1.0001,1.0001,1.0001,1.0001,1.0001,1.0001,1.0001,1.0001,1.0001,1.0001,1.0001,1.0001,1.0001,1.0001,1.0001,1.0001,1.0001,1.0001,1.0001,1.0001,1.0001,1.0001,1.0001,1.0001,1.0001,1.0001,1.0001,0.0,1.0001,...,1.0001,1.0001,1.0001,1.0001,1.0001,1443.520003,1.0001,1.0001,1.0001,1.0001,1.0001,1.0001,1.0001,1.0001,1.0001,1.0001,1.0001,1.0001,1.0001,1.0001,1.0001,1.0001,1.0001,1.0001,1.0001,1.0001,1.0001,1.0001,0.084853,1.0001,1.0001,1.0001,0.0,0.0,0.0,1.0001,1.0001,1.0001,1.0001,1.0001
min,-0.1015141,-0.95616,-0.2046551,-0.1926895,-0.3622454,-0.316784,-1.790311,-2.830019,-0.3486821,-0.1960143,-0.9360409,-0.5415962,-0.354428,-0.5924409,-0.6016865,-2.279427,-0.4021319,-0.2025242,-2.175763,-0.5905926,-3.638136,-3.595138,-2.821806,-2.213353,-3.520292,-0.6671191,-1.90309,-1.690965,-3.311721,-2.143711,-0.6007913,-3.559383,-3.151344,-2.408115,-3.635064,-0.7637281,-4.897923,-4.755759,0.0,-3.40779,...,-2.661667,-1.954741,-2.134282,-2.842048,-4.244263,0.0,-0.317795,-0.05195565,-0.04949304,-0.04459423,-0.06041357,-0.1689022,-0.6028019,-0.04676098,-0.1059408,-0.08107306,-0.4507735,-0.4803827,-0.1604918,-0.113318,-0.07122943,-0.1793064,-0.07223342,-0.145773,-0.01414355,-0.01414355,-0.01414355,-0.01414355,0.0,-0.7250017,-2.994953,-3.264861,0.0,0.0,0.0,-1.311053,-3.433889,-5.929666,-1.532461,-2.534371
25%,-0.1015141,-0.95616,-0.2046551,-0.1926895,-0.3622454,-0.316784,0.5585621,0.3533545,-0.3486821,-0.1960143,-0.9360409,-0.5415962,-0.354428,-0.5924409,-0.6016865,0.4387068,-0.4021319,-0.2025242,0.4596089,-0.5905926,-0.3268798,-0.8295211,-0.7303329,-0.7672129,-0.6822433,-0.5123896,-0.3184786,-0.6629101,-0.5913982,-1.289725,-0.4377932,-0.4614907,-0.6976887,-0.6074195,-0.742161,-0.7637281,-0.2529204,-0.2102307,0.0,-0.6656967,...,-0.7315808,-0.7335948,-0.4997688,-0.7515382,-0.1308079,1249.75,-0.317795,-0.05195565,-0.04949304,-0.04459423,-0.06041357,-0.1689022,-0.6028019,-0.04676098,-0.1059408,-0.08107306,-0.4507735,-0.4803827,-0.1604918,-0.113318,-0.07122943,-0.1793064,-0.07223342,-0.145773,-0.01414355,-0.01414355,-0.01414355,-0.01414355,0.0,-0.7250013,-0.7081538,-0.675073,0.0,0.0,0.0,-1.046719,0.407015,-0.7291446,-0.5781077,-0.7726071
50%,-0.1015141,-0.95616,-0.2046551,-0.1926895,-0.3622454,-0.316784,0.5585621,0.3533545,-0.3486821,-0.1960143,-0.9360409,-0.5415962,-0.354428,-0.5924409,-0.6016865,0.4387068,-0.4021319,-0.2025242,0.4596089,-0.5905926,0.2863157,0.09235111,-0.2655612,-0.1717436,0.263773,-0.4350249,-0.3184786,-0.3202253,-0.3440961,0.5890457,-0.2747952,0.4917071,0.1201964,0.06784119,0.5727947,-0.7637281,-0.2529204,-0.2102307,0.0,-0.08543411,...,-0.04445279,-0.03579702,-0.1728661,-0.05470167,-0.1308079,2499.5,-0.317795,-0.05195565,-0.04949304,-0.04459423,-0.06041357,-0.1689022,-0.6028019,-0.04676098,-0.1059408,-0.08107306,-0.4507735,-0.4803827,-0.1604918,-0.113318,-0.07122943,-0.1793064,-0.07223342,-0.145773,-0.01414355,-0.01414355,-0.01414355,-0.01414355,0.0,-0.7249995,-0.04417714,-0.027625,0.0,0.0,0.0,0.010618,0.407015,0.02926485,-0.2202254,-0.02297294
75%,-0.1015141,1.04585,-0.2046551,-0.1926895,-0.3622454,-0.316784,0.5585621,0.3533545,-0.3486821,-0.1960143,1.068329,-0.5415962,-0.354428,1.687932,1.661995,0.4387068,-0.4021319,-0.2025242,0.4596089,1.693215,0.654233,0.5020721,0.5477892,0.5087928,0.6185291,0.02916341,0.5618608,1.050514,0.8924143,0.759843,-0.1117971,0.7300065,0.6654531,1.193276,1.098777,1.309367,0.9083302,0.9261513,0.0,0.5642958,...,0.6830435,0.6620007,0.4809392,0.6421349,-0.1308079,3749.25,-0.317795,-0.05195565,-0.04949304,-0.04459423,-0.06041357,-0.1689022,1.163909,-0.04676098,-0.1059408,-0.08107306,-0.1866381,0.0309347,-0.1604918,-0.113318,-0.07122943,-0.1793064,-0.07223342,-0.145773,-0.01414355,-0.01414355,-0.01414355,-0.01414355,0.0,0.5874778,0.65981,0.719429,0.0,0.0,0.0,1.001871,0.407015,0.7876743,0.3421611,0.7105995
max,9.850848,1.04585,4.886269,5.189698,2.76056,3.156725,0.5585621,0.3533545,2.867942,5.101669,1.068329,1.846394,2.821449,1.687932,1.661995,0.4387068,2.486746,4.937683,0.4596089,1.693215,1.696665,5.009003,3.894145,4.251743,3.220074,2.42747,2.639462,4.30602,1.139716,1.272235,5.919131,1.206605,3.882468,5.469927,1.098777,1.309367,0.9083302,0.9261513,0.0,5.896058,...,3.598104,3.104293,7.672798,6.216828,8.096102,4999.0,10.8043,36.53653,36.61202,44.54964,38.45831,13.18027,6.464043,35.02397,23.17775,22.27363,7.209154,6.422402,19.72696,23.646843,31.8592,13.37209,26.57124,28.55777,70.70361,70.70361,70.70361,70.70361,6.0,4.524914,4.018115,3.20961,0.0,0.0,0.0,1.332289,0.407015,5.988196,15.52658,3.557247


p1_has_charizard      float64
p1_has_alakazam       float64
p1_has_dragonite      float64
p1_has_victreebel     float64
p1_has_golem          float64
                       ...   
pkm_slp_adv           float64
pkm_frz_adv           float64
unique_pokemon_adv    float64
switches_adv          float64
overall_advantage     float64
Length: 129, dtype: object

['p1_has_charizard', 'p1_has_alakazam', 'p1_has_dragonite', 'p1_has_victreebel', 'p1_has_golem', 'p1_has_jolteon', 'p1_has_exeggutor', 'p1_has_tauros', 'p1_has_lapras', 'p1_has_persian', 'p1_has_starmie', 'p1_has_jynx', 'p1_has_slowbro', 'p1_has_gengar', 'p1_has_zapdos', 'p1_has_chansey', 'p1_has_cloyster', 'p1_has_articuno', 'p1_has_snorlax', 'p1_has_rhydon', 'p1_mean_hp', 'p1_mean_spe', 'p1_mean_atk', 'p1_mean_def', 'p1_mean_special', 'p1_lead_hp', 'p1_lead_atk', 'p1_lead_def', 'p1_lead_special', 'p1_lead_spe', 'p2_lead_hp', 'p2_lead_spe', 'p2_lead_atk', 'p2_lead_def', 'p2_lead_special', 'spe_lead_adv', 'p1_unique_pokemon', 'p2_unique_pokemon', 'n_turns', 'mean_damage_dealt', 'final_p1_hp', 'p1_fainted_count', 'p2_fainted_count', 'p1_mean_final_hp', 'p2_mean_final_hp', 'p1_hp_loss_ratio', 'p2_hp_loss_ratio', 'hp_diff_lost', 'hp_diff_final', 'hp_ratio_p1_p2', 'p1_mean_hp_pct', 'p2_mean_hp_pct', 'p1_min_hp', 'p2_min_hp', 'p1_survival_ratio', 'p2_survival_ratio', 'survival_diff', 'p1_av

### 3. Training Models

In [10]:
# Define predictor features (X) and target (y)
X_train = train_feats_df[keepers] #[keepers]
print(len(train_feats_df.columns))
y_train = train_feats_df['player_won']

X_test = test_feats_df[keepers] #[keepers]

print("Training...")
model = XGBClassifier(
    random_state=100,
    n_estimators=200,
    learning_rate=0.05,
    max_depth=3,
    eval_metric='logloss',
    n_jobs=-1
)
model = CatBoostClassifier(iterations=100, learning_rate=0.1, depth=6, verbose=0)
model = LGBMClassifier(
    learning_rate=0.03165579861591166, 
    num_leaves=125,
    max_depth=3, 
    min_child_samples=29, 
    reg_alpha=7.1991672853117725,  
    reg_lambda=3.052954302286083, 
    feature_fraction=0.9504554803940148, 
    bagging_fraction=0.9731871954622454, 
    bagging_freq=6, 
    objective='binary',
    n_estimators=335,
    random_state=100,
    verbose=-1
)
model = LinearSVC(C=0.1, loss='squared_hinge', max_iter=30000)
model.fit(X_train, y_train)
print("Model training complete.")

129
Training...
Model training complete.


In [11]:
cv = StratifiedKFold(n_splits=10, shuffle=True, random_state=100)
cv_results = cross_validate(
    model,
    X_train,
    y_train,
    cv=cv,
    scoring={
        "accuracy_score": make_scorer(accuracy_score),
        "precision_score": make_scorer(precision_score),
        "recall_score": make_scorer(recall_score),
        "f1_score": make_scorer(f1_score),
        "roc_auc_score": make_scorer(roc_auc_score)
    },
    return_train_score=True,
    n_jobs=1
)

results_feats_df = pd.DataFrame(cv_results)
display(results_feats_df)

results_feats_df = pd.DataFrame(cv_results)
summary = results_feats_df.filter(regex='(train_|test_)').describe().loc[['mean', 'std']].T
summary.rename(columns={'mean': 'Mean', 'std': 'Std'}, inplace=True)
display(summary)

Unnamed: 0,fit_time,score_time,test_accuracy_score,train_accuracy_score,test_precision_score,train_precision_score,test_recall_score,train_recall_score,test_f1_score,train_f1_score,test_roc_auc_score,train_roc_auc_score
0,0.398241,0.042968,0.836,0.845667,0.834661,0.847442,0.838,0.843111,0.836327,0.845271,0.836,0.845667
1,0.22132,0.008985,0.839,0.846222,0.839679,0.846684,0.838,0.845556,0.838839,0.84612,0.839,0.846222
2,0.242206,0.009999,0.836,0.845222,0.833333,0.846841,0.84,0.842889,0.836653,0.84486,0.836,0.845222
3,0.371167,0.007998,0.828,0.847889,0.821569,0.850145,0.838,0.844667,0.829703,0.847397,0.828,0.847889
4,0.276032,0.007967,0.828,0.846111,0.822835,0.847424,0.836,0.844222,0.829365,0.84582,0.828,0.846111
5,0.333153,0.010749,0.848,0.844444,0.859504,0.845828,0.832,0.842444,0.845528,0.844133,0.848,0.844444
6,0.23161,0.008959,0.842,0.846,0.85041,0.846462,0.83,0.845333,0.840081,0.845897,0.842,0.846
7,0.1992,0.008996,0.859,0.844111,0.859719,0.846188,0.858,0.841111,0.858859,0.843642,0.859,0.844111
8,0.242004,0.008,0.842,0.845667,0.837945,0.847598,0.848,0.842889,0.842942,0.845237,0.842,0.845667
9,0.409208,0.008998,0.845,0.845778,0.845691,0.846548,0.844,0.844667,0.844845,0.845606,0.845,0.845778


Unnamed: 0,Mean,Std
test_accuracy_score,0.8403,0.009298
train_accuracy_score,0.845711,0.001036
test_precision_score,0.840535,0.013411
train_precision_score,0.847116,0.001208
test_recall_score,0.8402,0.008135
train_recall_score,0.843689,0.001422
test_f1_score,0.840314,0.00858
train_f1_score,0.845398,0.001054
test_roc_auc_score,0.8403,0.009298
train_roc_auc_score,0.845711,0.001036


### 4. Creating the Submission File

The competition requires a `.csv` file with two columns: `battle_id` and `player_won`. Let's use our trained model to make predictions on the test set and format them correctly.

In [12]:
print("Generating predictions on the test set...")
submission_feats_df = pd.DataFrame({
    'battle_id': test_feats_df['battle_id'],
    'player_won': model.predict(X_test)
})

submission_feats_df.to_csv('submission.csv', index=False)

print("\n'submission.csv' file created successfully!")
display(submission_feats_df.head())

Generating predictions on the test set...

'submission.csv' file created successfully!


Unnamed: 0,battle_id,player_won
0,0,0
1,1,1
2,2,1
3,3,1
4,4,1


### 5. Submitting Your Results

Once you have generated your `submission.csv` file, there are two primary ways to submit it to the competition.

---

#### Method A: Submitting Directly from the Notebook

This is the standard method for code competitions. It ensures that your submission is linked to the code that produced it, which is crucial for reproducibility.

1.  **Save Your Work:** Click the **"Save Version"** button in the top-right corner of the notebook editor.
2.  **Run the Notebook:** In the pop-up window, select **"Save & Run All (Commit)"** and then click the **"Save"** button. This will run your entire notebook from top to bottom and save the output, including your `submission.csv` file.
3.  **Go to the Viewer:** Once the save process is complete, navigate to the notebook viewer page. 
4.  **Submit to Competition:** In the viewer, find the **"Submit to Competition"** section. This is usually located in the header of the output section or in the vertical "..." menu on the right side of the page. Clicking the **Submit** button this will submit your generated `submission.csv` file.

After submitting, you will see your score in the **"Submit to Competition"** section or in the [Public Leaderboard](https://www.kaggle.com/competitions/fds-pokemon-battles-prediction-2025/leaderboard?).

---

#### Method B: Manual Upload

You can also generate your predictions and submission file using any environment you prefer (this notebook, Google Colab, or your local machine).

1.  **Generate the `submission.csv` file** using your model.
2.  **Download the file** to your computer.
3.  **Navigate to the [Leaderboard Page](https://www.kaggle.com/competitions/fds-pokemon-battles-prediction-2025/leaderboard?)** and click on the **"Submit Predictions"** button.
4.  **Upload Your File:** Drag and drop or select your `submission.csv` file to upload it.

This method is quick, but keep in mind that for the final evaluation, you might be required to provide the code that generated your submission.

Good luck!