In [1]:
import json
import pandas as pd
import os

# --- Define the path to our data ---
COMPETITION_NAME = 'fds-pokemon-battles-prediction-2025'
DATA_PATH = os.path.join('../input', COMPETITION_NAME)

train_file_path = os.path.join(DATA_PATH, 'train.jsonl')
test_file_path = os.path.join(DATA_PATH, 'test.jsonl')
train_data = []

# Read the file line by line
print(f"Loading data from '{train_file_path}'...")
try:
    with open(train_file_path, 'r') as f:
        for line in f:
            # json.loads() parses one line (one JSON object) into a Python dictionary
            train_data.append(json.loads(line))

    print(f"Successfully loaded {len(train_data)} battles.")

    # Let's inspect the first battle to see its structure
    print("\n--- Structure of the first train battle: ---")
    if train_data:
        first_battle = train_data[0]
        
        # To keep the output clean, we can create a copy and truncate the timeline
        battle_for_display = first_battle.copy()
        battle_for_display['battle_timeline'] = battle_for_display.get('battle_timeline', [])[:2] # Show first 2 turns
        
        # Use json.dumps for pretty-printing the dictionary
        print(json.dumps(battle_for_display, indent=4))
        if len(first_battle.get('battle_timeline', [])) > 3:
            print("    ...")
            print("    (battle_timeline has been truncated for display)")


except FileNotFoundError:
    print(f"ERROR: Could not find the training file at '{train_file_path}'.")
    print("Please make sure you have added the competition data to this notebook.")

Loading data from '../input/fds-pokemon-battles-prediction-2025/train.jsonl'...
Successfully loaded 10000 battles.

--- Structure of the first train battle: ---
{
    "player_won": true,
    "p1_team_details": [
        {
            "name": "starmie",
            "level": 100,
            "types": [
                "psychic",
                "water"
            ],
            "base_hp": 60,
            "base_atk": 75,
            "base_def": 85,
            "base_spa": 100,
            "base_spd": 100,
            "base_spe": 115
        },
        {
            "name": "exeggutor",
            "level": 100,
            "types": [
                "grass",
                "psychic"
            ],
            "base_hp": 95,
            "base_atk": 95,
            "base_def": 85,
            "base_spa": 125,
            "base_spd": 125,
            "base_spe": 55
        },
        {
            "name": "chansey",
            "level": 100,
            "types": [
                "normal",

In [2]:
# immune = moves that deal 0x to my pokemons
# weakness = moves that deal 2x to my pokemons
# resistence = moves that deal 1/2x to my pokemons

types_dict = {
    "normal": {
        "immune": ["ghost"],
        "weakness": ["fighting"],
        "resistence": []
    },
    "fire": {
        "immune": [],
        "weakness": ["water", "ground", "rock"],
        "resistence": ["fire", "grass", "bug"]
    },
    "water": {
        "immune": [],
        "weakness": ["electric", "grass"],
        "resistence": ["fire", "water", "ice"]
    },
    "electric": {
        "immune": [],
        "weakness": ["ground"],
        "resistence": ["electric", "flying"]
    },
    "grass": {
        "immune": [],
        "weakness": ["fire", "ice", "poison", "flying", "bug"],
        "resistence": ["water", "electric", "grass", "ground"]
    },
    "ice": {
        "immune": [],
        "weakness": ["fire", "fighting", "rock"],
        "resistence": ["ice"]
    },
    "fighting": {
        "immune": [],
        "weakness": ["flying", "psychic"],
        "resistence": ["bug", "rock"]
    },
    "poison": {
        "immune": [],
        "weakness": ["ground", "psychic", "bug"],
        "resistence": ["fighting", "poison", "grass"]
    },
    "ground": {
        "immune": ["electric"],
        "weakness": ["water", "grass", "ice"],
        "resistence": ["poison", "rock"]
    },
    "flying": {
        "immune": ["ground"],
        "weakness": ["electric", "ice", "rock"],
        "resistence": ["grass", "fighting", "bug"]
    },
    "psychic": {
        "immune": ["ghost"],
        "weakness": ["bug"],
        "resistence": ["fighting", "psychic"]
    },
    "bug": {
        "immune": [],
        "weakness": ["fire", "flying", "rock", "poison"],
        "resistence": ["grass", "fighting", "ground"]
    },
    "rock": {
        "immune": [],
        "weakness": ["water", "grass", "fighting", "ground"],
        "resistence": ["normal", "fire", "poison", "flying"]
    },
    "ghost": {
        "immune": ["normal", "fighting"],
        "weakness": ["ghost"],
        "resistence": ["poison", "bug"]
    },
    "dragon": {
        "immune": [],
        "weakness": ["ice", "dragon"],
        "resistence": ["fire", "water", "electric", "grass"]
    }
}

In [3]:
# Riferimento (fatto da Chat)
def static_features_2(battle: dict, pokedex=None) -> dict:
    """
    Build static, outcome-agnostic features from a single battle JSON object.

    Sections:
      1) Player 1 (P1) team aggregates
      2) Player 2 (P2) lead aggregates
      3) Direct deltas: P1 team avg vs P2 lead
      4) P2 observed roster (all opponents seen in timeline) + their averages
      5) Robust deltas: P1 team avg vs P2 observed averages
      6) Coverage diagnostics (how many P2 mons were actually seen)
      7) Opening matchup (turn-1 speed delta between active leads)
    """
    features = {}

    # -------------------------------------------------------------------------
    # 1) P1 TEAM AGGREGATES
    # -------------------------------------------------------------------------
    p1_team = battle.get("p1_team_details", []) or []

    # Team type diversity (number of distinct elemental types, ignoring "notype")
    features["p1_team_diversity"] = len({
        (t or "").lower()
        for p in p1_team
        for t in (p.get("types") or [])
        if t and (t.lower() != "notype")
    })

    # Average critical rate proxy across team (Gen-style approx: base_spe -> crit rate)
    if p1_team:
        features["p1_avg_crit_rate"] = float(np.mean([crit_rate(p.get("base_spe", 0)) for p in p1_team]))
        # Average base stats across the full P1 team
        for stat in stats:
            features[f"p1_mean_{stat}"] = float(np.mean([p.get(f"base_{stat}", 0) for p in p1_team]))

    # -------------------------------------------------------------------------
    # 2) P2 LEAD AGGREGATES
    # -------------------------------------------------------------------------
    p2_lead = battle.get("p2_lead_details") or {}
    if p2_lead:
        features["p2_crit_rate"] = crit_rate(p2_lead.get("base_spe", 0))
        for stat in stats:
            features[f"p2_lead_{stat}"] = p2_lead.get(f"base_{stat}", 0)

    # -------------------------------------------------------------------------
    # 3) DIRECT DELTAS: P1 TEAM AVG VS P2 LEAD
    #    (early-game signal: opening matchup against the known lead)
    # -------------------------------------------------------------------------
    if p1_team and p2_lead:
        for stat in stats:
            features[f"diff_{stat}"] = features.get(f"p1_mean_{stat}", 0.0) - features.get(f"p2_lead_{stat}", 0.0)

    # -------------------------------------------------------------------------
    # 4) P2 OBSERVED ROSTER (ALL OPPONENTS SEEN IN TIMELINE) + THEIR AVERAGES
    #    Build the set of opponent names seen; fall back to lead if none observed.
    #    Then, fetch their base stats from the pokedex (or from lead as fallback).
    # -------------------------------------------------------------------------
    timeline = battle.get("battle_timeline", []) or []
    p2_seen_names = []
    for turn in timeline:
        name = (turn.get("p2_pokemon_state") or {}).get("name")
        if name:
            p2_seen_names.append(name)

    # At least include the lead if nothing else was seen
    if not p2_seen_names and p2_lead:
        p2_seen_names = [p2_lead.get("name")]

    # Unique preserving order
    p2_seen_unique = list(dict.fromkeys(p2_seen_names))

    # Collect base stat rows for all observed P2 mons
    p2_rows = []
    for name in p2_seen_unique:
        row = None
        if pokedex is not None:
            row = pokedex.get(name)  # expected to contain base_* keys if created via create_pokedex
        # Fallback: if it's the actual lead, reuse lead stats
        if row is None and p2_lead and name == p2_lead.get("name"):
            row = {f"base_{s}": p2_lead.get(f"base_{s}", 0) for s in stats}
        if row:
            p2_rows.append(row)

    # Compute observed averages over P2 roster
    if p2_rows:
        for stat in stats:
            features[f"p2_observed_mean_{stat}"] = float(np.mean([r.get(f"base_{stat}", 0) for r in p2_rows]))

    # -------------------------------------------------------------------------
    # 5) ROBUST DELTAS: P1 TEAM AVG VS P2 OBSERVED AVERAGES
    #    (full-game signal: compares P1 against the actual opposing roster we saw)
    # -------------------------------------------------------------------------
    if p1_team and p2_rows:
        for stat in stats:
            features[f"diff_all_{stat}"] = (
                features.get(f"p1_mean_{stat}", 0.0) - features.get(f"p2_observed_mean_{stat}", 0.0)
            )

    # -------------------------------------------------------------------------
    # 6) COVERAGE DIAGNOSTICS
    #    How much of P2's roster did we see?
    # -------------------------------------------------------------------------
    unique_seen = len(set(p2_seen_unique)) if p2_seen_unique else 0
    features["p2_unique_seen"] = unique_seen
    features["p2_seen_coverage"] = min(unique_seen / 6.0, 1.0)

    # -------------------------------------------------------------------------
    # 7) OPENING MATCHUP: TURN-1 SPEED DELTA (ACTIVE VS ACTIVE)
    #    Uses the actual P1 lead sent out vs the known P2 lead base speed.
    # -------------------------------------------------------------------------
    if p1_team and p2_lead and timeline:
        first_turn = timeline[0]
        p1_active_name = (first_turn.get("p1_pokemon_state") or {}).get("name")
        p1_active = next((p for p in p1_team if p.get("name") == p1_active_name), None)
        if p1_active:
            p1_spe = p1_active.get("base_spe", 0)
            p2_spe = features.get("p2_lead_spe", 0)
            features["spe_diff"] = p1_spe - p2_spe
        else:
            features["spe_diff"] = 0.0

    p1_seen_names = []
    for turn in timeline:
        name = (turn.get("p1_pokemon_state") or {}).get("name")
        if name:
            p1_seen_names.append(name)
    p1_seen_unique = list(dict.fromkeys(p1_seen_names))  # unique, order-preserving

    # -------------------------------------------------------------------------
    # 6b) FULL-TEAM INDICATORS & SYMMETRIC DIFF (NEW)
    # -------------------------------------------------------------------------
    p1_unique_seen = len(set(p1_seen_unique)) if p1_seen_unique else 0
    features["p1_unique_seen"] = p1_unique_seen
    features["p2_full_team_seen"] = 1 if features["p2_unique_seen"] == 6 else 0
    features["p1_full_team_seen"] = 1 if p1_unique_seen == 6 else 0
    features["unique_seen_diff"] = features["p2_unique_seen"] - p1_unique_seen  # (>0 ⇒ P2 used more mons)

    return features

In [4]:
from tqdm.notebook import tqdm
import numpy as np
stats = stats = ["hp", "spe", "atk", "def", "spd", "spa"]

def create_pokedex(data):
    pokedex = {}
    for battle in tqdm(data, desc="Creating pokedex"):
        
        p1_team = battle.get('p1_team_details', [])
        p2_lead = battle.get('p2_lead_details')

        pokemon_list = p1_team + [p2_lead]

        for pokemon in pokemon_list:
            pokemon_name = pokemon.get('name')
            if pokemon_name not in pokedex:
                pokemon_stats = {f'base_{stat}': pokemon.get(f'base_{stat}') for stat in stats}
                
                pokedex[pokemon_name] = pokemon_stats

    return pokedex

def crit_rate(base_speed):
    rate = base_speed * 100 / 512
    return round(rate, 4)

def static_features(battle: dict) -> dict: 

    features = {}

    # --- Player 1 Team Features ---
    p1_team = battle.get('p1_team_details', [])
    #features['p1_team_diversity'] = len(set(t for p in p1_team for t in p.get('types', []) if t != "notype"))
    if p1_team:
        #features['p1_max_crit_rate'] = max([crit_rate(p.get('base_spe', 0)) for p in p1_team])
        
        # Average stats for p1 team
        for stat in stats:
            features[f'p1_mean_{stat}'] = np.mean([p.get(f'base_{stat}', 0) for p in p1_team])

        
    # --- Player 2 Lead Features ---
    p2_lead = battle.get('p2_lead_details')
    if p2_lead:
        #features['p2_lead_crit_rate'] = crit_rate(p2_lead.get('base_spe', 0))
        
        # Stats for lead pokemon p2
        for stat in stats:
            features[f'p2_lead_{stat}'] = p2_lead.get(f'base_{stat}', 0)


    # --- First turn matchup ---
    battle_timeline = battle.get('battle_timeline', [])
    if p1_team and p2_lead and battle_timeline:
        first_turn = battle_timeline[0]
        p1_pokemon_name = first_turn.get('p1_pokemon_state', {}).get('name')

        # Find matching Pokemon in p1_team
        p1_pokemon = next((p for p in p1_team if p.get('name') == p1_pokemon_name), None)

        if p1_pokemon: 
            p1_spe = p1_pokemon.get('base_spe', 0)
            p2_spe = features['p2_lead_spe']
            features['spe_diff'] = p1_spe - p2_spe
        else: 
            features['spe_diff'] = 0.0

    return features

# status info with weights 
def extract_status_features(battle):
    status_weights = {"slp": 4, "frz": 6, "par": 2.5,"tox": 1, "psn": 1,"brn": 1}

    features = {}

    battle_timeline = battle.get('battle_timeline', [])
    p1_score = 0.0
    p2_score = 0.0

    for turn in battle_timeline:
        p1_status = turn.get('p1_pokemon_state', {}).get('status')
        p2_status = turn.get('p2_pokemon_state', {}).get('status')

        if p1_status in status_weights:
            p1_score += status_weights[p1_status]
        if p2_status in status_weights:
            p2_score += status_weights[p2_status]

    features['status_diff'] = p1_score - p2_score
    return features

def first_move_advantage(battle, pokedex):
    features = {}
    return features

    
# TODO - Risistemare
def dynamic_features(battle: dict) -> dict:

    features = {
        'p1_ko_count': 0, 'p2_ko_count': 0, 
    }

    p1_hp_loss = 0.0
    p2_hp_loss = 0.0
    prev_p1_hp = None
    prev_p2_hp = None
    
    battle_timeline = battle.get('battle_timeline', [])

    for turn in battle_timeline:
        p1_pokemon_state = turn.get('p1_pokemon_state', {})
        p2_pokemon_state = turn.get('p2_pokemon_state', {})
        
        p1_status = p1_pokemon_state.get('status', {})
        p2_status = p2_pokemon_state.get('status', {})

        p1_hp = p1_pokemon_state.get('hp_pct', 1.0)
        p2_hp = p2_pokemon_state.get('hp_pct', 1.0)

        # HP loss 
        if prev_p1_hp is not None:
            d = p1_hp - prev_p1_hp
            if d < 0:
                p1_hp_loss += -d
        if prev_p2_hp is not None:
            d = p2_hp - prev_p2_hp
            if d < 0:
                p2_hp_loss += -d

        prev_p1_hp = p1_hp
        prev_p2_hp = p2_hp

        features['p1_hp_loss'] = round(p1_hp_loss * 100, 2) 
        features['p2_hp_loss'] = round(p2_hp_loss * 100, 2)

        # Number of turns with altered status
        if p1_status not in ['nostatus', 'fnt']:
            key = 'p1_bad_status'
            features[key] = features.get(key, 0) + 1

        if p2_status not in ['nostatus', 'fnt']:
            key = 'p2_bad_status'
            features[key] = features.get(key, 0) + 1

        # Number of fainted pokemons
        if p1_status == 'fnt': 
            features['p1_ko_count'] += 1
            
        if p2_status == 'fnt': 
            features['p2_ko_count'] += 1

    return features
    

def create_features(data: list[dict], pokedex) -> pd.DataFrame:
    """
    A very basic feature extraction function.
    It only uses the aggregated base stats of the player's team and opponent's lead.
    """
    feature_list = []
    for battle in tqdm(data, desc="Extracting features"):
        #if battle.get('battle_id') == 4877: continue
        
        features = {}

        features.update(extract_status_features(battle))
        features.update(static_features_2(battle))
        #features.update(static_features(battle))
        features.update(dynamic_features(battle))

        # We also need the ID and the target variable (if it exists)
        features['battle_id'] = battle.get('battle_id')
        if 'player_won' in battle:
            features['player_won'] = int(battle['player_won'])
            
        feature_list.append(features)
        
    return pd.DataFrame(feature_list).fillna(0)


# Create pokedex
print("\nProcessing pokemons...")
pokedex = create_pokedex(train_data)

# Create feature DataFrames for both training and test sets
print("\nProcessing training data...")
train_df = create_features(train_data, pokedex)

print("\nProcessing test data...")
test_data = []
with open(test_file_path, 'r') as f:
    for line in f:
        test_data.append(json.loads(line))
test_df = create_features(test_data, pokedex)

print("\nTraining features preview:")
display(train_df.head(10))
display(train_df.tail(10))


Processing pokemons...


Creating pokedex:   0%|          | 0/10000 [00:00<?, ?it/s]


Processing training data...


Extracting features:   0%|          | 0/10000 [00:00<?, ?it/s]


Processing test data...


Extracting features:   0%|          | 0/5000 [00:00<?, ?it/s]


Training features preview:


Unnamed: 0,status_diff,p1_team_diversity,p1_avg_crit_rate,p1_mean_hp,p1_mean_spe,p1_mean_atk,p1_mean_def,p1_mean_spd,p1_mean_spa,p2_crit_rate,...,p1_full_team_seen,unique_seen_diff,p1_ko_count,p2_ko_count,p1_hp_loss,p2_hp_loss,p2_bad_status,p1_bad_status,battle_id,player_won
0,-63.5,4,15.625,115.833333,80.0,72.5,63.333333,100.0,100.0,22.4609,...,0,0,1,1,315.16,351.56,16.0,6.0,0,1
1,6.0,5,12.044283,123.333333,61.666667,72.5,65.833333,90.0,90.0,23.4375,...,1,0,3,0,452.0,230.0,5.0,8.0,1,1
2,-6.0,7,12.858083,124.166667,65.833333,84.166667,71.666667,90.0,90.0,9.7656,...,0,1,1,0,188.0,208.0,14.0,14.0,2,1
3,22.5,7,14.8112,121.666667,75.833333,77.5,65.833333,103.333333,103.333333,21.4844,...,0,-1,3,0,395.0,303.0,5.0,17.0,3,1
4,-35.0,5,14.160167,114.166667,72.5,75.833333,79.166667,97.5,97.5,22.4609,...,0,0,1,0,285.0,387.0,22.0,8.0,4,1
5,9.5,6,16.601567,103.333333,85.0,70.833333,70.0,100.0,100.0,23.4375,...,1,-1,0,0,203.0,287.0,6.0,11.0,5,1
6,-4.5,8,15.787783,74.166667,80.833333,89.166667,105.833333,99.166667,99.166667,22.4609,...,1,0,3,2,498.0,409.0,11.0,8.0,6,1
7,62.5,6,17.2526,89.166667,88.333333,86.666667,76.666667,103.333333,103.333333,25.3906,...,0,-1,2,0,434.0,69.0,2.0,16.0,7,1
8,-26.5,8,15.787783,74.166667,80.833333,89.166667,105.833333,99.166667,99.166667,18.5547,...,0,1,4,1,503.0,175.0,19.0,9.0,8,1
9,2.5,5,15.136717,120.833333,77.5,75.0,63.333333,104.166667,104.166667,23.4375,...,1,0,2,0,321.03,315.67,7.0,11.0,9,1


Unnamed: 0,status_diff,p1_team_diversity,p1_avg_crit_rate,p1_mean_hp,p1_mean_spe,p1_mean_atk,p1_mean_def,p1_mean_spd,p1_mean_spa,p2_crit_rate,...,p1_full_team_seen,unique_seen_diff,p1_ko_count,p2_ko_count,p1_hp_loss,p2_hp_loss,p2_bad_status,p1_bad_status,battle_id,player_won
9990,26.5,5,14.485683,118.333333,74.166667,74.166667,60.833333,85.833333,85.833333,10.7422,...,1,-1,5,1,606.42,193.77,1.0,8.0,9990,0
9991,23.5,4,15.625,115.833333,80.0,72.5,63.333333,100.0,100.0,18.5547,...,1,-1,3,1,471.0,414.0,6.0,13.0,9991,0
9992,10.5,6,14.322917,122.5,73.333333,75.0,61.666667,97.5,97.5,23.4375,...,0,0,4,1,531.0,334.0,11.0,14.0,9992,0
9993,29.0,4,12.858083,121.666667,65.833333,72.5,67.5,96.666667,96.666667,18.5547,...,1,-1,4,1,472.62,385.58,3.0,11.0,9993,0
9994,14.0,6,12.3698,125.0,63.333333,81.666667,67.5,84.166667,84.166667,18.5547,...,0,-1,2,0,331.33,268.45,3.0,8.0,9994,0
9995,32.5,6,13.020833,124.166667,66.666667,85.833333,75.833333,85.0,85.0,22.4609,...,1,0,2,0,324.0,242.0,6.0,16.0,9995,0
9996,-10.0,8,12.858067,115.0,65.833333,74.166667,75.0,102.5,102.5,22.4609,...,0,1,3,0,416.0,377.0,14.0,10.0,9996,0
9997,25.5,7,15.462233,111.666667,79.166667,78.333333,72.5,100.833333,100.833333,19.5312,...,0,1,1,0,284.0,223.0,9.0,18.0,9997,0
9998,53.5,6,13.020833,124.166667,66.666667,85.833333,75.833333,85.0,85.0,22.4609,...,0,0,3,1,444.0,339.0,0.0,19.0,9998,0
9999,-5.0,7,18.229167,85.833333,93.333333,80.833333,66.666667,91.666667,91.666667,18.5547,...,1,-1,4,2,400.0,486.0,8.0,9.0,9999,0


In [5]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import StratifiedKFold, GridSearchCV, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline

def get_best_model(X_train, y_train):

    pipe = make_pipeline(StandardScaler(), LogisticRegression(max_iter=1000, random_state=42))

    # Define the parameter grid to search
    param_grid = {
        'logisticregression__C': [0.01, 0.1, 1, 10],
        'logisticregression__penalty': ['l1', 'l2'],
        'logisticregression__solver': ['liblinear']
    }

    # Create the GridSearchCV object
    grid_logreg = GridSearchCV(
        estimator=pipe,
        param_grid=param_grid,
        scoring='accuracy',
        n_jobs=4,        # use 4 cores in parallel
        cv=5,            # 5-fold cross-validation, more on this later
        refit=True,      # retrain the best model on the full training set
        return_train_score=True
    )

    # Fit the GridSearchCV object on the training data
    grid_logreg.fit(X_train, y_train)

    # Print the best accuracy score found during grid search
    best_score = grid_logreg.best_score_
    print("Best accuracy score:", best_score)

    # Extract the best hyperparameter combination
    best_params = grid_logreg.best_params_
    print("\nBest hyperparameters:")
    print(best_params)

    return grid_logreg.best_estimator_

kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

# Define our features (X) and target (y)
features = [col for col in train_df.columns if col not in ['battle_id', 'player_won']]
X_train = train_df[features]
y_train = train_df['player_won']

model = get_best_model(X_train, y_train)

# split()  method generate indices to split data into training and test set.
for count, (train_index, test_index) in enumerate(kf.split(X_train, y_train)):
    print(f'Fold:{count}, Train set: {len(train_index)}, Test set:{len(test_index)}')
    count += 1

# Define test features
X_test = test_df[features]

# Cross validation
score = cross_val_score(model, X_train, y_train, cv=kf, scoring="accuracy")
print(f'Scores for each fold are: {score}')
print(f'Average score: {"{:.2f}".format(score.mean())}')

print("Cross validation complete.")

Best accuracy score: 0.8289

Best hyperparameters:
{'logisticregression__C': 0.1, 'logisticregression__penalty': 'l2', 'logisticregression__solver': 'liblinear'}
Fold:0, Train set: 8000, Test set:2000
Fold:1, Train set: 8000, Test set:2000
Fold:2, Train set: 8000, Test set:2000
Fold:3, Train set: 8000, Test set:2000
Fold:4, Train set: 8000, Test set:2000
Scores for each fold are: [0.8235 0.83   0.832  0.827  0.832 ]
Average score: 0.83
Cross validation complete.


In [6]:
# Train the model
print("Training a simple Logistic Regression model...")
model.fit(X_train, y_train)
print("Model training complete.")


# Make predictions on the test data
print("Generating predictions on the test set...")
test_predictions = model.predict(X_test)

# Create the submission DataFrame
submission_df = pd.DataFrame({
    'battle_id': test_df['battle_id'],
    'player_won': test_predictions
})

# Save the DataFrame to a .csv file
submission_df.to_csv('submission.csv', index=False)

print("\n'submission.csv' file created successfully!")
display(submission_df.head())

Training a simple Logistic Regression model...
Model training complete.
Generating predictions on the test set...

'submission.csv' file created successfully!


Unnamed: 0,battle_id,player_won
0,0,0
1,1,1
2,2,1
3,3,1
4,4,1
