In [6]:
# LOAD JSONS   train_data e test_data

import json
import pandas as pd
import os

# --- Define the path to our data ---
train_file_path = 'train.jsonl'
test_file_path  = 'test.jsonl'

train_data = []
test_data  = []

# --- Load TRAIN data ---
print(f"📦 Loading data from '{train_file_path}'...")
try:
    with open(train_file_path, 'r') as f:
        for line in f:
            train_data.append(json.loads(line))
    print(f"✅ Successfully loaded {len(train_data)} battles from train.")
    
    # Show structure of first train battle
    if train_data:
        print("\n--- Structure of the first train battle: ---")
        first_battle = train_data[0]
        battle_for_display = first_battle.copy()
        battle_for_display['battle_timeline'] = first_battle.get('battle_timeline', [])[:2]
        print(json.dumps(battle_for_display, indent=4))
        if len(first_battle.get('battle_timeline', [])) > 3:
            print("    ...")
            print("    (battle_timeline has been truncated for display)")

except FileNotFoundError:
    print(f"❌ ERROR: Could not find the training file at '{train_file_path}'.")
    print("Please make sure you have added the competition data to this notebook.")


# --- Load TEST data ---
print(f"\n📦 Loading data from '{test_file_path}'...")
try:
    with open(test_file_path, 'r') as f:
        for line in f:
            test_data.append(json.loads(line))
    print(f"✅ Successfully loaded {len(test_data)} battles from test.")
    
    # Optional: inspect the first test battle
    if test_data:
        print("\n--- Structure of the first test battle: ---")
        first_test_battle = test_data[0]
        test_display = first_test_battle.copy()
        test_display['battle_timeline'] = test_display.get('battle_timeline', [])[:2]
        print(json.dumps(test_display, indent=4))
        if len(first_test_battle.get('battle_timeline', [])) > 3:
            print("    ...")
            print("    (battle_timeline has been truncated for display)")

except FileNotFoundError:
    print(f"❌ ERROR: Could not find the test file at '{test_file_path}'.")
    print("Please make sure you have added the competition data to this notebook.")


📦 Loading data from 'train.jsonl'...
✅ Successfully loaded 10000 battles from train.

--- Structure of the first train battle: ---
{
    "player_won": true,
    "p1_team_details": [
        {
            "name": "starmie",
            "level": 100,
            "types": [
                "psychic",
                "water"
            ],
            "base_hp": 60,
            "base_atk": 75,
            "base_def": 85,
            "base_spa": 100,
            "base_spd": 100,
            "base_spe": 115
        },
        {
            "name": "exeggutor",
            "level": 100,
            "types": [
                "grass",
                "psychic"
            ],
            "base_hp": 95,
            "base_atk": 95,
            "base_def": 85,
            "base_spa": 125,
            "base_spd": 125,
            "base_spe": 55
        },
        {
            "name": "chansey",
            "level": 100,
            "types": [
                "normal",
                "notype"
    

In [46]:
#Per vedere i dati in dataframe, divisi in 4 blocchi dove squadra==p1 e pokemon==p2
import pandas as pd

def create_dataframe(data):
    # DataFrame generale con info di base
    df_battle = pd.DataFrame([{
        "battle_id": b["battle_id"],
        "player_won": b.get("player_won", None)  # nel test non esiste
    } for b in data])

    # DataFrame con i Pokémon della squadra del giocatore (p1_team_details)
    df_squad = pd.DataFrame([
        {
            "battle_id": b["battle_id"],
            "name": p["name"],
            "level": p["level"],
            "types": tuple(p.get("types", [])),
            "base_hp": p["base_hp"],
            "base_atk": p["base_atk"],
            "base_def": p["base_def"],
            "base_spa": p["base_spa"],
            "base_spd": p["base_spd"],
            "base_spe": p["base_spe"]
        }
        for b in data
        for p in b["p1_team_details"]
    ])

    # DataFrame con dettagli del lead Pokémon dell'avversario
    df_pokemon = pd.DataFrame([
        {
            "battle_id": b["battle_id"],
            "name": b["p2_lead_details"]["name"],
            "level": b["p2_lead_details"]["level"],
            "types": tuple(b["p2_lead_details"].get("types", [])),
            "base_hp": b["p2_lead_details"]["base_hp"],
            "base_atk": b["p2_lead_details"]["base_atk"],
            "base_def": b["p2_lead_details"]["base_def"],
            "base_spa": b["p2_lead_details"]["base_spa"],
            "base_spd": b["p2_lead_details"]["base_spd"],
            "base_spe": b["p2_lead_details"]["base_spe"]
        }
        for b in data
    ])

    # DataFrame con la timeline dei turni
    df_battle_timeline = pd.DataFrame([
        {
            "battle_id": b["battle_id"],
            "turn": t["turn"],
            "p1_pokemon": t["p1_pokemon_state"]["name"],
            "p1_hp": t["p1_pokemon_state"]["hp_pct"],
            "p1_status": t["p1_pokemon_state"]["status"],
            "p1_effects": t["p1_pokemon_state"]["effects"],
            "p1_boosts": t["p1_pokemon_state"]["boosts"],
            "p2_pokemon": t["p2_pokemon_state"]["name"],
            "p2_hp": t["p2_pokemon_state"]["hp_pct"],
            "p2_status": t["p2_pokemon_state"]["status"],
            "p2_effects": t["p2_pokemon_state"]["effects"],
            "p2_boosts": t["p2_pokemon_state"]["boosts"],
            "p1_move_name": t["p1_move_details"]["name"] if t["p1_move_details"] else None,
            "p1_move_type": t["p1_move_details"]["type"] if t["p1_move_details"] else None,
            "p1_move_cat": t["p1_move_details"]["category"] if t["p1_move_details"] else None,
            "p1_move_basepow": t["p1_move_details"]["base_power"] if t["p1_move_details"] else None,
            "p1_move_acc": t["p1_move_details"]["accuracy"] if t["p1_move_details"] else None,
            "p1_move_priority": t["p1_move_details"]["priority"] if t["p1_move_details"] else None,
            "p2_move_name": t["p2_move_details"]["name"] if t["p2_move_details"] else None,
            "p2_move_type": t["p2_move_details"]["type"] if t["p2_move_details"] else None,
            "p2_move_cat": t["p2_move_details"]["category"] if t["p2_move_details"] else None,
            "p2_move_basepow": t["p2_move_details"]["base_power"] if t["p2_move_details"] else None,
            "p2_move_acc": t["p2_move_details"]["accuracy"] if t["p2_move_details"] else None,
            "p2_move_priority": t["p2_move_details"]["priority"] if t["p2_move_details"] else None
        }
        for b in data
        for t in b["battle_timeline"]
    ])

    return [df_battle, df_squad, df_pokemon, df_battle_timeline]

#train_data e test_data
#battle,squad,pokemon,timeline
train_list=create_dataframe(train_data)
test_list=create_dataframe(test_data)

Controllare le categorie degli attacchi presenti nel dataframe

In [54]:
df_battle_timeline=train_list[3]
all_names = list(df_battle_timeline["p1_move_cat"]) + list(df_battle_timeline["p2_move_cat"])

# Rimuove i duplicati e ordina alfabeticamente
move_catt = sorted(set(x for x in all_names if pd.notna(x)))

print(f"Numero totale di categorie uniche: {len(move_catt)}")
print(move_catt) # Mostra i primi 20 per esempio

Numero totale di categorie uniche: 3
['PHYSICAL', 'SPECIAL', 'STATUS']


CREATE POKEDEX

In [56]:
def create_pokedex(df_list):
    df_squad=df_list[1]
    df_pokemon=df_list[2]
    df_all_pokemon = pd.concat([df_squad, df_pokemon], ignore_index=True)

    # Mantieni solo Pokémon unici per nome (se stessi nomi → stessi tipi e stats)
    df_all_pokemon = (
        df_all_pokemon
        .drop_duplicates(subset=["name", "types"])
        .reset_index(drop=True)
    )
    print(f"Pokedex creato con {len(df_all_pokemon)} Pokémon unici ")

    #print(df_all_pokemon.head())
    return df_all_pokemon[["name", "types"]]
    #df_unique_types = df_all_pokemon[["pokemon_name", "types"]]


funzioni per controllare che abbbia preso tutti i pokemon

In [49]:
#controllo dei nomi pokemon, prima dei match
# Estrae tutti i Pokémon (p1 e p2)
df_squad=train_list[1]
df_pokemon=train_list[2]
all_names = list(df_squad["name"]) + list(df_pokemon["name"])

# Rimuove i duplicati e ordina alfabeticamente
unique_names = sorted(set(all_names))

print(f"Numero totale di Pokémon unici trovati: {len(unique_names)}")
print(unique_names[:20])  # Mostra i primi 20 per esempio

Numero totale di Pokémon unici trovati: 20
['alakazam', 'articuno', 'chansey', 'charizard', 'cloyster', 'dragonite', 'exeggutor', 'gengar', 'golem', 'jolteon', 'jynx', 'lapras', 'persian', 'rhydon', 'slowbro', 'snorlax', 'starmie', 'tauros', 'victreebel', 'zapdos']


In [50]:
#controllo dei nomi pokemon, durante i match
# Estrae tutti i Pokémon (p1 e p2)
df_battle_timeline=train_list[3]
all_names = list(df_battle_timeline["p1_pokemon"]) + list(df_battle_timeline["p2_pokemon"])

# Rimuove i duplicati e ordina alfabeticamente
unique_names_battle = sorted(set(all_names))

print(f"Numero totale di Pokémon unici trovati: {len(unique_names_battle)}")
print(unique_names_battle[:20])  # Mostra i primi 20 per esempio
print(unique_names==unique_names_battle)

Numero totale di Pokémon unici trovati: 20
['alakazam', 'articuno', 'chansey', 'charizard', 'cloyster', 'dragonite', 'exeggutor', 'gengar', 'golem', 'jolteon', 'jynx', 'lapras', 'persian', 'rhydon', 'slowbro', 'snorlax', 'starmie', 'tauros', 'victreebel', 'zapdos']
True


In [57]:
df_all_pokemon=create_pokedex(train_list)
print(df_all_pokemon)
#controllo dei nomi nel pokedex
all_pokedex_names = sorted(set(df_all_pokemon["name"]))
print("Stessi pokemon del pre battaglia: ",all_pokedex_names==unique_names)
print("Stessi pokemon della battaglia: ",all_pokedex_names==unique_names_battle)

Pokedex creato con 20 Pokémon unici 
          name               types
0      starmie    (psychic, water)
1    exeggutor    (grass, psychic)
2      chansey    (normal, notype)
3      snorlax    (normal, notype)
4       tauros    (normal, notype)
5     alakazam   (notype, psychic)
6         jynx      (ice, psychic)
7      slowbro    (psychic, water)
8       gengar     (ghost, poison)
9       rhydon      (ground, rock)
10      zapdos  (electric, flying)
11    cloyster        (ice, water)
12       golem      (ground, rock)
13     jolteon  (electric, notype)
14    articuno       (flying, ice)
15     persian    (normal, notype)
16      lapras        (ice, water)
17   dragonite    (dragon, flying)
18   charizard      (fire, flying)
19  victreebel     (grass, poison)
Stessi pokemon del pre battaglia:  True
Stessi pokemon della battaglia:  True


TIPI DI POKEMON

In [37]:
#Funzione per ottenere i tipi dei pokemon(UNICI)
def unique_t(lista):
    df_pokemon=lista[2]
    df_squad=lista[1]
    unique_types = sorted(
        set(
            t
            for types_list in pd.concat([df_pokemon["types"], df_squad["types"]])
            for t in types_list
        )
    )
    return unique_types

In [43]:
tipi=unique_t(train_list)
tipi.remove('notype')
print(tipi)
print(len(tipi))

['dragon', 'electric', 'fire', 'flying', 'ghost', 'grass', 'ground', 'ice', 'normal', 'poison', 'psychic', 'rock', 'water']
13


DATAFRAME VANTAGGI/SVANTAGGI TIPI

In [44]:
df_chart = pd.DataFrame(1.0, index=tipi, columns=tipi)
print(df_chart)

          dragon  electric  fire  flying  ghost  grass  ground  ice  normal  \
dragon       1.0       1.0   1.0     1.0    1.0    1.0     1.0  1.0     1.0   
electric     1.0       1.0   1.0     1.0    1.0    1.0     1.0  1.0     1.0   
fire         1.0       1.0   1.0     1.0    1.0    1.0     1.0  1.0     1.0   
flying       1.0       1.0   1.0     1.0    1.0    1.0     1.0  1.0     1.0   
ghost        1.0       1.0   1.0     1.0    1.0    1.0     1.0  1.0     1.0   
grass        1.0       1.0   1.0     1.0    1.0    1.0     1.0  1.0     1.0   
ground       1.0       1.0   1.0     1.0    1.0    1.0     1.0  1.0     1.0   
ice          1.0       1.0   1.0     1.0    1.0    1.0     1.0  1.0     1.0   
normal       1.0       1.0   1.0     1.0    1.0    1.0     1.0  1.0     1.0   
poison       1.0       1.0   1.0     1.0    1.0    1.0     1.0  1.0     1.0   
psychic      1.0       1.0   1.0     1.0    1.0    1.0     1.0  1.0     1.0   
rock         1.0       1.0   1.0     1.0    1.0    1