In [None]:
from tqdm.notebook import tqdm
import numpy as np

def create_simple_features(data: list[dict]) -> pd.DataFrame:
    """
    A very basic feature extraction function.
    It only uses the aggregated base stats of the player's team and opponent's lead.
    """
    feature_list = []
    for battle in tqdm(data, desc="Extracting features"):
        features = {}
        
        # --- Player 1 Team Features ---
        p1_team = battle.get('p1_team_details', [])
        if p1_team:
            features['p1_mean_hp'] = np.mean([p.get('base_hp', 0) for p in p1_team])
            features['p1_mean_spe'] = np.mean([p.get('base_spe', 0) for p in p1_team])
            features['p1_mean_atk'] = np.mean([p.get('base_atk', 0) for p in p1_team])
            features['p1_mean_def'] = np.mean([p.get('base_def', 0) for p in p1_team])
            
###########################################AGGIUNTA####################################################
          # Estrazione tipi
            type_counts = {t: 0 for t in all_types}
            for p in p1_team:
                for t in p.get('types', []):
                    type_counts[t] += 1
            team_size = len(p1_team)
            for t in all_types:
                features[f'p1_type_{t}'] = type_counts[t] / team_size if team_size > 0 else 0
##########################################AGGIUNTA#####################################################
        
        # --- Player 2 Lead Features ---
        p2_lead = battle.get('p2_lead_details')
        if p2_lead:
            # Player 2's lead PokÃ©mon's stats
            features['p2_lead_hp'] = p2_lead.get('base_hp', 0)
            features['p2_lead_spe'] = p2_lead.get('base_spe', 0)
            features['p2_lead_atk'] = p2_lead.get('base_atk', 0)
            features['p2_lead_def'] = p2_lead.get('base_def', 0)

        features['battle_id'] = battle.get('battle_id')
        if 'player_won' in battle:
            features['player_won'] = int(battle['player_won'])


        feature_list.append(features)
        
    return pd.DataFrame(feature_list).fillna(0)
all_types = set()
for battle in train_data:
    for p in battle['p1_team_details']:
        all_types.update(p['types'])

all_types = sorted(list(all_types))



def extract_type_features(team):
    type_counts = {t: 0 for t in all_types}
    for p in team:
        for t in p['types']:
            type_counts[t] += 1
    
    team_size = len(team)
    for t in type_counts:
        type_counts[t] /= team_size
    return type_counts

team = train_data[0]['p1_team_details']
type_features = extract_type_features(team)
print(type_features)

        ########################################################################################
        #####################################AGGIUNTA###########################################
        ########################################################################################






# Create feature DataFrames for both training and test sets
print("Processing training data...")
train_df = create_simple_features(train_data)

print("\nProcessing test data...")
test_data = []
with open(test_file_path, 'r') as f:
    for line in f:
        test_data.append(json.loads(line))
test_df = create_simple_features(test_data)

print("\nTraining features preview:")
display(train_df.head())