In [1]:
import pandas as pd
import ast  # For safely evaluating string-formatted lists
import sys

def arena_win_loss_data_collection():
    """
    Parses the battle log and correctly counts wins/losses 
    for both normal and evolution versions of each card,
    then saves the result to a CSV.
    """
    try:
        # --- 1. Load Arena and Troop Data ---
        arenas_df = pd.read_csv("../#2 Data Storage/Utils/arenas.csv")
        arenas_dict = {}
        for row in arenas_df.itertuples():
            arenas_dict[row.Arena_ID] = row.Arena_Name
        
        troop_file_name = '../#2 Data Storage/Utils/troop_name.csv' 
        troop_df = pd.read_csv(troop_file_name)
        troop_list = troop_df['Troop_name'].unique()
        print(f"Loaded {len(troop_list)} unique troop names.")
        # Create a set for fast lookup
        troop_set = set(troop_list) 
        
        # --- 2. Load Battle Log Data ---
        file_name = '../#2 Data Storage/Processed Data/preprocessed_battle_log_full_batch.csv' 
        df = pd.read_csv(file_name)
        print("Data loaded. Processing for all troops...")

        # --- 3. Process data for ALL troops (Corrected Logic) ---
        card_data = [] 

        # We will check both player_0_spells and player_1_spells
        player_spell_cols = ['players_0_spells', 'players_1_spells']
        
        # Iterate through each row in the battle log
        for row in df.itertuples():
            
            # Determine outcome for player 0 and player 1
            outcomes = {
                'players_0_spells': 'Won' if row.players_0_winner == 1 else 'Lost',
                'players_1_spells': 'Won' if row.players_1_winner == 1 else 'Lost'
            }
            
            # Loop over player 0 and player 1
            for col_name in player_spell_cols:
                player_cards_str = getattr(row, col_name)
                
                # Parse the card list string ONCE
                try:
                    card_list_tuples = ast.literal_eval(player_cards_str)
                    if not isinstance(card_list_tuples, list):
                        continue # Skip if data is not a list
                except (ValueError, SyntaxError, TypeError):
                    continue # Skip if data is malformed (e.g., NaN)
                
                # Now iterate through the (name, level, evo) tuples
                for card_tuple in card_list_tuples:
                    # Basic validation of the tuple
                    if not (isinstance(card_tuple, tuple) and len(card_tuple) >= 3):
                        continue
                        
                    card_name = card_tuple[0]
                    
                    # Check if this is a troop we care about
                    if card_name in troop_set:
                        evo_bin = card_tuple[2] # Get the evo status
                        
                        # Append the CORRECT, linked data
                        card_data.append({
                            'arena': row.arena,
                            'outcome': outcomes[col_name],
                            'card_name': card_name,
                            'evo': evo_bin
                        })

        # --- 4. Create and Group DataFrame ---
        if not card_data:
            print(f"No usage data found for any troops.")
        else:
            card_df = pd.DataFrame(card_data)
            
            # Group data by arena, card_name, outcome, and evo status
            grouped_df = card_df.groupby(['arena', 'card_name', 'outcome', 'evo']).size().reset_index(name='count')
            
            # Map arena IDs to names AFTER grouping
            grouped_df['arena'] = grouped_df['arena'].map(arenas_dict)
            grouped_df = grouped_df.dropna(subset=['arena']) # Drop any arenas not in our map
            
            # Re-order columns for clarity
            grouped_df = grouped_df[['arena', 'card_name', 'outcome', 'evo', 'count']]

            output_filename = "../#2 Data Storage/Visualization Data/arenawise_card_win_loss.csv"
            grouped_df.to_csv(output_filename, index=False)
            print(f"\nSuccessfully processed data and saved to '{output_filename}'")
            print("\n--- Data Head (Sample) ---")
            print(grouped_df.head())
            
    except FileNotFoundError as e:
        print(f"Error: A required file was not found. {e}")
    except Exception as e:
        print(f"An error occurred: {e}")

# --- Run the function ---
arena_win_loss_data_collection()

Loaded 121 unique troop names.
Data loaded. Processing for all troops...

Successfully processed data and saved to '../#2 Data Storage/Visualization Data/arenawise_card_win_loss.csv'

--- Data Head (Sample) ---
     arena    card_name outcome  evo  count
0  Arena 1      Archers    Lost    0  10753
1  Arena 1      Archers     Won    0  10420
2  Arena 1       Arrows    Lost    0  11561
3  Arena 1       Arrows     Won    0  10452
4  Arena 1  Baby Dragon    Lost    0      4
