In [4]:
import pandas as pd

In [5]:
# Battles and card_categories
battles_df = pd.read_csv('/content/battles_1st_approach.csv')
card_categories_df = pd.read_csv('/content/card_categories.csv', delimiter=";")

In [6]:
# Sanity check for our card_categories.csv
missing_values_per_column = card_categories_df.isnull().sum()
missing_columns = missing_values_per_column[missing_values_per_column > 0]
print(missing_columns)

print(card_categories_df.columns)

Series([], dtype: int64)
Index(['id', 'name', 'Ground-Single-Attack', 'Ground-Splash-Attack',
       'Air-Ground-Single-Attack', 'Air-Ground-Splash-Attack',
       'Straight-Tower-Building-Attack', 'High-Damage', 'Inferno-Attack',
       'Zap-Attack', 'Detonation-Damage', 'Shielded-Health',
       'Single-Ground-Low-Health', 'Single-Ground-Medium-Health',
       'Single-Ground-High-Health', 'Troop-Ground-Low-Health',
       'Troop-Ground-Medium-Health', 'Single-Air-Medium-Health',
       'Single-Air-High-Health', 'Troop-Air-Low-Health',
       'Troop-Air-Medium-Health', 'Spawnable-Everywhere', 'Spawns-Troops',
       'Building', 'Heal-Skill', 'Slow-Down-Attack', 'Mover-Skill',
       'Low-Ground-Damage-Spell', 'Low-Air-Damage-Spell',
       'Mid-Air-Damage-Spell', 'High-Air-Damage-Spell', 'Utility-Spell',
       'Prone-To-Zap', 'Earthquake-Spell', 'Monk'],
      dtype='object')


In [7]:
# Ensure type matching
card_categories_df['id'] = card_categories_df['id'].astype(int)

# Get all the category columns
category_columns = card_categories_df.columns.difference(['id', 'name'])

# Convert card_categories_df to a dictionary for faster lookup
card_categories = card_categories_df.set_index('id').to_dict(orient='index')

# Initialize new columns for each category for both players
for category in category_columns:
    battles_df[f'p1_{category}'] = 0
    battles_df[f'p2_{category}'] = 0

def update_categories(row):
    # Extract card IDs for both players
    p1_card_ids = [int(col.split('_')[-1]) for col in battles_df.columns if col.startswith('p1_has_card_') and row[col] == 1]
    p2_card_ids = [int(col.split('_')[-1]) for col in battles_df.columns if col.startswith('p2_has_card_') and row[col] == 1]

    # Update categories

    for card_id in p1_card_ids:
        if card_id in card_categories:
            for category in category_columns:
                if card_categories[card_id][category] == 1:
                    row[f'p1_{category}'] += 1

    for card_id in p2_card_ids:
        if card_id in card_categories:
            for category in category_columns:
                if card_categories[card_id][category] == 1:
                    row[f'p2_{category}'] += 1

    return row

battles_df = battles_df.apply(update_categories, axis=1)

In [8]:
pd.set_option('display.max_columns', None)
battles_df.head()

Unnamed: 0,winner,p1_avg_elixir_cost_normalized,p1_elixir_leaked,p1_has_card_26000000,p1_has_card_26000001,p1_has_card_26000002,p1_has_card_26000003,p1_has_card_26000004,p1_has_card_26000005,p1_has_card_26000006,p1_has_card_26000007,p1_has_card_26000008,p1_has_card_26000009,p1_has_card_26000010,p1_has_card_26000011,p1_has_card_26000012,p1_has_card_26000013,p1_has_card_26000014,p1_has_card_26000015,p1_has_card_26000016,p1_has_card_26000017,p1_has_card_26000018,p1_has_card_26000019,p1_has_card_26000020,p1_has_card_26000021,p1_has_card_26000022,p1_has_card_26000023,p1_has_card_26000024,p1_has_card_26000025,p1_has_card_26000026,p1_has_card_26000027,p1_has_card_26000028,p1_has_card_26000029,p1_has_card_26000030,p1_has_card_26000031,p1_has_card_26000032,p1_has_card_26000033,p1_has_card_26000034,p1_has_card_26000035,p1_has_card_26000036,p1_has_card_26000037,p1_has_card_26000038,p1_has_card_26000039,p1_has_card_26000040,p1_has_card_26000041,p1_has_card_26000042,p1_has_card_26000043,p1_has_card_26000044,p1_has_card_26000045,p1_has_card_26000046,p1_has_card_26000047,p1_has_card_26000048,p1_has_card_26000049,p1_has_card_26000050,p1_has_card_26000051,p1_has_card_26000052,p1_has_card_26000053,p1_has_card_26000054,p1_has_card_26000055,p1_has_card_26000056,p1_has_card_26000057,p1_has_card_26000058,p1_has_card_26000059,p1_has_card_26000060,p1_has_card_26000061,p1_has_card_26000062,p1_has_card_26000063,p1_has_card_26000064,p1_has_card_26000065,p1_has_card_26000067,p1_has_card_26000068,p1_has_card_26000069,p1_has_card_26000072,p1_has_card_26000074,p1_has_card_26000077,p1_has_card_26000080,p1_has_card_26000083,p1_has_card_26000084,p1_has_card_26000085,p1_has_card_26000087,p1_has_card_26000093,p1_has_card_26000095,p1_has_card_26000096,p1_has_card_26000097,p1_has_card_26000099,p1_has_card_27000000,p1_has_card_27000001,p1_has_card_27000002,p1_has_card_27000003,p1_has_card_27000004,p1_has_card_27000005,p1_has_card_27000006,p1_has_card_27000007,p1_has_card_27000008,p1_has_card_27000009,p1_has_card_27000010,p1_has_card_27000012,p1_has_card_27000013,p1_has_card_28000000,p1_has_card_28000001,p1_has_card_28000002,p1_has_card_28000003,p1_has_card_28000004,p1_has_card_28000005,p1_has_card_28000006,p1_has_card_28000007,p1_has_card_28000008,p1_has_card_28000009,p1_has_card_28000010,p1_has_card_28000011,p1_has_card_28000012,p1_has_card_28000013,p1_has_card_28000014,p1_has_card_28000015,p1_has_card_28000016,p1_has_card_28000017,p1_has_card_28000018,p1_has_card_28000023,p1_has_card_28000024,p1_has_support_card_159000000,p1_has_support_card_159000001,p1_has_support_card_159000002,p1_has_support_card_159000003,p1_total_win_rate,p2_avg_elixir_cost_normalized,p2_elixir_leaked,p2_has_card_26000000,p2_has_card_26000001,p2_has_card_26000002,p2_has_card_26000003,p2_has_card_26000004,p2_has_card_26000005,p2_has_card_26000006,p2_has_card_26000007,p2_has_card_26000008,p2_has_card_26000009,p2_has_card_26000010,p2_has_card_26000011,p2_has_card_26000012,p2_has_card_26000013,p2_has_card_26000014,p2_has_card_26000015,p2_has_card_26000016,p2_has_card_26000017,p2_has_card_26000018,p2_has_card_26000019,p2_has_card_26000020,p2_has_card_26000021,p2_has_card_26000022,p2_has_card_26000023,p2_has_card_26000024,p2_has_card_26000025,p2_has_card_26000026,p2_has_card_26000027,p2_has_card_26000028,p2_has_card_26000029,p2_has_card_26000030,p2_has_card_26000031,p2_has_card_26000032,p2_has_card_26000033,p2_has_card_26000034,p2_has_card_26000035,p2_has_card_26000036,p2_has_card_26000037,p2_has_card_26000038,p2_has_card_26000039,p2_has_card_26000040,p2_has_card_26000041,p2_has_card_26000042,p2_has_card_26000043,p2_has_card_26000044,p2_has_card_26000045,p2_has_card_26000046,p2_has_card_26000047,p2_has_card_26000048,p2_has_card_26000049,p2_has_card_26000050,p2_has_card_26000051,p2_has_card_26000052,p2_has_card_26000053,p2_has_card_26000054,p2_has_card_26000055,p2_has_card_26000056,p2_has_card_26000057,p2_has_card_26000058,p2_has_card_26000059,p2_has_card_26000060,p2_has_card_26000061,p2_has_card_26000062,p2_has_card_26000063,p2_has_card_26000064,p2_has_card_26000065,p2_has_card_26000067,p2_has_card_26000068,p2_has_card_26000069,p2_has_card_26000072,p2_has_card_26000074,p2_has_card_26000077,p2_has_card_26000080,p2_has_card_26000083,p2_has_card_26000084,p2_has_card_26000085,p2_has_card_26000087,p2_has_card_26000093,p2_has_card_26000095,p2_has_card_26000096,p2_has_card_26000097,p2_has_card_26000099,p2_has_card_27000000,p2_has_card_27000001,p2_has_card_27000002,p2_has_card_27000003,p2_has_card_27000004,p2_has_card_27000005,p2_has_card_27000006,p2_has_card_27000007,p2_has_card_27000008,p2_has_card_27000009,p2_has_card_27000010,p2_has_card_27000012,p2_has_card_27000013,p2_has_card_28000000,p2_has_card_28000001,p2_has_card_28000002,p2_has_card_28000003,p2_has_card_28000004,p2_has_card_28000005,p2_has_card_28000006,p2_has_card_28000007,p2_has_card_28000008,p2_has_card_28000009,p2_has_card_28000010,p2_has_card_28000011,p2_has_card_28000012,p2_has_card_28000013,p2_has_card_28000014,p2_has_card_28000015,p2_has_card_28000016,p2_has_card_28000017,p2_has_card_28000018,p2_has_card_28000023,p2_has_card_28000024,p2_has_support_card_159000000,p2_has_support_card_159000001,p2_has_support_card_159000002,p2_has_support_card_159000003,p2_total_win_rate,p1_Air-Ground-Single-Attack,p2_Air-Ground-Single-Attack,p1_Air-Ground-Splash-Attack,p2_Air-Ground-Splash-Attack,p1_Building,p2_Building,p1_Detonation-Damage,p2_Detonation-Damage,p1_Earthquake-Spell,p2_Earthquake-Spell,p1_Ground-Single-Attack,p2_Ground-Single-Attack,p1_Ground-Splash-Attack,p2_Ground-Splash-Attack,p1_Heal-Skill,p2_Heal-Skill,p1_High-Air-Damage-Spell,p2_High-Air-Damage-Spell,p1_High-Damage,p2_High-Damage,p1_Inferno-Attack,p2_Inferno-Attack,p1_Low-Air-Damage-Spell,p2_Low-Air-Damage-Spell,p1_Low-Ground-Damage-Spell,p2_Low-Ground-Damage-Spell,p1_Mid-Air-Damage-Spell,p2_Mid-Air-Damage-Spell,p1_Monk,p2_Monk,p1_Mover-Skill,p2_Mover-Skill,p1_Prone-To-Zap,p2_Prone-To-Zap,p1_Shielded-Health,p2_Shielded-Health,p1_Single-Air-High-Health,p2_Single-Air-High-Health,p1_Single-Air-Medium-Health,p2_Single-Air-Medium-Health,p1_Single-Ground-High-Health,p2_Single-Ground-High-Health,p1_Single-Ground-Low-Health,p2_Single-Ground-Low-Health,p1_Single-Ground-Medium-Health,p2_Single-Ground-Medium-Health,p1_Slow-Down-Attack,p2_Slow-Down-Attack,p1_Spawnable-Everywhere,p2_Spawnable-Everywhere,p1_Spawns-Troops,p2_Spawns-Troops,p1_Straight-Tower-Building-Attack,p2_Straight-Tower-Building-Attack,p1_Troop-Air-Low-Health,p2_Troop-Air-Low-Health,p1_Troop-Air-Medium-Health,p2_Troop-Air-Medium-Health,p1_Troop-Ground-Low-Health,p2_Troop-Ground-Low-Health,p1_Troop-Ground-Medium-Health,p2_Troop-Ground-Medium-Health,p1_Utility-Spell,p2_Utility-Spell,p1_Zap-Attack,p2_Zap-Attack
0,1.0,0.54,0.01,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.36,0.47,0.02,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.13,2.0,2.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,2.0,2.0,1.0,1.0,0.0,0.0,1.0,0.0,2.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,2.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,2.0,0.0,0.0,1.0,0.0,1.0,3.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,2.0
1,1.0,0.54,0.02,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.36,0.5,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.09,2.0,3.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,2.0,3.0,1.0,1.0,0.0,0.0,1.0,0.0,2.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,1.0,3.0,4.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,1.0,2.0
2,2.0,0.57,0.03,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.19,0.5,0.03,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.13,1.0,1.0,1.0,2.0,0.0,1.0,1.0,0.0,0.0,0.0,2.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,2.0,1.0,0.0,1.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0,0.0,2.0,0.0,1.0,0.0,1.0,1.0,1.0,3.0,0.0,0.0,1.0,0.0,0.0,0.0,2.0,1.0,1.0,0.0,1.0,1.0,0.0,0.0,1.0,2.0,0.0,0.0,1.0,0.0
3,1.0,0.57,0.04,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.19,0.47,0.05,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.14,1.0,1.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,2.0,2.0,0.0,2.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,1.0,0.0,2.0,0.0,1.0,1.0,1.0,1.0,1.0,2.0,0.0,0.0,1.0,0.0,0.0,0.0,2.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,1.0
4,1.0,0.57,0.01,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.19,0.33,0.02,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.14,1.0,2.0,1.0,0.0,0.0,1.0,1.0,2.0,0.0,0.0,2.0,4.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,2.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,2.0,0.0,1.0,0.0,1.0,0.0,1.0,4.0,0.0,0.0,1.0,1.0,0.0,0.0,2.0,1.0,1.0,0.0,1.0,0.0,0.0,3.0,1.0,0.0,0.0,0.0,1.0,0.0


In [9]:
battles_df.to_csv('battles_2nd_approach_not_for_use.csv', index=False)

In [10]:
# Drop one-hot encoded card columns
battles_df = battles_df.drop(columns=[col for col in battles_df.columns if col.startswith('p1_has_card_') or col.startswith('p2_has_card_')])

In [11]:
# For whatevery reasons winner was not of int type anymore
battles_df['winner'] = pd.to_numeric(battles_df['winner'], downcast='integer')

# Same here
columns_to_convert = [col for col in battles_df.columns if col.startswith('p1_has_support_card') or col.startswith('p2_has_support_card')]

for col in columns_to_convert:
    battles_df[col] = pd.to_numeric(battles_df[col], downcast='integer')

In [12]:
print(battles_df.columns)

Index(['winner', 'p1_avg_elixir_cost_normalized', 'p1_elixir_leaked',
       'p1_has_support_card_159000000', 'p1_has_support_card_159000001',
       'p1_has_support_card_159000002', 'p1_has_support_card_159000003',
       'p1_total_win_rate', 'p2_avg_elixir_cost_normalized',
       'p2_elixir_leaked', 'p2_has_support_card_159000000',
       'p2_has_support_card_159000001', 'p2_has_support_card_159000002',
       'p2_has_support_card_159000003', 'p2_total_win_rate',
       'p1_Air-Ground-Single-Attack', 'p2_Air-Ground-Single-Attack',
       'p1_Air-Ground-Splash-Attack', 'p2_Air-Ground-Splash-Attack',
       'p1_Building', 'p2_Building', 'p1_Detonation-Damage',
       'p2_Detonation-Damage', 'p1_Earthquake-Spell', 'p2_Earthquake-Spell',
       'p1_Ground-Single-Attack', 'p2_Ground-Single-Attack',
       'p1_Ground-Splash-Attack', 'p2_Ground-Splash-Attack', 'p1_Heal-Skill',
       'p2_Heal-Skill', 'p1_High-Air-Damage-Spell', 'p2_High-Air-Damage-Spell',
       'p1_High-Damage', 'p2_High-

In [13]:
# Export 2nd approach for EDA
battles_df.to_csv('battles_2nd_approach_EDA.csv', index=False)

In [14]:
# We identify the columns via regex by looking for specific patterns
import re
columns_to_normalize = [col for col in battles_df.columns if re.match(r'p[12]_[A-Z].*', col)]

In [15]:
# min-max normalization
for column in columns_to_normalize:
    min_value = battles_df[column].min()
    max_value = battles_df[column].max()

    if max_value - min_value != 0:
        battles_df[column] = ((battles_df[column] - min_value) / (max_value - min_value)).round(2)
    else:
        battles_df[column] = 0

In [16]:
battles_df.shape

(46534, 81)

In [17]:
battles_df.head()

Unnamed: 0,winner,p1_avg_elixir_cost_normalized,p1_elixir_leaked,p1_has_support_card_159000000,p1_has_support_card_159000001,p1_has_support_card_159000002,p1_has_support_card_159000003,p1_total_win_rate,p2_avg_elixir_cost_normalized,p2_elixir_leaked,p2_has_support_card_159000000,p2_has_support_card_159000001,p2_has_support_card_159000002,p2_has_support_card_159000003,p2_total_win_rate,p1_Air-Ground-Single-Attack,p2_Air-Ground-Single-Attack,p1_Air-Ground-Splash-Attack,p2_Air-Ground-Splash-Attack,p1_Building,p2_Building,p1_Detonation-Damage,p2_Detonation-Damage,p1_Earthquake-Spell,p2_Earthquake-Spell,p1_Ground-Single-Attack,p2_Ground-Single-Attack,p1_Ground-Splash-Attack,p2_Ground-Splash-Attack,p1_Heal-Skill,p2_Heal-Skill,p1_High-Air-Damage-Spell,p2_High-Air-Damage-Spell,p1_High-Damage,p2_High-Damage,p1_Inferno-Attack,p2_Inferno-Attack,p1_Low-Air-Damage-Spell,p2_Low-Air-Damage-Spell,p1_Low-Ground-Damage-Spell,p2_Low-Ground-Damage-Spell,p1_Mid-Air-Damage-Spell,p2_Mid-Air-Damage-Spell,p1_Monk,p2_Monk,p1_Mover-Skill,p2_Mover-Skill,p1_Prone-To-Zap,p2_Prone-To-Zap,p1_Shielded-Health,p2_Shielded-Health,p1_Single-Air-High-Health,p2_Single-Air-High-Health,p1_Single-Air-Medium-Health,p2_Single-Air-Medium-Health,p1_Single-Ground-High-Health,p2_Single-Ground-High-Health,p1_Single-Ground-Low-Health,p2_Single-Ground-Low-Health,p1_Single-Ground-Medium-Health,p2_Single-Ground-Medium-Health,p1_Slow-Down-Attack,p2_Slow-Down-Attack,p1_Spawnable-Everywhere,p2_Spawnable-Everywhere,p1_Spawns-Troops,p2_Spawns-Troops,p1_Straight-Tower-Building-Attack,p2_Straight-Tower-Building-Attack,p1_Troop-Air-Low-Health,p2_Troop-Air-Low-Health,p1_Troop-Air-Medium-Health,p2_Troop-Air-Medium-Health,p1_Troop-Ground-Low-Health,p2_Troop-Ground-Low-Health,p1_Troop-Ground-Medium-Health,p2_Troop-Ground-Medium-Health,p1_Utility-Spell,p2_Utility-Spell,p1_Zap-Attack,p2_Zap-Attack
0,1,0.54,0.01,1,0,0,0,0.36,0.47,0.02,1,0,0,0,0.13,0.4,0.33,0.2,0.2,0.0,0.0,0.2,0.0,0.0,0.0,0.33,0.29,0.25,0.2,0.0,0.0,0.5,0.0,0.5,0.2,0.0,0.0,0.0,0.25,0.0,0.0,0.33,0.67,0.0,0.0,0.0,0.0,0.33,0.0,0.5,0.0,0.0,0.0,0.67,0.0,0.0,0.25,0.0,0.2,0.43,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.2,0.17,0.0,0.0,0.0,0.0,0.33,0.5
1,1,0.54,0.02,1,0,0,0,0.36,0.5,0.01,1,0,0,0,0.09,0.4,0.5,0.2,0.0,0.0,0.2,0.2,0.0,0.0,0.0,0.33,0.43,0.25,0.2,0.0,0.0,0.5,0.0,0.5,0.2,0.0,0.0,0.0,0.25,0.0,0.0,0.33,0.0,0.0,0.0,0.0,0.0,0.33,0.0,0.5,0.0,0.0,0.0,0.67,0.0,0.0,0.0,0.0,0.2,0.43,0.5,0.0,0.0,0.0,0.33,0.0,0.0,0.25,0.2,0.0,0.0,0.0,0.0,0.2,0.17,0.0,0.2,0.0,0.0,0.33,0.5
2,2,0.57,0.03,1,0,0,0,0.19,0.5,0.03,1,0,0,0,0.13,0.2,0.17,0.2,0.4,0.0,0.2,0.2,0.0,0.0,0.0,0.33,0.57,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.4,0.5,0.0,0.33,0.0,0.0,0.5,0.33,0.33,0.0,0.0,0.0,0.25,0.33,0.25,0.0,0.33,1.0,0.0,0.67,0.0,0.33,0.0,0.25,0.2,0.14,0.38,0.0,0.0,0.5,0.0,0.0,0.0,0.5,0.2,0.5,0.0,1.0,1.0,0.0,0.0,0.25,0.4,0.0,0.0,0.33,0.0
3,1,0.57,0.04,1,0,0,0,0.19,0.47,0.05,0,0,1,0,0.14,0.2,0.17,0.2,0.2,0.0,0.0,0.2,0.0,0.0,1.0,0.33,0.29,0.0,0.4,0.0,0.0,0.0,0.0,0.25,0.0,0.5,0.0,0.33,0.25,0.0,0.5,0.33,0.0,0.0,0.0,0.0,0.25,0.33,0.25,0.0,0.0,1.0,0.0,0.67,0.0,0.33,0.25,0.25,0.2,0.14,0.25,0.0,0.0,0.5,0.0,0.0,0.0,0.5,0.2,0.5,0.5,1.0,0.0,0.0,0.17,0.25,0.0,0.0,0.0,0.33,0.25
4,1,0.57,0.01,1,0,0,0,0.19,0.33,0.02,0,1,0,0,0.14,0.2,0.33,0.2,0.0,0.0,0.2,0.2,0.4,0.0,0.0,0.33,0.57,0.0,0.2,0.0,0.0,0.0,0.0,0.25,0.0,0.5,0.0,0.33,0.0,0.0,0.0,0.33,0.67,0.0,0.0,0.0,0.0,0.33,0.0,0.0,0.33,1.0,0.0,0.67,0.0,0.33,0.0,0.25,0.0,0.14,0.5,0.0,0.0,0.5,0.33,0.0,0.0,0.5,0.2,0.5,0.0,1.0,0.0,0.0,0.5,0.25,0.0,0.0,0.0,0.33,0.0


In [None]:
battles_df.to_csv('battles_2nd_approach.csv', index=False)