In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [None]:
land_units =        pd.read_csv(r"..\data\raw\land_units_tables.tsv",           sep="\t", dtype=str, keep_default_na=False)
main_units =        pd.read_csv(r"..\data\raw\main_units_tables.tsv",           sep="\t", dtype=str, keep_default_na=False)
battle_entities =   pd.read_csv(r"..\data\raw\battle_entities_tables.tsv",      sep="\t", dtype=str, keep_default_na=False)
melee_weapons =     pd.read_csv(r"..\data\raw\melee_weapons_tables.tsv",        sep="\t", dtype=str, keep_default_na=False)
missile_weapons =   pd.read_csv(r"..\data\raw\missile_weapons_tables.tsv",      sep="\t", dtype=str, keep_default_na=False)
projectiles =       pd.read_csv(r"..\data\raw\projectiles_tables.tsv",          sep="\t", dtype=str, keep_default_na=False)
shields =           pd.read_csv(r"..\data\raw\unit_shield_types_tables.tsv",    sep="\t", dtype=str, keep_default_na=False)
armour_types =      pd.read_csv(r"..\data\raw\unit_armour_types_tables.tsv",    sep="\t", dtype=str, keep_default_na=False)

In [None]:
main_units[["unit", "caste"]]
land_units[["key", "primary_missile_weapon"]]
missile_weapons[["key", "default_projectile"]]
projectiles[["key", "effective_range", "damage", "ap_damage", "projectile_number"]]

In [13]:
import pandas as pd
import numpy as np

# 0) Lege strings -> NaN (omdat keep_default_na=False)
for df_ in (main_units, land_units, missile_weapons, projectiles):
    df_.replace({"": np.nan}, inplace=True)

# 1) Drop metadata/header-rijen zoals "#projectiles_tables;..."
#    (kan in meerdere tabellen voorkomen)
for df_ in (main_units, land_units, missile_weapons, projectiles):
    if "key" in df_.columns:
        df_.drop(df_[df_["key"].astype(str).str.startswith("#", na=False)].index, inplace=True)

# 2) main_units -> land_units
m = main_units.merge(
    land_units[["key", "primary_missile_weapon"]].rename(columns={"key": "land_unit"}),
    on="land_unit",
    how="left"
)

# 3) -> missile_weapons
m["missile_weapon"] = m["primary_missile_weapon"]  # kopie
m = m.merge(
    missile_weapons[["key", "default_projectile"]].rename(columns={"key": "missile_weapon"}),
    on="missile_weapon",
    how="left"
)


# 4) -> projectiles
m = m.rename(columns={"default_projectile": "projectile"}).merge(
    projectiles[["key", "effective_range", "damage", "ap_damage", "projectile_number"]]
        .rename(columns={"key": "projectile"}),
    on="projectile",
    how="left"
)

# 5) Numeriek maken
for c in ["effective_range", "damage", "ap_damage", "projectile_number"]:
    m[c] = pd.to_numeric(m[c], errors="coerce")

# 6) Simpele metrics
m["projectile_number"] = m["projectile_number"].fillna(1)
m.loc[m["projectile_number"] <= 0, "projectile_number"] = 1

m["total_damage"]  = m["damage"].fillna(0) + m["ap_damage"].fillna(0)
m["volley_damage"] = m["total_damage"] * m["projectile_number"]

# 7) Alleen ranged rows met echte range
ranged_clean = m.dropna(subset=["caste", "effective_range"]).copy()
ranged_clean = ranged_clean[ranged_clean["effective_range"] > 0]

ranged_clean[["unit", "caste", "effective_range", "total_damage", "volley_damage", "projectile_number"]].head()


Unnamed: 0,unit,caste,effective_range,total_damage,volley_damage,projectile_number
2,wh2_dlc09_tmb_art_casket_of_souls_0,chariot,440.0,130.0,910.0,7.0
11,wh2_dlc09_tmb_cav_skeleton_horsemen_archers_0,missile_cavalry,140.0,17.0,17.0,1.0
21,wh2_dlc09_tmb_cha_khatep_3,lord,440.0,130.0,910.0,7.0
46,wh2_dlc09_tmb_inf_skeleton_archers_0,missile_infantry,140.0,11.0,11.0,1.0
47,wh2_dlc09_tmb_inf_skeleton_archers_ror,missile_infantry,160.0,11.0,11.0,1.0


In [14]:
ranged_clean.shape
ranged_clean[["primary_missile_weapon","missile_weapon","projectile"]].isna().mean()


primary_missile_weapon    0.0
missile_weapon            0.0
projectile                0.0
dtype: float64

In [16]:
main_units["caste"].value_counts().head(30)


caste
lord                  772
hero                  524
melee_infantry        329
monster               250
missile_infantry      177
monstrous_infantry    131
melee_cavalry         127
warmachine             79
chariot                74
war_beast              54
monstrous_cavalry      43
missile_cavalry        33
generic                 2
Name: count, dtype: int64

In [17]:
main_units.columns


Index(['unit', 'additional_building_requirement', 'campaign_cap', 'caste',
       'create_time', 'is_naval', 'land_unit', 'num_men', 'multiplayer_cap',
       'multiplayer_cost', 'naval_unit', 'num_ships', 'min_men_per_ship',
       'max_men_per_ship', 'recruitment_cost', 'upkeep_cost', 'weight',
       'resource_requirement', 'special_edition_mask', 'in_encyclopedia',
       'audio_voiceover_culture', 'ui_unit_group_land', 'tier',
       'is_high_threat', 'porthole_camera', 'mount',
       'use_hitpoints_in_campaign', 'porthole_composite_scene', 'melee_cp',
       'missile_cp', 'can_siege', 'audio_voiceover_culture_override',
       'restrict_xp_gain_in_campaign', 'audio_voiceover_actor_group',
       'food_cost', 'has_spoken_vo', 'is_monstrous', 'multiplayer_qb_cap',
       'vo_is_dragon', 'vo_is_dinosaur', 'optional_ui_element',
       'barrier_health', 'can_be_bribed', 'point_allowance_weight',
       'is_renown'],
      dtype='str')