In [1]:
import json
import re
import fitz
from datetime import datetime, timezone
import pandas as pd
import os

In [51]:
from pathlib import Path

def find_all_pdfs(root_ordner):
    return [str(p) for p in Path(root_ordner).rglob('*.pdf')]

main_folder = os.path.join("..", "sample_roster", "bbtc_pl_2025")
bbtc_pl_2025_roster = find_all_pdfs(main_folder)

main_folder = os.path.join("..", "sample_roster", "bbtc_pl_2025_matched_played")
bbtc_pl_2025_matched_played5_roster = find_all_pdfs(main_folder)

main_folder = os.path.join("..", "sample_roster", "bbtc_pl_eurobowl_2025")
eurobowl_2025_roster = find_all_pdfs(main_folder)


test_roster_paths = bbtc_pl_2025_roster + bbtc_pl_2025_matched_played5_roster + eurobowl_2025_roster
print(len(test_roster_paths))

8


In [52]:
test_roster_paths[-1].split("\\")

['..', 'sample_roster', 'bbtc_pl_eurobowl_2025', 'EB - Orcs - Akorus.pdf']

In [4]:
# Python
import fitz  # PyMuPDF

# Open the PDF
doc = fitz.open(test_roster_paths[4])

team_data = {}

extraction_step = 'Race'
for page_number, page in enumerate(doc, start=1):
    # Extract text in blocks/spans with details
    blocks = page.get_text("dict")["blocks"]
    
    for block in blocks:
        if "lines" in block:
            for line in block["lines"]:
                for span in line["spans"]:
                    text = span["text"]
                    color = span["color"]  # RGB, float 0-1 representation
                    # print(f"Page {page_number}: '{text}' Color: {color}")
                    
                    if extraction_step == 'Race':
                        if text == 'COACH NAME':
                            extraction_step = 'Coach'
                        if 'Race' in team_data:
                            team_data


In [53]:
# Python
import fitz  # PyMuPDF

def load_roster(roster_path):
    doc = fitz.open(roster_path)

    roster_path_split = roster_path.split("\\")
    pdf_name = roster_path_split[-1]
    
    return {
        "full_path": roster_path,
        "pdf_name": pdf_name,
        "loaded_pdf": doc
    }

def detect_roster_type(loaded_roster):
    doc = loaded_roster["loaded_pdf"]
    for page_number, page in enumerate(doc, start=1):
        # Extract text in blocks/spans with details
        blocks = page.get_text("dict")["blocks"]
        started_summary = False
    
        for block in blocks:
            if "lines" in block:
                for line in block["lines"]:
                    for span in line["spans"]:
                        text = span["text"]
                        if text == "SUMMARY":
                            started_summary = True
                        elif started_summary:
                            print(text)
                            if text == "Skill Points":
                                return "bbtc_pl_2025_matched_played"
                            elif text == "Players cost":
                                return "bbtc_pl_2025"
                            elif "Option:" in text:
                                return "bbtc_pl_eurobowl_2025"

        raise RuntimeError(f"Could not identify roster_type for {loaded_roster['full_path']}")


SUMMARY_STEP_MAPPING = {
    "bbtc_pl_2025_matched_played": [
        'Skill Points',
        None,
        'Secondary skills',
        None,
        'Star players',
        None,
    ],
    "bbtc_pl_2025": [
        'Players cost',
        None,
        'Skills cost',
        None,
        'Inducement cost',
        None,
        'Sideline cost',
        None,
        'Primary skills',
        None,
        'Secondary skills',
        None
    ],
    "bbtc_pl_eurobowl_2025": [
        'Players cost',
        None,
        'Skills cost',
        None,
        'Inducement cost',
        None,
        'Sideline cost',
        None,
        'Primary skills',
        None,
        'Secondary skills',
        None
    ],
}


def process_team_pdf(roster_path):
    # Open the PDF
    loaded_roster = load_roster(roster_path)
    doc = loaded_roster["loaded_pdf"]
    pdf_type = detect_roster_type(loaded_roster)
    team_data = {
        'pdf_name': loaded_roster["pdf_name"],
        'pdf_type': pdf_type
    }

    print(team_data)
    
    extraction_step = 'Race'
    for page_number, page in enumerate(doc, start=1):
        # Extract text in blocks/spans with details
        blocks = page.get_text("dict")["blocks"]

        for block in blocks:
            if "lines" in block:
                for line in block["lines"]:
                    for span in line["spans"]:
                        text = span["text"]
                        color = span["color"]  # RGB, float 0-1 representation
                        # print(f"Page {page_number}: '{text}' Color: {color}")
                        # Adjust extraction step
                        if text == 'SIDELINE':
                            extraction_step = 'Sideline'
                            sideline_ctr = 0
                        elif text == 'INDUCEMENTS':
                            extraction_step = 'Inducements'
                            next_name = None
                        elif text == 'SUMMARY':
                            summary_ctr = 0
                            extraction_step = 'Summary'
                            summary_ctr = -1
    
                        if extraction_step == 'Race':
                            if text == 'COACH NAME':
                                extraction_step = 'Coach'
                                continue
                            if 'Race' in team_data:
                                team_data['Race'] += ' ' + text
                            else:
                                team_data['Race'] = text

                        elif extraction_step == 'Coach':
                            team_data['Coach'] = text
                            extraction_step = 'Team'

                        elif extraction_step == 'Team':
                            if text == 'TEAM NAME':
                                continue
                            elif text == 'SIDELINE':
                                extraction_step = 'Sideline'
                                sideline_ctr = 0
                                
                            elif 'Team' in team_data:
                                team_data['Team'] += ' ' + text
                            else:
                                team_data['Team'] = text

                        elif extraction_step == 'Sideline':
                            sideline_properties = [
                                'Apothecary',
                                'Assistant coaches',
                                'Cheerleaders',
                                'Dedicated fans',
                                'Re-rolls',
                            ]

                            if sideline_ctr >= len(sideline_properties):
                                extraction_step = 'Inducements'
                                continue
                            next_sideline = sideline_properties[sideline_ctr]
                            
                            if text == 'SIDELINE':
                                continue
                            elif text in sideline_properties:
                                if (sideline_ctr == 0) and (text != 'Apothecary'):
                                    sideline_ctr += 1
                                continue
                            else:
                                # print('SAVE', next_sideline, text)
                                team_data[f'Sideline - {next_sideline}'] = text
                                sideline_ctr += 1

                        elif extraction_step == 'Inducements':
                            if text in ['SUMMARY', 'No inducements', 'LEAGUES & SPECIAL']:
                                summary_ctr = 0
                                extraction_step = 'Summary'
                                summary_ctr = -1
                                continue
                            if text == 'INDUCEMENTS':
                                continue
                            if next_name is None:
                                next_name = text
                            else:
                                team_data[f'Inducement - {next_name}'] = text
                                next_name = None

                        elif extraction_step == 'Summary':
                            print(f"SUMMARY {summary_ctr} | {text}")
                            summary_steps = SUMMARY_STEP_MAPPING[pdf_type]
                            if summary_ctr == len(summary_steps):
                                extraction_step = 'Players'
                                player_ctr = -1
                                continue

                            if text == 'SUMMARY':
                                summary_ctr = 0
                                continue
                            elif summary_ctr == -1:
                                continue

                            if pdf_type == "bbtc_pl_eurobowl_2025":
                                if summary_ctr == 0:
                                    team_data[f'Summary - Option'] = text.split(": ")[0]
                                    extraction_step = 'Players'
                                    player_ctr = -1
                                    continue
                                else:
                                    raise NotImplementedError()
                            else:
                                if (summary_ctr % 2) == 1:
                                    team_data[f'Summary - {summary_steps[summary_ctr - 1]}'] = text
                            summary_ctr += 1
    
                        elif extraction_step == 'Players':
                            if text == 'COST':
                                team_data['Players'] = []
                                player_ctr = 1
                                next_player_property = 'Name'
                                current_player = {
                                    'ctr': player_ctr,
                                    'position_name': None,
                                    'primary_1': None,
                                    'primary_2': None,
                                    'secondary_1': None,
                                    'secondary_2': None,
                                    'star': False,
                                }
                                continue
                            elif player_ctr == -1:
                                continue

                            if re.fullmatch(r"\b\d+k\b", text):
                                team_data['Players'].append(current_player)
                                player_ctr += 1
                                current_player = {
                                    'ctr': player_ctr,
                                    'position_name': None,
                                    'primary_1': None,
                                    'primary_2': None,
                                    'secondary_1': None,
                                    'secondary_2': None,
                                    'star': False,
                                }
                                next_player_property = 'Name'

                            elif next_player_property == 'Name':
                                current_player['position_name'] = " ".join(text.split()[1:])
                                next_player_property = 'Skills'

                            elif next_player_property == 'Skills':
                                if color == 681912:
                                    if current_player['primary_1'] is None:
                                        current_player['primary_1'] = text.strip().strip(',')
                                    elif current_player['primary_2'] is None:
                                        current_player['primary_2'] = text.strip().strip(',')
                                    else:
                                        raise RuntimeError(f'Unexpected Skill | Color: {color} - Text: {text}')
                                if color == 4822027:
                                    if current_player['secondary_1'] is None:
                                        current_player['secondary_1'] = text.strip().strip(',')
                                    elif current_player['secondary_2'] is None:
                                        current_player['secondary_2'] = text.strip().strip(',')
                                    else:
                                        raise RuntimeError(f'Unexpected Skill | Color: {color} - Text: {text}')
                            
                            if text == 'Special skill: ':
                                current_player['star'] = True
                                      
    return team_data
                                    
                                
                            
processed_roster = []
for roster_path in test_roster_paths:
    team_data = process_team_pdf(roster_path)
    processed_roster.append(team_data)
    # print(json.dumps(team_data, indent=4))

Players cost
{'pdf_name': 'High Elfs - Akorus.pdf', 'pdf_type': 'bbtc_pl_2025'}
SUMMARY -1 | LEAGUES & SPECIAL
SUMMARY -1 | RULES
SUMMARY -1 | • Elven Kingdom League
SUMMARY -1 | SUMMARY
SUMMARY 0 | Players cost
SUMMARY 1 | 955k
SUMMARY 2 | Skills cost
SUMMARY 3 | 200k
SUMMARY 4 | Inducement cost
SUMMARY 5 | 0k
SUMMARY 6 | Sideline cost
SUMMARY 7 | 210k
SUMMARY 8 | Primary skills
SUMMARY 9 | 7
SUMMARY 10 | Secondary skills
SUMMARY 11 | 0
SUMMARY 12 | # POSITION
Players cost
{'pdf_name': 'Khorne - Akorus - vertical.pdf', 'pdf_type': 'bbtc_pl_2025'}
SUMMARY -1 | RULES
SUMMARY -1 | • Brawlin' Brutes
SUMMARY -1 | • Favoured of Khorne
SUMMARY -1 | • Chaos Clash
SUMMARY -1 | SUMMARY
SUMMARY 0 | Players cost
SUMMARY 1 | 970k
SUMMARY 2 | Skills cost
SUMMARY 3 | 260k
SUMMARY 4 | Inducement cost
SUMMARY 5 | 25k
SUMMARY 6 | Sideline cost
SUMMARY 7 | 170k
SUMMARY 8 | Primary skills
SUMMARY 9 | 7
SUMMARY 10 | Secondary skills
SUMMARY 11 | 1
SUMMARY 12 | # POSITION
Players cost
{'pdf_name': 'Khorne 

In [54]:
for roster_path in test_roster_paths:
    loaded_roster = load_roster(roster_path)
    doc = loaded_roster["loaded_pdf"]
    result = detect_roster_type(loaded_roster)
    print(roster_path, "|", result)

Players cost
..\sample_roster\bbtc_pl_2025\High Elfs - Akorus.pdf | bbtc_pl_2025
Players cost
..\sample_roster\bbtc_pl_2025\Khorne - Akorus - vertical.pdf | bbtc_pl_2025
Players cost
..\sample_roster\bbtc_pl_2025\Khorne - Akorus.pdf | bbtc_pl_2025
Players cost
..\sample_roster\bbtc_pl_2025\Necro - Schlachtenlenker.pdf | bbtc_pl_2025
Players cost
..\sample_roster\bbtc_pl_2025\Nurgle - Akorus.pdf | bbtc_pl_2025
Players cost
..\sample_roster\bbtc_pl_2025\Vamps - Akorus.pdf | bbtc_pl_2025
Skill Points
..\sample_roster\bbtc_pl_2025_matched_played\Zons - Test.pdf | bbtc_pl_2025_matched_played
Option: Hiring Legends
..\sample_roster\bbtc_pl_eurobowl_2025\EB - Orcs - Akorus.pdf | bbtc_pl_eurobowl_2025


In [49]:
processed_roster[0]

{'pdf_name': 'EB - Orcs - Akorus.pdf',
 'pdf_type': 'bbtc_pl_eurobowl_2025',
 'Race': 'Orc',
 'Coach': 'Akorus',
 'Team': 'EB - Orcs Test',
 'Sideline - Apothecary': 'No',
 'Sideline - Assistant coaches': '0',
 'Sideline - Cheerleaders': '0',
 'Sideline - Dedicated fans': '0',
 'Sideline - Re-rolls': '0',
 "Inducement - Blitzer's Best Kegs": '1',
 'Inducement - Weather Mage': '1',
 'Inducement - Dodgy League Rep': '1',
 'Summary - Option': 'Option',
 'Players': [{'ctr': 1,
   'position_name': 'Big Un Blocker',
   'primary_1': 'Block',
   'primary_2': None,
   'secondary_1': None,
   'secondary_2': None,
   'star': False},
  {'ctr': 2,
   'position_name': 'Big Un Blocker',
   'primary_1': None,
   'primary_2': None,
   'secondary_1': None,
   'secondary_2': None,
   'star': False},
  {'ctr': 3,
   'position_name': 'Goblin Lineman',
   'primary_1': None,
   'primary_2': None,
   'secondary_1': None,
   'secondary_2': None,
   'star': False},
  {'ctr': 4,
   'position_name': 'Orc Blitzer'

In [64]:
def flatten_players(processed_roster):
    rows = []
    error_rows = []
    for team_data in processed_roster:
        if 'Players' in team_data:
            for player in team_data['Players']:
                prefix = f'player_{player["ctr"]}'
                for key, value in player.items():
                    if key == 'ctr':
                        continue
                    
                    team_data[f'{prefix}_{key}'] = value
            
            del team_data['Players']
            rows.append(team_data)
        elif 'player_11_star' in team_data:
            rows.append(team_data)
        else:
            error_rows.append(team_data)

    return {
        "rows": rows,
        "error_rows": error_rows
    }

flattend_rows = flatten_players(processed_roster)

print('Rows', len(flattend_rows["rows"]))
print('Errors', len(flattend_rows["error_rows"]))

Rows 2
Errors 0


In [57]:
df = pd.DataFrame(rows)
df.head(5)

Unnamed: 0,pdf_name,pdf_type,Race,Coach,Team,Sideline - Apothecary,Sideline - Assistant coaches,Sideline - Cheerleaders,Sideline - Dedicated fans,Sideline - Re-rolls,...,player_13_primary_2,player_13_secondary_1,player_13_secondary_2,player_13_star,Summary - Skill Points,Summary - Star players,Inducement - Blitzer's Best Kegs,Inducement - Weather Mage,Inducement - Dodgy League Rep,Summary - Option
0,High Elfs - Akorus.pdf,bbtc_pl_2025,High Elf,Akorus,Emerald - High Elfs,Yes,1,0,0,3,...,,,,,,,,,,
1,Khorne - Akorus - vertical.pdf,bbtc_pl_2025,Khorne,Schlachtenlenker,Emerald - Khorne,Yes,0,0,0,2,...,,,,,,,,,,
2,Khorne - Akorus.pdf,bbtc_pl_2025,Khorne,Akorus,Emerald - Khorne,Yes,1,0,0,2,...,,,,,,,,,,
3,Necro - Schlachtenlenker.pdf,bbtc_pl_2025,Necromantic Horror,Schlachtenlenker,Emerald - Necro,,0,0,0,3,...,,,,,,,,,,
4,Nurgle - Akorus.pdf,bbtc_pl_2025,Nurgle,Akorus,Emerald - Nurgle,,0,0,0,3,...,,,,False,,,,,,


In [56]:
df_error = pd.DataFrame(error_rows)
df_error

In [None]:
df_emerald_bowl_processing = df[['PDF Name', 'Race', 'Coach', 'Team',
       'Summary - Players cost', 'Summary - Skills cost',
       'Summary - Inducement cost', 'Summary - Sideline cost',
       'Summary - Primary skills', 'Summary - Secondary skills']]
df_emerald_bowl_processing['emerald_bowl_skill_cost'] = df_emerald_bowl_processing['Summary - Primary skills'].astype(int) * 20  + df_emerald_bowl_processing['Summary - Secondary skills'].astype(int) * 30
df_emerald_bowl_processing

In [None]:
now_utc = datetime.now(timezone.utc)
timestamp_utc_str = now_utc.strftime("%Y%m%dT%H%M%S")
df.to_csv(f'roster_extracted_{timestamp_utc_str}.csv')

In [None]:
now_utc = datetime.now(timezone.utc)
timestamp_utc_str = now_utc.strftime("%Y%m%dT%H%M%S")
df_update.to_csv(f'eb_roster_update_{timestamp_utc_str}.csv')

In [None]:
df_error = pd.DataFrame(error_rows)
now_utc = datetime.now(timezone.utc)
timestamp_utc_str = now_utc.strftime("%Y%m%dT%H%M%S")
df_error = pd.DataFrame(error_rows).to_csv(f'eb_roster_errors_{timestamp_utc_str}.csv')
len(error_rows)

### Team Event

In [108]:
main_folder = os.path.join("..", "sample_roster", "bbtc_pl_2025_team_event")
bbtc_pl_2025_team_event_roster = find_all_pdfs(main_folder)
print(f"Found {len(bbtc_pl_2025_team_event_roster)} roster pdfs")

processed_roster = []
for roster_path in bbtc_pl_2025_team_event_roster:
    team_data = process_team_pdf(roster_path)
    
    team_data["event_team"] = os.path.basename(os.path.dirname(roster_path))
    processed_roster.append(team_data)
    # print(json.dumps(team_data, indent=4))

df_team_event = pd.DataFrame(processed_roster)
df_team_event

Found 4 roster pdfs
Players cost
{'pdf_name': 'Tschirgant_Snotlinge_17.pdf', 'pdf_type': 'bbtc_pl_2025'}
SUMMARY -1 | RULES
SUMMARY -1 | • Underworld Challenge
SUMMARY -1 | • Bribery and Corruption
SUMMARY -1 | • Low Cost Linemen
SUMMARY -1 | • Swarming
SUMMARY -1 | SUMMARY
SUMMARY 0 | Players cost
SUMMARY 1 | 860k
SUMMARY 2 | Skills cost
SUMMARY 3 | 160k
SUMMARY 4 | Inducement cost
SUMMARY 5 | 200k
SUMMARY 6 | Sideline cost
SUMMARY 7 | 190k
SUMMARY 8 | Primary skills
SUMMARY 9 | 1
SUMMARY 10 | Secondary skills
SUMMARY 11 | 3
SUMMARY 12 | # POSITION
Players cost
{'pdf_name': 'Vamps - Akorus.pdf', 'pdf_type': 'bbtc_pl_2025'}
SUMMARY -1 | RULES
SUMMARY -1 | • Masters of Undeath
SUMMARY -1 | • Sylvanian Spotlight
SUMMARY -1 | SUMMARY
SUMMARY 0 | Players cost
SUMMARY 1 | 920k
SUMMARY 2 | Skills cost
SUMMARY 3 | 150k
SUMMARY 4 | Inducement cost
SUMMARY 5 | 25k
SUMMARY 6 | Sideline cost
SUMMARY 7 | 240k
SUMMARY 8 | Primary skills
SUMMARY 9 | 6
SUMMARY 10 | Secondary skills
SUMMARY 11 | 0
SUM

Unnamed: 0,pdf_name,pdf_type,Race,Coach,Team,Sideline - Apothecary,Sideline - Assistant coaches,Sideline - Cheerleaders,Sideline - Dedicated fans,Sideline - Re-rolls,...,Inducement - Riotous Rookies,Summary - Players cost,Summary - Skills cost,Summary - Inducement cost,Summary - Sideline cost,Summary - Primary skills,Summary - Secondary skills,Players,event_team,Inducement - Team Mascot
0,Tschirgant_Snotlinge_17.pdf,bbtc_pl_2025,Snotling,Tschirgant,Getting Games in C,Yes,0,0,0,2,...,1.0,860k,160k,200k,190k,1,3,"[{'ctr': 1, 'position_name': 'Glart Smashrip',...",Another Team,
1,Vamps - Akorus.pdf,bbtc_pl_2025,Vampire,Akorus,Emerald - Vamps,No,0,0,0,4,...,,920k,150k,25k,240k,6,0,"[{'ctr': 1, 'position_name': 'Vampire Thrower'...",Another Team,1.0
2,High Elfs - Akorus.pdf,bbtc_pl_2025,High Elf,Akorus,Emerald - High Elfs,Yes,1,0,0,3,...,,955k,200k,0k,210k,7,0,"[{'ctr': 1, 'position_name': 'High Elf Blitzer...",Munich RumBBLerooskie,
3,Khorne - Akorus - vertical.pdf,bbtc_pl_2025,Khorne,Schlachtenlenker,Emerald - Khorne,Yes,0,0,0,2,...,,970k,260k,25k,170k,7,1,"[{'ctr': 1, 'position_name': 'Bloodspawn', 'pr...",Munich RumBBLerooskie,1.0


In [109]:
df_team_event.columns

Index(['pdf_name', 'pdf_type', 'Race', 'Coach', 'Team',
       'Sideline - Apothecary', 'Sideline - Assistant coaches',
       'Sideline - Cheerleaders', 'Sideline - Dedicated fans',
       'Sideline - Re-rolls', 'Inducement - Bribes',
       'Inducement - Riotous Rookies', 'Summary - Players cost',
       'Summary - Skills cost', 'Summary - Inducement cost',
       'Summary - Sideline cost', 'Summary - Primary skills',
       'Summary - Secondary skills', 'Players', 'event_team',
       'Inducement - Team Mascot'],
      dtype='object')

#### Emerald Bowl Post Processing

In [110]:
TIER_MAPPING = {
    1: [
        "Amazons", "Lizardmen", "Necromantic Horror",
        "Old World Alliance", "Wood Elf"
    ],
    2: [
        "Dark Elf", "Norse", "Orcs", "Shambling Undead",
        "Skaven", "Tomb Kings", "Underworld Denizens"
    ],
    3: [
        "Bretonnian", "Dwarves", "Elfen Union", "High Elf",
        "Humans", "Nurgle", "Slann", "Vampire"
    ],
    4: [
        "Black Orks", "Chaos Chosen", "Chaos Dwarves",
        "Chaos Renegades", "Gnomes", "Goblins",
        "Imperial Nobility", "Khorne"
    ],
    5: [
        "Halfling", "Ogre", "Snotling"
    ],
}

TIER_EXTRA_GOLD = {
    1: 110,
    2: 130,
    3: 140,
    4: 150,
    5: 170,
}

TIER_ALLOWED_SECONDARY = {
    1: 0,
    2: 0,
    3: 1,
    4: 2,
    5: 3,
}

COMBINED_TIER_EXTRA_GOLD = {
    3: 0,
    4: 25,
    5: 35,
    6: 40,
    7: 50,
    8: 60,
    9: 70,
    10: 90
}


ALLOWED_INDUCEMENTS = {
    "Part-time Assistant Coaches": 5,
    "Temp Agency Cheerleaders ": 5,
    "Team Mascot": 1,
    "Blitzer's Best Kegs": 2,
    "Bribes": 3,
    "Mortuary Assistant": 1,
    "Plague Doctor": 1,
    "Riotous Rookies": 1,
    "Halfling Masterchef": 1,
}

df_team_event = df_team_event.sort_values(by="event_team")
df_team_event['emerald_bowl_skill_cost'] = df_team_event['Summary - Primary skills'].astype(int) * 20  + df_team_event['Summary - Secondary skills'].astype(int) * 30

race_to_tier = {
    race: tier
    for tier, races in TIER_MAPPING.items()
    for race in races
}

# Create the new column
df_team_event['emerald_bowl_tier'] = df_team_event['Race'].map(race_to_tier)
unknown = df_team_event[df_team_event['emerald_bowl_tier'].isna()]['Race'].unique()
if len(unknown) > 0:
    print("Unknown races:", unknown)

"""df_team_event = df_team_event[[
        'event_team', 'pdf_name', 'Coach', 'Race',  'Team', 'emerald_bowl_tier',
       'Summary - Players cost',
       'Summary - Skills cost', 'Summary - Inducement cost',
       'Summary - Sideline cost', 'Summary - Primary skills',
       'Summary - Secondary skills', 'emerald_bowl_skill_cost']]
"""
cost_cols = [
    "Summary - Players cost",
    "Summary - Skills cost",
    "Summary - Inducement cost",
    "Summary - Sideline cost",
]
df_team_event[cost_cols] = (
    df_team_event[cost_cols]
    .replace("k", "", regex=True)   # remove the 'k'
    .astype(int)                    # convert to integer
)
df_team_event["total_team_cost"] = df_team_event[[
    "emerald_bowl_skill_cost",
    "Summary - Players cost",
    "Summary - Inducement cost",
    "Summary - Sideline cost",
]].sum(axis=1)
df_team_event['emerald_bowl_tier_extra_gold'] = df_team_event['emerald_bowl_tier'].map(TIER_EXTRA_GOLD)
df_team_event["base_total_cost"] = 1150 + df_team_event['emerald_bowl_tier_extra_gold']
df_team_event["requied_extra_gold"] = df_team_event["total_team_cost"] - df_team_event["base_total_cost"]
df_team_event["verify_skill_cost"] = df_team_event["emerald_bowl_skill_cost"] <= (df_team_event["emerald_bowl_tier_extra_gold"] + df_team_event["requied_extra_gold"])
df_team_event["verify_seconday_skills"] = df_team_event["Summary - Secondary skills"].astype(int) <= df_team_event['emerald_bowl_tier'].map(TIER_ALLOWED_SECONDARY)
# TODO VERIFY STAR_PLAYERS
# TODO VERIFY INDUCEMENTS
def verify_inducements(row):
    print(row)
    return True

df_team_event["verify_inducements"] = df_team_event.apply(verify_inducements, axis=1)

df_team_event["row"] = (
    df_team_event.groupby("event_team").cumcount() + 1
)

df_wide = (
    df_team_event
    .set_index(["event_team", "row"])
    .unstack("row")
)

df_wide.columns = [
    f"{row}_{col}"
    for col, row in df_wide.columns
]

df_wide = df_wide.reset_index()

row_counts = df_team_event.groupby("event_team").size()

df_wide = df_wide.merge(
    row_counts.rename("num_rows"),
    on="event_team",
    how="left"
)

df_wide["event_team_combined_tier"] = df_wide["1_emerald_bowl_tier"] + df_wide["2_emerald_bowl_tier"]
df_wide["event_team_total_cost"] = df_wide["1_total_team_cost"] + df_wide["2_total_team_cost"]
df_wide["event_team_requied_extra_gold"] = df_wide["1_requied_extra_gold"] + df_wide["2_requied_extra_gold"]
df_wide["event_team_combined_tier_extra_gold"] = df_wide['event_team_combined_tier'].map(COMBINED_TIER_EXTRA_GOLD)

# VERIFICATION
df_wide["verify_used_max_allowed_gold"] = df_wide["event_team_requied_extra_gold"] <= df_wide["event_team_combined_tier_extra_gold"]
df_wide["verify_skill_cost"] = df_wide["1_verify_skill_cost"] & df_wide["2_verify_skill_cost"]
df_wide["verify_seconday_skills"] = df_wide["1_verify_seconday_skills"] & df_wide["2_verify_seconday_skills"]




df_wide

pdf_name                                              Tschirgant_Snotlinge_17.pdf
pdf_type                                                             bbtc_pl_2025
Race                                                                     Snotling
Coach                                                                  Tschirgant
Team                                                           Getting Games in C
Sideline - Apothecary                                                         Yes
Sideline - Assistant coaches                                                    0
Sideline - Cheerleaders                                                         0
Sideline - Dedicated fans                                                       0
Sideline - Re-rolls                                                             2
Inducement - Bribes                                                             1
Inducement - Riotous Rookies                                                    1
Summary - Player

Unnamed: 0,event_team,1_pdf_name,2_pdf_name,1_pdf_type,2_pdf_type,1_Race,2_Race,1_Coach,2_Coach,1_Team,...,1_verify_inducements,2_verify_inducements,num_rows,event_team_combined_tier,event_team_total_cost,event_team_requied_extra_gold,event_team_combined_tier_extra_gold,verify_used_max_allowed_gold,verify_skill_cost,verify_seconday_skills
0,Another Team,Tschirgant_Snotlinge_17.pdf,Vamps - Akorus.pdf,bbtc_pl_2025,bbtc_pl_2025,Snotling,Vampire,Tschirgant,Akorus,Getting Games in C,...,True,True,2,8,2665,55,60,True,True,True
1,Munich RumBBLerooskie,High Elfs - Akorus.pdf,Khorne - Akorus - vertical.pdf,bbtc_pl_2025,bbtc_pl_2025,High Elf,Khorne,Akorus,Schlachtenlenker,Emerald - High Elfs,...,True,True,2,7,2640,50,50,True,True,True


In [111]:
df_team_event

Unnamed: 0,pdf_name,pdf_type,Race,Coach,Team,Sideline - Apothecary,Sideline - Assistant coaches,Sideline - Cheerleaders,Sideline - Dedicated fans,Sideline - Re-rolls,...,emerald_bowl_skill_cost,emerald_bowl_tier,total_team_cost,emerald_bowl_tier_extra_gold,base_total_cost,requied_extra_gold,verify_skill_cost,verify_seconday_skills,verify_inducements,row
0,Tschirgant_Snotlinge_17.pdf,bbtc_pl_2025,Snotling,Tschirgant,Getting Games in C,Yes,0,0,0,2,...,110,5,1360,170,1320,40,True,True,True,1
1,Vamps - Akorus.pdf,bbtc_pl_2025,Vampire,Akorus,Emerald - Vamps,No,0,0,0,4,...,120,3,1305,140,1290,15,True,True,True,2
2,High Elfs - Akorus.pdf,bbtc_pl_2025,High Elf,Akorus,Emerald - High Elfs,Yes,1,0,0,3,...,140,3,1305,140,1290,15,True,True,True,1
3,Khorne - Akorus - vertical.pdf,bbtc_pl_2025,Khorne,Schlachtenlenker,Emerald - Khorne,Yes,0,0,0,2,...,170,4,1335,150,1300,35,True,True,True,2


In [112]:
df_team_event.columns 

Index(['pdf_name', 'pdf_type', 'Race', 'Coach', 'Team',
       'Sideline - Apothecary', 'Sideline - Assistant coaches',
       'Sideline - Cheerleaders', 'Sideline - Dedicated fans',
       'Sideline - Re-rolls', 'Inducement - Bribes',
       'Inducement - Riotous Rookies', 'Summary - Players cost',
       'Summary - Skills cost', 'Summary - Inducement cost',
       'Summary - Sideline cost', 'Summary - Primary skills',
       'Summary - Secondary skills', 'Players', 'event_team',
       'Inducement - Team Mascot', 'emerald_bowl_skill_cost',
       'emerald_bowl_tier', 'total_team_cost', 'emerald_bowl_tier_extra_gold',
       'base_total_cost', 'requied_extra_gold', 'verify_skill_cost',
       'verify_seconday_skills', 'verify_inducements', 'row'],
      dtype='object')