In [258]:
import json
import re
import fitz
from datetime import datetime, timezone
import pandas as pd
import os
import numpy as np
import math

In [51]:
from pathlib import Path

def find_all_pdfs(root_ordner):
    return [str(p) for p in Path(root_ordner).rglob('*.pdf')]

main_folder = os.path.join("..", "sample_roster", "bbtc_pl_2025")
bbtc_pl_2025_roster = find_all_pdfs(main_folder)

main_folder = os.path.join("..", "sample_roster", "bbtc_pl_2025_matched_played")
bbtc_pl_2025_matched_played5_roster = find_all_pdfs(main_folder)

main_folder = os.path.join("..", "sample_roster", "bbtc_pl_eurobowl_2025")
eurobowl_2025_roster = find_all_pdfs(main_folder)


test_roster_paths = bbtc_pl_2025_roster + bbtc_pl_2025_matched_played5_roster + eurobowl_2025_roster
print(len(test_roster_paths))

8


In [52]:
test_roster_paths[-1].split("\\")

['..', 'sample_roster', 'bbtc_pl_eurobowl_2025', 'EB - Orcs - Akorus.pdf']

In [4]:
# Python
import fitz  # PyMuPDF

# Open the PDF
doc = fitz.open(test_roster_paths[4])

team_data = {}

extraction_step = 'Race'
for page_number, page in enumerate(doc, start=1):
    # Extract text in blocks/spans with details
    blocks = page.get_text("dict")["blocks"]
    
    for block in blocks:
        if "lines" in block:
            for line in block["lines"]:
                for span in line["spans"]:
                    text = span["text"]
                    color = span["color"]  # RGB, float 0-1 representation
                    # print(f"Page {page_number}: '{text}' Color: {color}")
                    
                    if extraction_step == 'Race':
                        if text == 'COACH NAME':
                            extraction_step = 'Coach'
                        if 'Race' in team_data:
                            team_data


In [53]:
# Python
import fitz  # PyMuPDF

def load_roster(roster_path):
    doc = fitz.open(roster_path)

    roster_path_split = roster_path.split("\\")
    pdf_name = roster_path_split[-1]
    
    return {
        "full_path": roster_path,
        "pdf_name": pdf_name,
        "loaded_pdf": doc
    }

def detect_roster_type(loaded_roster):
    doc = loaded_roster["loaded_pdf"]
    for page_number, page in enumerate(doc, start=1):
        # Extract text in blocks/spans with details
        blocks = page.get_text("dict")["blocks"]
        started_summary = False
    
        for block in blocks:
            if "lines" in block:
                for line in block["lines"]:
                    for span in line["spans"]:
                        text = span["text"]
                        if text == "SUMMARY":
                            started_summary = True
                        elif started_summary:
                            print(text)
                            if text == "Skill Points":
                                return "bbtc_pl_2025_matched_played"
                            elif text == "Players cost":
                                return "bbtc_pl_2025"
                            elif "Option:" in text:
                                return "bbtc_pl_eurobowl_2025"

        raise RuntimeError(f"Could not identify roster_type for {loaded_roster['full_path']}")


SUMMARY_STEP_MAPPING = {
    "bbtc_pl_2025_matched_played": [
        'Skill Points',
        None,
        'Secondary skills',
        None,
        'Star players',
        None,
    ],
    "bbtc_pl_2025": [
        'Players cost',
        None,
        'Skills cost',
        None,
        'Inducement cost',
        None,
        'Sideline cost',
        None,
        'Primary skills',
        None,
        'Secondary skills',
        None
    ],
    "bbtc_pl_eurobowl_2025": [
        'Players cost',
        None,
        'Skills cost',
        None,
        'Inducement cost',
        None,
        'Sideline cost',
        None,
        'Primary skills',
        None,
        'Secondary skills',
        None
    ],
}


def process_team_pdf(roster_path):
    # Open the PDF
    loaded_roster = load_roster(roster_path)
    doc = loaded_roster["loaded_pdf"]
    pdf_type = detect_roster_type(loaded_roster)
    team_data = {
        'pdf_name': loaded_roster["pdf_name"],
        'pdf_type': pdf_type
    }

    print(team_data)
    
    extraction_step = 'Race'
    for page_number, page in enumerate(doc, start=1):
        # Extract text in blocks/spans with details
        blocks = page.get_text("dict")["blocks"]

        for block in blocks:
            if "lines" in block:
                for line in block["lines"]:
                    for span in line["spans"]:
                        text = span["text"]
                        color = span["color"]  # RGB, float 0-1 representation
                        # print(f"Page {page_number}: '{text}' Color: {color}")
                        # Adjust extraction step
                        if text == 'SIDELINE':
                            extraction_step = 'Sideline'
                            sideline_ctr = 0
                        elif text == 'INDUCEMENTS':
                            extraction_step = 'Inducements'
                            next_name = None
                        elif text == 'SUMMARY':
                            summary_ctr = 0
                            extraction_step = 'Summary'
                            summary_ctr = -1
    
                        if extraction_step == 'Race':
                            if text == 'COACH NAME':
                                extraction_step = 'Coach'
                                continue
                            if 'Race' in team_data:
                                team_data['Race'] += ' ' + text
                            else:
                                team_data['Race'] = text

                        elif extraction_step == 'Coach':
                            team_data['Coach'] = text
                            extraction_step = 'Team'

                        elif extraction_step == 'Team':
                            if text == 'TEAM NAME':
                                continue
                            elif text == 'SIDELINE':
                                extraction_step = 'Sideline'
                                sideline_ctr = 0
                                
                            elif 'Team' in team_data:
                                team_data['Team'] += ' ' + text
                            else:
                                team_data['Team'] = text

                        elif extraction_step == 'Sideline':
                            sideline_properties = [
                                'Apothecary',
                                'Assistant coaches',
                                'Cheerleaders',
                                'Dedicated fans',
                                'Re-rolls',
                            ]

                            if sideline_ctr >= len(sideline_properties):
                                extraction_step = 'Inducements'
                                continue
                            next_sideline = sideline_properties[sideline_ctr]
                            
                            if text == 'SIDELINE':
                                continue
                            elif text in sideline_properties:
                                if (sideline_ctr == 0) and (text != 'Apothecary'):
                                    sideline_ctr += 1
                                continue
                            else:
                                # print('SAVE', next_sideline, text)
                                team_data[f'Sideline - {next_sideline}'] = text
                                sideline_ctr += 1

                        elif extraction_step == 'Inducements':
                            if text in ['SUMMARY', 'No inducements', 'LEAGUES & SPECIAL']:
                                summary_ctr = 0
                                extraction_step = 'Summary'
                                summary_ctr = -1
                                continue
                            if text == 'INDUCEMENTS':
                                continue
                            if next_name is None:
                                next_name = text
                            else:
                                team_data[f'Inducement - {next_name}'] = text
                                next_name = None

                        elif extraction_step == 'Summary':
                            print(f"SUMMARY {summary_ctr} | {text}")
                            summary_steps = SUMMARY_STEP_MAPPING[pdf_type]
                            if summary_ctr == len(summary_steps):
                                extraction_step = 'Players'
                                player_ctr = -1
                                continue

                            if text == 'SUMMARY':
                                summary_ctr = 0
                                continue
                            elif summary_ctr == -1:
                                continue

                            if pdf_type == "bbtc_pl_eurobowl_2025":
                                if summary_ctr == 0:
                                    team_data[f'Summary - Option'] = text.split(": ")[0]
                                    extraction_step = 'Players'
                                    player_ctr = -1
                                    continue
                                else:
                                    raise NotImplementedError()
                            else:
                                if (summary_ctr % 2) == 1:
                                    team_data[f'Summary - {summary_steps[summary_ctr - 1]}'] = text
                            summary_ctr += 1
    
                        elif extraction_step == 'Players':
                            if text == 'COST':
                                team_data['Players'] = []
                                player_ctr = 1
                                next_player_property = 'Name'
                                current_player = {
                                    'ctr': player_ctr,
                                    'position_name': None,
                                    'primary_1': None,
                                    'primary_2': None,
                                    'secondary_1': None,
                                    'secondary_2': None,
                                    'star': False,
                                }
                                continue
                            elif player_ctr == -1:
                                continue

                            if re.fullmatch(r"\b\d+k\b", text):
                                team_data['Players'].append(current_player)
                                player_ctr += 1
                                current_player = {
                                    'ctr': player_ctr,
                                    'position_name': None,
                                    'primary_1': None,
                                    'primary_2': None,
                                    'secondary_1': None,
                                    'secondary_2': None,
                                    'star': False,
                                }
                                next_player_property = 'Name'

                            elif next_player_property == 'Name':
                                current_player['position_name'] = " ".join(text.split()[1:])
                                next_player_property = 'Skills'

                            elif next_player_property == 'Skills':
                                if color == 681912:
                                    if current_player['primary_1'] is None:
                                        current_player['primary_1'] = text.strip().strip(',')
                                    elif current_player['primary_2'] is None:
                                        current_player['primary_2'] = text.strip().strip(',')
                                    else:
                                        raise RuntimeError(f'Unexpected Skill | Color: {color} - Text: {text}')
                                if color == 4822027:
                                    if current_player['secondary_1'] is None:
                                        current_player['secondary_1'] = text.strip().strip(',')
                                    elif current_player['secondary_2'] is None:
                                        current_player['secondary_2'] = text.strip().strip(',')
                                    else:
                                        raise RuntimeError(f'Unexpected Skill | Color: {color} - Text: {text}')
                            
                            if text == 'Special skill: ':
                                current_player['star'] = True
                                      
    return team_data
                                    
                                
                            
processed_roster = []
for roster_path in test_roster_paths:
    team_data = process_team_pdf(roster_path)
    processed_roster.append(team_data)
    # print(json.dumps(team_data, indent=4))

Players cost
{'pdf_name': 'High Elfs - Akorus.pdf', 'pdf_type': 'bbtc_pl_2025'}
SUMMARY -1 | LEAGUES & SPECIAL
SUMMARY -1 | RULES
SUMMARY -1 | • Elven Kingdom League
SUMMARY -1 | SUMMARY
SUMMARY 0 | Players cost
SUMMARY 1 | 955k
SUMMARY 2 | Skills cost
SUMMARY 3 | 200k
SUMMARY 4 | Inducement cost
SUMMARY 5 | 0k
SUMMARY 6 | Sideline cost
SUMMARY 7 | 210k
SUMMARY 8 | Primary skills
SUMMARY 9 | 7
SUMMARY 10 | Secondary skills
SUMMARY 11 | 0
SUMMARY 12 | # POSITION
Players cost
{'pdf_name': 'Khorne - Akorus - vertical.pdf', 'pdf_type': 'bbtc_pl_2025'}
SUMMARY -1 | RULES
SUMMARY -1 | • Brawlin' Brutes
SUMMARY -1 | • Favoured of Khorne
SUMMARY -1 | • Chaos Clash
SUMMARY -1 | SUMMARY
SUMMARY 0 | Players cost
SUMMARY 1 | 970k
SUMMARY 2 | Skills cost
SUMMARY 3 | 260k
SUMMARY 4 | Inducement cost
SUMMARY 5 | 25k
SUMMARY 6 | Sideline cost
SUMMARY 7 | 170k
SUMMARY 8 | Primary skills
SUMMARY 9 | 7
SUMMARY 10 | Secondary skills
SUMMARY 11 | 1
SUMMARY 12 | # POSITION
Players cost
{'pdf_name': 'Khorne 

In [54]:
for roster_path in test_roster_paths:
    loaded_roster = load_roster(roster_path)
    doc = loaded_roster["loaded_pdf"]
    result = detect_roster_type(loaded_roster)
    print(roster_path, "|", result)

Players cost
..\sample_roster\bbtc_pl_2025\High Elfs - Akorus.pdf | bbtc_pl_2025
Players cost
..\sample_roster\bbtc_pl_2025\Khorne - Akorus - vertical.pdf | bbtc_pl_2025
Players cost
..\sample_roster\bbtc_pl_2025\Khorne - Akorus.pdf | bbtc_pl_2025
Players cost
..\sample_roster\bbtc_pl_2025\Necro - Schlachtenlenker.pdf | bbtc_pl_2025
Players cost
..\sample_roster\bbtc_pl_2025\Nurgle - Akorus.pdf | bbtc_pl_2025
Players cost
..\sample_roster\bbtc_pl_2025\Vamps - Akorus.pdf | bbtc_pl_2025
Skill Points
..\sample_roster\bbtc_pl_2025_matched_played\Zons - Test.pdf | bbtc_pl_2025_matched_played
Option: Hiring Legends
..\sample_roster\bbtc_pl_eurobowl_2025\EB - Orcs - Akorus.pdf | bbtc_pl_eurobowl_2025


In [49]:
processed_roster[0]

{'pdf_name': 'EB - Orcs - Akorus.pdf',
 'pdf_type': 'bbtc_pl_eurobowl_2025',
 'Race': 'Orc',
 'Coach': 'Akorus',
 'Team': 'EB - Orcs Test',
 'Sideline - Apothecary': 'No',
 'Sideline - Assistant coaches': '0',
 'Sideline - Cheerleaders': '0',
 'Sideline - Dedicated fans': '0',
 'Sideline - Re-rolls': '0',
 "Inducement - Blitzer's Best Kegs": '1',
 'Inducement - Weather Mage': '1',
 'Inducement - Dodgy League Rep': '1',
 'Summary - Option': 'Option',
 'Players': [{'ctr': 1,
   'position_name': 'Big Un Blocker',
   'primary_1': 'Block',
   'primary_2': None,
   'secondary_1': None,
   'secondary_2': None,
   'star': False},
  {'ctr': 2,
   'position_name': 'Big Un Blocker',
   'primary_1': None,
   'primary_2': None,
   'secondary_1': None,
   'secondary_2': None,
   'star': False},
  {'ctr': 3,
   'position_name': 'Goblin Lineman',
   'primary_1': None,
   'primary_2': None,
   'secondary_1': None,
   'secondary_2': None,
   'star': False},
  {'ctr': 4,
   'position_name': 'Orc Blitzer'

In [64]:
def flatten_players(processed_roster):
    rows = []
    error_rows = []
    for team_data in processed_roster:
        if 'Players' in team_data:
            for player in team_data['Players']:
                prefix = f'player_{player["ctr"]}'
                for key, value in player.items():
                    if key == 'ctr':
                        continue
                    
                    team_data[f'{prefix}_{key}'] = value
            
            del team_data['Players']
            rows.append(team_data)
        elif 'player_11_star' in team_data:
            rows.append(team_data)
        else:
            error_rows.append(team_data)

    return {
        "rows": rows,
        "error_rows": error_rows
    }

flattend_rows = flatten_players(processed_roster)

print('Rows', len(flattend_rows["rows"]))
print('Errors', len(flattend_rows["error_rows"]))

Rows 2
Errors 0


In [57]:
df = pd.DataFrame(rows)
df.head(5)

Unnamed: 0,pdf_name,pdf_type,Race,Coach,Team,Sideline - Apothecary,Sideline - Assistant coaches,Sideline - Cheerleaders,Sideline - Dedicated fans,Sideline - Re-rolls,...,player_13_primary_2,player_13_secondary_1,player_13_secondary_2,player_13_star,Summary - Skill Points,Summary - Star players,Inducement - Blitzer's Best Kegs,Inducement - Weather Mage,Inducement - Dodgy League Rep,Summary - Option
0,High Elfs - Akorus.pdf,bbtc_pl_2025,High Elf,Akorus,Emerald - High Elfs,Yes,1,0,0,3,...,,,,,,,,,,
1,Khorne - Akorus - vertical.pdf,bbtc_pl_2025,Khorne,Schlachtenlenker,Emerald - Khorne,Yes,0,0,0,2,...,,,,,,,,,,
2,Khorne - Akorus.pdf,bbtc_pl_2025,Khorne,Akorus,Emerald - Khorne,Yes,1,0,0,2,...,,,,,,,,,,
3,Necro - Schlachtenlenker.pdf,bbtc_pl_2025,Necromantic Horror,Schlachtenlenker,Emerald - Necro,,0,0,0,3,...,,,,,,,,,,
4,Nurgle - Akorus.pdf,bbtc_pl_2025,Nurgle,Akorus,Emerald - Nurgle,,0,0,0,3,...,,,,False,,,,,,


In [56]:
df_error = pd.DataFrame(error_rows)
df_error

In [None]:
df_emerald_bowl_processing = df[['PDF Name', 'Race', 'Coach', 'Team',
       'Summary - Players cost', 'Summary - Skills cost',
       'Summary - Inducement cost', 'Summary - Sideline cost',
       'Summary - Primary skills', 'Summary - Secondary skills']]
df_emerald_bowl_processing['emerald_bowl_skill_cost'] = df_emerald_bowl_processing['Summary - Primary skills'].astype(int) * 20  + df_emerald_bowl_processing['Summary - Secondary skills'].astype(int) * 30
df_emerald_bowl_processing

In [None]:
now_utc = datetime.now(timezone.utc)
timestamp_utc_str = now_utc.strftime("%Y%m%dT%H%M%S")
df.to_csv(f'roster_extracted_{timestamp_utc_str}.csv')

In [None]:
now_utc = datetime.now(timezone.utc)
timestamp_utc_str = now_utc.strftime("%Y%m%dT%H%M%S")
df_update.to_csv(f'eb_roster_update_{timestamp_utc_str}.csv')

In [None]:
df_error = pd.DataFrame(error_rows)
now_utc = datetime.now(timezone.utc)
timestamp_utc_str = now_utc.strftime("%Y%m%dT%H%M%S")
df_error = pd.DataFrame(error_rows).to_csv(f'eb_roster_errors_{timestamp_utc_str}.csv')
len(error_rows)

### Team Event

In [248]:
main_folder = os.path.join("..", "sample_roster", "bbtc_pl_2025_team_event")
bbtc_pl_2025_team_event_roster = find_all_pdfs(main_folder)
print(f"Found {len(bbtc_pl_2025_team_event_roster)} roster pdfs")

processed_roster = []
for roster_path in bbtc_pl_2025_team_event_roster:
    team_data = process_team_pdf(roster_path)
    
    team_data["event_team"] = os.path.basename(os.path.dirname(roster_path))
    processed_roster.append(team_data)
    # print(json.dumps(team_data, indent=4))

df_team_event = pd.DataFrame(processed_roster)
df_team_event

Found 8 roster pdfs
Players cost
{'pdf_name': 'Tschirgant_Snotlinge_17.pdf', 'pdf_type': 'bbtc_pl_2025'}
SUMMARY -1 | RULES
SUMMARY -1 | • Underworld Challenge
SUMMARY -1 | • Bribery and Corruption
SUMMARY -1 | • Low Cost Linemen
SUMMARY -1 | • Swarming
SUMMARY -1 | SUMMARY
SUMMARY 0 | Players cost
SUMMARY 1 | 860k
SUMMARY 2 | Skills cost
SUMMARY 3 | 160k
SUMMARY 4 | Inducement cost
SUMMARY 5 | 200k
SUMMARY 6 | Sideline cost
SUMMARY 7 | 190k
SUMMARY 8 | Primary skills
SUMMARY 9 | 1
SUMMARY 10 | Secondary skills
SUMMARY 11 | 3
SUMMARY 12 | # POSITION
Players cost
{'pdf_name': 'Vamps - Akorus.pdf', 'pdf_type': 'bbtc_pl_2025'}
SUMMARY -1 | RULES
SUMMARY -1 | • Masters of Undeath
SUMMARY -1 | • Sylvanian Spotlight
SUMMARY -1 | SUMMARY
SUMMARY 0 | Players cost
SUMMARY 1 | 920k
SUMMARY 2 | Skills cost
SUMMARY 3 | 150k
SUMMARY 4 | Inducement cost
SUMMARY 5 | 25k
SUMMARY 6 | Sideline cost
SUMMARY 7 | 240k
SUMMARY 8 | Primary skills
SUMMARY 9 | 6
SUMMARY 10 | Secondary skills
SUMMARY 11 | 0
SUM

Unnamed: 0,pdf_name,pdf_type,Race,Coach,Team,Sideline - Apothecary,Sideline - Assistant coaches,Sideline - Cheerleaders,Sideline - Dedicated fans,Sideline - Re-rolls,...,Inducement - Riotous Rookies,Summary - Players cost,Summary - Skills cost,Summary - Inducement cost,Summary - Sideline cost,Summary - Primary skills,Summary - Secondary skills,Players,event_team,Inducement - Team Mascot
0,Tschirgant_Snotlinge_17.pdf,bbtc_pl_2025,Snotling,Tschirgant,Getting Games in C,Yes,0,0,0,2,...,1.0,860k,160k,200k,190k,1,3,"[{'ctr': 1, 'position_name': 'Glart Smashrip',...",Another Team,
1,Vamps - Akorus.pdf,bbtc_pl_2025,Vampire,Akorus,Emerald - Vamps,No,0,0,0,4,...,,920k,150k,25k,240k,6,0,"[{'ctr': 1, 'position_name': 'Vampire Thrower'...",Another Team,1.0
2,High Elfs - Akorus.pdf,bbtc_pl_2025,High Elf,Akorus,Emerald - High Elfs,Yes,1,0,0,3,...,,955k,200k,0k,210k,7,0,"[{'ctr': 1, 'position_name': 'High Elf Blitzer...",Munich RumBBLerooskie,
3,Khorne - Akorus - vertical.pdf,bbtc_pl_2025,Khorne,Schlachtenlenker,Emerald - Khorne,Yes,0,0,0,2,...,,970k,260k,25k,170k,7,1,"[{'ctr': 1, 'position_name': 'Bloodspawn', 'pr...",Munich RumBBLerooskie,1.0
4,Khorne - Schlachtenlenker.pdf,bbtc_pl_2025,Khorne,Schlachtenlenker,Emerald - Khorne,Yes,0,0,0,2,...,,970k,260k,25k,170k,7,1,"[{'ctr': 1, 'position_name': 'Bloodspawn', 'pr...",Munich RumBBLerooskie - v2,1.0
5,Nurgle - Akorus.pdf,bbtc_pl_2025,Nurgle,Akorus,Emerald - Nurgle,,0,0,0,3,...,,960k,200k,25k,180k,7,0,"[{'ctr': 1, 'position_name': 'Rotspawn', 'prim...",Munich RumBBLerooskie - v2,1.0
6,Orcs - Goblin as captain.pdf,bbtc_pl_2025,Orc,Akorus,EB - Orcs Test,No,0,0,0,0,...,,795k,90k,0k,0k,2,1,"[{'ctr': 1, 'position_name': 'Big Un Blocker',...",Team Captains,
7,Orcs - Thrower as captain.pdf,bbtc_pl_2025,Orc,Akorus,EB - Orcs Test,No,0,0,0,0,...,,795k,50k,0k,0k,2,0,"[{'ctr': 1, 'position_name': 'Big Un Blocker',...",Team Captains,


In [249]:
df_team_event.columns

Index(['pdf_name', 'pdf_type', 'Race', 'Coach', 'Team',
       'Sideline - Apothecary', 'Sideline - Assistant coaches',
       'Sideline - Cheerleaders', 'Sideline - Dedicated fans',
       'Sideline - Re-rolls', 'Inducement - Bribes',
       'Inducement - Riotous Rookies', 'Summary - Players cost',
       'Summary - Skills cost', 'Summary - Inducement cost',
       'Summary - Sideline cost', 'Summary - Primary skills',
       'Summary - Secondary skills', 'Players', 'event_team',
       'Inducement - Team Mascot'],
      dtype='object')

#### Emerald Bowl Post Processing

In [256]:
TIER_MAPPING = {
    1: [
        "Amazons", "Lizardmen", "Necromantic Horror",
        "Old World Alliance", "Wood Elf"
    ],
    2: [
        "Dark Elf", "Norse", "Orc", "Shambling Undead",
        "Skaven", "Tomb Kings", "Underworld Denizens"
    ],
    3: [
        "Bretonnian", "Dwarves", "Elfen Union", "High Elf",
        "Human", "Nurgle", "Slann", "Vampire"
    ],
    4: [
        "Black Orc", "Chaos Chosen", "Chaos Dwarves",
        "Chaos Renegades", "Gnomes", "Goblins",
        "Imperial Nobility", "Khorne"
    ],
    5: [
        "Halfling", "Ogre", "Snotling"
    ],
}

TIER_EXTRA_GOLD = {
    1: 110,
    2: 130,
    3: 140,
    4: 150,
    5: 170,
}

TIER_ALLOWED_SECONDARY = {
    1: 0,
    2: 0,
    3: 1,
    4: 2,
    5: 3,
}

COMBINED_TIER_EXTRA_GOLD = {
    3: 0,
    4: 25,
    5: 35,
    6: 40,
    7: 50,
    8: 60,
    9: 70,
    10: 90
}


ALLOWED_INDUCEMENTS = {
    "Part-time Assistant Coaches": 5,
    "Temp Agency Cheerleaders ": 5,
    "Team Mascot": 1,
    "Blitzer's Best Kegs": 2,
    "Bribes": 3,
    "Mortuary Assistant": 1,
    "Plague Doctor": 1,
    "Riotous Rookies": 1,
    "Halfling Masterchef": 1,
}

ALLOWED_STARS = [
    "Rodney Roachbait",
    "Akhorne The Squirrel",
    "Barik Farblast",
    "Fungus the Loon",
    "Swiftvine Glimmershard",
    "Randolph Backstabber",
    "Glart Smashrip",
]

df_team_event = df_team_event.sort_values(by="event_team")
def team_captain(row):
    captain = None
    if row['Race'] in ['Human', 'Orc']:
        for player in row['Players']:
            skills = [
                player["primary_1"],
                player["primary_2"],
                player["secondary_1"],
                player["secondary_2"],
            ]
            count_skills = sum(x is not None for x in skills)
            if 'Pro' in skills:
                if count_skills == 2:
                    return player
                elif 'Pro' in [player["secondary_1"], player["secondary_2"]]:
                    return player
                else:
                    captain = player
    return captain
df_team_event["team_captain"] = df_team_event.apply(team_captain, axis=1)

def team_captain_pro_as_secondary(row):
    captain = row['team_captain']
    if captain is not None:
        print([captain["secondary_1"], captain["secondary_2"]])
        return 'Pro' in [captain["secondary_1"], captain["secondary_2"]]
    return False
df_team_event["team_captain_pro_as_secondary"] = df_team_event.apply(team_captain_pro_as_secondary, axis=1) 

def calculate_skill_cost(row):
    skill_cost = int(row['Summary - Primary skills']) * 20  + int(row['Summary - Secondary skills']) * 30
    captain = row['team_captain'] 
    if captain is not None :
        if 'Pro' in [captain["secondary_1"], captain["secondary_2"]]:
            skill_cost -= 30
        else:
            skill_cost -= 20
    return skill_cost
df_team_event["emerald_bowl_skill_cost"] = df_team_event.apply(calculate_skill_cost, axis=1)  

race_to_tier = {
    race: tier
    for tier, races in TIER_MAPPING.items()
    for race in races
}

# Create the new column
df_team_event['emerald_bowl_tier'] = df_team_event['Race'].map(race_to_tier)
unknown = df_team_event[df_team_event['emerald_bowl_tier'].isna()]['Race'].unique()
if len(unknown) > 0:
    print("Unknown races:", unknown)

"""df_team_event = df_team_event[[
        'event_team', 'pdf_name', 'Coach', 'Race',  'Team', 'emerald_bowl_tier',
       'Summary - Players cost',
       'Summary - Skills cost', 'Summary - Inducement cost',
       'Summary - Sideline cost', 'Summary - Primary skills',
       'Summary - Secondary skills', 'emerald_bowl_skill_cost']]
"""
cost_cols = [
    "Summary - Players cost",
    "Summary - Skills cost",
    "Summary - Inducement cost",
    "Summary - Sideline cost",
]
df_team_event[cost_cols] = (
    df_team_event[cost_cols]
    .replace("k", "", regex=True)   # remove the 'k'
    .astype(int)                    # convert to integer
)
df_team_event["total_team_cost"] = df_team_event[[
    "emerald_bowl_skill_cost",
    "Summary - Players cost",
    "Summary - Inducement cost",
    "Summary - Sideline cost",
]].sum(axis=1)
df_team_event['emerald_bowl_tier_extra_gold'] = df_team_event['emerald_bowl_tier'].map(TIER_EXTRA_GOLD)
df_team_event["base_total_cost"] = 1150 + df_team_event['emerald_bowl_tier_extra_gold']
df_team_event["required_extra_gold"] = (
    df_team_event["total_team_cost"] 
    - df_team_event["base_total_cost"]
).apply(lambda x: max(x, 0))
df_team_event["verify_skill_cost"] = df_team_event["emerald_bowl_skill_cost"] <= (df_team_event["emerald_bowl_tier_extra_gold"] + df_team_event["required_extra_gold"])
# TODO ENSURE TEAM CAPTAIN SECONDARY DOES NOT COUNT
df_team_event["verify_seconday_skills"] = (
    (df_team_event["Summary - Secondary skills"].astype(int) - df_team_event["team_captain_pro_as_secondary"]) 
    <= df_team_event['emerald_bowl_tier'].map(TIER_ALLOWED_SECONDARY)
)
# TODO VERIFY STAR_PLAYERS
def get_all_stars(player_list):
    stars = []
    for player in player_list:
        if player["star"]:
            stars.append(player["position_name"])
    return stars
df_team_event["star_players"] = df_team_event["Players"].apply(get_all_stars)
df_team_event["count_star_players"] = df_team_event["star_players"].apply(lambda x: len(x))
def verify_star_players(row):
    if row["count_star_players"] > 2:
        return False
    if row["count_star_players"] > 0:
        if row['emerald_bowl_tier'] != 5:
            return False
        stars = row["star_players"]
        matching_stars = [s for s in stars if (s in ALLOWED_STARS)]
        return len(matching_stars) == row["count_star_players"]

    return True
df_team_event["verify_star_players"] = df_team_event.apply(verify_star_players, axis=1)        

def verify_no_skill_stack(row):
    player_list = row["Players"]

    captain = row['team_captain']

    for player in player_list:
        skills = [
            player["primary_1"],
            player["primary_2"],
            player["secondary_1"],
            player["secondary_2"],
        ]
        count_skills = sum(x is not None for x in skills)
        if count_skills > 1:
            if captain is not None:
                if player['ctr'] != captain['ctr']:
                    return False
            else:
                return False
    return True
df_team_event["verify_no_skill_stack"] = df_team_event.apply(verify_no_skill_stack, axis=1)

# TODO VERIFY EXTRA GOLD PROPERLY SPLIT
for col in df_team_event.columns:
    if col.startswith("Inducement -"):
        df_team_event[col] = df_team_event[col].fillna(0).astype(int)
def verify_inducements(row):
    row_dict = row.to_dict()
    for key, value in row_dict.items():

        if key.startswith("Inducement - "):
            inducement_type = key.replace("Inducement - ", "").strip()
            if inducement_type not in ALLOWED_INDUCEMENTS:
                print("FALSE", key, value, type(value))
                return False
            max_value = ALLOWED_INDUCEMENTS[inducement_type]
            if max_value < value:
                return False

    return True

df_team_event["verify_inducements"] = df_team_event.apply(verify_inducements, axis=1)

df_team_event["row"] = (
    df_team_event.groupby("event_team").cumcount() + 1
)

df_wide = (
    df_team_event
    .set_index(["event_team", "row"])
    .unstack("row")
)

df_wide.columns = [
    f"{row}_{col}"
    for col, row in df_wide.columns
]

df_wide = df_wide.reset_index()

row_counts = df_team_event.groupby("event_team").size()

df_wide = df_wide.merge(
    row_counts.rename("num_rows"),
    on="event_team",
    how="left"
)

df_wide["event_team_combined_tier"] = df_wide["1_emerald_bowl_tier"] + df_wide["2_emerald_bowl_tier"]
df_wide["event_team_total_cost"] = df_wide["1_total_team_cost"] + df_wide["2_total_team_cost"]
df_wide["event_team_required_extra_gold"] = df_wide["1_required_extra_gold"] + df_wide["2_required_extra_gold"]
df_wide["event_team_combined_tier_extra_gold"] = df_wide['event_team_combined_tier'].map(COMBINED_TIER_EXTRA_GOLD)

# VERIFICATION
df_wide["verify_used_max_allowed_gold"] = df_wide["event_team_required_extra_gold"] <= df_wide["event_team_combined_tier_extra_gold"]
df_wide["verify_skill_cost"] = df_wide["1_verify_skill_cost"] & df_wide["2_verify_skill_cost"]
df_wide["verify_seconday_skills"] = df_wide["1_verify_seconday_skills"] & df_wide["2_verify_seconday_skills"]
df_wide["verify_inducements"] = df_wide["1_verify_inducements"] & df_wide["2_verify_inducements"]
df_wide["verify_no_skill_stack"] = df_wide["1_verify_no_skill_stack"] & df_wide["2_verify_no_skill_stack"]
df_wide["verify_star_players"] = df_wide["1_verify_star_players"] & df_wide["2_verify_star_players"]
df_wide["verify_min_tier3_combined"] = df_wide["event_team_combined_tier"] >= 3

df_wide["roster_okay"] = df_wide["verify_used_max_allowed_gold"] \
                         & df_wide["verify_skill_cost"] \
                         & df_wide["verify_seconday_skills"] \
                         & df_wide["verify_inducements"] \
                         & df_wide["verify_no_skill_stack"] \
                         & df_wide["verify_star_players"] \
                         & df_wide["verify_min_tier3_combined"] 


df_final = df_wide[[
    'event_team',
    '1_Coach', '2_Coach',
    '1_pdf_name', '2_pdf_name', 
    '1_Race', '2_Race',
    'roster_okay',
    'verify_used_max_allowed_gold',
    'verify_skill_cost',
    'verify_seconday_skills',
    'verify_inducements',
    'verify_no_skill_stack',
    'verify_star_players',
    '1_verify_seconday_skills', '2_verify_seconday_skills',
    '1_verify_inducements', '2_verify_inducements',
]]
df_final

['Pro', None]
[None, None]


Unnamed: 0,event_team,1_Coach,2_Coach,1_pdf_name,2_pdf_name,1_Race,2_Race,roster_okay,verify_used_max_allowed_gold,verify_skill_cost,verify_seconday_skills,verify_inducements,verify_no_skill_stack,verify_star_players,1_verify_seconday_skills,2_verify_seconday_skills,1_verify_inducements,2_verify_inducements
0,Another Team,Tschirgant,Akorus,Tschirgant_Snotlinge_17.pdf,Vamps - Akorus.pdf,Snotling,Vampire,True,True,True,True,True,True,True,True,True,True,True
1,Munich RumBBLerooskie,Akorus,Schlachtenlenker,High Elfs - Akorus.pdf,Khorne - Akorus - vertical.pdf,High Elf,Khorne,True,True,True,True,True,True,True,True,True,True,True
2,Munich RumBBLerooskie - v2,Schlachtenlenker,Akorus,Khorne - Schlachtenlenker.pdf,Nurgle - Akorus.pdf,Khorne,Nurgle,True,True,True,True,True,True,True,True,True,True,True
3,Team Captains,Akorus,Akorus,Orcs - Goblin as captain.pdf,Orcs - Thrower as captain.pdf,Orc,Orc,True,True,True,True,True,True,True,True,True,True,True


In [257]:
df_team_event[["pdf_name", "team_captain_pro_as_secondary", "team_captain"]]

Unnamed: 0,pdf_name,team_captain_pro_as_secondary,team_captain
0,Tschirgant_Snotlinge_17.pdf,False,
1,Vamps - Akorus.pdf,False,
2,High Elfs - Akorus.pdf,False,
3,Khorne - Akorus - vertical.pdf,False,
4,Khorne - Schlachtenlenker.pdf,False,
5,Nurgle - Akorus.pdf,False,
6,Orcs - Goblin as captain.pdf,True,"{'ctr': 4, 'position_name': 'Goblin Lineman', ..."
7,Orcs - Thrower as captain.pdf,False,"{'ctr': 9, 'position_name': 'Orc Thrower', 'pr..."


In [255]:
for _, row in df_team_event[["pdf_name", "team_captain_pro_as_secondary", "team_captain"]].iterrows():
    print(row['team_captain'])
    print()

None
None
None
None
None
None
{'ctr': 4, 'position_name': 'Goblin Lineman', 'primary_1': 'Dirty Player', 'primary_2': None, 'secondary_1': 'Pro', 'secondary_2': None, 'star': False}
{'ctr': 9, 'position_name': 'Orc Thrower', 'primary_1': 'Pro', 'primary_2': None, 'secondary_1': None, 'secondary_2': None, 'star': False}


In [232]:
now_utc = datetime.now(timezone.utc)
timestamp_utc_str = now_utc.strftime("%Y%m%dT%H%M%S")
df_final.to_csv(f'emerald_bowl_roster_eval_{timestamp_utc_str}.csv', index=False)

In [267]:
for _, row in df_team_event.iterrows():
    pass

In [268]:
row.to_dict()

{'pdf_name': 'Orcs - Thrower as captain.pdf',
 'pdf_type': 'bbtc_pl_2025',
 'Race': 'Orc',
 'Coach': 'Akorus',
 'Team': 'EB - Orcs Test',
 'Sideline - Apothecary': 'No',
 'Sideline - Assistant coaches': '0',
 'Sideline - Cheerleaders': '0',
 'Sideline - Dedicated fans': '0',
 'Sideline - Re-rolls': '0',
 'Inducement - Bribes': 0,
 'Inducement - Riotous Rookies': 0,
 'Summary - Players cost': 795,
 'Summary - Skills cost': 50,
 'Summary - Inducement cost': 0,
 'Summary - Sideline cost': 0,
 'Summary - Primary skills': '2',
 'Summary - Secondary skills': '0',
 'Players': [{'ctr': 1,
   'position_name': 'Big Un Blocker',
   'primary_1': None,
   'primary_2': None,
   'secondary_1': None,
   'secondary_2': None,
   'star': False},
  {'ctr': 2,
   'position_name': 'Big Un Blocker',
   'primary_1': None,
   'primary_2': None,
   'secondary_1': None,
   'secondary_2': None,
   'star': False},
  {'ctr': 3,
   'position_name': 'Goblin Lineman',
   'primary_1': None,
   'primary_2': None,
   'se

In [169]:
df_team_event[["Players"]]
def verify_skill_stack(player_list):
    for player in player_list:
        skills = [
            player["primary_1"],
            player["primary_2"],
            player["secondary_1"],
            player["secondary_2"],
        ]
        count_skills = sum(x is not None for x in skills)
        if count_skills > 1:
            return False
    return True

['Glart Smashrip', 'Fungus the Loon']

In [168]:
player

[{'ctr': 1,
  'position_name': 'Glart Smashrip',
  'primary_1': None,
  'primary_2': None,
  'secondary_1': None,
  'secondary_2': None,
  'star': True},
 {'ctr': 2,
  'position_name': 'Fungus the Loon',
  'primary_1': None,
  'primary_2': None,
  'secondary_1': None,
  'secondary_2': None,
  'star': True},
 {'ctr': 3,
  'position_name': 'Trained Troll',
  'primary_1': None,
  'primary_2': None,
  'secondary_1': 'Block',
  'secondary_2': None,
  'star': False},
 {'ctr': 4,
  'position_name': 'Trained Troll',
  'primary_1': None,
  'primary_2': None,
  'secondary_1': 'Block',
  'secondary_2': None,
  'star': False},
 {'ctr': 5,
  'position_name': 'Pump Wagon',
  'primary_1': None,
  'primary_2': None,
  'secondary_1': 'Pro',
  'secondary_2': None,
  'star': False},
 {'ctr': 6,
  'position_name': 'Pump Wagon',
  'primary_1': None,
  'primary_2': None,
  'secondary_1': None,
  'secondary_2': None,
  'star': False},
 {'ctr': 7,
  'position_name': 'Fungus Flinga',
  'primary_1': 'Accurate',

In [149]:
df_wide.columns

Index(['event_team', '1_pdf_name', '2_pdf_name', '1_pdf_type', '2_pdf_type',
       '1_Race', '2_Race', '1_Coach', '2_Coach', '1_Team', '2_Team',
       '1_Sideline - Apothecary', '2_Sideline - Apothecary',
       '1_Sideline - Assistant coaches', '2_Sideline - Assistant coaches',
       '1_Sideline - Cheerleaders', '2_Sideline - Cheerleaders',
       '1_Sideline - Dedicated fans', '2_Sideline - Dedicated fans',
       '1_Sideline - Re-rolls', '2_Sideline - Re-rolls',
       '1_Inducement - Bribes', '2_Inducement - Bribes',
       '1_Inducement - Riotous Rookies', '2_Inducement - Riotous Rookies',
       '1_Summary - Players cost', '2_Summary - Players cost',
       '1_Summary - Skills cost', '2_Summary - Skills cost',
       '1_Summary - Inducement cost', '2_Summary - Inducement cost',
       '1_Summary - Sideline cost', '2_Summary - Sideline cost',
       '1_Summary - Primary skills', '2_Summary - Primary skills',
       '1_Summary - Secondary skills', '2_Summary - Secondary skills',

In [118]:
dir(row)

['Coach',
 'Players',
 'Race',
 'T',
 'Team',
 '_AXIS_LEN',
 '_AXIS_ORDERS',
 '_AXIS_TO_AXIS_NUMBER',
 '_HANDLED_TYPES',
 '__abs__',
 '__add__',
 '__and__',
 '__annotations__',
 '__array__',
 '__array_priority__',
 '__array_ufunc__',
 '__bool__',
 '__class__',
 '__column_consortium_standard__',
 '__contains__',
 '__copy__',
 '__deepcopy__',
 '__delattr__',
 '__delitem__',
 '__dict__',
 '__dir__',
 '__divmod__',
 '__doc__',
 '__eq__',
 '__finalize__',
 '__firstlineno__',
 '__float__',
 '__floordiv__',
 '__format__',
 '__ge__',
 '__getattr__',
 '__getattribute__',
 '__getitem__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__iadd__',
 '__iand__',
 '__ifloordiv__',
 '__imod__',
 '__imul__',
 '__init__',
 '__init_subclass__',
 '__int__',
 '__invert__',
 '__ior__',
 '__ipow__',
 '__isub__',
 '__iter__',
 '__itruediv__',
 '__ixor__',
 '__le__',
 '__len__',
 '__lt__',
 '__matmul__',
 '__mod__',
 '__module__',
 '__mul__',
 '__ne__',
 '__neg__',
 '__new__',
 '__nonzero__',
 '__or__',
 '__pandas_

# Dask Application

In [348]:
import dash_bootstrap_components as dbc
from dash import Dash, dcc, html, Input, Output
from dash import dash_table
import plotly.express as px
import pandas as pd

# ----------------------------------------------------------------
# 0. Assumption: df_team_event and df_wide already exist in scope
# ----------------------------------------------------------------
# df_team_event:
#   columns like ["Race", "Coach", "Team", "Players", ...]
#   "Players" is a list[dict] with keys:
#       "ctr", "position_name", "star",
#       "primary_1", "primary_2", "secondary_1", "secondary_2"
#
# df_wide:
#   columns like ["event_team", "event_team_combined_tier", ...]


# ------------------------------------------------------------
# 1. Transform players list into table rows (Single Teams tab)
# ------------------------------------------------------------
def transform_players(players):
    rows = []
    for p in players:
        name = p["position_name"] + (" [STAR]" if p["star"] else "")
        skills = [
            p["primary_1"],
            p["primary_2"],
            p["secondary_1"],
            p["secondary_2"],
        ]
        skills = [s for s in skills if s is not None]

        rows.append({
            "#": p["ctr"],
            "Name": name,
            "Skills": ", ".join(skills) if skills else "-"
        })
    return rows


# ------------------------------------------------------------
# 2. Build metadata info panel (grouped + cleaned labels)
# ------------------------------------------------------------
def metadata_panel(meta):
    team_fields = ["Race", "Coach", "Team"]

    sideline_fields = [
        "Sideline - Apothecary",
        "Sideline - Assistant coaches",
        "Sideline - Cheerleaders",
        "Sideline - Dedicated fans",
        "Sideline - Re-rolls"
    ]

    inducement_fields = [
        "Inducement - Bribes",
        "Inducement - Riotous Rookies",
        "Inducement - Team Mascot"
    ]

    def build_section(title, fields):
        rows = [
            dbc.Row(
                dbc.Col(html.H5(title), width=12),
                className="mt-2 mb-2"
            )
        ]
        for key in fields:
            if key in meta:
                if meta[key] in [0, "", "0"]:
                    continue
                clean_key = key.split(" - ")[-1]
                rows.append(
                    dbc.Row([
                        dbc.Col(html.Strong(clean_key + ":"), width=5),
                        dbc.Col(html.Span(str(meta[key])), width=7)
                    ], className="mb-1")
                )
        return rows

    all_rows = []
    all_rows += build_section("Team Info", team_fields)
    all_rows += build_section("Sideline Staff", sideline_fields)
    all_rows += build_section("Inducements", inducement_fields)

    return dbc.Card(
        dbc.CardBody(all_rows),
        style={"backgroundColor": "#222", "color": "white", "border": "1px solid #444"},
        className="mb-4"
    )


# ------------------------------------------------------------
# 3. Build team table layout (Single Teams tab)
# ------------------------------------------------------------
def team_table(team_rows):
    if not team_rows:
        return html.Div("No players for this team.")

    return dash_table.DataTable(
        data=team_rows,
        columns=[
            {"name": "#", "id": "#"},
            {"name": "Name", "id": "Name"},
            {"name": "Skills", "id": "Skills"},
        ],
        sort_action="native",
        filter_action="native",
        page_size=20,
        style_table={"overflowX": "auto"},
        style_header={
            "backgroundColor": "rgb(50,50,50)",
            "color": "white",
            "fontWeight": "bold",
            "border": "1px solid #444"
        },
        style_cell={
            "backgroundColor": "rgb(30,30,30)",
            "color": "white",
            "border": "1px solid #444",
            "padding": "8px",
            "textAlign": "left"
        },
        style_cell_conditional=[
            {
                "if": {"column_id": "#"},
                "width": "60px",
                "maxWidth": "60px",
                "minWidth": "60px",
                "textAlign": "center"
            }
        ],
        style_data_conditional=[
            {
                "if": {"filter_query": "{Name} contains '[STAR]'"},
                "backgroundColor": "rgb(70,50,0)",
                "color": "gold",
                "fontWeight": "bold"
            }
        ]
    )


# ------------------------------------------------------------
# 4. Dash App (df_team_event and df_wide must already exist)
# ------------------------------------------------------------
app = Dash(__name__, external_stylesheets=[dbc.themes.CYBORG])

all_coaches = sorted(df_team_event["Coach"].dropna().unique())
all_races = sorted(df_team_event["Race"].dropna().unique())


# ------------------------------------------------------------
# 5. Build Overview Tab Charts
# ------------------------------------------------------------
race_counts = df_team_event["Race"].value_counts().reset_index()
fig_race = px.bar(
    race_counts,
    x="Race",
    y="count",
    title="Teams per Race"
).update_layout(
    paper_bgcolor="#222",
    plot_bgcolor="#222",
    font_color="white"
)

team_combo_counts = df_wide["event_team"].value_counts().reset_index()
fig_team_combo = px.bar(
    team_combo_counts,
    x="event_team",
    y="count",
    title="Team Combinations per Event Team"
).update_layout(
    paper_bgcolor="#222",
    plot_bgcolor="#222",
    font_color="white"
)

tier_counts = df_wide["event_team_combined_tier"].value_counts().reset_index()
fig_tiers = px.bar(
    tier_counts,
    x="event_team_combined_tier",
    y="count",
    title="Combined Tiers Across Event Teams"
).update_layout(
    paper_bgcolor="#222",
    plot_bgcolor="#222",
    font_color="white"
)


# ------------------------------------------------------------
# 6. Race Stats: explode players into long format
# ------------------------------------------------------------
def explode_players(df):
    rows = []
    for _, row in df.iterrows():
        race = row["Race"]
        team = row["Team"]
        coach = row["Coach"]
        players = row["Players"]

        for p in players:
            base_name = p["position_name"]
            skills = [
                p["primary_1"],
                p["primary_2"],
                p["secondary_1"],
                p["secondary_2"],
            ]
            skills = [s for s in skills if s is not None]

            rows.append({
                "Race": race,
                "Team": team,
                "Coach": coach,
                "PlayerName": base_name,
                "Skills": skills
            })
    return pd.DataFrame(rows)


df_players_long = explode_players(df_team_event)


# ------------------------------------------------------------
# 7. Layout (Tabs reordered + Race Stats added)
# ------------------------------------------------------------
tab_style = {
    "backgroundColor": "#111",
    "color": "white",
    "padding": "10px",
    "fontWeight": "bold",
    "border": "1px solid #444",
}

tab_selected_style = {
    "backgroundColor": "#333",
    "color": "gold",
    "padding": "10px",
    "fontWeight": "bold",
    "border": "1px solid gold",
}

app.layout = dbc.Container([
    dcc.Tabs([

        # --------------------------------------------------------
        # Overview FIRST
        # --------------------------------------------------------
        dcc.Tab(
            label="Overview",
            children=[
                html.H2("Event Overview", className="mt-3"),
                html.H4("Teams per Race"),
                dcc.Graph(id="chart-race-count", figure=fig_race),
                html.H4("Team Combinations per Event Team"),
                dcc.Graph(id="chart-team-combinations", figure=fig_team_combo),
                html.H4("Combined Tiers per Event Team"),
                dcc.Graph(id="chart-combined-tiers", figure=fig_tiers),
            ],
            style=tab_style,
            selected_style=tab_selected_style
        ),

        # --------------------------------------------------------
        # Race Stats SECOND
        # --------------------------------------------------------
        dcc.Tab(
            label="Race Stats",
            children=[
                html.H2("Race Statistics", className="mt-3"),

                html.Label("Select Race:", style={"marginTop": "10px"}),
                dcc.Dropdown(
                    id="race-filter",
                    options=[{"label": r, "value": r} for r in all_races],
                    value=all_races[0] if all_races else None,
                    clearable=False,
                    style={"width": "300px", "marginBottom": "20px"}
                ),

                html.H4("Player Types Summary"),
                html.Div(id="race-player-types-table"),

                html.H4("Skill Frequency for Race"),
                dcc.Graph(id="race-skills-bar"),

                html.H4("Skills per Player Type"),
                html.Div(id="race-skills-pivot-table")
            ],
            style=tab_style,
            selected_style=tab_selected_style
        ),

        # --------------------------------------------------------
        # Single Teams THIRD
        # --------------------------------------------------------
        dcc.Tab(
            label="Single Teams",
            children=[
                html.H2("Team Overview", className="mt-3"),
                html.Label("Select Coach:", style={"marginTop": "10px"}),
                dcc.Dropdown(
                    id="coach-filter",
                    options=[{"label": c, "value": c} for c in all_coaches],
                    value=all_coaches[0] if all_coaches else None,
                    clearable=False,
                    style={"width": "300px", "marginBottom": "20px"}
                ),
                html.Div(id="metadata-panel"),
                html.Div(id="team-table")
            ],
            style=tab_style,
            selected_style=tab_selected_style
        ),

    ])
], fluid=True)


# ------------------------------------------------------------
# 8. Callbacks
# ------------------------------------------------------------

# --- Single Team View ---
@app.callback(
    Output("metadata-panel", "children"),
    Output("team-table", "children"),
    Input("coach-filter", "value")
)
def update_team_view(selected_coach):
    if selected_coach is None:
        return html.Div("No coach selected."), html.Div("")

    df_filtered = df_team_event[df_team_event["Coach"] == selected_coach]

    if df_filtered.empty:
        return html.Div("No data for this coach."), html.Div("")

    row = df_filtered.iloc[0]
    meta = row.to_dict()
    players = row["Players"]

    team_rows = transform_players(players)

    return metadata_panel(meta), team_table(team_rows)


# --- Race Stats View ---
@app.callback(
    Output("race-player-types-table", "children"),
    Output("race-skills-bar", "figure"),
    Output("race-skills-pivot-table", "children"),
    Input("race-filter", "value")
)
def update_race_stats(selected_race):
    if selected_race is None:
        return html.Div("No race selected."), px.Figure(), html.Div("")

    df_race = df_players_long[df_players_long["Race"] == selected_race]

    if df_race.empty:
        return html.Div("No data for this race."), px.Figure(), html.Div("No data.")

    # --------------------------------------------------------
    # 1. Player Types Table: Count, min, max, average per PlayerName
    # --------------------------------------------------------
    per_team = (
        df_race.groupby(["Team", "PlayerName"])
        .size()
        .reset_index(name="Count")
    )

    summary = (
        per_team.groupby("PlayerName")["Count"]
        .agg(["count", "min", "max", "mean"])
        .reset_index()
        .rename(columns={
            "count": "TotalCount",
            "min": "MinPerTeam",
            "max": "MaxPerTeam",
            "mean": "AvgPerTeam"
        })
            .sort_values("PlayerName", ascending=False)
    )

    player_types_table = dash_table.DataTable(
        data=summary.to_dict("records"),
        columns=[{"name": c, "id": c} for c in summary.columns],
        page_size=20,
        style_table={"overflowX": "auto"},
        style_header={
            "backgroundColor": "rgb(50,50,50)",
            "color": "white",
            "fontWeight": "bold",
            "border": "1px solid #444"
        },
        style_cell={
            "backgroundColor": "rgb(30,30,30)",
            "color": "white",
            "border": "1px solid #444",
            "padding": "8px",
            "textAlign": "left"
        }
    )

    # --------------------------------------------------------
    # 2. Bar chart of all skills added for that race
    # --------------------------------------------------------
    all_skills = (
        df_race["Skills"]
        .explode()
        .dropna()
        .value_counts()
        .reset_index()
    )

    fig_skills = px.bar(
        all_skills,
        x="Skills",
        y="count",
        title=f"Skill Frequency for {selected_race}"
    ).update_layout(
        paper_bgcolor="#222",
        plot_bgcolor="#222",
        font_color="white"
    )

    # --------------------------------------------------------
    # 3. Pivot table: counts of skills per position
    #    PlayerName | Skill1 | Skill2 | ...
    # --------------------------------------------------------
    df_expanded = df_race.explode("Skills")

    if df_expanded["Skills"].notna().any():
        pivot = (
            df_expanded
                .groupby(["PlayerName", "Skills"])
                .size()
                .reset_index(name="Count")
                .sort_values(["PlayerName", "Count"], ascending=[True, False])
        )
    else:
        pivot = pd.DataFrame({"PlayerName": df_expanded["PlayerName"].unique()})
    def compute_group_styles(df, group_column):
        """
        Creates conditional formatting rules that:
        1. Hide repeated text in the grouping column
        2. Add a full-row top border when the value changes
        """
        hide_rules = []
        border_rules = []
    
        for i in range(len(df)):
            if i > 0 and df.iloc[i][group_column] == df.iloc[i-1][group_column]:
                # Same as previous → hide text in the grouping column
                hide_rules.append({
                    "if": {"row_index": i, "column_id": group_column},
                    "color": "rgba(0,0,0,0)",
                    "textShadow": "0 0 0 transparent"
                })
            else:
                # Different from previous → add a full-row border
                border_rules.append({
                    "if": {"row_index": i, "column_id": "all"},
                    "borderTop": "3px solid #888"
                })
    
        return hide_rules, border_rules
    
    hide_rules, border_rules = compute_group_styles(pivot, "PlayerName")

    pivot_table = dash_table.DataTable(
        data=pivot.to_dict("records"),
        columns=[{"name": c, "id": c} for c in pivot.columns],
        page_size=20,
        style_table={"overflowX": "auto"},
        style_header={
            "backgroundColor": "rgb(50,50,50)",
            "color": "white",
            "fontWeight": "bold",
            "border": "1px solid #444"
        },
        style_cell={
            "backgroundColor": "rgb(30,30,30)",
            "color": "white",
            "border": "1px solid #444",
            "padding": "8px",
            "textAlign": "left"
        },
        style_data_conditional=[
            *hide_rules,     # hide repeated text
            *border_rules    # add grouping borders
        ]
    )


    return player_types_table, fig_skills, pivot_table


# ------------------------------------------------------------
# 9. Run (Jupyter-friendly)
# ------------------------------------------------------------
app.run(jupyter_mode="external")


Dash app running on http://127.0.0.1:8050/


In [346]:
df_expanded = df_race.explode("Skills")

if df_expanded["Skills"].notna().any():
    pivot = (
        df_expanded
        .pivot_table(
            index="PlayerName",
            columns="Skills",
            aggfunc="size",
            fill_value=0
        )
        .reset_index()
    )
else:
    pivot = pd.DataFrame({"PlayerName": df_expanded["PlayerName"].unique()})

    

In [341]:
pivot = (
    df_expanded
        .groupby(["PlayerName", "Skills"])
        .size()
        .reset_index(name="Count")
        .sort_values("PlayerName", ascending=False)
)
pivot


Unnamed: 0,PlayerName,Skills,Count
2,Trained Troll,Block,2
1,Pump Wagon,Pro,1
0,Fungus Flinga,Accurate,1


In [271]:
df_team_event.columns

Index(['pdf_name', 'pdf_type', 'Race', 'Coach', 'Team',
       'Sideline - Apothecary', 'Sideline - Assistant coaches',
       'Sideline - Cheerleaders', 'Sideline - Dedicated fans',
       'Sideline - Re-rolls', 'Inducement - Bribes',
       'Inducement - Riotous Rookies', 'Summary - Players cost',
       'Summary - Skills cost', 'Summary - Inducement cost',
       'Summary - Sideline cost', 'Summary - Primary skills',
       'Summary - Secondary skills', 'Players', 'event_team',
       'Inducement - Team Mascot', 'team_captain',
       'team_captain_pro_as_secondary', 'emerald_bowl_skill_cost',
       'emerald_bowl_tier', 'total_team_cost', 'emerald_bowl_tier_extra_gold',
       'base_total_cost', 'required_extra_gold', 'verify_skill_cost',
       'verify_seconday_skills', 'star_players', 'count_star_players',
       'verify_star_players', 'verify_no_skill_stack', 'verify_inducements',
       'row'],
      dtype='object')

In [275]:
df_wide.columns

Index(['event_team', '1_pdf_name', '2_pdf_name', '1_pdf_type', '2_pdf_type',
       '1_Race', '2_Race', '1_Coach', '2_Coach', '1_Team', '2_Team',
       '1_Sideline - Apothecary', '2_Sideline - Apothecary',
       '1_Sideline - Assistant coaches', '2_Sideline - Assistant coaches',
       '1_Sideline - Cheerleaders', '2_Sideline - Cheerleaders',
       '1_Sideline - Dedicated fans', '2_Sideline - Dedicated fans',
       '1_Sideline - Re-rolls', '2_Sideline - Re-rolls',
       '1_Inducement - Bribes', '2_Inducement - Bribes',
       '1_Inducement - Riotous Rookies', '2_Inducement - Riotous Rookies',
       '1_Summary - Players cost', '2_Summary - Players cost',
       '1_Summary - Skills cost', '2_Summary - Skills cost',
       '1_Summary - Inducement cost', '2_Summary - Inducement cost',
       '1_Summary - Sideline cost', '2_Summary - Sideline cost',
       '1_Summary - Primary skills', '2_Summary - Primary skills',
       '1_Summary - Secondary skills', '2_Summary - Secondary skills',