In [2]:
import json
import re
import fitz
from datetime import datetime, timezone
import pandas as pd
import os
import numpy as np
import math

In [3]:
from pathlib import Path

def find_all_pdfs(root_ordner):
    return [str(p) for p in Path(root_ordner).rglob('*.pdf')]

main_folder = os.path.join("..", "sample_roster", "bbtc_pl_2025")
bbtc_pl_2025_roster = find_all_pdfs(main_folder)

main_folder = os.path.join("..", "sample_roster", "bbtc_pl_2025_matched_played")
bbtc_pl_2025_matched_played5_roster = find_all_pdfs(main_folder)

main_folder = os.path.join("..", "sample_roster", "bbtc_pl_eurobowl_2025")
eurobowl_2025_roster = find_all_pdfs(main_folder)


test_roster_paths = bbtc_pl_2025_roster + bbtc_pl_2025_matched_played5_roster + eurobowl_2025_roster
print(len(test_roster_paths))

8


In [4]:
test_roster_paths[-1].split("\\")

['..', 'sample_roster', 'bbtc_pl_eurobowl_2025', 'EB - Orcs - Akorus.pdf']

In [5]:
# Python
import fitz  # PyMuPDF

# Open the PDF
doc = fitz.open(test_roster_paths[4])

team_data = {}

extraction_step = 'Race'
for page_number, page in enumerate(doc, start=1):
    # Extract text in blocks/spans with details
    blocks = page.get_text("dict")["blocks"]
    
    for block in blocks:
        if "lines" in block:
            for line in block["lines"]:
                for span in line["spans"]:
                    text = span["text"]
                    color = span["color"]  # RGB, float 0-1 representation
                    # print(f"Page {page_number}: '{text}' Color: {color}")
                    
                    if extraction_step == 'Race':
                        if text == 'COACH NAME':
                            extraction_step = 'Coach'
                        if 'Race' in team_data:
                            team_data


In [83]:
# Python
import fitz  # PyMuPDF

def load_roster(roster_path):
    doc = fitz.open(roster_path)

    roster_path_split = roster_path.split("\\")
    pdf_name = roster_path_split[-1]
    
    return {
        "full_path": roster_path,
        "pdf_name": pdf_name,
        "loaded_pdf": doc
    }

def detect_roster_type(loaded_roster):
    doc = loaded_roster["loaded_pdf"]
    for page_number, page in enumerate(doc, start=1):
        # Extract text in blocks/spans with details
        blocks = page.get_text("dict")["blocks"]
        started_summary = False
    
        for block in blocks:
            if "lines" in block:
                for line in block["lines"]:
                    for span in line["spans"]:
                        text = span["text"]
                        if text == "SUMMARY":
                            started_summary = True
                        elif started_summary:
                            # print(text)
                            if text == "Skill Points":
                                return "bbtc_pl_2025_matched_played"
                            elif text == "Players cost":
                                return "bbtc_pl_2025"
                            elif "Option:" in text:
                                return "bbtc_pl_eurobowl_2025"

        raise RuntimeError(f"Could not identify roster_type for {loaded_roster['full_path']}")


SUMMARY_STEP_MAPPING = {
    "bbtc_pl_2025_matched_played": [
        'Skill Points',
        None,
        'Secondary skills',
        None,
        'Star players',
        None,
    ],
    "bbtc_pl_2025": [
        'Players cost',
        None,
        'Skills cost',
        None,
        'Inducement cost',
        None,
        'Sideline cost',
        None,
        'Primary skills',
        None,
        'Secondary skills',
        None
    ],
    "bbtc_pl_eurobowl_2025": [
        'Players cost',
        None,
        'Skills cost',
        None,
        'Inducement cost',
        None,
        'Sideline cost',
        None,
        'Primary skills',
        None,
        'Secondary skills',
        None
    ],
}


def process_team_pdf(roster_path):
    # Open the PDF
    loaded_roster = load_roster(roster_path)
    doc = loaded_roster["loaded_pdf"]
    pdf_type = detect_roster_type(loaded_roster)
    team_data = {
        'pdf_name': loaded_roster["pdf_name"],
        'pdf_type': pdf_type
    }

    print(team_data)
    
    extraction_step = 'Race'
    for page_number, page in enumerate(doc, start=1):
        # Extract text in blocks/spans with details
        blocks = page.get_text("dict")["blocks"]

        for block in blocks:
            if "lines" in block:
                for line in block["lines"]:
                    for span in line["spans"]:
                        text = span["text"]
                        color = span["color"]  # RGB, float 0-1 representation
                        # print(f"Page {page_number}: '{text}' Color: {color}")
                        # Adjust extraction step
                        if text == 'SIDELINE':
                            extraction_step = 'Sideline'
                            sideline_ctr = 0
                        elif text == 'INDUCEMENTS':
                            extraction_step = 'Inducements'
                            next_name = None
                        elif text == 'SUMMARY':
                            summary_ctr = 0
                            extraction_step = 'Summary'
                            summary_ctr = -1
    
                        if extraction_step == 'Race':
                            if text == 'COACH NAME':
                                extraction_step = 'Coach'
                                continue
                            if 'Race' in team_data:
                                team_data['Race'] += ' ' + text
                            else:
                                team_data['Race'] = text

                        elif extraction_step == 'Coach':
                            team_data['Coach'] = text
                            extraction_step = 'Team'

                        elif extraction_step == 'Team':
                            if text == 'TEAM NAME':
                                continue
                            elif text == 'SIDELINE':
                                extraction_step = 'Sideline'
                                sideline_ctr = 0
                                
                            elif 'Team' in team_data:
                                team_data['Team'] += ' ' + text
                            else:
                                team_data['Team'] = text

                        elif extraction_step == 'Sideline':
                            sideline_properties = [
                                'Apothecary',
                                'Assistant coaches',
                                'Cheerleaders',
                                'Dedicated fans',
                                'Re-rolls',
                            ]

                            if sideline_ctr >= len(sideline_properties):
                                extraction_step = 'Inducements'
                                continue
                            next_sideline = sideline_properties[sideline_ctr]
                            
                            if text == 'SIDELINE':
                                continue
                            elif text in sideline_properties:
                                if (sideline_ctr == 0) and (text != 'Apothecary'):
                                    sideline_ctr += 1
                                continue
                            else:
                                # print('SAVE', next_sideline, text)
                                team_data[f'Sideline - {next_sideline}'] = text
                                sideline_ctr += 1

                        elif extraction_step == 'Inducements':
                            if text in ['SUMMARY', 'No inducements', 'LEAGUES & SPECIAL']:
                                summary_ctr = 0
                                extraction_step = 'Summary'
                                summary_ctr = -1
                                continue
                            if text == 'INDUCEMENTS':
                                continue
                            if next_name is None:
                                next_name = text
                            else:
                                team_data[f'Inducement - {next_name}'] = text
                                next_name = None

                        elif extraction_step == 'Summary':
                            # print(f"SUMMARY {summary_ctr} | {text}")
                            summary_steps = SUMMARY_STEP_MAPPING[pdf_type]
                            if summary_ctr == len(summary_steps):
                                extraction_step = 'Players'
                                player_ctr = -1
                                continue

                            if text == 'SUMMARY':
                                summary_ctr = 0
                                continue
                            elif summary_ctr == -1:
                                continue

                            if pdf_type == "bbtc_pl_eurobowl_2025":
                                if summary_ctr == 0:
                                    team_data[f'Summary - Option'] = text.split(": ")[0]
                                    extraction_step = 'Players'
                                    player_ctr = -1
                                    continue
                                else:
                                    raise NotImplementedError()
                            else:
                                if (summary_ctr % 2) == 1:
                                    team_data[f'Summary - {summary_steps[summary_ctr - 1]}'] = text
                            summary_ctr += 1
    
                        elif extraction_step == 'Players':
                            if text == 'COST':
                                team_data['Players'] = []
                                player_ctr = 1
                                next_player_property = 'Name'
                                current_player = {
                                    'ctr': player_ctr,
                                    'position_name': None,
                                    'primary_1': None,
                                    'primary_2': None,
                                    'secondary_1': None,
                                    'secondary_2': None,
                                    'star': False,
                                }
                                continue
                            elif player_ctr == -1:
                                continue

                            if re.fullmatch(r"\b\d+k\b", text):
                                team_data['Players'].append(current_player)
                                player_ctr += 1
                                current_player = {
                                    'ctr': player_ctr,
                                    'position_name': None,
                                    'primary_1': None,
                                    'primary_2': None,
                                    'secondary_1': None,
                                    'secondary_2': None,
                                    'star': False,
                                }
                                next_player_property = 'Name'

                            elif next_player_property == 'Name':
                                current_player['position_name'] = " ".join(text.split()[1:])
                                next_player_property = 'Skills'

                            elif next_player_property == 'Skills':
                                if color == 681912:
                                    skill_name = text.strip().strip(',')
                                    if skill_name == "Put the":
                                        skill_name = "Put the "
                                    elif skill_name == "Boot In":
                                        continue
                                    if current_player['primary_1'] is None:
                                        current_player['primary_1'] = skill_name
                                    elif current_player['primary_2'] is None:
                                        current_player['primary_2'] = skill_name
                                    else:
                                        raise RuntimeError(f'Unexpected Skill | Color: {color} - Text: {text}')
                                if color == 4822027:
                                    if current_player['secondary_1'] is None:
                                        current_player['secondary_1'] = text.strip().strip(',')
                                    elif current_player['secondary_2'] is None:
                                        current_player['secondary_2'] = text.strip().strip(',')
                                    else:
                                        raise RuntimeError(f'Unexpected Skill | Color: {color} - Text: {text}')
                            
                            if text == 'Special skill: ':
                                current_player['star'] = True
                                      
    return team_data
                                    
                                
                            
processed_roster = []
for roster_path in test_roster_paths:
    team_data = process_team_pdf(roster_path)
    processed_roster.append(team_data)
    # print(json.dumps(team_data, indent=4))

{'pdf_name': 'High Elfs - Akorus.pdf', 'pdf_type': 'bbtc_pl_2025'}
{'pdf_name': 'Khorne - Akorus - vertical.pdf', 'pdf_type': 'bbtc_pl_2025'}
{'pdf_name': 'Khorne - Akorus.pdf', 'pdf_type': 'bbtc_pl_2025'}
{'pdf_name': 'Necro - Schlachtenlenker.pdf', 'pdf_type': 'bbtc_pl_2025'}
{'pdf_name': 'Nurgle - Akorus.pdf', 'pdf_type': 'bbtc_pl_2025'}
{'pdf_name': 'Vamps - Akorus.pdf', 'pdf_type': 'bbtc_pl_2025'}
{'pdf_name': 'Zons - Test.pdf', 'pdf_type': 'bbtc_pl_2025_matched_played'}
{'pdf_name': 'EB - Orcs - Akorus.pdf', 'pdf_type': 'bbtc_pl_eurobowl_2025'}


In [9]:
for roster_path in test_roster_paths:
    loaded_roster = load_roster(roster_path)
    doc = loaded_roster["loaded_pdf"]
    result = detect_roster_type(loaded_roster)
    print(roster_path, "|", result)

..\sample_roster\bbtc_pl_2025\High Elfs - Akorus.pdf | bbtc_pl_2025
..\sample_roster\bbtc_pl_2025\Khorne - Akorus - vertical.pdf | bbtc_pl_2025
..\sample_roster\bbtc_pl_2025\Khorne - Akorus.pdf | bbtc_pl_2025
..\sample_roster\bbtc_pl_2025\Necro - Schlachtenlenker.pdf | bbtc_pl_2025
..\sample_roster\bbtc_pl_2025\Nurgle - Akorus.pdf | bbtc_pl_2025
..\sample_roster\bbtc_pl_2025\Vamps - Akorus.pdf | bbtc_pl_2025
..\sample_roster\bbtc_pl_2025_matched_played\Zons - Test.pdf | bbtc_pl_2025_matched_played
..\sample_roster\bbtc_pl_eurobowl_2025\EB - Orcs - Akorus.pdf | bbtc_pl_eurobowl_2025


In [10]:
processed_roster[0]

{'pdf_name': 'High Elfs - Akorus.pdf',
 'pdf_type': 'bbtc_pl_2025',
 'Race': 'High Elf',
 'Coach': 'Akorus',
 'Team': 'Emerald - High Elfs',
 'Sideline - Apothecary': 'Yes',
 'Sideline - Assistant coaches': '1',
 'Sideline - Cheerleaders': '0',
 'Sideline - Dedicated fans': '0',
 'Sideline - Re-rolls': '3',
 'Summary - Players cost': '955k',
 'Summary - Skills cost': '200k',
 'Summary - Inducement cost': '0k',
 'Summary - Sideline cost': '210k',
 'Summary - Primary skills': '7',
 'Summary - Secondary skills': '0',
 'Players': [{'ctr': 1,
   'position_name': 'High Elf Blitzer',
   'primary_1': 'Dodge',
   'primary_2': None,
   'secondary_1': None,
   'secondary_2': None,
   'star': False},
  {'ctr': 2,
   'position_name': 'High Elf Blitzer',
   'primary_1': 'Dodge',
   'primary_2': None,
   'secondary_1': None,
   'secondary_2': None,
   'star': False},
  {'ctr': 3,
   'position_name': 'High Elf Thrower',
   'primary_1': 'Block',
   'primary_2': None,
   'secondary_1': None,
   'seconda

In [11]:
def flatten_players(processed_roster):
    rows = []
    error_rows = []
    for team_data in processed_roster:
        if 'Players' in team_data:
            for player in team_data['Players']:
                prefix = f'player_{player["ctr"]}'
                for key, value in player.items():
                    if key == 'ctr':
                        continue
                    
                    team_data[f'{prefix}_{key}'] = value
            
            del team_data['Players']
            rows.append(team_data)
        elif 'player_11_star' in team_data:
            rows.append(team_data)
        else:
            error_rows.append(team_data)

    return {
        "rows": rows,
        "error_rows": error_rows
    }

flattend_rows = flatten_players(processed_roster)

print('Rows', len(flattend_rows["rows"]))
print('Errors', len(flattend_rows["error_rows"]))

Rows 8
Errors 0


In [12]:
df = pd.DataFrame(rows)
df.head(5)

NameError: name 'rows' is not defined

In [13]:
df_error = pd.DataFrame(error_rows)
df_error

NameError: name 'error_rows' is not defined

In [14]:
df_emerald_bowl_processing = df[['PDF Name', 'Race', 'Coach', 'Team',
       'Summary - Players cost', 'Summary - Skills cost',
       'Summary - Inducement cost', 'Summary - Sideline cost',
       'Summary - Primary skills', 'Summary - Secondary skills']]
df_emerald_bowl_processing['emerald_bowl_skill_cost'] = df_emerald_bowl_processing['Summary - Primary skills'].astype(int) * 20  + df_emerald_bowl_processing['Summary - Secondary skills'].astype(int) * 30
df_emerald_bowl_processing

NameError: name 'df' is not defined

In [15]:
now_utc = datetime.now(timezone.utc)
timestamp_utc_str = now_utc.strftime("%Y%m%dT%H%M%S")
df.to_csv(f'roster_extracted_{timestamp_utc_str}.csv')

NameError: name 'df' is not defined

In [None]:
now_utc = datetime.now(timezone.utc)
timestamp_utc_str = now_utc.strftime("%Y%m%dT%H%M%S")
df_update.to_csv(f'eb_roster_update_{timestamp_utc_str}.csv')

In [None]:
df_error = pd.DataFrame(error_rows)
now_utc = datetime.now(timezone.utc)
timestamp_utc_str = now_utc.strftime("%Y%m%dT%H%M%S")
df_error = pd.DataFrame(error_rows).to_csv(f'eb_roster_errors_{timestamp_utc_str}.csv')
len(error_rows)

### Team Event

In [84]:
main_folder = os.path.join("..", "sample_roster", "bbtc_pl_2025_team_event")
main_folder = "C:\\Users\\maxph\\Downloads\\wetransfer_emerald-roster_2026-01-06_1104\\Emerald Bowl Roster"
bbtc_pl_2025_team_event_roster = find_all_pdfs(main_folder)
print(f"Found {len(bbtc_pl_2025_team_event_roster)} roster pdfs")

processed_roster = []
for roster_path in bbtc_pl_2025_team_event_roster:
    try:
        team_data = process_team_pdf(roster_path)
        
        team_data["event_team"] = os.path.basename(os.path.dirname(roster_path))
        processed_roster.append(team_data)
    except:
        print(f"ERROR: Processing {roster_path}")
    # print(json.dumps(team_data, indent=4))

df_team_event = pd.DataFrame(processed_roster)
df_team_event.shape

Found 108 roster pdfs
{'pdf_name': 'Emeraldbowl26_Angeblich_13006.pdf', 'pdf_type': 'bbtc_pl_2025'}
{'pdf_name': 'Emeraldbowl26_Orinox_24725.pdf', 'pdf_type': 'bbtc_pl_2025'}
{'pdf_name': 'Berlin_Sin_LexusD (Al-Axe Dynasty).pdf', 'pdf_type': 'bbtc_pl_2025'}
{'pdf_name': 'Leipzig 2026 Roegger79.pdf', 'pdf_type': 'bbtc_pl_2025'}
{'pdf_name': 'Leipzig-oventa-humans.pdf', 'pdf_type': 'bbtc_pl_2025'}
{'pdf_name': 'Wurstzone-Nord-Leipzig-Melmoth.pdf', 'pdf_type': 'bbtc_pl_2025'}
{'pdf_name': 'Emerald Candlejack Amazons +10k.pdf', 'pdf_type': 'bbtc_pl_2025'}
{'pdf_name': 'Emerald Topas Human +15K.pdf', 'pdf_type': 'bbtc_pl_2025'}
{'pdf_name': 'BellyBrassknuckle_ChaosRenegade_Maskotchen.pdf', 'pdf_type': 'bbtc_pl_2025'}
{'pdf_name': 'N8wache_Bretonen.pdf', 'pdf_type': 'bbtc_pl_2025'}
{'pdf_name': 'Bash_und_Arm_Bar_Carabor_BO_Miskatonic_Madness.pdf', 'pdf_type': 'bbtc_pl_2025'}
{'pdf_name': "Bash_und_Arm_Bar_scoolov_Chaos_Chosen_Archaon's_Dream.pdf", 'pdf_type': 'bbtc_pl_2025'}
{'pdf_name': 'Be

(108, 25)

In [85]:
df_team_event.columns

Index(['pdf_name', 'pdf_type', 'Race', 'Coach', 'Team',
       'Sideline - Apothecary', 'Sideline - Assistant coaches',
       'Sideline - Cheerleaders', 'Sideline - Dedicated fans',
       'Sideline - Re-rolls', 'Inducement - Blitzer's Best Kegs',
       'Summary - Players cost', 'Summary - Skills cost',
       'Summary - Inducement cost', 'Summary - Sideline cost',
       'Summary - Primary skills', 'Summary - Secondary skills', 'Players',
       'event_team', 'Inducement - Team Mascot', 'Inducement - Bribes',
       'Inducement - Temp Agency Cheerleaders',
       'Inducement - Part-time Assistant Coaches 1',
       'Inducement - Halfling Master Chef', 'Inducement - Riotous Rookies'],
      dtype='object')

#### Emerald Bowl Post Processing

In [97]:
TIER_MAPPING = {
    1: [
        "Amazon", "Lizardmen", "Necromantic Horror",
        "Old World Alliance", "Wood Elf"
    ],
    2: [
        "Dark Elf", "Norse", "Orc", "Shambling Undead",
        "Skaven", "Tomb Kings", "Underworld Denizens"
    ],
    3: [
        "Bretonnian", "Dwarf", "Elfen Union", "High Elf",
        "Human", "Nurgle", "Slann", "Vampire"
    ],
    4: [
        "Black Orc", "Chaos Chosen", "Chaos Dwarf",
        "Chaos Renegade", "Gnomes", "Goblin",
        "Imperial Nobility", "Khorne"
    ],
    5: [
        "Halfling", "Ogre", "Snotling"
    ],
}


TIER_EXTRA_GOLD = {
    1: 110,
    2: 130,
    3: 140,
    4: 150,
    5: 170,
}

TIER_ALLOWED_SECONDARY = {
    1: 0,
    2: 0,
    3: 1,
    4: 2,
    5: 3,
}

COMBINED_TIER_EXTRA_GOLD = {
    3: 0,
    4: 25,
    5: 35,
    6: 40,
    7: 50,
    8: 60,
    9: 70,
    10: 90
}


ALLOWED_INDUCEMENTS = {
    "Part-time Assistant Coaches": 5,
    "Temp Agency Cheerleaders": 5,
    "Team Mascot": 1,
    "Blitzer's Best Kegs": 2,
    "Bribes": 3,
    "Mortuary Assistant": 1,
    "Plague Doctor": 1,
    "Riotous Rookies": 1,
    "Halfling Master Chef": 1,
}
# [Glart Smashrip, Fungus the Loon]           2
# [Scrappa Sorehead]                          1
# [Glart Smashrip, Scrappa Sorehead]          1
# [Puggy Baconbreath, Rumbelow Sheepskin]     1
# [Crumbleberry, Grak, Rodney Roachbait]      1
# [Akhorne the Squirrel, Grim Ironjaw]        1
# [Fungus the Loon, Glart Smashrip]           1
# [Rumbelow Sheepskin, Grombrindal]           1
ALLOWED_STARS = [
    "Rodney Roachbait",
    "Akhorne the Squirrel",
    "Barik Farblast",
    "Fungus the Loon",
    "Swiftvine Glimmershard",
    "Randolph Backstabber",
    "Glart Smashrip",
    "Grim Ironjaw",
    "Rodney Roachbait",
    "Crumbleberry", "Grak",
    "Puggy Baconbreath",
    "Scrappa Sorehead",
    "Rumbelow Sheepskin",
    "Grombrindal"
]

df_team_event = df_team_event.sort_values(by="event_team")
def team_captain(row):
    captain = None
    if row['Race'] in ['Human', 'Orc']:
        for player in row['Players']:
            skills = [
                player["primary_1"],
                player["primary_2"],
                player["secondary_1"],
                player["secondary_2"],
            ]
            count_skills = sum(x is not None for x in skills)
            if 'Pro' in skills:
                if count_skills == 2:
                    return player
                elif 'Pro' in [player["secondary_1"], player["secondary_2"]]:
                    return player
                else:
                    captain = player
    return captain
df_team_event["team_captain"] = df_team_event.apply(team_captain, axis=1)

def team_captain_pro_as_secondary(row):
    captain = row['team_captain']
    if captain is not None:
        print([captain["secondary_1"], captain["secondary_2"]])
        return 'Pro' in [captain["secondary_1"], captain["secondary_2"]]
    return False
df_team_event["team_captain_pro_as_secondary"] = df_team_event.apply(team_captain_pro_as_secondary, axis=1) 

def calculate_skill_cost(row):
    skill_cost = int(row['Summary - Primary skills']) * 20  + int(row['Summary - Secondary skills']) * 30
    captain = row['team_captain'] 
    if captain is not None :
        if 'Pro' in [captain["secondary_1"], captain["secondary_2"]]:
            skill_cost -= 30
        else:
            skill_cost -= 20
    return skill_cost
df_team_event["emerald_bowl_skill_cost"] = df_team_event.apply(calculate_skill_cost, axis=1)  

race_to_tier = {
    race: tier
    for tier, races in TIER_MAPPING.items()
    for race in races
}

# Create the new column
df_team_event['emerald_bowl_tier'] = df_team_event['Race'].map(race_to_tier)
unknown = df_team_event[df_team_event['emerald_bowl_tier'].isna()]['Race'].unique()
if len(unknown) > 0:
    print("Unknown races:", unknown)

"""df_team_event = df_team_event[[
        'event_team', 'pdf_name', 'Coach', 'Race',  'Team', 'emerald_bowl_tier',
       'Summary - Players cost',
       'Summary - Skills cost', 'Summary - Inducement cost',
       'Summary - Sideline cost', 'Summary - Primary skills',
       'Summary - Secondary skills', 'emerald_bowl_skill_cost']]
"""
cost_cols = [
    "Summary - Players cost",
    "Summary - Skills cost",
    "Summary - Inducement cost",
    "Summary - Sideline cost",
]
df_team_event[cost_cols] = (
    df_team_event[cost_cols]
    .replace("k", "", regex=True)   # remove the 'k'
    .astype(int)                    # convert to integer
)
df_team_event["total_team_cost"] = df_team_event[[
    "emerald_bowl_skill_cost",
    "Summary - Players cost",
    "Summary - Inducement cost",
    "Summary - Sideline cost",
]].sum(axis=1)
df_team_event['emerald_bowl_tier_extra_gold'] = df_team_event['emerald_bowl_tier'].map(TIER_EXTRA_GOLD)
df_team_event["base_total_cost"] = 1150 + df_team_event['emerald_bowl_tier_extra_gold']
df_team_event["required_extra_gold"] = (
    df_team_event["total_team_cost"] 
    - df_team_event["base_total_cost"]
).apply(lambda x: max(x, 0))
# +5 is buffer to allow 5k gold of unused team gold
df_team_event["verify_skill_cost"] = df_team_event["emerald_bowl_skill_cost"] <= (df_team_event["emerald_bowl_tier_extra_gold"] + df_team_event["required_extra_gold"] + 5)
# TODO ENSURE TEAM CAPTAIN SECONDARY DOES NOT COUNT
df_team_event["verify_seconday_skills"] = (
    (df_team_event["Summary - Secondary skills"].astype(int) - df_team_event["team_captain_pro_as_secondary"]) 
    <= df_team_event['emerald_bowl_tier'].map(TIER_ALLOWED_SECONDARY)
)
# TODO VERIFY STAR_PLAYERS
def get_all_stars(player_list):
    stars = []
    for player in player_list:
        if player["star"]:
            stars.append(player["position_name"])
    return stars
df_team_event["star_players"] = df_team_event["Players"].apply(get_all_stars)
def count_stars(star_players):
    return len([s for s in star_players if s != "Grak"])
df_team_event["count_star_players"] = df_team_event["star_players"].apply(count_stars)
def verify_star_players(row):
    if row["count_star_players"] > 2:
        return False
    if row["count_star_players"] > 0:
        if row['emerald_bowl_tier'] != 5:
            return False
        stars = row["star_players"]
        matching_stars = [s for s in stars if (s in ALLOWED_STARS)]
        return len(matching_stars) == len(stars)

    return True
df_team_event["verify_star_players"] = df_team_event.apply(verify_star_players, axis=1)        

def verify_no_skill_stack(row):
    player_list = row["Players"]

    captain = row['team_captain']

    for player in player_list:
        skills = [
            player["primary_1"],
            player["primary_2"],
            player["secondary_1"],
            player["secondary_2"],
        ]
        count_skills = sum(x is not None for x in skills)
        if count_skills > 1:
            if captain is not None:
                if player['ctr'] != captain['ctr']:
                    return False
            else:
                return False
    return True
df_team_event["verify_no_skill_stack"] = df_team_event.apply(verify_no_skill_stack, axis=1)

for col in df_team_event.columns:
    if col.startswith("Inducement -"):
        try:
            df_team_event[col] = df_team_event[col].fillna(0).astype(int)
        except:
            print(df_team_event[col].value_counts())
           # raise RuntimeError(col)
def verify_inducements(row):
    row_dict = row.to_dict()
    for key, value in row_dict.items():
        if key == "Inducement - Part-time Assistant Coaches 1":
            print("ERROR: SKIPPING | Inducement - Part-time Assistant Coaches 1")
            continue
        if key.startswith("Inducement - "):
            inducement_type = key.replace("Inducement - ", "").strip()
            if inducement_type not in ALLOWED_INDUCEMENTS:
                print("FALSE", key, value, type(value))
                return False
            max_value = ALLOWED_INDUCEMENTS[inducement_type]
            if max_value < value:
                return False

    return True

df_team_event["verify_inducements"] = df_team_event.apply(verify_inducements, axis=1)
    

df_team_event["row"] = (
    df_team_event.groupby("event_team").cumcount() + 1
)

df_wide = (
    df_team_event
    .set_index(["event_team", "row"])
    .unstack("row")
)

df_wide.columns = [
    f"{row}_{col}"
    for col, row in df_wide.columns
]

df_wide = df_wide.reset_index()

row_counts = df_team_event.groupby("event_team").size()

df_wide = df_wide.merge(
    row_counts.rename("num_rows"),
    on="event_team",
    how="left"
)


df_wide["event_team_combined_tier"] = df_wide["1_emerald_bowl_tier"] + df_wide["2_emerald_bowl_tier"]
df_wide["event_team_total_cost"] = df_wide["1_total_team_cost"] + df_wide["2_total_team_cost"]
df_wide["event_team_required_extra_gold"] = df_wide["1_required_extra_gold"] + df_wide["2_required_extra_gold"]
df_wide["event_team_combined_tier_extra_gold"] = df_wide['event_team_combined_tier'].map(COMBINED_TIER_EXTRA_GOLD)

# VERIFICATION
# TODO VERIFY EXTRA GOLD PROPERLY SPLIT
def verify_proper_split(row):
    if row["event_team_combined_tier"] == 3:
        return (df_wide["1_required_extra_gold"]) == 0 and (df_wide["2_required_extra_gold"] == 0)

    return (df_wide["1_required_extra_gold"]) > 0 and (df_wide["2_required_extra_gold"] > 0)

df_wide["verify_proper_split"] = df_wide.apply(verify_proper_split, axis=1)
df_wide["verify_used_max_allowed_gold"] = df_wide["event_team_required_extra_gold"] <= df_wide["event_team_combined_tier_extra_gold"]
df_wide["verify_skill_cost"] = df_wide["1_verify_skill_cost"] & df_wide["2_verify_skill_cost"]
df_wide["verify_seconday_skills"] = df_wide["1_verify_seconday_skills"] & df_wide["2_verify_seconday_skills"]
df_wide["verify_inducements"] = df_wide["1_verify_inducements"] & df_wide["2_verify_inducements"]
df_wide["verify_no_skill_stack"] = df_wide["1_verify_no_skill_stack"] & df_wide["2_verify_no_skill_stack"]
df_wide["verify_star_players"] = df_wide["1_verify_star_players"] & df_wide["2_verify_star_players"]
df_wide["verify_min_tier3_combined"] = df_wide["event_team_combined_tier"] >= 3

df_wide["roster_okay"] = df_wide["verify_used_max_allowed_gold"] \
                         & df_wide["verify_skill_cost"] \
                         & df_wide["verify_seconday_skills"] \
                         & df_wide["verify_inducements"] \
                         & df_wide["verify_no_skill_stack"] \
                         & df_wide["verify_star_players"] \
                         & df_wide["verify_min_tier3_combined"] \
                         & df_wide["verify_proper_split"]


df_final = df_wide[[
    'event_team',
    '1_Coach', '2_Coach',
    '1_pdf_name', '2_pdf_name', 
    '1_Race', '2_Race',
    'roster_okay',
    'verify_used_max_allowed_gold',
    'verify_skill_cost',
    'verify_seconday_skills',
    'verify_inducements',
    'verify_no_skill_stack',
    'verify_star_players',
    '1_verify_seconday_skills', '2_verify_seconday_skills',
    '1_verify_inducements', '2_verify_inducements',
]]
df_final

[None, None]
[None, None]
[None, None]
[None, None]
[None, None]
[None, None]
[None, None]
[None, None]
[None, None]
[None, None]
[None, None]
Inducement - Part-time Assistant Coaches 1
Temp Agency Cheerleaders    2
Name: count, dtype: int64
ERROR: SKIPPING | Inducement - Part-time Assistant Coaches 1
ERROR: SKIPPING | Inducement - Part-time Assistant Coaches 1
ERROR: SKIPPING | Inducement - Part-time Assistant Coaches 1
ERROR: SKIPPING | Inducement - Part-time Assistant Coaches 1
ERROR: SKIPPING | Inducement - Part-time Assistant Coaches 1
ERROR: SKIPPING | Inducement - Part-time Assistant Coaches 1
ERROR: SKIPPING | Inducement - Part-time Assistant Coaches 1
ERROR: SKIPPING | Inducement - Part-time Assistant Coaches 1
ERROR: SKIPPING | Inducement - Part-time Assistant Coaches 1
ERROR: SKIPPING | Inducement - Part-time Assistant Coaches 1
ERROR: SKIPPING | Inducement - Part-time Assistant Coaches 1
ERROR: SKIPPING | Inducement - Part-time Assistant Coaches 1
ERROR: SKIPPING | Induceme

ValueError: The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().

In [98]:
df_team_event["star_players"].value_counts()

star_players
[]                                         98
[Glart Smashrip, Fungus the Loon]           2
[Scrappa Sorehead]                          1
[Glart Smashrip, Scrappa Sorehead]          1
[Puggy Baconbreath, Rumbelow Sheepskin]     1
[Akhorne the Squirrel, Grim Ironjaw]        1
[Crumbleberry, Grak, Rodney Roachbait]      1
[Rumbelow Sheepskin, Puggy Baconbreath]     1
[Fungus the Loon, Glart Smashrip]           1
[Rumbelow Sheepskin, Grombrindal]           1
Name: count, dtype: int64

In [99]:
df_final["roster_okay"].value_counts()

roster_okay
True    54
Name: count, dtype: int64

In [89]:
df_wide[~df_final["roster_okay"]]

Unnamed: 0,event_team,1_pdf_name,2_pdf_name,1_pdf_type,2_pdf_type,1_Race,2_Race,1_Coach,2_Coach,1_Team,...,event_team_required_extra_gold,event_team_combined_tier_extra_gold,verify_used_max_allowed_gold,verify_skill_cost,verify_seconday_skills,verify_inducements,verify_no_skill_stack,verify_star_players,verify_min_tier3_combined,roster_okay
9,Chaos Brothers,Emerald Bowl 2026 Burke.pdf,Emerald Bowl 2026 Torkan.pdf,bbtc_pl_2025,bbtc_pl_2025,Imperial Nobility,Orc,Burke 16473,Torkan - 29397,Wardens of the Shire,...,35,40,True,False,True,True,True,True,True,False
27,Last Minute Dudes,Eichborndamm Eagles - pedda - 20090.pdf,Team_EB2026_Borsti.pdf,bbtc_pl_2025,bbtc_pl_2025,Wood Elf,Imperial Nobility,pedda - 20090,Borsti - 35743,Eichborndamm Eagles,...,30,35,True,False,True,True,True,True,True,False


In [71]:
df_team_event.columns

Index(['pdf_name', 'pdf_type', 'Race', 'Coach', 'Team',
       'Sideline - Apothecary', 'Sideline - Assistant coaches',
       'Sideline - Cheerleaders', 'Sideline - Dedicated fans',
       'Sideline - Re-rolls', 'Inducement - Blitzer's Best Kegs',
       'Summary - Players cost', 'Summary - Skills cost',
       'Summary - Inducement cost', 'Summary - Sideline cost',
       'Summary - Primary skills', 'Summary - Secondary skills', 'Players',
       'event_team', 'Inducement - Team Mascot', 'Inducement - Bribes',
       'Inducement - Temp Agency Cheerleaders',
       'Inducement - Part-time Assistant Coaches 1',
       'Inducement - Halfling Master Chef', 'Inducement - Riotous Rookies',
       'team_captain', 'team_captain_pro_as_secondary',
       'emerald_bowl_skill_cost', 'emerald_bowl_tier', 'total_team_cost',
       'emerald_bowl_tier_extra_gold', 'base_total_cost',
       'required_extra_gold', 'verify_skill_cost', 'verify_seconday_skills',
       'star_players', 'count_star_playe

In [90]:
df_team_event[df_team_event["event_team"]=='Last Minute Dudes'][["pdf_name", 
       'team_captain', 'team_captain_pro_as_secondary',
       'emerald_bowl_skill_cost', 'emerald_bowl_tier', 'total_team_cost',
       'emerald_bowl_tier_extra_gold', 'base_total_cost',
       'required_extra_gold', 'verify_skill_cost', 'verify_seconday_skills',
       'star_players', 'count_star_players', 'verify_star_players',
       'verify_no_skill_stack', 'verify_inducements']]

Unnamed: 0,pdf_name,team_captain,team_captain_pro_as_secondary,emerald_bowl_skill_cost,emerald_bowl_tier,total_team_cost,emerald_bowl_tier_extra_gold,base_total_cost,required_extra_gold,verify_skill_cost,verify_seconday_skills,star_players,count_star_players,verify_star_players,verify_no_skill_stack,verify_inducements
54,Eichborndamm Eagles - pedda - 20090.pdf,,False,120,1,1265,110,1260,5,False,True,[],0,True,True,True
55,Team_EB2026_Borsti.pdf,,False,150,4,1325,150,1300,25,True,True,[],0,True,True,True


In [49]:
df_team_event[df_team_event["Inducement - Part-time Assistant Coaches 1"].notna()]

Unnamed: 0,pdf_name,pdf_type,Race,Coach,Team,Sideline - Apothecary,Sideline - Assistant coaches,Sideline - Cheerleaders,Sideline - Dedicated fans,Sideline - Re-rolls,...,base_total_cost,required_extra_gold,verify_skill_cost,verify_seconday_skills,star_players,count_star_players,verify_star_players,verify_no_skill_stack,verify_inducements,row
28,Leipzig Cindy.pdf,bbtc_pl_2025,Skaven,Stoffelst,Stoffelst Harzer,No,0,0,0,3,...,1280,20,True,True,[],0,True,True,True,1
34,Korbi Skaven Emerald Bowl.pdf,bbtc_pl_2025,Skaven,Korbi14,Blackriver Skullcrushers,Yes,0,0,0,3,...,1280,15,True,True,[],0,True,True,True,1


In [372]:
df_filtered = df_team_event[
    df_team_event['Summary - Primary skills'].isna() |
    df_team_event['Summary - Secondary skills'].isna()
]
df_filtered[['pdf_name', 'pdf_type', 'Summary - Primary skills', 'Summary - Secondary skills']]

Unnamed: 0,pdf_name,pdf_type,Summary - Primary skills,Summary - Secondary skills
71,NAFL II - nooX1e - Nurgle's Rotters.pdf,bbtc_pl_2025_matched_played,,0/3
75,K4S4_Undead.pdf,bbtc_pl_eurobowl_2025,,
81,Emerald26_Rocksteady&Bebop_MrQuause.pdf,bbtc_pl_2025_matched_played,,0/2
92,EmB2026-The_Orkazons_Amazons_Brutal_Actress_Si...,bbtc_pl_eurobowl_2025,,
93,EmB2026-The_Orkazons_Orcs_Da_Waaghdanzers.pdf,bbtc_pl_eurobowl_2025,,


In [363]:
df_team_event[["pdf_name", "team_captain_pro_as_secondary", "team_captain"]]

Unnamed: 0,pdf_name,team_captain_pro_as_secondary,team_captain
0,Emeraldbowl26_Angeblich_13006.pdf,False,"{'ctr': 4, 'position_name': 'Human Blitzer', '..."
1,Emeraldbowl26_Orinox_24725.pdf,False,
2,Berlin_Sin_LexusD (Al-Axe Dynasty).pdf,False,
3,Leipzig 2026 Roegger79.pdf,False,
4,Leipzig-oventa-humans.pdf,False,"{'ctr': 2, 'position_name': 'Human Blitzer', '..."
...,...,...,...
102,Goaty-Nurgle.pdf,False,
104,blackpanther Emerald Bowl 2026.pdf,False,
105,EmeraldBowl_Kaneeda.pdf,False,
55,Bogen Emerald 2026.pdf,False,


In [232]:
now_utc = datetime.now(timezone.utc)
timestamp_utc_str = now_utc.strftime("%Y%m%dT%H%M%S")
df_final.to_csv(f'emerald_bowl_roster_eval_{timestamp_utc_str}.csv', index=False)

In [268]:
row.to_dict()

{'pdf_name': 'Orcs - Thrower as captain.pdf',
 'pdf_type': 'bbtc_pl_2025',
 'Race': 'Orc',
 'Coach': 'Akorus',
 'Team': 'EB - Orcs Test',
 'Sideline - Apothecary': 'No',
 'Sideline - Assistant coaches': '0',
 'Sideline - Cheerleaders': '0',
 'Sideline - Dedicated fans': '0',
 'Sideline - Re-rolls': '0',
 'Inducement - Bribes': 0,
 'Inducement - Riotous Rookies': 0,
 'Summary - Players cost': 795,
 'Summary - Skills cost': 50,
 'Summary - Inducement cost': 0,
 'Summary - Sideline cost': 0,
 'Summary - Primary skills': '2',
 'Summary - Secondary skills': '0',
 'Players': [{'ctr': 1,
   'position_name': 'Big Un Blocker',
   'primary_1': None,
   'primary_2': None,
   'secondary_1': None,
   'secondary_2': None,
   'star': False},
  {'ctr': 2,
   'position_name': 'Big Un Blocker',
   'primary_1': None,
   'primary_2': None,
   'secondary_1': None,
   'secondary_2': None,
   'star': False},
  {'ctr': 3,
   'position_name': 'Goblin Lineman',
   'primary_1': None,
   'primary_2': None,
   'se

In [169]:
df_team_event[["Players"]]
def verify_skill_stack(player_list):
    for player in player_list:
        skills = [
            player["primary_1"],
            player["primary_2"],
            player["secondary_1"],
            player["secondary_2"],
        ]
        count_skills = sum(x is not None for x in skills)
        if count_skills > 1:
            return False
    return True

['Glart Smashrip', 'Fungus the Loon']

# Dask Application

In [35]:
import dash_bootstrap_components as dbc
from dash import Dash, dcc, html, Input, Output
from dash import dash_table
import plotly.express as px
import pandas as pd

# ----------------------------------------------------------------
# 0. Assumption: df_team_event and df_wide already exist in scope
# ----------------------------------------------------------------

# ------------------------------------------------------------
# 1. Transform players list into table rows (Single Teams tab)
# ------------------------------------------------------------
def transform_players(players):
    rows = []
    for p in players:
        name = p["position_name"] + (" [STAR]" if p["star"] else "")
        skills = [
            p["primary_1"],
            p["primary_2"],
            p["secondary_1"],
            p["secondary_2"],
        ]
        skills = [s for s in skills if s is not None]

        rows.append({
            "#": p["ctr"],
            "Name": name,
            "Skills": ", ".join(skills) if skills else "-"
        })
    return rows


# ------------------------------------------------------------
# 2. Build metadata info panel (grouped + cleaned labels)
# ------------------------------------------------------------
def metadata_panel(meta):
    team_fields = ["Race", "Coach", "Team"]

    sideline_fields = [
        "Sideline - Apothecary",
        "Sideline - Assistant coaches",
        "Sideline - Cheerleaders",
        "Sideline - Dedicated fans",
        "Sideline - Re-rolls"
    ]

    inducement_fields = [
        "Inducement - Bribes",
        "Inducement - Riotous Rookies",
        "Inducement - Team Mascot"
    ]

    def build_section(title, fields):
        rows = [
            dbc.Row(
                dbc.Col(html.H5(title), width=12),
                className="mt-2 mb-2"
            )
        ]

        for key in fields:
            if key in meta:
                value = meta[key]
                if value in [0, "", "0"]:
                    continue

                clean_key = key.split(" - ")[-1]

                # Coach hyperlink
                if clean_key == "Coach":
                    coach_name = str(value)
                    naf_url = (
                        "https://member.thenaf.net/index.php"
                        f"?module=NAF&type=coachpage&coach={coach_name}"
                    )
                    value_component = html.A(
                        coach_name,
                        href=naf_url,
                        target="_blank",
                        style={"color": "gold", "textDecoration": "underline"}
                    )
                else:
                    value_component = html.Span(str(value))

                rows.append(
                    dbc.Row([
                        dbc.Col(html.Strong(clean_key + ":"), width=5),
                        dbc.Col(value_component, width=7)
                    ], className="mb-1")
                )

        return rows

    all_rows = []
    all_rows += build_section("Team Info", team_fields)
    all_rows += build_section("Sideline Staff", sideline_fields)
    all_rows += build_section("Inducements", inducement_fields)

    return dbc.Card(
        dbc.CardBody(all_rows),
        style={"backgroundColor": "#222", "color": "white", "border": "1px solid #444"},
        className="mb-4"
    )


# ------------------------------------------------------------
# 3. Build team table layout (Single Teams tab)
# ------------------------------------------------------------
def team_table(team_rows):
    if not team_rows:
        return html.Div("No players for this team.")

    return dash_table.DataTable(
        data=team_rows,
        columns=[
            {"name": "#", "id": "#"},
            {"name": "Name", "id": "Name"},
            {"name": "Skills", "id": "Skills"},
        ],
        sort_action="native",
        filter_action="native",
        page_size=20,
        style_table={"overflowX": "auto"},
        style_header={
            "backgroundColor": "rgb(50,50,50)",
            "color": "white",
            "fontWeight": "bold",
            "border": "1px solid #444"
        },
        style_cell={
            "backgroundColor": "rgb(30,30,30)",
            "color": "white",
            "border": "1px solid #444",
            "padding": "8px",
            "textAlign": "left"
        },
        style_cell_conditional=[
            {
                "if": {"column_id": "#"},
                "width": "60px",
                "maxWidth": "60px",
                "minWidth": "60px",
                "textAlign": "center"
            }
        ],
        style_data_conditional=[
            {
                "if": {"filter_query": "{Name} contains '[STAR]'"},
                "backgroundColor": "rgb(70,50,0)",
                "color": "gold",
                "fontWeight": "bold"
            }
        ]
    )


# ------------------------------------------------------------
# 4. Dash App
# ------------------------------------------------------------
app = Dash(__name__, external_stylesheets=[dbc.themes.CYBORG])

all_coaches = sorted(df_team_event["Coach"].dropna().unique())
all_races = sorted(df_team_event["Race"].dropna().unique())


# ------------------------------------------------------------
# 5. Overview Tab Charts
# ------------------------------------------------------------
race_counts = df_team_event["Race"].value_counts().reset_index()
fig_race = px.bar(
    race_counts,
    x="Race",
    y="count",
    title="Teams per Race"
).update_layout(
    paper_bgcolor="#222",
    plot_bgcolor="#222",
    font_color="white"
)

team_combo_counts = df_wide["event_team"].value_counts().reset_index()
fig_team_combo = px.bar(
    team_combo_counts,
    x="event_team",
    y="count",
    title="Team Combinations per Event Team"
).update_layout(
    paper_bgcolor="#222",
    plot_bgcolor="#222",
    font_color="white"
)

tier_counts = df_wide["event_team_combined_tier"].value_counts().reset_index()
fig_tiers = px.bar(
    tier_counts,
    x="event_team_combined_tier",
    y="count",
    title="Combined Tiers Across Event Teams"
).update_layout(
    paper_bgcolor="#222",
    plot_bgcolor="#222",
    font_color="white"
)


# ------------------------------------------------------------
# 6. Race Stats: explode players into long format
# ------------------------------------------------------------
def explode_players(df):
    rows = []
    for _, row in df.iterrows():
        race = row["Race"]
        team = row["Team"]
        coach = row["Coach"]
        players = row["Players"]

        for p in players:
            base_name = p["position_name"]
            skills = [
                p["primary_1"],
                p["primary_2"],
                p["secondary_1"],
                p["secondary_2"],
            ]
            skills = [s for s in skills if s is not None]

            rows.append({
                "Race": race,
                "Team": team,
                "Coach": coach,
                "PlayerName": base_name,
                "Skills": skills
            })
    return pd.DataFrame(rows)


df_players_long = explode_players(df_team_event)


# ------------------------------------------------------------
# 7. Layout (Tabs reordered + Race Stats updated)
# ------------------------------------------------------------
tab_style = {
    "backgroundColor": "#111",
    "color": "white",
    "padding": "10px",
    "fontWeight": "bold",
    "border": "1px solid #444",
}

tab_selected_style = {
    "backgroundColor": "#333",
    "color": "gold",
    "padding": "10px",
    "fontWeight": "bold",
    "border": "1px solid gold",
}

app.layout = dbc.Container([
    dcc.Tabs([

        # Overview
        dcc.Tab(
            label="Overview",
            children=[
                html.H2("Event Overview", className="mt-3"),
                html.H4("Teams per Race"),
                dcc.Graph(id="chart-race-count", figure=fig_race),
                html.H4("Team Combinations per Event Team"),
                dcc.Graph(id="chart-team-combinations", figure=fig_team_combo),
                html.H4("Combined Tiers per Event Team"),
                dcc.Graph(id="chart-combined-tiers", figure=fig_tiers),
            ],
            style=tab_style,
            selected_style=tab_selected_style
        ),

        # Race Stats
        dcc.Tab(
            label="Race Stats",
            children=[
                html.H2("Race Statistics", className="mt-3"),

                html.Label("Select Race:", style={"marginTop": "10px"}),
                dcc.Dropdown(
                    id="race-filter",
                    options=[{"label": r, "value": r} for r in all_races],
                    value=all_races[0] if all_races else None,
                    clearable=False,
                    style={"width": "300px", "marginBottom": "20px"}
                ),

                html.H4("Coaches Playing This Race"),
                html.Div(id="race-coaches-list"),

                html.H4("Player Types Summary"),
                html.Div(id="race-player-types-table"),

                html.H4("Skill Frequency for Race"),
                dcc.Graph(id="race-skills-bar"),

                html.H4("Skills per Player Type"),
                html.Div(id="race-skills-pivot-table")
            ],
            style=tab_style,
            selected_style=tab_selected_style
        ),

        # Single Teams
        dcc.Tab(
            label="Single Teams",
            children=[
                html.H2("Team Overview", className="mt-3"),
                html.Label("Select Coach:", style={"marginTop": "10px"}),
                dcc.Dropdown(
                    id="coach-filter",
                    options=[{"label": c, "value": c} for c in all_coaches],
                    value=all_coaches[0] if all_coaches else None,
                    clearable=False,
                    style={"width": "300px", "marginBottom": "20px"}
                ),
                html.Div(id="metadata-panel"),
                html.Div(id="team-table")
            ],
            style=tab_style,
            selected_style=tab_selected_style
        ),

    ])
], fluid=True)


# ------------------------------------------------------------
# 8. Helper for pivot table styling
# ------------------------------------------------------------
def compute_group_styles(df, group_column):
    hide_rules = []
    border_rules = []

    for i in range(len(df)):
        if i > 0 and df.iloc[i][group_column] == df.iloc[i-1][group_column]:
            hide_rules.append({
                "if": {"row_index": i, "column_id": group_column},
                "color": "rgba(0,0,0,0)",
                "textShadow": "0 0 0 transparent"
            })
        else:
            border_rules.append({
                "if": {"row_index": i, "column_id": "all"},
                "borderTop": "3px solid #888"
            })

    return hide_rules, border_rules


# ------------------------------------------------------------
# 9. Callbacks
# ------------------------------------------------------------

# --- Single Team View ---
@app.callback(
    Output("metadata-panel", "children"),
    Output("team-table", "children"),
    Input("coach-filter", "value")
)
def update_team_view(selected_coach):
    if selected_coach is None:
        return html.Div("No coach selected."), html.Div("")

    df_filtered = df_team_event[df_team_event["Coach"] == selected_coach]

    if df_filtered.empty:
        return html.Div("No data for this coach."), html.Div("")

    row = df_filtered.iloc[0]
    meta = row.to_dict()
    players = row["Players"]

    team_rows = transform_players(players)

    return metadata_panel(meta), team_table(team_rows)


# --- Race Stats View ---
@app.callback(
    Output("race-coaches-list", "children"),
    Output("race-player-types-table", "children"),
    Output("race-skills-bar", "figure"),
    Output("race-skills-pivot-table", "children"),
    Input("race-filter", "value")
)
def update_race_stats(selected_race):
    if selected_race is None:
        return html.Div("No race selected."), html.Div(""), px.Figure(), html.Div("")

    df_race = df_players_long[df_players_long["Race"] == selected_race]

    if df_race.empty:
        return html.Div("No data for this race."), html.Div(""), px.Figure(), html.Div("")

    # --- Coaches list ---
    coaches = sorted(df_race["Coach"].dropna().unique())
    coach_list_component = html.Ul([
        html.Li(
            html.A(
                c,
                href=f"https://member.thenaf.net/index.php?module=NAF&type=coachpage&coach={c}",
                target="_blank",
                style={"color": "gold", "textDecoration": "underline"}
            )
        )
        for c in coaches
    ])

    # --- Player Types Summary ---
    per_team = (
        df_race.groupby(["Team", "PlayerName"])
        .size()
        .reset_index(name="Count")
    )

    summary = (
        per_team.groupby("PlayerName")["Count"]
        .agg(["count", "min", "max", "mean"])
        .reset_index()
        .rename(columns={
            "count": "TotalCount",
            "min": "MinPerTeam",
            "max": "MaxPerTeam",
            "mean": "AvgPerTeam"
        })
        .sort_values("PlayerName", ascending=False)
    )

    player_types_table = dash_table.DataTable(
        data=summary.to_dict("records"),
        columns=[{"name": c, "id": c} for c in summary.columns],
        page_size=20,
        style_table={"overflowX": "auto"},
        style_header={
            "backgroundColor": "rgb(50,50,50)",
            "color": "white",
            "fontWeight": "bold",
            "border": "1px solid #444"
        },
        style_cell={
            "backgroundColor": "rgb(30,30,30)",
            "color": "white",
            "border": "1px solid #444",
            "padding": "8px",
            "textAlign": "left"
        }
    )

    # --- Skill Frequency ---
    all_skills = (
        df_race["Skills"]
        .explode()
        .dropna()
        .value_counts()
        .reset_index()
    )

    fig_skills = px.bar(
        all_skills,
        x="Skills",
        y="count",
        title=f"Skill Frequency for {selected_race}"
    ).update_layout(
        paper_bgcolor="#222",
        plot_bgcolor="#222",
        font_color="white"
    )

    # --- Pivot Table ---
    df_expanded = df_race.explode("Skills")

    if df_expanded["Skills"].notna().any():
        pivot = (
            df_expanded
                .groupby(["PlayerName", "Skills"])
                .size()
                .reset_index(name="Count")
                .sort_values(["PlayerName", "Count"], ascending=[True, False])
        )
    else:
        pivot = pd.DataFrame({"PlayerName": df_expanded["PlayerName"].unique()})

    hide_rules, border_rules = compute_group_styles(pivot, "PlayerName")

    pivot_table = dash_table.DataTable(
        data=pivot.to_dict("records"),
        columns=[{"name": c, "id": c} for c in pivot.columns],
        page_size=20,
        style_table={"overflowX": "auto"},
        style_header={
            "backgroundColor": "rgb(50,50,50)",
            "color": "white",
            "fontWeight": "bold",
            "border": "1px solid #444"
        },
        style_cell={
            "backgroundColor": "rgb(30,30,30)",
            "color": "white",
            "border": "1px solid #444",
            "padding": "8px",
            "textAlign": "left"
        },
        style_data_conditional=[
            *hide_rules,
            *border_rules
        ]
    )

    return coach_list_component, player_types_table, fig_skills, pivot_table


# ------------------------------------------------------------
# 10. Run (Jupyter-friendly)
# ------------------------------------------------------------
app.run(jupyter_mode="external")


Dash app running on http://127.0.0.1:8050/


In [36]:
import dash_bootstrap_components as dbc
from dash import Dash, dcc, html, Input, Output
from dash import dash_table
import plotly.express as px
import pandas as pd

# ----------------------------------------------------------------
# 0. Assumption: df_team_event and df_wide already exist in scope
# ----------------------------------------------------------------

# ------------------------------------------------------------
# 1. Transform players list into table rows (Single Teams tab)
# ------------------------------------------------------------
def transform_players(players):
    rows = []
    for p in players:
        name = p["position_name"] + (" [STAR]" if p["star"] else "")
        skills = [
            p["primary_1"],
            p["primary_2"],
            p["secondary_1"],
            p["secondary_2"],
        ]
        skills = [s for s in skills if s is not None]

        rows.append({
            "#": p["ctr"],
            "Name": name,
            "Skills": ", ".join(skills) if skills else "-"
        })
    return rows


# ------------------------------------------------------------
# 2. Build metadata info panel (grouped + cleaned labels)
# ------------------------------------------------------------
def metadata_panel(meta):
    team_fields = ["Race", "Coach", "Team"]

    sideline_fields = [
        "Sideline - Apothecary",
        "Sideline - Assistant coaches",
        "Sideline - Cheerleaders",
        "Sideline - Dedicated fans",
        "Sideline - Re-rolls"
    ]

    inducement_fields = [
        "Inducement - Bribes",
        "Inducement - Riotous Rookies",
        "Inducement - Team Mascot"
    ]

    def build_section(title, fields):
        rows = [
            dbc.Row(
                dbc.Col(html.H5(title), width=12),
                className="mt-2 mb-2"
            )
        ]

        for key in fields:
            if key in meta:
                value = meta[key]
                if value in [0, "", "0"]:
                    continue

                clean_key = key.split(" - ")[-1]

                # Coach hyperlink
                if clean_key == "Coach":
                    coach_name = str(value)
                    naf_url = (
                        "https://member.thenaf.net/index.php"
                        f"?module=NAF&type=coachpage&coach={coach_name}"
                    )
                    value_component = html.A(
                        coach_name,
                        href=naf_url,
                        target="_blank",
                        style={"color": "gold", "textDecoration": "underline"}
                    )
                else:
                    value_component = html.Span(str(value))

                rows.append(
                    dbc.Row([
                        dbc.Col(html.Strong(clean_key + ":"), width=5),
                        dbc.Col(value_component, width=7)
                    ], className="mb-1")
                )

        return rows

    all_rows = []
    all_rows += build_section("Team Info", team_fields)
    all_rows += build_section("Sideline Staff", sideline_fields)
    all_rows += build_section("Inducements", inducement_fields)

    return dbc.Card(
        dbc.CardBody(all_rows),
        style={"backgroundColor": "#222", "color": "white", "border": "1px solid #444"},
        className="mb-4"
    )


# ------------------------------------------------------------
# 3. Build team table layout (Single Teams tab)
# ------------------------------------------------------------
def team_table(team_rows):
    if not team_rows:
        return html.Div("No players for this team.")

    return dash_table.DataTable(
        data=team_rows,
        columns=[
            {"name": "#", "id": "#"},
            {"name": "Name", "id": "Name"},
            {"name": "Skills", "id": "Skills"},
        ],
        sort_action="native",
        filter_action="native",
        page_size=20,
        style_table={"overflowX": "auto"},
        style_header={
            "backgroundColor": "rgb(50,50,50)",
            "color": "white",
            "fontWeight": "bold",
            "border": "1px solid #444"
        },
        style_cell={
            "backgroundColor": "rgb(30,30,30)",
            "color": "white",
            "border": "1px solid #444",
            "padding": "8px",
            "textAlign": "left"
        },
        style_cell_conditional=[
            {
                "if": {"column_id": "#"},
                "width": "60px",
                "maxWidth": "60px",
                "minWidth": "60px",
                "textAlign": "center"
            }
        ],
        style_data_conditional=[
            {
                "if": {"filter_query": "{Name} contains '[STAR]'"},
                "backgroundColor": "rgb(70,50,0)",
                "color": "gold",
                "fontWeight": "bold"
            }
        ]
    )


# ------------------------------------------------------------
# 4. Dash App
# ------------------------------------------------------------
app = Dash(__name__, external_stylesheets=[dbc.themes.CYBORG])

all_coaches = sorted(df_team_event["Coach"].dropna().unique())
all_races = sorted(df_team_event["Race"].dropna().unique())


# ------------------------------------------------------------
# 5. Overview Tab Charts
# ------------------------------------------------------------
race_counts = df_team_event["Race"].value_counts().reset_index()
fig_race = px.bar(
    race_counts,
    x="Race",
    y="count",
    title="Teams per Race"
).update_layout(
    paper_bgcolor="#222",
    plot_bgcolor="#222",
    font_color="white"
)

team_combo_counts = df_wide["event_team"].value_counts().reset_index()
fig_team_combo = px.bar(
    team_combo_counts,
    x="event_team",
    y="count",
    title="Team Combinations per Event Team"
).update_layout(
    paper_bgcolor="#222",
    plot_bgcolor="#222",
    font_color="white"
)

tier_counts = df_wide["event_team_combined_tier"].value_counts().reset_index()
fig_tiers = px.bar(
    tier_counts,
    x="event_team_combined_tier",
    y="count",
    title="Combined Tiers Across Event Teams"
).update_layout(
    paper_bgcolor="#222",
    plot_bgcolor="#222",
    font_color="white"
)


# ------------------------------------------------------------
# 6. Race Stats: explode players into long format
# ------------------------------------------------------------
def explode_players(df):
    rows = []
    for _, row in df.iterrows():
        race = row["Race"]
        team = row["Team"]
        coach = row["Coach"]
        players = row["Players"]

        for p in players:
            base_name = p["position_name"]
            skills = [
                p["primary_1"],
                p["primary_2"],
                p["secondary_1"],
                p["secondary_2"],
            ]
            skills = [s for s in skills if s is not None]

            rows.append({
                "Race": race,
                "Team": team,
                "Coach": coach,
                "PlayerName": base_name,
                "Skills": skills
            })
    return pd.DataFrame(rows)


df_players_long = explode_players(df_team_event)


# ------------------------------------------------------------
# 7. Layout (Tabs reordered + Race Stats updated)
# ------------------------------------------------------------
tab_style = {
    "backgroundColor": "#111",
    "color": "white",
    "padding": "10px",
    "fontWeight": "bold",
    "border": "1px solid #444",
}

tab_selected_style = {
    "backgroundColor": "#333",
    "color": "gold",
    "padding": "10px",
    "fontWeight": "bold",
    "border": "1px solid gold",
}

app.layout = dbc.Container([
    dcc.Tabs([

        # Overview
        dcc.Tab(
            label="Overview",
            children=[
                html.H2("Event Overview", className="mt-3"),
                html.H4("Teams per Race"),
                dcc.Graph(id="chart-race-count", figure=fig_race),
                html.H4("Team Combinations per Event Team"),
                dcc.Graph(id="chart-team-combinations", figure=fig_team_combo),
                html.H4("Combined Tiers per Event Team"),
                dcc.Graph(id="chart-combined-tiers", figure=fig_tiers),
            ],
            style=tab_style,
            selected_style=tab_selected_style
        ),

        # Race Stats
        dcc.Tab(
            label="Race Stats",
            children=[
                html.H2("Race Statistics", className="mt-3"),

                html.Label("Select Race:", style={"marginTop": "10px"}),
                dcc.Dropdown(
                    id="race-filter",
                    options=[{"label": r, "value": r} for r in all_races],
                    value=all_races[0] if all_races else None,
                    clearable=False,
                    style={"width": "300px", "marginBottom": "20px"}
                ),

                html.H4("Coaches Playing This Race"),
                html.Div(id="race-coaches-list"),

                html.H4("Player Types Summary"),
                html.Div(id="race-player-types-table"),

                html.H4("Skill Frequency for Race"),
                dcc.Graph(id="race-skills-bar"),

                html.H4("Skills per Player Type"),
                html.Div(id="race-skills-pivot-table")
            ],
            style=tab_style,
            selected_style=tab_selected_style
        ),

        # Single Teams
        dcc.Tab(
            label="Single Teams",
            children=[

                # NEW BETA WARNING
                html.Div(
                    "This application is in Beta state and might contain data errors, "
                    "please ensure to review the actual roster of your opponent.",
                    style={
                        "backgroundColor": "#552",
                        "color": "gold",
                        "padding": "10px",
                        "border": "1px solid gold",
                        "marginTop": "15px",
                        "marginBottom": "20px",
                        "fontWeight": "bold",
                        "borderRadius": "5px"
                    }
                ),

                html.H2("Team Overview", className="mt-3"),

                html.Label("Select Coach:", style={"marginTop": "10px"}),
                dcc.Dropdown(
                    id="coach-filter",
                    options=[{"label": c, "value": c} for c in all_coaches],
                    value=all_coaches[0] if all_coaches else None,
                    clearable=False,
                    style={"width": "300px", "marginBottom": "20px"}
                ),

                html.Div(id="metadata-panel"),
                html.Div(id="team-table")
            ],
            style=tab_style,
            selected_style=tab_selected_style
        ),

    ])
], fluid=True)


# ------------------------------------------------------------
# 8. Helper for pivot table styling
# ------------------------------------------------------------
def compute_group_styles(df, group_column):
    hide_rules = []
    border_rules = []

    for i in range(len(df)):
        if i > 0 and df.iloc[i][group_column] == df.iloc[i-1][group_column]:
            hide_rules.append({
                "if": {"row_index": i, "column_id": group_column},
                "color": "rgba(0,0,0,0)",
                "textShadow": "0 0 0 transparent"
            })
        else:
            border_rules.append({
                "if": {"row_index": i, "column_id": "all"},
                "borderTop": "3px solid #888"
            })

    return hide_rules, border_rules


# ------------------------------------------------------------
# 9. Callbacks
# ------------------------------------------------------------

# --- Single Team View ---
@app.callback(
    Output("metadata-panel", "children"),
    Output("team-table", "children"),
    Input("coach-filter", "value")
)
def update_team_view(selected_coach):
    if selected_coach is None:
        return html.Div("No coach selected."), html.Div("")

    df_filtered = df_team_event[df_team_event["Coach"] == selected_coach]

    if df_filtered.empty:
        return html.Div("No data for this coach."), html.Div("")

    row = df_filtered.iloc[0]
    meta = row.to_dict()
    players = row["Players"]

    team_rows = transform_players(players)

    return metadata_panel(meta), team_table(team_rows)


# --- Race Stats View ---
@app.callback(
    Output("race-coaches-list", "children"),
    Output("race-player-types-table", "children"),
    Output("race-skills-bar", "figure"),
    Output("race-skills-pivot-table", "children"),
    Input("race-filter", "value")
)
def update_race_stats(selected_race):
    if selected_race is None:
        return html.Div("No race selected."), html.Div(""), px.Figure(), html.Div("")

    df_race = df_players_long[df_players_long["Race"] == selected_race]

    if df_race.empty:
        return html.Div("No data for this race."), html.Div(""), px.Figure(), html.Div("")

    # --- Coaches list ---
    coaches = sorted(df_race["Coach"].dropna().unique())
    coach_list_component = html.Ul([
        html.Li(
            html.A(
                c,
                href=f"https://member.thenaf.net/index.php?module=NAF&type=coachpage&coach={c}",
                target="_blank",
                style={"color": "gold", "textDecoration": "underline"}
            )
        )
        for c in coaches
    ])

    # --- Player Types Summary ---
    per_team = (
        df_race.groupby(["Team", "PlayerName"])
        .size()
        .reset_index(name="Count")
    )

    summary = (
        per_team.groupby("PlayerName")["Count"]
        .agg(["count", "min", "max", "mean"])
        .reset_index()
        .rename(columns={
            "count": "TotalCount",
            "min": "MinPerTeam",
            "max": "MaxPerTeam",
            "mean": "AvgPerTeam"
        })
        .sort_values("PlayerName", ascending=False)
    )

    player_types_table = dash_table.DataTable(
        data=summary.to_dict("records"),
        columns=[{"name": c, "id": c} for c in summary.columns],
        page_size=20,
        style_table={"overflowX": "auto"},
        style_header={
            "backgroundColor": "rgb(50,50,50)",
            "color": "white",
            "fontWeight": "bold",
            "border": "1px solid #444"
        },
        style_cell={
            "backgroundColor": "rgb(30,30,30)",
            "color": "white",
            "border": "1px solid #444",
            "padding": "8px",
            "textAlign": "left"
        }
    )

    # --- Skill Frequency ---
    all_skills = (
        df_race["Skills"]
        .explode()
        .dropna()
        .value_counts()
        .reset_index()
    )

    fig_skills = px.bar(
        all_skills,
        x="Skills",
        y="count",
        title=f"Skill Frequency for {selected_race}"
    ).update_layout(
        paper_bgcolor="#222",
        plot_bgcolor="#222",
        font_color="white"
    )

    # --- Pivot Table ---
    df_expanded = df_race.explode("Skills")

    if df_expanded["Skills"].notna().any():
        pivot = (
            df_expanded
                .groupby(["PlayerName", "Skills"])
                .size()
                .reset_index(name="Count")
                .sort_values(["PlayerName", "Count"], ascending=[True, False])
        )
    else:
        pivot = pd.DataFrame({"PlayerName": df_expanded["PlayerName"].unique()})

    hide_rules, border_rules = compute_group_styles(pivot, "PlayerName")

    pivot_table = dash_table.DataTable(
        data=pivot.to_dict("records"),
        columns=[{"name": c, "id": c} for c in pivot.columns],
        page_size=20,
        style_table={"overflowX": "auto"},
        style_header={
            "backgroundColor": "rgb(50,50,50)",
            "color": "white",
            "fontWeight": "bold",
            "border": "1px solid #444"
        },
        style_cell={
            "backgroundColor": "rgb(30,30,30)",
            "color": "white",
            "border": "1px solid #444",
            "padding": "8px",
            "textAlign": "left"
        },
        style_data_conditional=[
            *hide_rules,
            *border_rules
        ]
    )

    return coach_list_component, player_types_table, fig_skills, pivot_table


# ------------------------------------------------------------
# 10. Run (Jupyter-friendly)
# ------------------------------------------------------------
app.run(jupyter_mode="external")


Dash app running on http://127.0.0.1:8050/


In [346]:
df_expanded = df_race.explode("Skills")

if df_expanded["Skills"].notna().any():
    pivot = (
        df_expanded
        .pivot_table(
            index="PlayerName",
            columns="Skills",
            aggfunc="size",
            fill_value=0
        )
        .reset_index()
    )
else:
    pivot = pd.DataFrame({"PlayerName": df_expanded["PlayerName"].unique()})

    

In [379]:
pivot = (
    df_expanded
        .groupby(["PlayerName", "Skills"])
        .size()
        .reset_index(name="Count")
        .sort_values("PlayerName", ascending=False)
)
pivot


Unnamed: 0,PlayerName,Skills,Count
2,Trained Troll,Block,2
1,Pump Wagon,Pro,1
0,Fungus Flinga,Accurate,1


In [271]:
df_team_event.columns

Index(['pdf_name', 'pdf_type', 'Race', 'Coach', 'Team',
       'Sideline - Apothecary', 'Sideline - Assistant coaches',
       'Sideline - Cheerleaders', 'Sideline - Dedicated fans',
       'Sideline - Re-rolls', 'Inducement - Bribes',
       'Inducement - Riotous Rookies', 'Summary - Players cost',
       'Summary - Skills cost', 'Summary - Inducement cost',
       'Summary - Sideline cost', 'Summary - Primary skills',
       'Summary - Secondary skills', 'Players', 'event_team',
       'Inducement - Team Mascot', 'team_captain',
       'team_captain_pro_as_secondary', 'emerald_bowl_skill_cost',
       'emerald_bowl_tier', 'total_team_cost', 'emerald_bowl_tier_extra_gold',
       'base_total_cost', 'required_extra_gold', 'verify_skill_cost',
       'verify_seconday_skills', 'star_players', 'count_star_players',
       'verify_star_players', 'verify_no_skill_stack', 'verify_inducements',
       'row'],
      dtype='object')

In [275]:
df_wide.columns

Index(['event_team', '1_pdf_name', '2_pdf_name', '1_pdf_type', '2_pdf_type',
       '1_Race', '2_Race', '1_Coach', '2_Coach', '1_Team', '2_Team',
       '1_Sideline - Apothecary', '2_Sideline - Apothecary',
       '1_Sideline - Assistant coaches', '2_Sideline - Assistant coaches',
       '1_Sideline - Cheerleaders', '2_Sideline - Cheerleaders',
       '1_Sideline - Dedicated fans', '2_Sideline - Dedicated fans',
       '1_Sideline - Re-rolls', '2_Sideline - Re-rolls',
       '1_Inducement - Bribes', '2_Inducement - Bribes',
       '1_Inducement - Riotous Rookies', '2_Inducement - Riotous Rookies',
       '1_Summary - Players cost', '2_Summary - Players cost',
       '1_Summary - Skills cost', '2_Summary - Skills cost',
       '1_Summary - Inducement cost', '2_Summary - Inducement cost',
       '1_Summary - Sideline cost', '2_Summary - Sideline cost',
       '1_Summary - Primary skills', '2_Summary - Primary skills',
       '1_Summary - Secondary skills', '2_Summary - Secondary skills',