In [13]:
from fasthtml.common import *
from fasthtml.jupyter import JupyUvi, HTMX
from dataclasses import dataclass
from datetime import datetime
from math import ceil
import pandas as pd

In [14]:
db = database('trfc.db')

In [15]:
app, rt = fast_app()

In [16]:
user_inputs = {
    'min_season': 1921,
    'max_season': 2024,
    'league_tiers': [2, 3, 4, 5],
    'inc_play_offs': 1,
    'generic_comps': ['Anglo-Italian Cup', "Associate Members' Cup", 'FA Cup', 'FA Trophy', "Full Members' Cup", 'League Cup', 'War League'],
    'pens_as_draw': 1,
    'venues': ['H', 'A', 'N'],
    'min_games': 10,
}

In [17]:
min_season = user_inputs['min_season']
max_season = user_inputs['max_season']
league_tiers = user_inputs['league_tiers']
inc_play_offs = user_inputs['inc_play_offs']
pens_as_draw = user_inputs['pens_as_draw']
venues = user_inputs['venues']
min_games = user_inputs['min_games']

In [18]:
if inc_play_offs == 0:
    po_filter = 'AND COALESCE(c.is_playoff, 0) != 1'
else:
    po_filter = ''

In [19]:
venue_placeholders = ','.join(['?' for _ in venues])

In [20]:
tier_placeholders = ','.join(['?' for _ in user_inputs['league_tiers']]) if user_inputs['league_tiers'] else ''

comp_placeholders = ','.join(['?' for _ in user_inputs['generic_comps']]) if user_inputs['generic_comps'] else ''

In [21]:
tier_comp_filter = ''
if tier_placeholders or comp_placeholders:
    filters = []
    if tier_placeholders:
        filters.append(f'r.league_tier IN ({tier_placeholders})')
    if comp_placeholders:
        filters.append(f'r.generic_comp IN ({comp_placeholders})')
    tier_comp_filter = 'AND (' + ' OR '.join(filters) + ')'

In [22]:
pens_as_draw = 1

In [23]:
query = f'''
    WITH yellow_counts AS (
        SELECT 
            r.season,
            COUNT(*) as yellow_cards
        FROM cards_yellow cy
        JOIN results r ON cy.game_date = r.game_date
        LEFT JOIN cup_game_details c ON r.game_date = c.game_date
        LEFT JOIN seasons s ON r.season = s.season
        WHERE s.ssn_start >= ?
            AND s.ssn_start <= ?
            AND r.venue IN ({venue_placeholders})
            {po_filter}
            {tier_comp_filter}
        GROUP BY r.season
    ),
    red_counts AS (
        SELECT 
            r.season,
            COUNT(*) as red_cards
        FROM cards_red cr
        JOIN results r ON cr.game_date = r.game_date
        LEFT JOIN cup_game_details c ON r.game_date = c.game_date
        LEFT JOIN seasons s ON r.season = s.season
        WHERE s.ssn_start >= ?
            AND s.ssn_start <= ?
            AND r.venue IN ({venue_placeholders})
            {po_filter}
            {tier_comp_filter}
        GROUP BY r.season
    )
    SELECT 
        s.season,
        COALESCE(yc.yellow_cards, 0) as yellow_cards,
        COALESCE(rc.red_cards, 0) as red_cards,
        COALESCE(yc.yellow_cards, 0) + COALESCE(rc.red_cards, 0) as total_cards
    FROM seasons s
    LEFT JOIN yellow_counts yc ON s.season = yc.season
    LEFT JOIN red_counts rc ON s.season = rc.season
    WHERE s.ssn_start >= ?
        AND s.ssn_start <= ?
    ORDER BY s.season DESC
'''

# Base parameters for one set of filters
base_params = [
    min_season,
    max_season,
    *venues
]

if user_inputs['league_tiers']:
    base_params.extend(user_inputs['league_tiers'])

if user_inputs['generic_comps']:
    base_params.extend(user_inputs['generic_comps'])

# Complete parameter list:
# 1. Parameters for yellow cards CTE
# 2. Parameters for red cards CTE
# 3. Parameters for final WHERE clause (just season range)
params = [
    *base_params,  # For yellow cards CTE
    *base_params,  # For red cards CTE
    min_season, max_season  # For final WHERE clause
]

results = db.execute(query, tuple(params))
df = pd.DataFrame(results.fetchall(), columns=[d[0] for d in results.description])

df.head(10)

Unnamed: 0,season,yellow_cards,red_cards,total_cards
0,2024/25,64,2,66
1,2023/24,104,6,110
2,2022/23,84,3,87
3,2021/22,99,6,105
4,2020/21,81,2,83
5,2019/20,81,7,88
6,2018/19,91,3,94
7,2017/18,76,6,82
8,2016/17,96,5,101
9,2015/16,76,3,79


In [27]:
query = f'''
    SELECT 
        r.season,
        p.player_name,
        COUNT(*) as yellow_cards
    FROM cards_yellow cy
    JOIN results r ON cy.game_date = r.game_date
    JOIN players p ON cy.player_id = p.player_id
    LEFT JOIN cup_game_details c ON r.game_date = c.game_date
    LEFT JOIN seasons s ON r.season = s.season
    WHERE s.ssn_start >= ?
        AND s.ssn_start <= ?
        AND r.venue IN ({venue_placeholders})
        {po_filter}
        {tier_comp_filter}
    GROUP BY r.season, player_name
    ORDER BY yellow_cards DESC
'''

# Base parameters for one set of filters
base_params = [
    min_season,
    max_season,
    *venues
]

# if user_inputs['league_tiers']:
#     base_params.extend(user_inputs['league_tiers'])

# if user_inputs['generic_comps']:
#     base_params.extend(user_inputs['generic_comps'])

# Complete parameter list:
# 1. Parameters for yellow cards CTE
# 2. Parameters for red cards CTE
# 3. Parameters for final WHERE clause (just season range)
params = [
    min_season,
    max_season,
    *venues,
    *user_inputs['league_tiers'],
    *user_inputs['generic_comps']
]

results = db.execute(query, tuple(params))
df = pd.DataFrame(results.fetchall(), columns=[d[0] for d in results.description])

df.query("season=='2017/18'")

Unnamed: 0,season,player_name,yellow_cards
18,2017/18,Jay Harris,11
33,2017/18,Connor Jennings,10
34,2017/18,Oliver Norburn,10
110,2017/18,Andy Cook,7
111,2017/18,Steve McNulty,7
204,2017/18,James Norwood,5
275,2017/18,Adam Buxton,4
276,2017/18,Jeff Hughes,4
354,2017/18,Scott Davies,3
485,2017/18,Dylan Mottley-Henry,2
