In [2]:
from fasthtml.common import *
from fasthtml.jupyter import JupyUvi, HTMX
from dataclasses import dataclass
from datetime import datetime
from math import ceil
import pandas as pd

In [3]:
db = database('trfc.db')

In [4]:
app, rt = fast_app()

In [5]:
user_inputs = {
    'min_season': 1921,
    'max_season': 2024,
    'league_tiers': [2, 3, 4, 5],
    'inc_play_offs': 1,
    'generic_comps': ['Anglo-Italian Cup', "Associate Members' Cup", 'FA Cup', 'FA Trophy', "Full Members' Cup", 'League Cup'],
    'pens_as_draw': 0,
    'venues': ['H', 'A', 'N'],
    'min_games': 10,
}

In [6]:
min_season = user_inputs['min_season']
max_season = user_inputs['max_season']
league_tiers = user_inputs['league_tiers']
inc_play_offs = user_inputs['inc_play_offs']
pens_as_draw = user_inputs['pens_as_draw']
venues = user_inputs['venues']
min_games = user_inputs['min_games']

In [7]:
if inc_play_offs == 0:
    po_filter = 'AND COALESCE(c.is_playoff, 0) != 1'
else:
    po_filter = ''

In [8]:
venue_placeholders = ','.join(['?' for _ in venues])

In [9]:
tier_placeholders = ','.join(['?' for _ in user_inputs['league_tiers']]) if user_inputs['league_tiers'] else ''

comp_placeholders = ','.join(['?' for _ in user_inputs['generic_comps']]) if user_inputs['generic_comps'] else ''

In [10]:
tier_comp_filter = ''
if tier_placeholders or comp_placeholders:
    filters = []
    if tier_placeholders:
        filters.append(f'r.league_tier IN ({tier_placeholders})')
    if comp_placeholders:
        filters.append(f'r.generic_comp IN ({comp_placeholders})')
    tier_comp_filter = 'AND (' + ' OR '.join(filters) + ')'

In [11]:
pens_as_draw = 1

In [20]:
query = f'''
    WITH scorer_counts AS (
        SELECT 
            r.season,
            COUNT(DISTINCT g.player_id) as unique_scorers
        FROM results r
        LEFT JOIN goals g ON r.game_date = g.game_date
        LEFT JOIN cup_game_details c ON r.game_date = c.game_date
        LEFT JOIN seasons s ON r.season = s.season
        WHERE s.ssn_start >= ?
            AND s.ssn_start <= ?
            AND r.venue IN ({venue_placeholders})
            {po_filter}
            {tier_comp_filter}
        GROUP BY r.season
    )
    SELECT
        r.season,
        SUM(r.goals_for >= 1) AS scored_one_plus,
        AVG(r.goals_for) AS avg_goals_for,
        COALESCE(sc.unique_scorers, 0) as unique_scorers,
        SUM(r.goals_for == 0) AS blanks,
        SUM(r.goals_against == 0) AS clean_sheets,
        SUM(r.outcome == 'W' AND r.goals_against == 0) AS wins_to_nil,
        AVG(r.goals_against) AS avg_goals_against        
    FROM results r
    LEFT JOIN cup_game_details c ON r.game_date = c.game_date
    LEFT JOIN manager_reigns mr ON r.game_date >= mr.mgr_date_from
        AND (r.game_date <= mr.mgr_date_to OR mr.mgr_date_to IS NULL)
    LEFT JOIN managers m ON mr.manager_id = m.manager_id
    LEFT JOIN seasons s ON r.season = s.season
    LEFT JOIN scorer_counts sc ON r.season = sc.season
    WHERE s.ssn_start >= ?
        AND s.ssn_start <= ?
        AND r.venue IN ({venue_placeholders})
        {po_filter}
        {tier_comp_filter}
    GROUP BY r.season
    HAVING COUNT(*) >= ?
    ORDER BY r.season DESC
'''

# First, let's build the base parameter list that will be needed twice
base_params = [
    min_season, 
    max_season,
    *venues  # Venue placeholders
]

# Add tier/competition parameters if they exist
if user_inputs['league_tiers']:
    base_params.extend(user_inputs['league_tiers'])

if user_inputs['generic_comps']:
    base_params.extend(user_inputs['generic_comps'])

# Final params list combines:
# 1. Parameters for the CTE
# 2. Parameters for the main query
# 3. The min_games parameter at the end
params = [
    *base_params,  # For the CTE
    *base_params,  # For the main query
    min_games      # For the HAVING clause
]

results = db.execute(query, tuple(params))

df = pd.DataFrame(
    results.fetchall(),
    columns=[d[0] for d in results.description]
)

df.head(10)

Unnamed: 0,season,scored_one_plus,avg_goals_for,unique_scorers,blanks,clean_sheets,wins_to_nil,avg_goals_against
0,2024/25,16,0.962963,12,11,6,3,1.555556
1,2023/24,37,1.384615,12,15,9,7,1.557692
2,2022/23,39,1.056604,15,14,18,15,1.169811
3,2021/22,38,1.245283,18,15,23,20,0.962264
4,2020/21,39,1.266667,16,21,22,14,1.1
5,2019/20,29,1.244444,15,16,5,3,1.866667
6,2018/19,45,1.344828,15,13,23,17,1.310345
7,2017/18,40,1.692308,16,12,19,15,1.115385
8,2016/17,48,1.789474,17,9,23,20,0.912281
9,2015/16,37,1.285714,19,12,19,14,1.020408
