In [1]:
from fasthtml.common import *
from fasthtml.jupyter import JupyUvi, HTMX
from dataclasses import dataclass
from datetime import datetime
from math import ceil
import pandas as pd

In [2]:
db = database('trfc.db')

In [3]:
app, rt = fast_app()

In [4]:
user_inputs = {
    'min_season': 1921,
    'max_season': 2024,
    'inc_play_offs': 1,
    'pens_as_draw': 1,
    'venues': ['H', 'A', 'N'],
    'min_games': 10,
}

In [5]:
focus = 'm.manager_name'
focus = 'r.season'
focus = 'r.opposition'

In [6]:
query = f'''
    SELECT
        {focus},
        COUNT(*) as P,
        COUNT(
            CASE WHEN
                (? = 0 AND ((c.pens_outcome IS NULL AND r.outcome = 'W') OR (c.pens_outcome IS NOT NULL AND c.pens_outcome = 'W')))
            OR 
                (? = 1 AND r.outcome = 'W')
            THEN 1 END) as W,
        COUNT(
            CASE WHEN
                (? = 0 AND (c.pens_outcome IS NULL AND r.outcome = 'D'))
            OR
                (? = 1 AND r.outcome = 'D')
            THEN 1 END) as D,
        COUNT(
            CASE WHEN 
                (? = 0 AND ((c.pens_outcome IS NULL AND r.outcome = 'L') OR (c.pens_outcome IS NOT NULL AND c.pens_outcome = 'L')))
            OR 
                (? = 1 AND r.outcome = 'L')
            THEN 1 END) as L,
        SUM(r.goals_for) as GF,
        SUM(r.goals_against) as GA,
        SUM(r.goals_for) - SUM(r.goals_against) as GD,
        ROUND(CAST(COUNT(
            CASE WHEN
                (? = 0 AND (c.pens_outcome IS NOT NULL AND c.pens_outcome = 'W') OR (c.pens_outcome IS NULL AND r.outcome = 'W'))
            OR 
                (? = 1 AND r.outcome = 'W')
            THEN 1 END) AS FLOAT) / COUNT(*) * 100, 1) as win_pc
    FROM results r
    LEFT JOIN cup_game_details c ON r.game_date = c.game_date
    LEFT JOIN manager_reigns mr ON r.game_date >= mr.mgr_date_from
        AND (r.game_date <= mr.mgr_date_to OR mr.mgr_date_to IS NULL)
    LEFT JOIN managers m ON mr.manager_id = m.manager_id
    LEFT JOIN league_tiers lt ON r.season = lt.season AND r.game_type = 'League'
    WHERE CAST(SUBSTRING(r.season, 1, 4) AS INTEGER) >= ?
        AND CAST(SUBSTRING(r.season, 1, 4) AS INTEGER) <= ?
        AND r.venue IN ('H', 'A', 'N')
    GROUP BY {focus}
    HAVING COUNT(*) >= ?
    ORDER BY P DESC
'''

In [7]:
results = db.execute(query, (user_inputs['pens_as_draw'], user_inputs['pens_as_draw'], user_inputs['pens_as_draw'], user_inputs['pens_as_draw'], user_inputs['pens_as_draw'], user_inputs['pens_as_draw'], user_inputs['pens_as_draw'], user_inputs['pens_as_draw'], user_inputs['min_season'], user_inputs['max_season'], user_inputs['min_games']))

df = pd.DataFrame(
    results.fetchall(),
    columns=[d[0] for d in results.description]
)

df.query("opposition=='Barnsley'")

Unnamed: 0,opposition,P,W,D,L,GF,GA,GD,win_pc
29,Barnsley,56,14,18,24,71.0,87.0,-16.0,25.0


In [8]:
pens_as_draw = 0

results = db.execute(query, (pens_as_draw, pens_as_draw, pens_as_draw, pens_as_draw, pens_as_draw, pens_as_draw, pens_as_draw, pens_as_draw, user_inputs['min_season'], user_inputs['max_season'], user_inputs['min_games']))

df = pd.DataFrame(
    results.fetchall(),
    columns=[d[0] for d in results.description]
)

df.query("opposition=='Barnsley'")

Unnamed: 0,opposition,P,W,D,L,GF,GA,GD,win_pc
29,Barnsley,56,15,17,24,71.0,87.0,-16.0,26.8


In [9]:
def get_query(focus, user_inputs):
    if not user_inputs.get('venues'):
        raise ValueError("At least one venue must be selected")

    venue_placeholders = ','.join(['?' for _ in user_inputs['venues']])
    
    query = f'''
        SELECT
            {focus},
            COUNT(*) as P,
            COUNT(
                CASE WHEN
                    (? = 0 AND ((c.pens_outcome IS NULL AND r.outcome = 'W') OR (c.pens_outcome IS NOT NULL AND c.pens_outcome = 'W')))
                OR 
                    (? = 1 AND r.outcome = 'W')
                THEN 1 END) as W,
            COUNT(
                CASE WHEN
                    (? = 0 AND (c.pens_outcome IS NULL AND r.outcome = 'D'))
                OR
                    (? = 1 AND r.outcome = 'D')
                THEN 1 END) as D,
            COUNT(
                CASE WHEN 
                    (? = 0 AND ((c.pens_outcome IS NULL AND r.outcome = 'L') OR (c.pens_outcome IS NOT NULL AND c.pens_outcome = 'L')))
                OR 
                    (? = 1 AND r.outcome = 'L')
                THEN 1 END) as L,
            SUM(r.goals_for) as GF,
            SUM(r.goals_against) as GA,
            SUM(r.goals_for) - SUM(r.goals_against) as GD,
            ROUND(CAST(COUNT(
                CASE WHEN
                    (? = 0 AND (c.pens_outcome IS NOT NULL AND c.pens_outcome = 'W') OR (c.pens_outcome IS NULL AND r.outcome = 'W'))
                OR 
                    (? = 1 AND r.outcome = 'W')
                THEN 1 END) AS FLOAT) / COUNT(*) * 100, 1) as win_pc
        FROM results r
        LEFT JOIN cup_game_details c ON r.game_date = c.game_date
        LEFT JOIN manager_reigns mr ON r.game_date >= mr.mgr_date_from
            AND (r.game_date <= mr.mgr_date_to OR mr.mgr_date_to IS NULL)
        LEFT JOIN managers m ON mr.manager_id = m.manager_id
        LEFT JOIN league_tiers lt ON r.season = lt.season AND r.game_type = 'League'
        WHERE CAST(SUBSTRING(r.season, 1, 4) AS INTEGER) >= ?
            AND CAST(SUBSTRING(r.season, 1, 4) AS INTEGER) <= ?
            AND r.venue IN ({venue_placeholders})
        GROUP BY {focus}
        HAVING COUNT(*) >= ?
        ORDER BY P DESC
    '''
    return query

def execute_query(cursor, focus, user_inputs):
    query = get_query(focus, user_inputs)
    
    # Let's count the parameters:
    params = [
        user_inputs['pens_as_draw'], user_inputs['pens_as_draw'], # for W count (2)
        user_inputs['pens_as_draw'], user_inputs['pens_as_draw'], # for D count (2)
        user_inputs['pens_as_draw'], user_inputs['pens_as_draw'], # for L count (2)
        user_inputs['pens_as_draw'], user_inputs['pens_as_draw'], # for win_pc calculation (2)
        user_inputs['min_season'], user_inputs['max_season'], # for year range (2)
        *user_inputs['venues'], # for venue IN clause (variable)
        user_inputs['min_games'] # for HAVING clause (1)
    ]
    
    col_names = [d[0] for d in cursor.execute(query, params).description]
    data = cursor.execute(query, params).fetchall()
    return {col_names[i]: [row[i] for row in data] for i in range(len(col_names))}

In [10]:
user_inputs = {
    'min_season': 1921,
    'max_season': 2024,
    'inc_play_offs': 0,
    'pens_as_draw': 0,
    'venues': ['H', 'A', 'N'],
    'min_games': 10,
}

pd.DataFrame(execute_query(db, focus, user_inputs)).query("opposition=='Barnsley'")

Unnamed: 0,opposition,P,W,D,L,GF,GA,GD,win_pc
29,Barnsley,56,15,17,24,71.0,87.0,-16.0,26.8
