In [1]:
from data_prep import *
from charts import *
from video_analysis import *
from team_sheets import *
import pandas as pd

game_df = team_sheets()
players_df = players(game_df)
players_agg_df = players_agg(players_df)
lineouts_df = lineouts()
pitchero_df = pitchero_stats()
set_piece_df = set_piece_results()
analysis = game_stats()

### Season Summaries

In [12]:
seasons = [
    "2021/22", 
    "2022/23", 
    "2023/24", 
    "2024/25"
]

# Lineouts
for season in seasons:
    lineout_chart(1, season, df=lineouts_df).save(f"Charts/{season.replace('/','-')}/1s-lineouts.html")
    if season != "2021/22":
        lineout_chart(2, season, df=lineouts_df).save(f"Charts/{season.replace('/','-')}/2s-lineouts.html")

# Trends
plot_games_by_player(df=players_df).save("Charts/appearances.html")
plot_starts_by_position(df=players_df).save("Charts/positions.html")
captains_chart(df=game_df).save("Charts/captains.html")
card_chart(df=pitchero_df).save("Charts/cards.html")
points_scorers_chart(df=pitchero_df).save("Charts/points.html")
results_chart(df=game_df).save("Charts/results.html")
set_piece_h2h_chart(df=set_piece_df).save("Charts/set-piece.html")

# Video stats
game_stats_charts(df=analysis).save("Charts/game-stats.html")

In [13]:
import os
from bs4 import BeautifulSoup

# Define the directory path where the HTML files are located
charts_dir = './Charts/'

# Define the CSS to be added
css_to_add = '''
    .vega-bindings {
      border: 2px solid black;
      background-color: #e5e4e7;
      color: #202946;
      position: absolute;
      top: 0;
      right: 100px;
      display: block;
      justify-content: center;
      gap: 20px;
      padding: 10px;
      margin-top: 10px;
      font-size: large;
    }

    .vega-bind {
      padding: 10px;
      padding-top: 5px;
    }

    .vega-bind-name {
      font-family: 'Lato', sans-serif;
      font-weight: bold;
      font-size: larger;
      color: #202946; 
    }

    .vega-bind-radio input {
      width: 1rem;
      height: 1rem;
    }

    .vega-bind-radio label {
      font-family: 'Lato', sans-serif;
      display: flex;
      padding: 0.1rem;
      cursor: pointer;
      transition: all 0.3s;
      font-size: medium;
    }

    .vega-bind-radio input:checked+label {
      background-color: #202946;
      color: #e5e4e7;
    }
'''

# Loop through all HTML files in the Charts directory
for root, dirs, files in os.walk(charts_dir):
    for file in files:
        if file.endswith('.html'):
            # Full path of the HTML file
            file_path = os.path.join(root, file)
            
            # Read the file content using BeautifulSoup
            with open(file_path, 'r', encoding='utf-8') as f:
                soup = BeautifulSoup(f, 'html.parser')
                
            # Find the <style> tag or create it if it doesn't exist
            style_tag = soup.find('style')
            if not style_tag:
                style_tag = soup.new_tag('style')
                soup.head.append(style_tag)
            
            # Append the new CSS to the <style> tag
            style_tag.append(css_to_add)
            
            soup.head.append(soup.new_tag("link", rel="stylesheet", href="https://fonts.googleapis.com/css?family=Lato:100,300,400,700,900"))

            
            # Write the modified HTML back to the file
            with open(file_path, 'w', encoding='utf-8') as f:
                f.write(str(soup))
            print(f"Updated {file_path}")


Updated ./Charts/appearances.html
Updated ./Charts/1s-lineouts.html
Updated ./Charts/positions.html
Updated ./Charts/set-piece.html
Updated ./Charts/2s-lineouts.html
Updated ./Charts/points.html
Updated ./Charts/cards.html
Updated ./Charts/results.html
Updated ./Charts/captains.html
Updated ./Charts/game-stats.html
Updated ./Charts/2021-22/1s-lineouts.html
Updated ./Charts/2024-25/1s-lineouts.html
Updated ./Charts/2024-25/2s-lineouts.html
Updated ./Charts/2023-24/1s-lineouts.html
Updated ./Charts/2023-24/2s-lineouts.html
Updated ./Charts/2022-23/1s-lineouts.html
Updated ./Charts/2022-23/2s-lineouts.html


# Individual Player Stats

In [16]:
def player_data(p):
    pitchero = pitchero_df[pitchero_df["Player"]==p]
    pdf = players_df[players_df["Player"]==p]
    p_agg = players_agg_df[players_agg_df["Player"]==p]
    
    return {
        "pitchero": pitchero,
        "pdf": pdf,
        "p_agg": p_agg
    }

a,b,c = player_data("Sam Lindsay-McCall").values()    

In [109]:
p = "Dan Billin"

def squad_pie(p):
    base = (
        alt.Chart(players_agg_df).encode(
            theta=alt.Theta("sum(TotalGames)").stack(True),
            color=alt.Color("Squad:N", scale=squad_scale, legend=alt.Legend(title=None, labelExpr="datum.label + ' XV'"))
        )
        .transform_filter(f"datum.Player === '{p}'")
        .transform_calculate(label="datum.Squad + ' XV'")
    )

    pie = base.mark_arc(outerRadius=120, opacity=0.8)
    text1 = base.mark_text(radius=75, size=36).encode(
        theta=alt.Theta("sum(TotalGames)", stack=True),
        text=alt.Text("sum(TotalGames)"), 
        detail="Squad:N",
        color=alt.value("white")
    )
    text2 = base.mark_text(radius=150, size=24).encode(
        theta=alt.Theta("sum(TotalGames)", stack=True),
        text=alt.Text("label:N"),
        detail="Squad:N",
    )

    return pie + text1 + text2

position_order = ["Prop", "Hooker", "Second Row", "Back Row", "Scrum Half", "Fly Half", "Centre", "Back Three"]
position_color = ["#202947", "#146f14", "#981515", "#b03030"]

def position_pie(p):
    base = (
        alt.Chart(players_df)
        .transform_calculate(posi=f"indexof({position_order}, datum.Position)")
        .encode(
            theta=alt.Theta("count()").stack(True),
            color=alt.Color(
                "Position:N"
                legend=alt.Legend(title=None, orient="bottom", offset=40), 
                scale=alt.Scale()
            )
        )
        .transform_filter(f"datum.Player === '{p}' & isValid(datum.Position)")
    )

    pie = base.mark_arc(outerRadius=120)
    text = base.mark_text(radius=75, size=36).encode(
        theta=alt.Theta("count()", stack=True),
        text=alt.Text("count()"), 
        color=alt.value("white"),
        detail="Position:N"
    )

    return (pie + text).transform_filter(f"datum.Player === '{p}'")

def games(p):
    bar = (
        alt.Chart(players_agg_df).encode(
            x=alt.X("Date:T", title="Date"),
            y=alt.Y("count()", title="Games Played"),
            color=alt.Color("Position:N", scale=position_scale)
        )
        .transform_filter(f"datum.Player === '{p}'")
        .mark_bar()
    )

position_pie(p)
# squad_pie(p)


In [96]:
b

Unnamed: 0,Squad,Season,Competition,GameType,Opposition,Home/Away,PF,PA,Result,Captain,VC1,VC2,Number,Player,Position,PositionType
422,1st,2021/22,Friendly,Friendly,Metropolitan Police,H,29,28,W,Jack Andrews,,,4,Sam Lindsay-McCall,Second Row,Forwards
423,1st,2021/22,Friendly,Friendly,London Irish,A,10,33,L,Jack Andrews,Sam Lindsay-McCall,,4,Sam Lindsay-McCall,Second Row,Forwards
424,1st,2021/22,Friendly,Friendly,Horsham,A,26,56,L,Jack Andrews,James Funnell,,4,Sam Lindsay-McCall,Second Row,Forwards
425,1st,2021/22,Friendly,Friendly,Purley John Fisher,H,19,33,L,Jack Andrews,James Funnell,,4,Sam Lindsay-McCall,Second Row,Forwards
426,1st,2021/22,Sussex 1,League,Eastbourne,H,47,0,W,Jack Andrews,,,4,Sam Lindsay-McCall,Second Row,Forwards
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2289,1st,2023/24,Counties 1 Surrey/Sussex,League,Eastbourne,A,24,26,L,Ryland Thomas,Dan Poulton,,17,Sam Lindsay-McCall,,Bench
2313,1st,2024/25,Counties 1 Surrey/Sussex,League,Eastbourne,A,36,33,W,Ryland Thomas,Chris May-Miller,,17,Sam Lindsay-McCall,,Bench
2314,1st,2024/25,Counties 1 Surrey/Sussex,League,Trinity,H,14,48,L,Ryland Thomas,,,17,Sam Lindsay-McCall,,Bench
2315,1st,2024/25,Counties 1 Surrey/Sussex,League,Old Rutlishians,H,19,23,L,Ryland Thomas,,,17,Sam Lindsay-McCall,,Bench


# Team Sheets

In [None]:
def team_sheet_chart(
        squad=1, 
        names=None, 
        captain=None, 
        vc=None, 
        opposition=None, 
        home=True, 
        competition="Counties 1 Sussex",
        season="2023/24"
    ):

    if names is None:
        df = team_sheets(squad=1) 

        # Last row as dict
        team = df.iloc[-1].to_dict()


        label = f'{"1st" if squad==1 else "2nd"} XV vs {team["Opposition"]}({team["Home/Away"]})'
        captain = team["Captain"]
        vc = team["VC"]
        season = team["Season"]
        competition = team["Competition"]

        # Keep keys that can be converted to integers
        team = {int(k): v for k, v in team.items() if k.isnumeric() and v}

        # Convert team to dataframe with Number and Player columns
        team = pd.DataFrame(team.items(), columns=["Number", "Player"])

    else:
        label = f'{"1st" if squad==1 else "2nd"} XV vs {opposition} ({"H" if home else "A"})'

        # Convert names to Player column of a dataframe with Number column (1-len(names))
        team = pd.DataFrame({"Player": names, "Number": range(1, len(names)+1)})

    coords = pd.DataFrame([
                {"n": 1, "x": 10, "y": 81},
                {"n": 2, "x": 25, "y": 81},
                {"n": 3, "x": 40, "y": 81},
                {"n": 4, "x": 18, "y": 69},
                {"n": 5, "x": 32, "y": 69},
                {"n": 6, "x": 6, "y": 61},
                {"n": 7, "x": 44, "y": 61},
                {"n": 8, "x": 25, "y": 56},
                {"n": 9, "x": 20, "y": 42},
                {"n": 10, "x": 38, "y": 36},
                {"n": 11, "x": 8, "y": 18},
                {"n": 12, "x": 56, "y": 30},
                {"n": 13, "x": 74, "y": 24},
                {"n": 14, "x": 92, "y": 18},
                {"n": 15, "x": 50, "y": 10},
                {"n": 16, "x": 80, "y": 82},
                {"n": 17, "x": 80, "y": 74},
                {"n": 18, "x": 80, "y": 66},
                {"n": 19, "x": 80, "y": 58},
                {"n": 20, "x": 80, "y": 50},
                {"n": 21, "x": 80, "y": 42},
                {"n": 22, "x": 80, "y": 34},
                {"n": 23, "x": 80, "y": 26},
            ])
    team = team.merge(coords, left_on="Number", right_on="n", how="inner").drop(columns="n")

    # Add captain (C) and vice captain (VC) else None
    team["Captain"] = team["Player"].apply(lambda x: "C" if x == captain else "VC" if x == vc else None)

    team["Player"] = team["Player"].str.split(" ")

    team.to_dict(orient="records")

    with open("team-sheet-lineup.json") as f:
        chart = json.load(f)
    chart["data"]["values"] = team.to_dict(orient="records")
    chart["title"]["text"] = label
    chart["title"]["subtitle"] = f"{season} - {competition}"

    n_replacements = len(team) - 15
    
    y = 126 + (n_replacements * 64)
    chart["layer"][0]["mark"]["y2"] = y
    # return chart
    return alt.Chart.from_dict(chart)

In [96]:
pitchero_df[pitchero_df["Player"]=="Sam Lindsay-McCall"]

Unnamed: 0,Player,Season,Squad,TotalGames,Player_join,A,T,Con,PK,DG,YC,RC,Points,PPG,Tries,Cons,Pens,Cards
9,Sam Lindsay-McCall,2023/24,1st,23,S Lindsay-Mccall,,,,,,,,,,,,,
11,Sam Lindsay-McCall,2021/22,2nd,2,S Lindsay-Mccall,,,,,,,,,,,,,
51,Sam Lindsay-McCall,2024/25,2nd,1,S Lindsay-Mccall,,,,,,,,,,,,,
198,Sam Lindsay-McCall,2021/22,1st,15,S Lindsay-Mccall,,,,,,,,,,,,,
200,Sam Lindsay-McCall,2024/25,1st,15,S Lindsay-Mccall,,,,,,,,,,,,,
270,Sam Lindsay-McCall,2022/23,2nd,1,S Lindsay-Mccall,,,,,,,,,,,,,
470,Sam Lindsay-McCall,2022/23,1st,15,S Lindsay-Mccall,,,,,,,,,,,,,


In [7]:
pitchero_df[pitchero_df["Player_join"]=="S Lindsay"].sort_values(["Season","Squad"])

Unnamed: 0,Player,Season,Squad,TotalGames,Player_join,A,T,Con,PK,DG,YC,RC,Points,PPG,Tries,Cons,Pens
464,Sam Lindsay-McCall,2021/22,1st,15,S Lindsay,,,,,,,,,,,,
348,Sam Lindsay-McCall,2021/22,2nd,2,S Lindsay,,,,,,,,,,,,
18,Sam Lindsay-McCall,2022/23,1st,15,S Lindsay,,,,,,,,,,,,
405,Sam Lindsay-McCall,2022/23,2nd,1,S Lindsay,,,,,,,,,,,,
346,Sam Lindsay-McCall,2023/24,1st,23,S Lindsay,,,,,,,,,,,,
466,Sam Lindsay-McCall,2024/25,1st,15,S Lindsay,,,,,,,,,,,,
388,Sam Lindsay-McCall,2024/25,2nd,1,S Lindsay,,,,,,,,,,,,
