In [106]:
from data_prep import *
from charts import *
import pandas as pd

game_df = team_sheets()
players_df = players(game_df)
players_agg_df = players_agg(players_df)
lineouts_df = lineouts()
pitchero_df = pitchero_stats()
set_piece_df = set_piece_results()

# Season Summaries

In [107]:
seasons = [
    "2021/22", 
    "2022/23", 
    "2023/24", 
    "2024/25"
]

for season in seasons:
    print(season)

    # # Starts (by Position)
    plot_starts_by_position(1, season=season, fb_only=False, df=players_df).save(f"Charts/{season.replace('/','-')}/1s-squad-position.html")
    plot_starts_by_position(2, season=season, fb_only=False, df=players_df).save(f"Charts/{season.replace('/','-')}/2s-squad-position.html")
    # # # Appearances
    # plot_games_by_player(0, min=10, season=season, df=players_df).save(f"Charts/{season.replace('/','-')}/appearances.html")
    # plot_games_by_player(1, min=5, season=season, df=players_df).save(f"Charts/{season.replace('/','-')}/1s-appearances.html")
    # plot_games_by_player(2, min=5, season=season, df=players_df).save(f"Charts/{season.replace('/','-')}/2s-appearances.html")
    # # Points Scorers
    # points_scorers_chart(1, season, df=pitchero_df).save(f"Charts/{season.replace('/','-')}/1s-points.html")
    # points_scorers_chart(2, season, df=pitchero_df).save(f"Charts/{season.replace('/','-')}/2s-points.html")
    # # Captains
    # if season != ["2021/22", "2022/23"]:
    #     captains_chart(season, df=game_df).save(f"Charts/{season.replace('/','-')}/captains.html")
    # # Cards
    # card_chart(0, season, df=pitchero_df).save(f"Charts/{season.replace('/','-')}/cards.html")
    # Results
    # results_chart(1, season, df=game_df).save(f"Charts/{season.replace('/','-')}/1s-results.html")
    # results_chart(2, season, df=game_df).save(f"Charts/{season.replace('/','-')}/2s-results.html")

    # # Lineouts
    # lineout_chart(1, season, df=lineouts_df).save(f"Charts/{season.replace('/','-')}/1s-lineouts.html")
    # if season != "2021/22":
    #     lineout_chart(2, season, df=lineouts_df).save(f"Charts/{season.replace('/','-')}/2s-lineouts.html")

2021/22
2022/23
2023/24
2024/25


# Trends


In [108]:
plot_games_by_player(0, min=10, season=None, agg=True, df=players_df).save("Charts/by-season/appearances.html")
captains_chart(season=None, df=game_df).save("Charts/by-season/captains.html")
card_chart(season=None, df=pitchero_df).save("Charts/by-season/cards.html")
points_scorers_chart(squad=0, season=None, df=pitchero_df).save("Charts/by-season/points.html")

# Set Piece

In [109]:
# Lineouts
for squad in [1, 2]:
    types = ["Numbers", "Area", "Hooker", "Jumper", "Setup", "Call", "Movement"]
    charts = []
    for i,t in enumerate(types):
        min = 5 if t in ["Hooker", "Jumper", "Call"] else 1
        chart = count_success_chart(t, squad, as_dict=True, min=min)
        filters = [{"filter": {"param": f"select{f}"}} for f in types if f != t]
        chart["transform"] = filters + chart["transform"]
        chart["title"]["fontSize"] = 36
        chart["title"]["color"] = "#202946"

        charts.append(alt.Chart.from_dict(chart))

    chart = (
        alt.vconcat(*charts)
        .resolve_scale(color="independent")
        .properties(
            title=alt.Title(
                text=f"{'1st' if squad==1 else '2nd'} XV Lineout Stats", 
                subtitle="Click on any bar to filter the other charts accordingly", 
                subtitleFontSize=14
            )
        )
        .save(f"Charts/by-season/{squad}s-lineouts.html")
    )

# Set Piece Head-to-Head
for squad in [1,2]:
    for event in ["scrum", "lineout"]:
        chart = set_piece_h2h_charts(squad, event, df=set_piece_df)
        chart.save(f"Charts/by-season/{squad}s-{event}-h2h.html")

KeyboardInterrupt: 

# 1st XV Game Analysis

In [125]:
pd.options.display.max_columns = None

analysis = sheet[0].batch_get(['B4:AZ'])[0]

df = pd.DataFrame(analysis, columns=analysis.pop(0))
df

id_cols = ["Date", "Opposition", "Home/Away", "PF", "PA"]
poss_terr = df[id_cols + ["BIP Time", "Poss (%)", "Terr (%)", "Own 22m (%)", "22m - Half (%)", "Half - 22m (%)", "Opp 22m (%)"]]

# Convert Date (7 Sep 2024) to vega-compatible date
poss_terr.loc[:,"Date"] = pd.to_datetime(poss_terr["Date"], format="%d %b %Y")

# Convert all percentage columns to floats
for c in poss_terr.columns:
    if "%" in c:
        poss_terr.loc[:,c] = poss_terr[c].str.replace("%", "").astype(float)*0.01

poss_terr = poss_terr.melt(id_vars=id_cols, var_name="Metric", value_name="Value")

poss_terr = poss_terr[~poss_terr["Metric"].isin(["Poss (%)", "Terr (%)", "BIP Time"])]

# Create text label position value for each metric
# Each Date has 4 metrics (Own 22m, 22m - Half, Half - 22m, Opp 22m) which add to 1
# The text label for Own 22 should be 0.5 * Value
# The text label for 22m - Half should be 0.5 * Value + the previous value (Own 22)
# The text label for Half - 22m should be 0.5 * Value + both previous values (Own 22, 22m - Half)
# The text label for Opp 22 should be 0.5 * Value + all previous values (Own 22, 22m - Half, Half - 22m)

# Add cumulative sum of Value column in each Date group
poss_terr["Cumulative"] = poss_terr.groupby(id_cols)["Value"].cumsum()

poss_terr

TypeError: cumsum is not supported for object dtype

In [132]:
poss_terr.sort_values(["Date"])

Unnamed: 0,Date,Opposition,Home/Away,PF,PA,Metric,Value
24,2024-09-07,London Cornish,H,16,18,Own 22m (%),0.09
48,2024-09-07,London Cornish,H,16,18,Opp 22m (%),0.14
32,2024-09-07,London Cornish,H,16,18,22m - Half (%),0.33
40,2024-09-07,London Cornish,H,16,18,Half - 22m (%),0.45
25,2024-10-12,Hove,H,21,34,Own 22m (%),0.17
49,2024-10-12,Hove,H,21,34,Opp 22m (%),0.24
41,2024-10-12,Hove,H,21,34,Half - 22m (%),0.32
33,2024-10-12,Hove,H,21,34,22m - Half (%),0.27
26,2024-10-19,Twickenham,A,33,41,Own 22m (%),0.11
42,2024-10-19,Twickenham,A,33,41,Half - 22m (%),0.3


In [112]:

# Altair stacked bar chart with own 22m, 22m - Half, Half - 22m, Opp 22m stacked along the y axis, and Opposition on the x axis
chart = (
    alt.Chart(poss_terr)
    .transform_filter("datum.Metric != 'BIP Time'")
    .transform_filter("datum.Metric != 'Poss (%)'")
    .transform_filter("datum.Metric != 'Terr (%)'")
    .mark_bar()
.encode(
        y=alt.Y("Opposition:N", title="Opposition"),
        x=alt.X("sum(Value):Q", title="Percentage", stack="normalize"),
        color=alt.Color(
            "Metric:N", 
            title="Territory", 
            scale=alt.Scale(
                domain=["Own 22m (%)", "22m - Half (%)", "Half - 22m (%)", "Opp 22m (%)"],
                range=["#981515", "#da8", "#ad8", "#146f14"]
            )
        ),
        order=alt.Order("stack_order:O"),
        tooltip=[
            alt.Tooltip("Opposition:N"),
            alt.Tooltip("Metric:N", title="Territory"),
            alt.Tooltip("Value:Q", title="Percentage", format=".0%")
        ]
    )
    .properties(
        title="Territory Stats",
        width=600,
        height=alt.Step(40)
    )
)
chart

# # Add text overlay to bars showing the percentage - the text should be in the exact centre of the bar (i.e. (bottom + top)/2)
# text = (
#     alt.Chart(poss_terr)
#     .transform_filter("datum.Metric != 'BIP Time'")
#     .transform_filter("datum.Metric != 'Poss (%)'")
#     .transform_filter("datum.Metric != 'Terr (%)'")
#     .mark_text(font="Lato, sans-serif", fontSize=20)
#     .encode(
#         y=alt.Y("Opposition:N"),
#         x=alt.X("sum(Value):Q"),
#         text=alt.Text("Value:Q", format=".0%"),
#         color=alt.Color("Metric:N", scale=alt.Scale(domain=["Own 22m (%)", "22m - Half (%)", "Half - 22m (%)", "Opp 22m (%)"], range=["white", "black", "black", "white"])),
#         order=alt.Order("stack_order:O")
#     )
# )
# (chart + text).resolve_scale(color="independent")

In [None]:
#

# Team Sheets

In [None]:
def team_sheet_chart(
        squad=1, 
        names=None, 
        captain=None, 
        vc=None, 
        opposition=None, 
        home=True, 
        competition="Counties 1 Sussex",
        season="2023/24"
    ):

    if names is None:
        df = team_sheets(squad=1) 

        # Last row as dict
        team = df.iloc[-1].to_dict()


        label = f'{"1st" if squad==1 else "2nd"} XV vs {team["Opposition"]}({team["Home/Away"]})'
        captain = team["Captain"]
        vc = team["VC"]
        season = team["Season"]
        competition = team["Competition"]

        # Keep keys that can be converted to integers
        team = {int(k): v for k, v in team.items() if k.isnumeric() and v}

        # Convert team to dataframe with Number and Player columns
        team = pd.DataFrame(team.items(), columns=["Number", "Player"])

    else:
        label = f'{"1st" if squad==1 else "2nd"} XV vs {opposition} ({"H" if home else "A"})'

        # Convert names to Player column of a dataframe with Number column (1-len(names))
        team = pd.DataFrame({"Player": names, "Number": range(1, len(names)+1)})

    coords = pd.DataFrame([
                {"n": 1, "x": 10, "y": 81},
                {"n": 2, "x": 25, "y": 81},
                {"n": 3, "x": 40, "y": 81},
                {"n": 4, "x": 18, "y": 69},
                {"n": 5, "x": 32, "y": 69},
                {"n": 6, "x": 6, "y": 61},
                {"n": 7, "x": 44, "y": 61},
                {"n": 8, "x": 25, "y": 56},
                {"n": 9, "x": 20, "y": 42},
                {"n": 10, "x": 38, "y": 36},
                {"n": 11, "x": 8, "y": 18},
                {"n": 12, "x": 56, "y": 30},
                {"n": 13, "x": 74, "y": 24},
                {"n": 14, "x": 92, "y": 18},
                {"n": 15, "x": 50, "y": 10},
                {"n": 16, "x": 80, "y": 82},
                {"n": 17, "x": 80, "y": 74},
                {"n": 18, "x": 80, "y": 66},
                {"n": 19, "x": 80, "y": 58},
                {"n": 20, "x": 80, "y": 50},
                {"n": 21, "x": 80, "y": 42},
                {"n": 22, "x": 80, "y": 34},
                {"n": 23, "x": 80, "y": 26},
            ])
    team = team.merge(coords, left_on="Number", right_on="n", how="inner").drop(columns="n")

    # Add captain (C) and vice captain (VC) else None
    team["Captain"] = team["Player"].apply(lambda x: "C" if x == captain else "VC" if x == vc else None)

    team["Player"] = team["Player"].str.split(" ")

    team.to_dict(orient="records")

    with open("team-sheet-lineup.json") as f:
        chart = json.load(f)
    chart["data"]["values"] = team.to_dict(orient="records")
    chart["title"]["text"] = label
    chart["title"]["subtitle"] = f"{season} - {competition}"

    n_replacements = len(team) - 15
    
    y = 126 + (n_replacements * 64)
    chart["layer"][0]["mark"]["y2"] = y
    # return chart
    return alt.Chart.from_dict(chart)

In [None]:
names = [
    "Guy Collins",
    "Ben Tottman",
    "Josh Brimecombe",
    "Ollie Toogood",
    "Sam Lindsay-McCall",
    "Josh Weaver",
    "Ryland Thomas",
    "Dan Billin",
    "Chris Taylor",
    "Luke Maker",
    "Jake Radcliffe",
    "Chris May-Miller",
    "Ted Hardisty",
    "Ali Moffatt",
    "Noah Roberts",
    "Or Shay",
    "Tom Mooney",
    "Jack Billin",
]

team_sheet_chart(names=names, captain="Ryland", opposition="London Cornish", competition="Counties 1 Surrey/Sussex")

In [None]:
df1 = players()
df1 = df1[["Number", "Player", "Position"]]
df1 = df1[df1["Number"]<=15]

df1 = df1.groupby("Player").agg({"Position": lambda x: list(set(x))})
df1["N"] = df1["Position"].apply(len)
df1[df1["N"]>1].sort_values("N", ascending=False)

In [None]:


import requests
from bs4 import BeautifulSoup
import json

# URL of the EGRFC team statistics page
URL = "https://www.egrfc.com/teams/142069/statistics"

# Fetch and parse the page
def fetch_player_data(url):
    response = requests.get(url)
    if response.status_code != 200:
        print("Failed to fetch the webpage.")
        return []

    soup = BeautifulSoup(response.text, "html.parser")
    
    table = soup.find_all("div", {"class": "no-grid-hide"})[0]

    headers = [h.text for h in table.find_all("div", {"class": "league-table__header"})]
    players = [p.text for p in table.find_all("div", "sc-bwzfXH iWTrZm")]
    positions = [p.text for p in table.find_all("div", "sc-bwzfXH fGYXYx")]
    
    players = [{"Player": p, "Position": [pos]} for p, pos in zip(players, positions)]

    return players

# Save to JSON file
def save_to_json(data, filename):
    with open(filename, "w") as f:
        json.dump(data, f, indent=4)
    print(f"Player data saved to {filename}")

if __name__ == "__main__":
    player_data = fetch_player_data(URL)
    if player_data:
        save_to_json(player_data, "players.json")
