In [2]:
from data_prep import *
from charts import *
from players import *
from video_analysis import *
from team_sheets import *

# Load data
game_df = team_sheets()
players_df = players(game_df)
players_agg_df = players_agg(players_df)
lineouts_df = lineouts()
set_piece_df = set_piece_results()
analysis = game_stats()
players_table = players_table_data(players_df, players_agg_df)
 
# # Save data
# game_df.to_csv('data/game.csv', index=False)
# players_df.to_csv('data/players.csv', index=False)
# players_agg_df.to_csv('data/players_agg.csv', index=False)
# lineouts_df.to_csv('data/lineouts.csv', index=False)
# set_piece_df.to_csv('data/set_piece.csv', index=False)
# analysis.to_csv('data/analysis.csv', index=False)
# update_season_summaries(game_df, seasons=["2024/25"])

# # One-off charts (only source data needs updating)
# captains_chart(file='Charts/captains.html')
# results_chart(file='Charts/results.html')
# plot_games_by_player(file='Charts/appearances.html')
# plot_starts_by_position(file='Charts/positions.html')
# card_chart(file='Charts/cards.html')
# points_scorers_chart(file='Charts/points.html')
# team_sheets_chart(file='Charts/team-sheets.html')
# set_piece_h2h_chart(file='Charts/set-piece.html')
# squad_continuity_chart(file='Charts/continuity.html')

# # Self-contained charts (chart needs updating)
# game_stats_charts(analysis, file='Charts/video_analysis.html')
# lineout_success(types=types, file='Charts/lineouts.html')

KeyError: 'Season'

In [17]:
seasons = ["2021/22", "2022/23", "2023/24", "2024/25"]
seasons_hist = ["2016/17", "2017/18", "2018/19", "2019/20"]

game_df = team_sheets()
players_df = players(game_df)
players_agg_df = players_agg(players_df)

def plot_games_by_player(min=5, df=None, file=None):

    c = alt.Color("Squad:N", scale=squad_scale, legend=None)

    season_selection = alt.param(
        bind=alt.binding_radio(options=[*seasons_hist, *seasons, "Total"], name="Season"), 
        value="Total" 
    )
    squad_selection = alt.param(
        bind=alt.binding_radio(options=["1st", "2nd", "Total"], name="Squad"),
        value="Total"
    )

    min_selection = alt.param(
        bind=alt.binding_range(name="Minimum Games", min=1, max=20, step=1),
        value=min
    )

    chart = (
        alt.Chart(df if df is not None else {"name": "df", "url":'https://raw.githubusercontent.com/samnlindsay/egrfc-stats/main/data/players_agg.json',"format":{'type':"json"}})
        .mark_bar(strokeWidth=2)
        .encode(
            x=alt.X("sum(TotalGames):Q", axis=alt.Axis(title=None, orient="top")),
            y=alt.Y("Player:N", sort="-x", title=None),
            color=c,
            tooltip=[
                "Player:N", 
                "Squad:N",
                alt.Tooltip("sum(TotalGames):Q", title="Games"), 
            ]
        )
        # .transform_filter(legend)
        .add_params(season_selection, squad_selection, min_selection)
        .resolve_scale(y="independent")
        .transform_filter(f"datum.Season == {season_selection.name} | {season_selection.name} == 'Total'")
        .transform_filter(f"datum.Squad == {squad_selection.name} | {squad_selection.name} == 'Total'")
        .transform_aggregate(TotalGames="sum(TotalGames)", groupby=["Player", "Squad"])
        .transform_filter(f"datum.TotalGames >= {min}")
        .properties(
            title=alt.Title(
                text=f"Appearances",
                subtitle=f"Minimum {min} appearances. Lighter shaded bars represent bench appearances.",
                subtitleFontStyle="italic"  
            ),
            width=400,
            height=alt.Step(15)
        )
    )
    if file:
        chart.save(file)
        hack_params_css(file)

    return chart

plot_games_by_player(df=players_agg_df)

In [18]:
pitchero_stats()

KeyboardInterrupt: 

In [None]:
players_agg_df.to_json('data/players_agg.json', orient='records')

In [14]:

pitchero_df = pd.read_json('data/pitchero.json')
pitchero_df[pitchero_df['Player_join'] == 'H Weller']

Unnamed: 0,A,T,Con,PK,DG,YC,RC,Points,PPG,Season,Squad,Tries,Cons,Pens,Player_join
593,2,1,0,0,0,0,0,5,2.5,2019/20,2nd,5,0,0,H Weller
594,2,1,4,0,0,0,0,13,6.5,2019/20,2nd,5,8,0,H Weller
675,3,0,0,0,0,0,0,0,0.0,2021/22,2nd,0,0,0,H Weller
754,1,0,0,0,0,0,0,0,0.0,2022/23,2nd,0,0,0,H Weller


In [6]:
players_agg_df.columns

Index(['Squad', 'Season', 'Player', 'CupStarts', 'CupBench', 'LeagueStarts',
       'LeagueBench', 'FriendlyStarts', 'FriendlyBench', 'CompetitiveStarts',
       'CompetitiveBench', 'TotalStarts', 'TotalBench', 'TotalGames',
       'MostCommonPosition', 'MostCommonPositionType', 'Player_join', 'A', 'T',
       'Con', 'PK', 'DG', 'YC', 'RC', 'Points', 'PPG', 'Tries', 'Cons',
       'Pens'],
      dtype='object')

In [5]:
pitchero_df.columns

Index(['A', 'T', 'Con', 'PK', 'DG', 'YC', 'RC', 'Points', 'PPG', 'Season',
       'Squad', 'Tries', 'Cons', 'Pens', 'Player_join', 'Player'],
      dtype='object')

# Individual Player Stats

- Table:
  - Position
  - Debut
  - Total Games (this season/total)
  - Tries (this season/total)
- Charts:
  - Games per season (color by squad)
  - Games by position (color by squad)
  - Tries/points per season

In [None]:
current_season = max(players_agg_df["Season"])

# Position dictionary
d = {
    1: "Prop",
    2: "Hooker",
    3: "Prop",
    4: "Second Row",
    5: "Second Row",
    6: "Flanker",
    7: "Flanker",
    8: "Number 8",
    9: "Scrum Half",
    10: "Fly Half",
    11: "Wing",
    12: "Centre",
    13: "Centre",
    14: "Wing",
    15: "Full Back",
}


# Total tries per player (current season)
def totals(df):
    totals = (
        df.groupby("Player")
        .agg({"T": "sum", "TotalGames": "sum"})
        .rename(columns={"TotalGames": "Games", "T": "Tries"})
        .reset_index()
        .astype({"Tries": "int", "Games": "int"})
    )

    games_by_squad = (
        df.groupby(["Player", "Squad"])
        .agg({"TotalGames": "sum"})
        .reset_index()
        .pivot(index="Player", columns="Squad", values="TotalGames")
        .reset_index()
        .rename(columns={"1st": "Games1", "2nd": "Games2"})
        .fillna(0)
        .astype({"Games1": "int", "Games2": "int"})
    )

    totals = totals.merge(games_by_squad, on="Player").fillna(0)

    return totals

def get_positions(df, by=None):

    df["Position"] = df.apply(lambda x: d.get(x["Number"]), axis=1)


    df = (
        df.groupby(["Player", "Position", by] if by else ["Player", "Position"])
        .agg({"PF": "count"})
        .reset_index()
        .sort_values(["Player", "PF"], ascending=[True, False])
    )

    df = df[df['PF'] >= 1]

    return df

def debuts(df):

    df = df.sort_values(["Player", "Squad", "GameSort"])

    debut = df.groupby(["Player","Squad"]).agg({"GameID": "first", "Season": "first"}).reset_index()
    debut["Debut1"] = list(zip(debut["GameID"], debut["Season"]))
    debut = debut[debut["Squad"] == "1st"].drop(columns=["Squad", "GameID", "Season"])


    first_season = df.groupby("Player").agg({"Season": "min"}).reset_index()
    first_season = first_season.rename(columns={"Season": "FirstSeason"})

    debuts = first_season.merge(debut, on="Player", how="left")

    return debuts

def players_table_data():

    positions = get_positions(players_df)
    positions = positions[
        positions.groupby('Player')['PF'].transform('sum') * 0.2 <= positions[~positions['Position'].isna()]['PF']
    ]
    positions = positions.groupby('Player').agg({'Position': lambda x: ' / '.join(x)}).reset_index()

    df_current = totals(players_agg_df[players_agg_df["Season"] == current_season])
    df_total = totals(players_agg_df).rename(columns={
        "Tries": "TotalTries", 
        "Games": "TotalGames", 
        "Games1": "TotalGames1", 
        "Games2": "TotalGames2"
    })

    debuts_df = debuts(players_df)

    df = (
        df_total
        .merge(df_current, on="Player", how="left").fillna(0)
        .merge(positions, on="Player", how="left")
        .merge(debuts_df, on="Player", how="left")
        .astype({"Tries": "int", "Games": "int", "Games1": "int", "Games2": "int"})
    )

    df.to_json("data/player_table.json", orient="records", indent=2)

    return df

df = players_table_data()
df


Unnamed: 0,Player,TotalTries,TotalGames,TotalGames1,TotalGames2,Tries,Games,Games1,Games2,Position,FirstSeason,Debut1
0,Aaron Boczek,10,40,37,3,0,4,1,3,Number 8 / Flanker,2022/23,"(Lewes (A), 2022/23)"
1,Aaron Stonell,0,1,0,1,0,0,0,0,Prop,2022/23,
2,Alex Jackson,2,5,4,1,0,0,0,0,Number 8,2021/22,"(Crawley (H), 2021/22)"
3,Alex Leadbeater,2,9,1,8,0,0,0,0,Centre / Wing,2021/22,"(London Irish (A), 2021/22)"
4,Alex Maynard,2,2,0,2,0,0,0,0,Prop,2021/22,
...,...,...,...,...,...,...,...,...,...,...,...,...
186,Tommy Nasta,0,1,0,1,0,1,0,1,Wing,2024/25,
187,Will Bramwell,0,10,4,6,0,3,1,2,Second Row / Flanker,2023/24,"(Horsham (A), 2023/24)"
188,Will Burch,0,4,0,4,0,0,0,0,Prop / Hooker,2021/22,
189,Will Roberts,29,38,10,28,6,13,6,7,Wing,2022/23,"(Burgess Hill (A), 2022/23)"


In [5]:
df[df["Player"] == "Dan Poulton"]

Unnamed: 0,Player,TotalTries,TotalGames,TotalGames1,TotalGames2,Tries,Games,Games1,Games2,Position,FirstSeason,Debut1
37,Dan Poulton,3,53,53,0,0,5,5,0,Fly Half,2021/22,"(Crawley (H), 2021/22)"


In [None]:
p = "Dan Billin"

def squad_pie(p):
    base = (
        alt.Chart(players_agg_df).encode(
            theta=alt.Theta("sum(TotalGames)").stack(True),
            color=alt.Color("Squad:N", scale=squad_scale, legend=alt.Legend(title=None, labelExpr="datum.label + ' XV'"))
        )
        .transform_filter(f"datum.Player === '{p}'")
        .transform_calculate(label="datum.Squad + ' XV'")
    )

    pie = base.mark_arc(outerRadius=120, opacity=0.8)
    text1 = base.mark_text(radius=75, size=36).encode(
        theta=alt.Theta("sum(TotalGames)", stack=True),
        text=alt.Text("sum(TotalGames)"), 
        detail="Squad:N",
        color=alt.value("white")
    )
    text2 = base.mark_text(radius=150, size=24).encode(
        theta=alt.Theta("sum(TotalGames)", stack=True),
        text=alt.Text("label:N"),
        detail="Squad:N",
    )

    return pie + text1 + text2

position_order = ["Prop", "Hooker", "Second Row", "Back Row", "Scrum Half", "Fly Half", "Centre", "Back Three"]
position_color = ["#202947", "#146f14", "#981515", "#b03030"]

def position_pie(p):
    base = (
        alt.Chart(players_df)
        .transform_calculate(posi=f"indexof({position_order}, datum.Position)")
        .encode(
            theta=alt.Theta("count()").stack(True),
            color=alt.Color(
                "Position:N"
                legend=alt.Legend(title=None, orient="bottom", offset=40), 
                scale=alt.Scale()
            )
        )
        .transform_filter(f"datum.Player === '{p}' & isValid(datum.Position)")
    )

    pie = base.mark_arc(outerRadius=120)
    text = base.mark_text(radius=75, size=36).encode(
        theta=alt.Theta("count()", stack=True),
        text=alt.Text("count()"), 
        color=alt.value("white"),
        detail="Position:N"
    )

    return (pie + text).transform_filter(f"datum.Player === '{p}'")

def games(p):
    bar = (
        alt.Chart(players_agg_df).encode(
            x=alt.X("Date:T", title="Date"),
            y=alt.Y("count()", title="Games Played"),
            color=alt.Color("Position:N", scale=position_scale)
        )
        .transform_filter(f"datum.Player === '{p}'")
        .mark_bar()
    )

position_pie(p)
# squad_pie(p)


SyntaxError: invalid syntax. Perhaps you forgot a comma? (2585547723.py, line 38)

In [None]:
b

Unnamed: 0,Squad,Season,Competition,GameType,Opposition,Home/Away,PF,PA,Result,Captain,VC1,VC2,Number,Player,Position,PositionType
422,1st,2021/22,Friendly,Friendly,Metropolitan Police,H,29,28,W,Jack Andrews,,,4,Sam Lindsay-McCall,Second Row,Forwards
423,1st,2021/22,Friendly,Friendly,London Irish,A,10,33,L,Jack Andrews,Sam Lindsay-McCall,,4,Sam Lindsay-McCall,Second Row,Forwards
424,1st,2021/22,Friendly,Friendly,Horsham,A,26,56,L,Jack Andrews,James Funnell,,4,Sam Lindsay-McCall,Second Row,Forwards
425,1st,2021/22,Friendly,Friendly,Purley John Fisher,H,19,33,L,Jack Andrews,James Funnell,,4,Sam Lindsay-McCall,Second Row,Forwards
426,1st,2021/22,Sussex 1,League,Eastbourne,H,47,0,W,Jack Andrews,,,4,Sam Lindsay-McCall,Second Row,Forwards
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2289,1st,2023/24,Counties 1 Surrey/Sussex,League,Eastbourne,A,24,26,L,Ryland Thomas,Dan Poulton,,17,Sam Lindsay-McCall,,Bench
2313,1st,2024/25,Counties 1 Surrey/Sussex,League,Eastbourne,A,36,33,W,Ryland Thomas,Chris May-Miller,,17,Sam Lindsay-McCall,,Bench
2314,1st,2024/25,Counties 1 Surrey/Sussex,League,Trinity,H,14,48,L,Ryland Thomas,,,17,Sam Lindsay-McCall,,Bench
2315,1st,2024/25,Counties 1 Surrey/Sussex,League,Old Rutlishians,H,19,23,L,Ryland Thomas,,,17,Sam Lindsay-McCall,,Bench


# Team Sheets

In [None]:
def team_sheet_chart(
        squad=1, 
        names=None, 
        captain=None, 
        vc=None, 
        opposition=None, 
        home=True, 
        competition="Counties 1 Sussex",
        season="2023/24"
    ):

    if names is None:
        df = team_sheets()    

        # Last row as dict
        team = df.iloc[-1].to_dict()


        label = f'{"1st" if squad==1 else "2nd"} XV vs {team["Opposition"]}({team["Home/Away"]})'
        captain = team["Captain"]
        vc = team["VC1"]
        season = team["Season"]
        competition = team["Competition"]

        # Keep keys that can be converted to integers
        team = {int(k): v for k, v in team.items() if k.isnumeric() and v}

        # Convert team to dataframe with Number and Player columns
        team = pd.DataFrame(team.items(), columns=["Number", "Player"])

    else:
        label = f'{"1st" if squad==1 else "2nd"} XV vs {opposition} ({"H" if home else "A"})'

        # Convert names to Player column of a dataframe with Number column (1-len(names))
        team = pd.DataFrame({"Player": names, "Number": range(1, len(names)+1)})

    coords = pd.DataFrame([
                {"n": 1, "x": 10, "y": 81},
                {"n": 2, "x": 25, "y": 81},
                {"n": 3, "x": 40, "y": 81},
                {"n": 4, "x": 18, "y": 69},
                {"n": 5, "x": 32, "y": 69},
                {"n": 6, "x": 6, "y": 61},
                {"n": 7, "x": 44, "y": 61},
                {"n": 8, "x": 25, "y": 56},
                {"n": 9, "x": 20, "y": 42},
                {"n": 10, "x": 38, "y": 36},
                {"n": 11, "x": 8, "y": 18},
                {"n": 12, "x": 56, "y": 30},
                {"n": 13, "x": 74, "y": 24},
                {"n": 14, "x": 92, "y": 18},
                {"n": 15, "x": 50, "y": 10},
                {"n": 16, "x": 80, "y": 82},
                {"n": 17, "x": 80, "y": 74},
                {"n": 18, "x": 80, "y": 66},
                {"n": 19, "x": 80, "y": 58},
                {"n": 20, "x": 80, "y": 50},
                {"n": 21, "x": 80, "y": 42},
                {"n": 22, "x": 80, "y": 34},
                {"n": 23, "x": 80, "y": 26},
            ])
    team = team.merge(coords, left_on="Number", right_on="n", how="inner").drop(columns="n")

    # Add captain (C) and vice captain (VC) else None
    team["Captain"] = team["Player"].apply(lambda x: "C" if x == captain else "VC" if x == vc else None)

    team["Player"] = team["Player"].str.split(" ")

    team.to_dict(orient="records")

    with open("team-sheet-lineup.json") as f:
        chart = json.load(f)
    chart["data"]["values"] = team.to_dict(orient="records")
    chart["title"]["text"] = label
    chart["title"]["subtitle"] = f"{season} - {competition}"

    n_replacements = len(team) - 15
    
    y = 126 + (n_replacements * 64)
    chart["layer"][0]["mark"]["y2"] = y
    # return chart
    return alt.Chart.from_dict(chart)

team_sheet_chart()

In [None]:
pitchero_df[pitchero_df["Player"]=="Sam Lindsay-McCall"]

Unnamed: 0,Player,Season,Squad,TotalGames,Player_join,A,T,Con,PK,DG,YC,RC,Points,PPG,Tries,Cons,Pens,Cards
9,Sam Lindsay-McCall,2023/24,1st,23,S Lindsay-Mccall,,,,,,,,,,,,,
11,Sam Lindsay-McCall,2021/22,2nd,2,S Lindsay-Mccall,,,,,,,,,,,,,
51,Sam Lindsay-McCall,2024/25,2nd,1,S Lindsay-Mccall,,,,,,,,,,,,,
198,Sam Lindsay-McCall,2021/22,1st,15,S Lindsay-Mccall,,,,,,,,,,,,,
200,Sam Lindsay-McCall,2024/25,1st,15,S Lindsay-Mccall,,,,,,,,,,,,,
270,Sam Lindsay-McCall,2022/23,2nd,1,S Lindsay-Mccall,,,,,,,,,,,,,
470,Sam Lindsay-McCall,2022/23,1st,15,S Lindsay-Mccall,,,,,,,,,,,,,


In [None]:
pitchero_df[pitchero_df["Player_join"]=="S Lindsay"].sort_values(["Season","Squad"])

Unnamed: 0,Player,Season,Squad,TotalGames,Player_join,A,T,Con,PK,DG,YC,RC,Points,PPG,Tries,Cons,Pens
464,Sam Lindsay-McCall,2021/22,1st,15,S Lindsay,,,,,,,,,,,,
348,Sam Lindsay-McCall,2021/22,2nd,2,S Lindsay,,,,,,,,,,,,
18,Sam Lindsay-McCall,2022/23,1st,15,S Lindsay,,,,,,,,,,,,
405,Sam Lindsay-McCall,2022/23,2nd,1,S Lindsay,,,,,,,,,,,,
346,Sam Lindsay-McCall,2023/24,1st,23,S Lindsay,,,,,,,,,,,,
466,Sam Lindsay-McCall,2024/25,1st,15,S Lindsay,,,,,,,,,,,,
388,Sam Lindsay-McCall,2024/25,2nd,1,S Lindsay,,,,,,,,,,,,
