In [None]:
# Add ./python/ to path
import sys
sys.path.append('python/')

from data_prep import *
from charts import *
from players import *
from video_analysis import *
from team_sheets import *

import gspread
from google.oauth2.service_account import Credentials   
import pandas as pd

scope = ['https://spreadsheets.google.com/feeds','https://www.googleapis.com/auth/drive']
creds = Credentials.from_service_account_file("client_secret.json", scopes=scope)
client = gspread.authorize(creds)
sheet_url = "https://docs.google.com/spreadsheets/d/1keX2eGbyiBejpfMPMbL7aXYLy7IDJZDBXQqiKVQavz0/edit?gid=257605888#gid=257605888t"
my_sheet = client.open_by_url(sheet_url)


df = pd.DataFrame(
    my_sheet.get_worksheet(0).batch_get(['A2:S'])[0],
    columns=my_sheet.get_worksheet(0).batch_get(['A1:S'])[0][0]
)
df["Season"] = df["Season"].apply(lambda x: str(x)[2:5] + str(x)[-2:])
df

Unnamed: 0,Season,Date,Club,Team,Competition,Stage,Opposition,Home/Away,Venue,Result,F,A,Position,Time,Start,MOTM,YC,Try,Notes
0,15/16,2015-11-21,Ealing,2nd,MMT 2,,Rosslyn Park IVs (4th),A,Away,L,5,40,Lock,80,TRUE,FALSE,FALSE,0,
1,15/16,2015-11-28,Ealing,Exiles,MMT Vase,,Uxbridge 2nd,A,Away,L,0,17,No. 8,80,TRUE,FALSE,FALSE,0,
2,15/16,2015-12-05,Ealing,Exiles,MMT 5,,Twickenham Bulldogs (2nd),H,Ealing,L,15,29,Lock,80,TRUE,FALSE,FALSE,0,
3,15/16,2015-12-12,Ealing,Exiles,MMT 5,,Hammersmith & Fulham 5th,A,Away,L,26,38,Lock,80,TRUE,FALSE,FALSE,0,
4,15/16,2015-12-19,Ealing,Exiles,Friendly,,High Wycombe 3rd,A,Away,W,12,5,Lock,80,TRUE,FALSE,FALSE,0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
198,24/25,2025-04-05,East Grinstead,1st,Counties 1 Surrey/Sussex,,London Cornish,A,Away,D,21,21,Lock,80,TRUE,FALSE,FALSE,0,
199,24/25,2025-04-12,East Grinstead,1st,Counties 1 Plate South,,Haywards Heath,H,Home,W,45,37,Lock,80,TRUE,FALSE,FALSE,0,
200,24/25,2025-05-03,East Grinstead,1st,Counties 1 Plate South,,Ashford,A,Away,L,16,27,Lock,80,TRUE,FALSE,FALSE,0,
201,25/26,2025-08-23,East Grinstead,1st,Friendly,,Beccehamians,H,Home,L,19,24,Lock,40,TRUE,FALSE,FALSE,0,


In [11]:
team_mates = pd.DataFrame(
    my_sheet.get_worksheet(1).batch_get(['A2:F'])[0],
    columns=my_sheet.get_worksheet(1).batch_get(['A1:F'])[0][0]
)

team_mates

Unnamed: 0,Club,Team,name,Date,cap,pos
0,East Grinstead,1st,Guy Collins,2025-09-20,FALSE,1
1,East Grinstead,1st,Ben Tottman,2025-09-20,FALSE,2
2,East Grinstead,1st,Josh Brimecombe,2025-09-20,FALSE,3
3,East Grinstead,1st,Sam Lindsay-McCall,2025-09-20,FALSE,4
4,East Grinstead,1st,John Peaty,2025-09-20,FALSE,5
...,...,...,...,...,...,...
3813,Ealing,2nd,Nick Yau,2015-11-21,FALSE,14
3814,Ealing,2nd,Eoghan O'Sullivan,2015-11-21,FALSE,15
3815,Ealing,2nd,Sam Lindsay,2015-11-21,FALSE,16
3816,Ealing,2nd,Harry Charalanbous,2015-11-21,FALSE,17


In [13]:
import altair as alt

team_scale = alt.Scale(domain=["Ealing", "East Grinstead"], range=['#146f14', '#202947']) 

tdf = team_mates[~team_mates['name'].isin(['Sam Lindsay', 'Sam Lindsay-McCall'])]
tdf.rename(columns={"name":"Player"}, inplace=True)

# Position dictionary
d = {
    1: "Prop",
    2: "Hooker",
    3: "Prop",
    4: "Second Row",
    5: "Second Row",
    6: "Back Row",
    7: "Back Row",
    8: "Back Row",
    9: "Scrum Half",
    10: "Fly Half",
    11: "Back Three",
    12: "Centre",
    13: "Centre",
    14: "Back Three",
    15: "Back Three",
}

# Top 3 players by pos
tdf['pos'] = pd.to_numeric(tdf["pos"],errors='coerce')
tdf["Position"] = tdf["pos"].apply(lambda x: d[x] if x in d.keys() else None)

counts = df.value_counts('Club').to_dict()

def team_mates_chart(df):

    chart = alt.Chart(df).mark_bar().encode(
        y=alt.Y('Player:N', sort='-x', title=None),
        x=alt.X('countX:Q', title='Games', axis=alt.Axis(orient='top')),
        color=alt.Color('Club:N', scale=team_scale, legend=alt.Legend(title=None, orient='bottom-right')),
        tooltip=['Player', 'Club', alt.Tooltip('countX:Q', title="Games")]
    ).transform_aggregate(
        countX='count()',
        groupby=['Player', 'Club']
    ).transform_filter(
        "datum.countX > 40"
    ).properties(
        title=alt.Title(
            text='Top Team Mates',
            subtitle=[
                f"Out of {counts['Ealing']} games for Ealing", f"and {counts['East Grinstead']} games for East Grinstead"],
        ),
        width=400,
        height=alt.Step(20),
    )

    return chart

def games_minutes_chart(df, show_mins=True):
    mins = alt.Chart(df).mark_line(
        color="black", 
        opacity=0.8, 
        point=alt.OverlayMarkDef(filled=True, fill="black", size=50)
    ).encode(
        x = alt.X('Season:N'),
        y = alt.Y("sum(Time):Q", title="Minutes", axis=alt.Axis(orient='right'), scale=alt.Scale(domain=[0, 2000])),
    )

    base = alt.Chart(df).encode(
        x = alt.X('Season:N'),
        y = alt.Y("count():Q", title="Games", axis=alt.Axis(orient='left'), scale=alt.Scale(domain=[0, 40 if show_mins else 30])),
        tooltip=['Season', alt.Tooltip('count():Q', title="Games"), alt.Tooltip('sum(Time):Q', title="Minutes")],
    )
    games = base.mark_bar().encode(
        color=alt.Color('Club:N', scale=team_scale, legend=None)
    )
    games_text = base.mark_text(dy=-10, fontSize=20, fontWeight="bold").encode(
        text = alt.Text("count():Q", title="Minutes played"),
    )


    return (
        (games + games_text) + mins if show_mins else games + games_text
    ).resolve_scale(y='independent').properties(
        title=alt.Title(text=f'Games{" / Minutes" if show_mins else ""} per season'),
        width=alt.Step(50),
        height=280,
    )

games_minutes_chart(df, True)

In [14]:
tdf.groupby('Position').apply(lambda x: x.value_counts("Player").head(3)).reset_index()

  tdf.groupby('Position').apply(lambda x: x.value_counts("Player").head(3)).reset_index()


Unnamed: 0,Position,Player,count
0,Back Row,Ryland Thomas,43
1,Back Row,Aaron Boczek,32
2,Back Row,Connor Leggat,28
3,Back Three,Jake Radcliffe,51
4,Back Three,Ali Moffatt,38
5,Back Three,Oli Billin,23
6,Centre,Max Crawley-Moore,53
7,Centre,Chris May-Miller,21
8,Centre,Ryan Walsh,15
9,Fly Half,Dan Poulton,40


In [15]:
mins = alt.Chart(df).mark_line(
    color="black", 
    opacity=0.8, 
    point=alt.OverlayMarkDef(filled=True, fill="black", size=50)
).encode(
    x = alt.X('Season:N'),
    y = alt.Y("sum(Time):Q", title="Minutes", axis=alt.Axis(orient='right'), scale=alt.Scale(domain=[0, 2000])),
)

base = alt.Chart(df).encode(
    x = alt.X('Season:N'),
    y = alt.Y("count():Q", title="Games", axis=alt.Axis(orient='left'), scale=alt.Scale(domain=[0, 40])),
)
games = base.mark_bar().encode(
    color=alt.Color('Club:N', scale=team_scale, legend=None)
)
games_text = base.mark_text(dy=-10).encode(
    text = alt.Text("count():Q", title="Minutes played"),
)


(
    (games + games_text) + mins
).resolve_scale(y='independent').properties(
    title=alt.Title(text='Games / Minutes per season'),
    width=alt.Step(50),
)

In [16]:
base = alt.Chart(df).encode(
    x = alt.X('Season:N',),
    y = alt.Y("sum(Try):Q", title="Tries scored", axis=alt.Axis(orient='left', format=".0f")),
)

tries = base.mark_bar().encode(
    color=alt.Color('Club:N', scale=team_scale, legend=None)
)
tries_text = base.mark_text(dy=-10).encode(
    text = alt.Text("average(Try):Q", title="Tries", format=".2f"),
)
(tries + tries_text).properties(width=alt.Step(50), title=alt.Title(
        text='Tries per season',
        subtitle="Label shows average tries per game"
    ))

In [17]:
# Split position column into two columns (pos1 and pos2) by splitting on "/"
df[['pos1', 'pos2']] = df['Position'].str.split("/", expand=True)

# melt the dataframe to have a single column for position
df_melt = pd.melt(df, id_vars=['Season', 'Club'], value_vars=['pos1', 'pos2'], value_name='pos').dropna()

alt.Chart(df_melt).mark_bar().encode(
    y=alt.Y('pos:N', title=None, sort=["Lock", "Flanker", "No. 8"]),
    x=alt.X('count()', title="Games"),
    row=alt.Row('Season', title=None, header=alt.Header(labelAngle=0, labelAlign='left', labelFontStyle="bold"), spacing=10),
    color=alt.Color('Club:N', scale=team_scale, legend=None),
).resolve_scale(y='independent').properties(
    title=alt.Title(
        text='Games by position',
        offset=20,
        anchor='middle',
    ),
    width=300,
)

In [8]:
alt.Chart(df).mark_bar(width=3).encode(
    y=alt.Y('F:Q', title='Points for and against', scale=alt.Scale(nice=False)),
    y2=alt.Y2('A:Q'),
    x=alt.X('Date:T', scale=alt.Scale(nice=False)),
    color=alt.Color('Result:N', 
        scale=alt.Scale(domain=["L", "W"], range=['red','darkgreen']), 
        legend=alt.Legend(orient='top-left', fillColor="white", symbolSize=150, padding=10, strokeColor="black", direction="horizontal"),),
    tooltip=['Season', 'Club', 'Team', 'Result', 'F', 'A'],
).properties(
    title=alt.Title(
        text='Points difference per game',
        offset=20,
        anchor='middle',
    ),
    width=1000,
)