In [1]:
%load_ext autoreload
%autoreload 2
%reload_ext autoreload

from IPython.display import Javascript, display, clear_output, HTML

display(HTML("<style>.container { width:100% !important;}</style>"))

import pandas as pd

pd.set_option("display.max_columns", 500)
pd.set_option("display.max_rows", 500)

In [2]:
%run data_loading.ipynb

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [3]:
from typing import Literal

In [4]:
fixtures_df = get_fixtures_df()
teams_df = get_teams_df()

In [5]:
def merge_fixtures_and_teams(
    fixtures_df: pd.DataFrame, teams_df: pd.DataFrame
) -> pd.DataFrame:
    for home_away in ["home", "away"]:
        renaming_dict = {
            "team_id": f"{home_away}_team_id",
            "team_short_name": f"{home_away}_team_short_name",
            "team_name": f"{home_away}_team_name",
        }
        home_away_teams_df = teams_df.rename(columns=renaming_dict)[
            list(renaming_dict.values())
        ].copy()
        fixtures_df = fixtures_df.merge(
            home_away_teams_df, how="left", on=[f"{home_away}_team_id"]
        )
    return fixtures_df

In [6]:
fixtures_df = merge_fixtures_and_teams(fixtures_df=fixtures_df, teams_df=teams_df)

In [7]:
fixtures_df.head().T

Unnamed: 0,0,1,2,3,4
global_match_id,2561895,2561896,2561897,2561900,2561899
match_id,1,2,3,6,5
game_week,1,1,1,1,1
is_finished,False,False,False,False,False
is_finished_provisional,False,False,False,False,False
match_start_time,2025-08-15 19:00:00+00:00,2025-08-16 11:30:00+00:00,2025-08-16 14:00:00+00:00,2025-08-16 14:00:00+00:00,2025-08-16 14:00:00+00:00
is_provisional_start_time,False,False,False,False,False
is_started,False,False,False,False,False
home_team_id,12,2,6,18,17
away_team_id,4,15,10,3,19


In [8]:
import numpy as np

In [9]:
def get_team_fixtures_df(fixtures_df: pd.DataFrame) -> pd.DataFrame:
    team_fixtures_dfs = []
    for home_away, away_home in [("home", "away"), ("away", "home")]:
        team_fixtures_df = (
            fixtures_df[
                [
                    "match_id",
                    "game_week",
                    "is_finished",
                    "is_started",
                    "match_start_time",
                    f"{home_away}_team_id",
                    f"{home_away}_team_name",
                    f"{home_away}_team_short_name",
                    f"{home_away}_team_score",
                    f"{away_home}_team_score",
                ]
            ]
            .rename(
                columns={
                    f"{home_away}_team_id": "team_id",
                    f"{home_away}_team_name": "team_name",
                    f"{home_away}_team_short_name": "team_short_name",
                    f"{home_away}_team_score": "goals_for",
                    f"{away_home}_team_score": "goals_against",
                }
            )
            .copy()
        )
        team_fixtures_df["home_away"] = home_away
        condlist = [
            team_fixtures_df["goals_for"] > team_fixtures_df["goals_against"],
            team_fixtures_df["goals_for"] == team_fixtures_df["goals_against"],
            team_fixtures_df["goals_for"] < team_fixtures_df["goals_against"],
        ]
        team_fixtures_df["result"] = np.select(
            condlist=condlist,
            choicelist=["W", "D", "L"],
            default=None,
        )
        team_fixtures_df["points"] = np.select(
            condlist=condlist,
            choicelist=[3, 1, 0],
            default=np.nan,
        )
        team_fixtures_dfs.append(team_fixtures_df)
    return pd.concat(team_fixtures_dfs).sort_values(
        by=["match_id", "home_away"], ascending=[True, False], ignore_index=True
    )

In [10]:
team_fixtures_df = get_team_fixtures_df(fixtures_df=fixtures_df)

In [11]:
team_fixtures_df

Unnamed: 0,match_id,game_week,is_finished,is_started,match_start_time,team_id,team_name,team_short_name,goals_for,goals_against,home_away,result,points
0,1,1,False,False,2025-08-15 19:00:00+00:00,12,Liverpool,LIV,,,home,,
1,1,1,False,False,2025-08-15 19:00:00+00:00,4,Bournemouth,BOU,,,away,,
2,2,1,False,False,2025-08-16 11:30:00+00:00,2,Aston Villa,AVL,,,home,,
3,2,1,False,False,2025-08-16 11:30:00+00:00,15,Newcastle,NEW,,,away,,
4,3,1,False,False,2025-08-16 14:00:00+00:00,6,Brighton,BHA,,,home,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
755,378,38,False,False,2026-05-24 15:00:00+00:00,7,Chelsea,CHE,,,away,,
756,379,38,False,False,2026-05-24 15:00:00+00:00,18,Spurs,TOT,,,home,,
757,379,38,False,False,2026-05-24 15:00:00+00:00,9,Everton,EVE,,,away,,
758,380,38,False,False,2026-05-24 15:00:00+00:00,19,West Ham,WHU,,,home,,


In [12]:
def get_league_table_df(
    team_fixtures_df: pd.DataFrame,
    started_or_finished: Literal["started", "finished"] = "started",
) -> pd.DataFrame:
    team_fixtures_df = team_fixtures_df.sort_values(
        by=["match_id", "home_away"], ascending=[True, False], ignore_index=True
    )
    league_table_df = (
        team_fixtures_df
        .groupby(by=["team_name"], as_index=False)
        .agg(
            matches_played=(f"is_{started_or_finished}", "sum"),
            won=("result", lambda x: sum(x == "W")),
            drawn=("result", lambda x: sum(x == "D")),
            lost=("result", lambda x: sum(x == "L")),
            points=("points", "sum"),
            goals_for=("goals_for", "sum"),
            goals_against=("goals_against", "sum"),
            form=(
                "result",
                lambda x: x[team_fixtures_df[f"is_{started_or_finished}"]]
                .tail(5)
                .tolist(),
            ),
        )
    )
    league_table_df["goal_difference"] = (
        league_table_df["goals_for"] - league_table_df["goals_against"]
    )
    league_table_df = league_table_df.sort_values(
        by=["points", "goal_difference", "goals_for"],
        ascending=False,
        ignore_index=True,
    )
    league_table_df["position"] = range(1, 21)
    return league_table_df

In [13]:
league_table_df = get_league_table_df(team_fixtures_df=team_fixtures_df)

In [14]:
league_table_df

Unnamed: 0,team_name,matches_played,won,drawn,lost,points,goals_for,goals_against,form,goal_difference,position
0,Arsenal,0,0,0,0,0.0,0,0,[],0,1
1,Aston Villa,0,0,0,0,0.0,0,0,[],0,2
2,Bournemouth,0,0,0,0,0.0,0,0,[],0,3
3,Brentford,0,0,0,0,0.0,0,0,[],0,4
4,Brighton,0,0,0,0,0.0,0,0,[],0,5
5,Burnley,0,0,0,0,0.0,0,0,[],0,6
6,Chelsea,0,0,0,0,0.0,0,0,[],0,7
7,Crystal Palace,0,0,0,0,0.0,0,0,[],0,8
8,Everton,0,0,0,0,0.0,0,0,[],0,9
9,Fulham,0,0,0,0,0.0,0,0,[],0,10


In [15]:
TEAM_BADGES = {
    "Arsenal": "https://ssl.gstatic.com/onebox/media/sports/logos/4us2nCgl6kgZc0t3hpW75Q_48x48.png",
    "Aston Villa": "https://ssl.gstatic.com/onebox/media/sports/logos/uyNNelfnFvCEnsLrUL-j2Q_48x48.png",
    "Bournemouth": "https://ssl.gstatic.com/onebox/media/sports/logos/IcOt-hrK04B-RlRwI3R0yA_48x48.png",
    "Brentford": "https://ssl.gstatic.com/onebox/media/sports/logos/QOUce0WQBYqnkSmN6_TxGA_48x48.png",
    "Brighton": "https://ssl.gstatic.com/onebox/media/sports/logos/EKIe0e-ZIphOcfQAwsuEEQ_48x48.png",
    "Burnley": "https://ssl.gstatic.com/onebox/media/sports/logos/teLLSaMXim_8BA1d93sMng_48x48.png",
    "Chelsea": "https://ssl.gstatic.com/onebox/media/sports/logos/fhBITrIlbQxhVB6IjxUO6Q_48x48.png",
    "Crystal Palace": "https://ssl.gstatic.com/onebox/media/sports/logos/8piQOzndGmApKYTcvyN9vA_48x48.png",
    "Everton": "https://ssl.gstatic.com/onebox/media/sports/logos/C3J47ea36cMBc4XPbp9aaA_48x48.png",
    "Fulham": "https://ssl.gstatic.com/onebox/media/sports/logos/Gh7_5p3n364p4vxeM8FhNg_48x48.png",
    "Ipswich": "https://ssl.gstatic.com/onebox/media/sports/logos/56vquJBk5U16Dng7txLXCw_48x48.png",
    "Leeds": "https://ssl.gstatic.com/onebox/media/sports/logos/5dqfOKpjjW6EwTAx_FysKQ_48x48.png",
    "Leicester": "https://ssl.gstatic.com/onebox/media/sports/logos/UDYY4FSlty6fXFBzvFfcyw_48x48.png",
    "Liverpool": "https://ssl.gstatic.com/onebox/media/sports/logos/nGfV05dipbAc7zzojivKew_48x48.png",
    "Man City": "https://ssl.gstatic.com/onebox/media/sports/logos/z44l-a0W1v5FmgPnemV6Xw_48x48.png",
    "Man Utd": "https://ssl.gstatic.com/onebox/media/sports/logos/udQ6ns69PctCv143h-GeYw_48x48.png",
    "Newcastle": "https://ssl.gstatic.com/onebox/media/sports/logos/96CcNNQ0AYDAbssP0V9LuQ_48x48.png",
    "Nott'm Forest": "https://ssl.gstatic.com/onebox/media/sports/logos/Zr6FbE-8pDH7UBpWCO8U9A_48x48.png",
    "Southampton": "https://ssl.gstatic.com/onebox/media/sports/logos/y1V4sm2SEBiWUPRIYl5rfg_48x48.png",
    "Spurs": "https://ssl.gstatic.com/onebox/media/sports/logos/k3Q_mKE98Dnohrcea0JFgQ_48x48.png",
    "Sunderland": "https://ssl.gstatic.com/onebox/media/sports/logos/CQFeTfHrtxqgr3VKWtTwfA_48x48.png",
    "West Ham": "https://ssl.gstatic.com/onebox/media/sports/logos/bXkiyIzsbDip3x2FFcUU3A_48x48.png",
    "Wolves": "https://ssl.gstatic.com/onebox/media/sports/logos/-WjHLbBIQO9xE2e2MW3OPQ_48x48.png",
}

In [16]:
team_fixtures_df.head().T

Unnamed: 0,0,1,2,3,4
match_id,1,1,2,2,3
game_week,1,1,1,1,1
is_finished,False,False,False,False,False
is_started,False,False,False,False,False
match_start_time,2025-08-15 19:00:00+00:00,2025-08-15 19:00:00+00:00,2025-08-16 11:30:00+00:00,2025-08-16 11:30:00+00:00,2025-08-16 14:00:00+00:00
team_id,12,4,2,15,6
team_name,Liverpool,Bournemouth,Aston Villa,Newcastle,Brighton
team_short_name,LIV,BOU,AVL,NEW,BHA
goals_for,,,,,
goals_against,,,,,


In [17]:
def get_team_position_vs_game_week(team_fixtures_df: pd.DataFrame) -> pd.DataFrame:
    team_position_vs_game_week_df = []
    for game_week in range(1, 39):
        week_league_table_df = get_league_table_df(
            team_fixtures_df=team_fixtures_df[team_fixtures_df["game_week"] <= game_week].copy()
        )
        week_league_table_df = week_league_table_df.set_index("team_name")[["position"]].rename(columns={"position": f"game_week_{game_week}"})
        team_position_vs_game_week_df.append(week_league_table_df)
    team_position_vs_game_week_df = pd.concat(team_position_vs_game_week_df, axis=1)
    return team_position_vs_game_week_df.reset_index()

In [18]:
team_position_vs_game_week = get_team_position_vs_game_week(team_fixtures_df=team_fixtures_df)

In [19]:
entries = get_entries()

In [20]:
def get_entries_position_df(
    teams_df: pd.DataFrame, entires: Dict[str, pd.DataFrame]
) -> pd.DataFrame:
    entries_position_df = teams_df[["team_id", "team_name", "team_short_name"]].copy()
    for entry_name, entry_df in entries.items():
        entry_to_merge = (
            entry_df[["Team", "Position"]]
            .rename(columns={"Team": "team_name", "Position": entry_name})
            .copy()
        )
        entries_position_df = entries_position_df.merge(
            entry_to_merge, how="left", on=["team_name"]
        )
    entries_position_df = (
        entries_position_df.set_index(["team_id", "team_name", "team_short_name"])
        .sort_index(axis="columns")
        .reset_index()
    )
    return entries_position_df

In [21]:
entries_position_df = get_entries_position_df(teams_df=teams_df, entires=entries)

In [22]:
entries_position_df

Unnamed: 0,team_id,team_name,team_short_name,TRich
0,1,Arsenal,ARS,1
1,2,Aston Villa,AVL,2
2,3,Burnley,BUR,6
3,4,Bournemouth,BOU,3
4,5,Brentford,BRE,4
5,6,Brighton,BHA,5
6,7,Chelsea,CHE,7
7,8,Crystal Palace,CRY,8
8,9,Everton,EVE,9
9,10,Fulham,FUL,10


In [23]:
def get_entry_score_vs_game_week(
    team_fixtures_df: pd.DataFrame, entries_position_df: pd.DataFrame
) -> pd.DataFrame:
    prediction_cols = entries_position_df.columns[3:]
    entry_score_vs_game_week_df = pd.DataFrame({"entry_name": prediction_cols})

    max_game_week = team_fixtures_df["game_week"].max()
    for game_week in range(1, max_game_week + 1):
        game_week_league_table_df = get_league_table_df(
            team_fixtures_df=team_fixtures_df[team_fixtures_df["game_week"] <= game_week].copy(),
            started_or_finished="started",
        )
        if game_week_league_table_df["matches_played"].max() < game_week:
            break # This game week not started yet so break out of loop
            
        game_week_league_table_df = game_week_league_table_df[
            ["team_name", "position"]
        ].merge(
            entries_position_df, how="left", on="team_name"
        )
        stacked_game_week_pos_and_pred_df = game_week_league_table_df.melt(
            id_vars=["team_name", "position"],
            value_vars=prediction_cols,
            var_name="entry_name",
            value_name="prediction"
        )
        stacked_game_week_pos_and_pred_df[f"GW{game_week}"] = abs(
            stacked_game_week_pos_and_pred_df["position"] - stacked_game_week_pos_and_pred_df["prediction"]
        )
        stacked_game_week_pos_and_pred_df = (
            stacked_game_week_pos_and_pred_df
            .groupby(by=["entry_name"], as_index=False)
            [f"GW{game_week}"].sum()
        )
        entry_score_vs_game_week_df = entry_score_vs_game_week_df.merge(
            stacked_game_week_pos_and_pred_df, how="left", on=["entry_name"]
        )
    return entry_score_vs_game_week_df.sort_values(
        by=entry_score_vs_game_week_df.columns[-1]
    )

In [24]:
get_entry_score_vs_game_week_df = get_entry_score_vs_game_week(team_fixtures_df, entries_position_df)

In [25]:
get_entry_score_vs_game_week_df

Unnamed: 0,entry_name
0,TRich


In [54]:
game_week_fixtures = fixtures_df[
    fixtures_df["game_week"] == 1
].copy()
game_week_fixtures["match_start_time"] = game_week_fixtures["match_start_time"].dt.tz_convert("Europe/London").dt.tz_localize(None)
game_week_fixtures["home_team_badge"] = game_week_fixtures["home_team_name"].map(TEAM_BADGES)
game_week_fixtures["away_team_badge"] = game_week_fixtures["away_team_name"].map(TEAM_BADGES)
game_week_fixtures["match_start_date"] = game_week_fixtures["match_start_time"].dt.date
game_week_fixtures["match_start_time_short"] = game_week_fixtures["match_start_time"].dt.time.astype(str).str[:5]

In [59]:
{
    pd.to_datetime(date).strftime("%a %d %b"): group.to_dict(orient="records")
    for date, group in game_week_fixtures.groupby("match_start_date")
}

{'Fri 15 Aug': [{'global_match_id': 2561895,
   'match_id': 1,
   'game_week': 1,
   'is_finished': False,
   'is_finished_provisional': False,
   'match_start_time': Timestamp('2025-08-15 20:00:00'),
   'is_provisional_start_time': False,
   'is_started': False,
   'home_team_id': 12,
   'away_team_id': 4,
   'home_team_score': None,
   'away_team_score': None,
   'minutes_played': 0,
   'stats': [],
   'home_team_difficulty': 3,
   'away_team_difficulty': 5,
   'pulse_id': 124791,
   'home_team_short_name': 'LIV',
   'home_team_name': 'Liverpool',
   'away_team_short_name': 'BOU',
   'away_team_name': 'Bournemouth',
   'home_team_badge': 'https://ssl.gstatic.com/onebox/media/sports/logos/nGfV05dipbAc7zzojivKew_48x48.png',
   'away_team_badge': 'https://ssl.gstatic.com/onebox/media/sports/logos/IcOt-hrK04B-RlRwI3R0yA_48x48.png',
   'match_start_date': datetime.date(2025, 8, 15),
   'match_start_time_short': '20:00'}],
 'Sat 16 Aug': [{'global_match_id': 2561896,
   'match_id': 2,
   '

In [95]:
def create_fixture_table_html(fixtures_df: pd.DataFrame, game_week: int) -> str:
    game_week_fixtures = fixtures_df[
        fixtures_df["game_week"] == game_week
    ].copy()
    game_week_fixtures["match_start_time"] = game_week_fixtures["match_start_time"].dt.tz_convert("Europe/London").dt.tz_localize(None)
    game_week_fixtures["home_team_badge"] = game_week_fixtures["home_team_name"].map(TEAM_BADGES)
    game_week_fixtures["away_team_badge"] = game_week_fixtures["away_team_name"].map(TEAM_BADGES)
    game_week_fixtures["match_start_date"] = game_week_fixtures["match_start_time"].dt.date
    game_week_fixtures["match_start_time_short"] = game_week_fixtures["match_start_time"].dt.time.astype(str).str[:5]
    game_week_fixtures_records = {
        pd.to_datetime(date).strftime("%a %d %b"): group.to_dict(orient="records")
        for date, group in game_week_fixtures.groupby("match_start_date")
    }
    html = ""
    html += """
    <style>
    table {
        width: auto;
        border-collapse: collapse;
        font-family: Arial, sans-serif;
    }

    td {
        font-size: 16px;
        color: #37003c;
        background-color: #ffffff;
    }

    tr:hover td {
        background-color: #f5f5f5 !important;
    }

    td.col-date {
        width: 300px;
        font-size: 20px;
        font-weight: bold;
    }

    td.col-name {
        width: 150px;
    }

    td.col-badge {
        width: 30px;
    }

    td.col-time {
        width: 60px;
        font-size: 18px;
        font-weight: bold;
    }

    </style>
    """
    html += """
    <table id="fixtureTable">
        <tbody>
    """
    for date, fixtures in game_week_fixtures_records.items():
        html += f"""
            <tr>
                <td class="col-date" style="text-align: left">{date}</td>
                <td class="col-name"></td>
                <td class="col-badge"></td>
                <td class="col-time"></td>
                <td class="col-badge"></td>
                <td class="col-name"></td>
                <td class="col-date"></td>
            </tr>
        """
        for fixture in fixtures:
            html += f"""
                <tr>
                    <td class="col-date"></td>
                    <td class="col-name" style="text-align: right">{fixture['home_team_name']}</td>
                    <td class="col-badge"><img src="{fixture['home_team_badge']}" width="30px"></td>
                    <td class="col-time" style="text-align: center">{fixture['match_start_time_short']}</td>
                    <td class="col-badge"><img src="{fixture['away_team_badge']}" width="30px"></td>
                    <td class="col-name" style="text-align: left">{fixture['away_team_name']}</td>
                    <td class="col-date"></td>
                </tr>
            """
    html += """
        </tbody>
    </table>
    """
    return html

In [96]:
test = create_fixture_table_html(fixtures_df, 2)

In [97]:
HTML(test)

0,1,2,3,4,5,6
Fri 22 Aug,,,,,,
,West Ham,,20:00,,Chelsea,
Sat 23 Aug,,,,,,
,Man City,,12:30,,Spurs,
,Bournemouth,,15:00,,Wolves,
,Brentford,,15:00,,Aston Villa,
,Burnley,,15:00,,Sunderland,
,Arsenal,,17:30,,Leeds,
Sun 24 Aug,,,,,,
,Crystal Palace,,14:00,,Nott'm Forest,


In [102]:
" - ".join(["Fri 15 Aug"])

'Fri 15 Aug'

In [101]:
" - ".join(["Fri 15 Aug", "Mon 18 Aug"])

'Fri 15 Aug - Mon 18 Aug'

In [133]:
" - ".join(("Fri 15 Aug", "Mon 18 Aug"))

'Fri 15 Aug - Mon 18 Aug'

In [108]:
HTML("""
<div 
  style="text-align: center; font-family: Arial, sans-serif;">
  <div style="font-weight: bold; font-size: 24px; color: #37003c;">Matchweek 1</div>
  <div style="font-size: 18px; color: #b9a4ba;">Fri 15 Aug - Mon 18 Aug</div>
</div>
""")

In [111]:
import datetime

In [130]:
datetime.datetime.now()

datetime.datetime(2025, 7, 30, 19, 19, 25, 181241)

In [131]:
pd.Timestamp.now(tz="Europe/London").strftime("%Y-%m-%d")

'2025-07-30'

In [128]:
fixtures_df[fixtures_df["match_start_time"] >= pd.to_datetime(test).strftime("%Y-%m-%d")]

Unnamed: 0,global_match_id,match_id,game_week,is_finished,is_finished_provisional,match_start_time,is_provisional_start_time,is_started,home_team_id,away_team_id,home_team_score,away_team_score,minutes_played,stats,home_team_difficulty,away_team_difficulty,pulse_id,home_team_short_name,home_team_name,away_team_short_name,away_team_name
0,2561895,1,1,False,False,2025-08-15 19:00:00+00:00,False,False,12,4,,,0,[],3,5,124791,LIV,Liverpool,BOU,Bournemouth
1,2561896,2,1,False,False,2025-08-16 11:30:00+00:00,False,False,2,15,,,0,[],3,4,124792,AVL,Aston Villa,NEW,Newcastle
2,2561897,3,1,False,False,2025-08-16 14:00:00+00:00,False,False,6,10,,,0,[],3,3,124793,BHA,Brighton,FUL,Fulham
3,2561900,6,1,False,False,2025-08-16 14:00:00+00:00,False,False,18,3,,,0,[],2,3,124796,TOT,Spurs,BUR,Burnley
4,2561899,5,1,False,False,2025-08-16 14:00:00+00:00,False,False,17,19,,,0,[],2,2,124795,SUN,Sunderland,WHU,West Ham
5,2561901,7,1,False,False,2025-08-16 16:30:00+00:00,False,False,20,13,,,0,[],4,3,124797,WOL,Wolves,MCI,Man City
6,2561902,8,1,False,False,2025-08-17 13:00:00+00:00,False,False,7,8,,,0,[],3,4,124798,CHE,Chelsea,CRY,Crystal Palace
7,2561898,4,1,False,False,2025-08-17 13:00:00+00:00,False,False,16,5,,,0,[],3,3,124794,NFO,Nott'm Forest,BRE,Brentford
8,2561903,9,1,False,False,2025-08-17 15:30:00+00:00,False,False,14,1,,,0,[],4,3,124799,MUN,Man Utd,ARS,Arsenal
9,2561904,10,1,False,False,2025-08-18 19:00:00+00:00,False,False,11,9,,,0,[],2,2,124800,LEE,Leeds,EVE,Everton


In [110]:
list(fixtures_df["game_week"].unique())

[1,
 2,
 3,
 4,
 5,
 6,
 7,
 8,
 9,
 10,
 11,
 12,
 13,
 14,
 15,
 16,
 17,
 18,
 19,
 20,
 21,
 22,
 23,
 24,
 25,
 26,
 27,
 28,
 29,
 30,
 31,
 32,
 33,
 34,
 35,
 36,
 37,
 38]