In [1]:
%load_ext autoreload
%autoreload 2
%reload_ext autoreload

from IPython.display import Javascript, display, clear_output, HTML

display(HTML("<style>.container { width:100% !important;}</style>"))

import pandas as pd

pd.set_option("display.max_columns", 500)
pd.set_option("display.max_rows", 500)

In [2]:
%run data_loading.ipynb

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [3]:
from typing import Literal

In [4]:
fixtures_df = get_fixtures_df()
teams_df = get_teams_df()

In [5]:
def merge_fixtures_and_teams(
    fixtures_df: pd.DataFrame, teams_df: pd.DataFrame
) -> pd.DataFrame:
    for home_away in ["home", "away"]:
        renaming_dict = {
            "team_id": f"{home_away}_team_id",
            "team_short_name": f"{home_away}_team_short_name",
            "team_name": f"{home_away}_team_name",
        }
        home_away_teams_df = teams_df.rename(columns=renaming_dict)[
            list(renaming_dict.values())
        ].copy()
        fixtures_df = fixtures_df.merge(
            home_away_teams_df, how="left", on=[f"{home_away}_team_id"]
        )
    return fixtures_df

In [6]:
fixtures_df = merge_fixtures_and_teams(fixtures_df=fixtures_df, teams_df=teams_df)

In [7]:
fixtures_df.head().T

Unnamed: 0,0,1,2,3,4
global_match_id,2561895,2561896,2561897,2561900,2561899
match_id,1,2,3,6,5
game_week,1,1,1,1,1
is_finished,False,False,False,False,False
is_finished_provisional,False,False,False,False,False
match_start_time,2025-08-15T19:00:00Z,2025-08-16T11:30:00Z,2025-08-16T14:00:00Z,2025-08-16T14:00:00Z,2025-08-16T14:00:00Z
is_provisional_start_time,False,False,False,False,False
is_started,False,False,False,False,False
home_team_id,12,2,6,18,17
away_team_id,4,15,10,3,19


In [8]:
import numpy as np

In [9]:
def get_team_fixtures_df(fixtures_df: pd.DataFrame) -> pd.DataFrame:
    team_fixtures_dfs = []
    for home_away, away_home in [("home", "away"), ("away", "home")]:
        team_fixtures_df = (
            fixtures_df[
                [
                    "match_id",
                    "game_week",
                    "is_finished",
                    "is_started",
                    "match_start_time",
                    f"{home_away}_team_id",
                    f"{home_away}_team_name",
                    f"{home_away}_team_short_name",
                    f"{home_away}_team_score",
                    f"{away_home}_team_score",
                ]
            ]
            .rename(
                columns={
                    f"{home_away}_team_id": "team_id",
                    f"{home_away}_team_name": "team_name",
                    f"{home_away}_team_short_name": "team_short_name",
                    f"{home_away}_team_score": "goals_for",
                    f"{away_home}_team_score": "goals_against",
                }
            )
            .copy()
        )
        team_fixtures_df["home_away"] = home_away
        condlist = [
            team_fixtures_df["goals_for"] > team_fixtures_df["goals_against"],
            team_fixtures_df["goals_for"] == team_fixtures_df["goals_against"],
            team_fixtures_df["goals_for"] < team_fixtures_df["goals_against"],
        ]
        team_fixtures_df["result"] = np.select(
            condlist=condlist,
            choicelist=["W", "D", "L"],
            default=None,
        )
        team_fixtures_df["points"] = np.select(
            condlist=condlist,
            choicelist=[3, 1, 0],
            default=np.nan,
        )
        team_fixtures_dfs.append(team_fixtures_df)
    return pd.concat(team_fixtures_dfs).sort_values(
        by=["match_id", "home_away"], ascending=[True, False], ignore_index=True
    )

In [10]:
team_fixtures_df = get_team_fixtures_df(fixtures_df=fixtures_df)

In [11]:
team_fixtures_df

Unnamed: 0,match_id,game_week,is_finished,is_started,match_start_time,team_id,team_name,team_short_name,goals_for,goals_against,home_away,result,points
0,1,1,False,False,2025-08-15T19:00:00Z,12,Liverpool,LIV,,,home,,
1,1,1,False,False,2025-08-15T19:00:00Z,4,Bournemouth,BOU,,,away,,
2,2,1,False,False,2025-08-16T11:30:00Z,2,Aston Villa,AVL,,,home,,
3,2,1,False,False,2025-08-16T11:30:00Z,15,Newcastle,NEW,,,away,,
4,3,1,False,False,2025-08-16T14:00:00Z,6,Brighton,BHA,,,home,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
755,378,38,False,False,2026-05-24T15:00:00Z,7,Chelsea,CHE,,,away,,
756,379,38,False,False,2026-05-24T15:00:00Z,18,Spurs,TOT,,,home,,
757,379,38,False,False,2026-05-24T15:00:00Z,9,Everton,EVE,,,away,,
758,380,38,False,False,2026-05-24T15:00:00Z,19,West Ham,WHU,,,home,,


In [12]:
def get_league_table_df(
    team_fixtures_df: pd.DataFrame,
    started_or_finished: Literal["started", "finished"] = "started",
) -> pd.DataFrame:
    team_fixtures_df = team_fixtures_df.sort_values(
        by=["match_id", "home_away"], ascending=[True, False], ignore_index=True
    )
    league_table_df = (
        team_fixtures_df
        .groupby(by=["team_name"], as_index=False)
        .agg(
            matches_played=(f"is_{started_or_finished}", "sum"),
            won=("result", lambda x: sum(x == "W")),
            drawn=("result", lambda x: sum(x == "D")),
            lost=("result", lambda x: sum(x == "L")),
            points=("points", "sum"),
            goals_for=("goals_for", "sum"),
            goals_against=("goals_against", "sum"),
            form=(
                "result",
                lambda x: x[team_fixtures_df[f"is_{started_or_finished}"]]
                .tail(5)
                .tolist(),
            ),
        )
    )
    league_table_df["goal_difference"] = (
        league_table_df["goals_for"] - league_table_df["goals_against"]
    )
    league_table_df = league_table_df.sort_values(
        by=["points", "goal_difference", "goals_for"],
        ascending=False,
        ignore_index=True,
    )
    league_table_df["position"] = range(1, 21)
    return league_table_df

In [13]:
league_table_df = get_league_table_df(team_fixtures_df=team_fixtures_df)

In [14]:
TEAM_BADGES = {
    "Arsenal": "https://ssl.gstatic.com/onebox/media/sports/logos/4us2nCgl6kgZc0t3hpW75Q_48x48.png",
    "Aston Villa": "https://ssl.gstatic.com/onebox/media/sports/logos/uyNNelfnFvCEnsLrUL-j2Q_48x48.png",
    "Bournemouth": "https://ssl.gstatic.com/onebox/media/sports/logos/IcOt-hrK04B-RlRwI3R0yA_48x48.png",
    "Brentford": "https://ssl.gstatic.com/onebox/media/sports/logos/QOUce0WQBYqnkSmN6_TxGA_48x48.png",
    "Brighton": "https://ssl.gstatic.com/onebox/media/sports/logos/EKIe0e-ZIphOcfQAwsuEEQ_48x48.png",
    "Burnley": "https://ssl.gstatic.com/onebox/media/sports/logos/teLLSaMXim_8BA1d93sMng_48x48.png",
    "Chelsea": "https://ssl.gstatic.com/onebox/media/sports/logos/fhBITrIlbQxhVB6IjxUO6Q_48x48.png",
    "Crystal Palace": "https://ssl.gstatic.com/onebox/media/sports/logos/8piQOzndGmApKYTcvyN9vA_48x48.png",
    "Everton": "https://ssl.gstatic.com/onebox/media/sports/logos/C3J47ea36cMBc4XPbp9aaA_48x48.png",
    "Fulham": "https://ssl.gstatic.com/onebox/media/sports/logos/Gh7_5p3n364p4vxeM8FhNg_48x48.png",
    "Ipswich": "https://ssl.gstatic.com/onebox/media/sports/logos/56vquJBk5U16Dng7txLXCw_48x48.png",
    "Leeds": "https://ssl.gstatic.com/onebox/media/sports/logos/5dqfOKpjjW6EwTAx_FysKQ_48x48.png",
    "Leicester": "https://ssl.gstatic.com/onebox/media/sports/logos/UDYY4FSlty6fXFBzvFfcyw_48x48.png",
    "Liverpool": "https://ssl.gstatic.com/onebox/media/sports/logos/nGfV05dipbAc7zzojivKew_48x48.png",
    "Man City": "https://ssl.gstatic.com/onebox/media/sports/logos/z44l-a0W1v5FmgPnemV6Xw_48x48.png",
    "Man Utd": "https://ssl.gstatic.com/onebox/media/sports/logos/udQ6ns69PctCv143h-GeYw_48x48.png",
    "Newcastle": "https://ssl.gstatic.com/onebox/media/sports/logos/96CcNNQ0AYDAbssP0V9LuQ_48x48.png",
    "Nott'm Forest": "https://ssl.gstatic.com/onebox/media/sports/logos/Zr6FbE-8pDH7UBpWCO8U9A_48x48.png",
    "Southampton": "https://ssl.gstatic.com/onebox/media/sports/logos/y1V4sm2SEBiWUPRIYl5rfg_48x48.png",
    "Spurs": "https://ssl.gstatic.com/onebox/media/sports/logos/k3Q_mKE98Dnohrcea0JFgQ_48x48.png",
    "Sunderland": "https://ssl.gstatic.com/onebox/media/sports/logos/CQFeTfHrtxqgr3VKWtTwfA_48x48.png",
    "West Ham": "https://ssl.gstatic.com/onebox/media/sports/logos/bXkiyIzsbDip3x2FFcUU3A_48x48.png",
    "Wolves": "https://ssl.gstatic.com/onebox/media/sports/logos/-WjHLbBIQO9xE2e2MW3OPQ_48x48.png",
}

In [15]:
team_fixtures_df.head().T

Unnamed: 0,0,1,2,3,4
match_id,1,1,2,2,3
game_week,1,1,1,1,1
is_finished,False,False,False,False,False
is_started,False,False,False,False,False
match_start_time,2025-08-15T19:00:00Z,2025-08-15T19:00:00Z,2025-08-16T11:30:00Z,2025-08-16T11:30:00Z,2025-08-16T14:00:00Z
team_id,12,4,2,15,6
team_name,Liverpool,Bournemouth,Aston Villa,Newcastle,Brighton
team_short_name,LIV,BOU,AVL,NEW,BHA
goals_for,,,,,
goals_against,,,,,


In [23]:
def get_team_position_vs_game_week(team_fixtures_df: pd.DataFrame) -> pd.DataFrame:
    team_position_vs_game_week_df = []
    for game_week in range(1, 39):
        week_league_table_df = get_league_table_df(
            team_fixtures_df=team_fixtures_df[team_fixtures_df["game_week"] <= game_week].copy()
        )
        week_league_table_df = week_league_table_df.set_index("team_name")[["position"]].rename(columns={"position": f"game_week_{game_week}"})
        team_position_vs_game_week_df.append(week_league_table_df)
    team_position_vs_game_week_df = pd.concat(team_position_vs_game_week_df, axis=1)
    return team_position_vs_game_week_df.reset_index()

In [24]:
team_position_vs_game_week = get_team_position_vs_game_week(team_fixtures_df=team_fixtures_df)

In [25]:
team_position_vs_game_week

Unnamed: 0,team_name,game_week_1,game_week_2,game_week_3,game_week_4,game_week_5,game_week_6,game_week_7,game_week_8,game_week_9,game_week_10,game_week_11,game_week_12,game_week_13,game_week_14,game_week_15,game_week_16,game_week_17,game_week_18,game_week_19,game_week_20,game_week_21,game_week_22,game_week_23,game_week_24,game_week_25,game_week_26,game_week_27,game_week_28,game_week_29,game_week_30,game_week_31,game_week_32,game_week_33,game_week_34,game_week_35,game_week_36,game_week_37,game_week_38
0,Arsenal,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
1,Aston Villa,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2
2,Bournemouth,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3
3,Brentford,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4
4,Brighton,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5
5,Burnley,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6
6,Chelsea,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7
7,Crystal Palace,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8
8,Everton,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9
9,Fulham,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10
