In [1]:
import os
from pathlib import Path

import polars as pl

os.chdir(Path().cwd().parent)

In [2]:
df_teams = pl.read_json("data/teams/2025-07-25.json")
team_mappings = dict(df_teams.select(["id", "short_name"]).iter_rows())

In [3]:
df_fixtures = pl.read_json("data/fixtures/2025-07-23.json")
df_fixtures.head()

code,event,finished,finished_provisional,id,kickoff_time,minutes,provisional_start_time,pulse_id,started,stats,team_a,team_a_difficulty,team_a_score,team_h,team_h_difficulty,team_h_score
i64,i64,bool,bool,i64,str,i64,bool,i64,bool,list[null],i64,i64,null,i64,i64,null
2561895,1,False,False,1,"""2025-08-15T19:00:00Z""",0,False,124791,False,[],4,5,,12,3,
2561896,1,False,False,2,"""2025-08-16T11:30:00Z""",0,False,124792,False,[],15,4,,2,3,
2561897,1,False,False,3,"""2025-08-16T14:00:00Z""",0,False,124793,False,[],10,3,,6,3,
2561900,1,False,False,6,"""2025-08-16T14:00:00Z""",0,False,124796,False,[],3,3,,18,2,
2561899,1,False,False,5,"""2025-08-16T14:00:00Z""",0,False,124795,False,[],19,2,,17,2,


In [4]:
(
    df_fixtures.select(
        ["event", "team_a", "team_a_difficulty", "team_h", "team_h_difficulty"]
    )
    .unpivot(
        on=["team_h_difficulty", "team_a_difficulty"], index=["event", "team_a", "team_h"]
    )
    .filter(pl.col("event") == 1)
    .filter((pl.col("team_a") == 1) | (pl.col("team_h") == 1))
)

event,team_a,team_h,variable,value
i64,i64,i64,str,i64
1,1,14,"""team_h_difficulty""",4
1,1,14,"""team_a_difficulty""",3


In [5]:
def get_team_fixtures(fixtures: pl.DataFrame, team: int):
    output = (
        fixtures.filter((pl.col("team_h") == team) | (pl.col("team_a") == team))
        .with_columns(
            pl.when(pl.col("team_h") == team)
            .then(pl.lit(True))
            .otherwise(pl.lit(False))
            .alias("home"),
            pl.when(pl.col("team_h") == team)
            .then(pl.col("team_h_difficulty"))
            .otherwise(pl.col("team_a_difficulty"))
            .alias("difficulty"),
            pl.when(pl.col("team_h") == team)
            .then(pl.col("team_h"))
            .otherwise(pl.col("team_a"))
            .replace_strict(team_mappings)
            .alias("team"),
            pl.when(pl.col("team_h") == team)
            .then(pl.col("team_a"))
            .otherwise(pl.col("team_h"))
            .replace_strict(team_mappings)
            .alias("opponent"),
        )
        .select(["event", "team", "opponent", "home", "difficulty"])
        .rename({"event": "gameweek"})
    )

    return output

In [6]:
team_fixtures = {}
for idx, team in team_mappings.items():
    team_fixtures[team] = get_team_fixtures(df_fixtures, team=idx)

In [7]:
team_fixtures["CHE"]

gameweek,team,opponent,home,difficulty
i64,str,str,bool,i64
1,"""CHE""","""CRY""",true,3
2,"""CHE""","""WHU""",false,2
3,"""CHE""","""FUL""",true,3
4,"""CHE""","""BRE""",false,3
5,"""CHE""","""MUN""",false,3
…,…,…,…,…
34,"""CHE""","""BHA""",false,3
35,"""CHE""","""NFO""",true,3
36,"""CHE""","""LIV""",false,5
37,"""CHE""","""TOT""",true,3


In [None]:
def avg_difficulty(team_fixtures: pl.DataFrame, n_gameweeks: int, current_gw: int = 1):
    output = (
        team_fixtures.filter(pl.col("gameweek") >= current_gw)
        .with_columns(
            (pl.col("gameweek") - current_gw + 1).alias("gameweek"),
            avg_difficulty=pl.col("difficulty").cum_sum()
            / pl.col("difficulty").cum_count(),
        )
        .filter(pl.col("gameweek") == n_gameweeks)
        .get_column("avg_difficulty")
    )

    return output

In [19]:
avg_difficulty(team_fixtures=team_fixtures["CHE"], n_gameweeks=3, current_gw=3)

avg_difficulty
f64
3.0


In [10]:
team_fixtures["CHE"]

gameweek,team,opponent,home,difficulty
i64,str,str,bool,i64
1,"""CHE""","""CRY""",true,3
2,"""CHE""","""WHU""",false,2
3,"""CHE""","""FUL""",true,3
4,"""CHE""","""BRE""",false,3
5,"""CHE""","""MUN""",false,3
…,…,…,…,…
34,"""CHE""","""BHA""",false,3
35,"""CHE""","""NFO""",true,3
36,"""CHE""","""LIV""",false,5
37,"""CHE""","""TOT""",true,3
