In [124]:
import requests
import pandas as pd
import datetime as dt
from bs4 import BeautifulSoup
import glom

INITIAL_RATING = 1500
WEIGHT = 400
K_FACTOR = 16

def get_football_matches(season: str = "2023-2024"):
    url = f"https://fbref.com/en/comps/9/{season}/schedule/{season}-Premier-League-Scores-and-Fixtures"
    response = requests.get(url)
    matches = (
        pd.read_html(response.content)[0]
        .assign(datetime=lambda df: pd.to_datetime(df["Date"] + " " + df["Time"]))
        .dropna(subset=["Score"])
        .drop(["Match Report", "Notes"], axis=1)
        .assign(season=season)
        .assign(elo_home_pre=None)
        .assign(elo_away_pre=None)
        .assign(elo_home_post=None)
        .assign(elo_away_post=None)
        .assign(Wk=lambda df: df["Wk"].astype(int).astype(str))
    )

    return matches

def expected_scores(rating_x, rating_y):
    """
    https://en.wikipedia.org/wiki/Elo_rating_system
    'For each `weight` rating points of advantage over the opponent, 
    the expected score is magnified ten times in comparison to the opponent's expected score.'
    """
    q_x = 10 ** (rating_x / WEIGHT)
    q_y = 10 ** (rating_y / WEIGHT)

    e_x = q_x / (q_x + q_y)
    e_y = q_y / (q_x + q_y)

    return e_x, e_y


def update_ratings(rating_x, rating_y, result_x):
    result_y = 1 - result_x
    e_x, e_y = expected_scores(rating_x, rating_y)

    rating_x_new = rating_x + K_FACTOR * (result_x - e_x)
    rating_y_new = rating_y + K_FACTOR * (result_y - e_y)

    return rating_x_new, rating_y_new

def get_result(goals_x, goals_y, xg_x=None, xg_y=None):
    if goals_x > goals_y:
        return 1
    elif goals_x == goals_y:
        return 0.5
    else:
        return 0

In [125]:
matches = get_football_matches()


In [178]:
import numpy as np
matches_h = (
    matches
    .assign(team=lambda df: df["Home"])
    .assign(opponent=lambda df: df["Away"])
    .assign(venue="home")
    .assign(team_g=lambda df: df["Score"].str.split("–").str.get(0))
    .assign(opponent_g=lambda df: df["Score"].str.split("–").str.get(1))
    .assign(team_xg=lambda df: df["xG"])
    .assign(opponent_xg=lambda df: df["xG.1"])
)
matches_a = (
    matches
    .assign(team=lambda df: df["Away"])
    .assign(opponent=lambda df: df["Home"])
    .assign(venue="away")
    .assign(team_g=lambda df: df["Score"].str.split("–").str.get(1))
    .assign(opponent_g=lambda df: df["Score"].str.split("–").str.get(0))
    .assign(team_xg=lambda df: df["xG.1"])
    .assign(opponent_xg=lambda df: df["xG"])
)

matches_ha = (
    pd.concat([matches_h, matches_a])
    .assign(game_number=lambda df: df.groupby(["team", "season"])["datetime"].rank(method="first", ascending=True).astype(int))
    .sort_values(["game_number"])
)

In [179]:
my_dict = {
    row["team"]: {
        row["season"]: {
        }
    }

    for _, row in matches_ha.iterrows()
}

In [180]:
for _, row in matches_ha.iterrows():
    try:
        print(glom.glom(my_dict, f"{row['team']}.{row['season']}"))
        team_past_game_number = max(glom.glom(my_dict, f"{row['team']}.{row['season']}").keys())
    except ValueError:
        team_past_game_number = None

    try:
        opponent_past_game_number = max(glom.glom(my_dict, f"{row['opponent']}.{row['season']}").keys())
    except ValueError:
        opponent_past_game_number = None
        
    elo_team_pre = glom.glom(my_dict, f"{row['team']}.{row['season']}.{str(team_past_game_number)}.elo_team_post", default=INITIAL_RATING)
    elo_opponent_pre = glom.glom(my_dict, f"{row['opponent']}.{row['season']}.{str(opponent_past_game_number)}.elo_team_post", default=INITIAL_RATING)
    result = get_result(row["team_g"], row["opponent_g"])
    elo_team_post, elo_opponent_post = update_ratings(elo_team_pre, elo_opponent_pre, result)
    my_dict[row["team"]][row["season"]][str(row["game_number"])] = {
        "team_g": row["team_g"],
        "opponent": row["opponent"],
        "opponent_g": row["opponent_g"],
        "team_xg": row["team_xg"],
        "opponent_xg": row["opponent_xg"],
        "elo_team_pre": elo_team_pre,
        "elo_opponent_pre": elo_opponent_pre,
        "elo_team_post": elo_team_post,
        "elo_opponent_post": elo_opponent_post
    }

    # my_dict[row["opponent"]][row["season"]][str(row["game_number"])] = {
    #     "team_g": row["team_g"],
    #     "opponent": row["opponent"],
    #     "opponent_g": row["opponent_g"],
    #     "team_xg": row["team_xg"],
    #     "opponent_xg": row["opponent_xg"],
    #     "elo_team_pre": elo_opponent_pre,
    #     "elo_opponent_pre": elo_team_pre,
    #     "elo_team_post": elo_opponent_post,
    #     "elo_opponent_post": elo_team_post
    # }

{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{'1': {'team_g': '0', 'opponent': 'Manchester City', 'opponent_g': '3', 'team_xg': 0.3, 'opponent_xg': 1.9, 'elo_team_pre': 1500, 'elo_opponent_pre': 1500, 'elo_team_post': 1492.0, 'elo_opponent_post': 1508.0}}
{'1': {'team_g': '0', 'opponent': 'Crystal Palace', 'opponent_g': '1', 'team_xg': 0.5, 'opponent_xg': 1.9, 'elo_team_pre': 1500, 'elo_opponent_pre': 1508.0, 'elo_team_post': 1492.1841742594847, 'elo_opponent_post': 1515.8158257405153}}
{'1': {'team_g': '2', 'opponent': 'Tottenham', 'opponent_g': '2', 'team_xg': 2.2, 'opponent_xg': 1.3, 'elo_team_pre': 1500, 'elo_opponent_pre': 1500.0, 'elo_team_post': 1500.0, 'elo_opponent_post': 1500.0}}
{'1': {'team_g': '1', 'opponent': 'West Ham', 'opponent_g': '1', 'team_xg': 1.3, 'opponent_xg': 1.1, 'elo_team_pre': 1500, 'elo_opponent_pre': 1500.0, 'elo_team_post': 1500.0, 'elo_opponent_post': 1500.0}}
{'1': {'team_g': '4', 'opponent': 'Luton Town', 'opponent_g': '1', 'team_xg': 4.

In [181]:
my_dict["Arsenal"]

{'2023-2024': {'1': {'team_g': '2',
   'opponent': 'Tottenham',
   'opponent_g': '2',
   'team_xg': 2.2,
   'opponent_xg': 1.3,
   'elo_team_pre': 1500,
   'elo_opponent_pre': 1500.0,
   'elo_team_post': 1500.0,
   'elo_opponent_post': 1500.0},
  '2': {'team_g': '3',
   'opponent': 'Fulham',
   'opponent_g': '0',
   'team_xg': 3.7,
   'opponent_xg': 0.5,
   'elo_team_pre': 1500.0,
   'elo_opponent_pre': 1508.0,
   'elo_team_post': 1508.1841742594847,
   'elo_opponent_post': 1499.8158257405153},
  '3': {'team_g': '1',
   'opponent': 'Crystal Palace',
   'opponent_g': '1',
   'team_xg': 1.0,
   'opponent_xg': 0.8,
   'elo_team_pre': 1508.1841742594847,
   'elo_opponent_pre': 1500.1842695532575,
   'elo_team_post': 1508.0000021930573,
   'elo_opponent_post': 1500.368441619685},
  '4': {'team_g': '2',
   'opponent': 'Bournemouth',
   'opponent_g': '2',
   'team_xg': 2.9,
   'opponent_xg': 2.1,
   'elo_team_pre': 1508.0000021930573,
   'elo_opponent_pre': 1485.082413600163,
   'elo_team_pos