In [25]:
import requests
import pandas as pd
import datetime as dt
from bs4 import BeautifulSoup

def get_football_matches(season: str = "2023-2024"):
    url = f"https://fbref.com/en/comps/9/{season}/schedule/{season}-Premier-League-Scores-and-Fixtures"
    response = requests.get(url)
    matches = (
        pd.read_html(response.content)[0]
        .assign(datetime=lambda df: pd.to_datetime(df["Date"] + " " + df["Time"]))
        .dropna(subset=["Score"])
        .drop(["Match Report", "Notes"], axis=1)
    )

    return matches

def init_elo(teams, starting_value=1500):
    return {team: starting_value for team in teams}

def expected_scores(rating_x, rating_y, weight=400):
    """
    https://en.wikipedia.org/wiki/Elo_rating_system
    'For each `weight` rating points of advantage over the opponent, 
    the expected score is magnified ten times in comparison to the opponent's expected score.'
    """
    q_x = 10 ** (rating_x / weight)
    q_y = 10 ** (rating_y / weight)

    e_x = q_x / (q_x + q_y)
    e_y = q_y / (q_x + q_y)

    return e_x, e_y


def update_ratings(rating_x, rating_y, result_x, k_factor=16):
    result_y = 1 - result_x
    e_x, e_y = expected_scores(rating_x, rating_y)

    rating_x_new = rating_x + k_factor * (result_x - e_x)
    rating_y_new = rating_y + k_factor * (result_y - e_y)

    return rating_x_new, rating_y_new

In [22]:
matches = get_football_matches()

In [27]:
elo = init_elo(matches["Home"].unique())
elo

{'Burnley': 1000,
 'Arsenal': 1000,
 'Everton': 1000,
 'Sheffield Utd': 1000,
 'Brighton': 1000,
 'Bournemouth': 1000,
 'Newcastle Utd': 1000,
 'Brentford': 1000,
 'Chelsea': 1000,
 'Manchester Utd': 1000,
 "Nott'ham Forest": 1000,
 'Fulham': 1000,
 'Liverpool': 1000,
 'Wolves': 1000,
 'Tottenham': 1000,
 'Manchester City': 1000,
 'Aston Villa': 1000,
 'West Ham': 1000,
 'Crystal Palace': 1000,
 'Luton Town': 1000}