In [1]:
from datetime import date
import warnings
warnings.filterwarnings("ignore")

import pandas as pd
from autoscout import preprocess
from autoscout.util import load_csv

from ratings.team import RatingSystem

Load the match summary data

Competition scorelines and expected goals data can be downloaded from [fbref](https://fbref.com) using [autoscout](https://github.com/olliestanley/autoscout/)

In [2]:
# Each season DataFrame separately
dfs = [
    load_csv(f"data/raw/epl_{season}_matches.csv")
    for season in [2021, 2022, 2023]
]

# Overall DataFrame
df = preprocess.combine_data(dfs).dropna(axis=0, how="any")
df = df[~df["referee"].str.contains("0")].reset_index(drop=True)
df[["home_goals", "away_goals"]] = df["score"].str.split("–", expand=True)

Develop a rating system

In [3]:
def ratings(
    data: pd.DataFrame,
    k: int,
    baseline: float,
    home_advantage: float,
    default_rating: float,
) -> RatingSystem:
    system = RatingSystem(k, baseline, home_advantage, default_rating)
    system.process_dataset(data)
    system.fit_forecast_model(data)
    system.forecast_dataset(data)
    return system

Different systems tracking performance over different time scales

In [4]:
baseline = 2.8
home_advantage = 0.06

system = ratings(df, 64, baseline, home_advantage, 1000)

Use the system to predict

In [5]:
def predict_match_as_of(df, system, home_team, away_team, date_str):
    home_att, home_def = system.get_team_ratings_before_date(
        df, home_team, date_str
    )

    away_att, away_def = system.get_team_ratings_before_date(
        df, away_team, date_str
    )

    home_pred, away_pred = system.predict_match_from_ratings(
        home_att, home_def, away_att, away_def
    )

    return home_pred, away_pred

In [6]:
# Change these values to alter the match to predict
home_team = "Manchester Utd"
away_team = "Brighton"
date_str = str(date.today())

Predict results based on long term performance

In [7]:
# Predict and output results
home_pred, away_pred = predict_match_as_of(df, system, home_team, away_team, date_str)
print(f"\nPredicted xG: {home_team} {round(home_pred, 2)} - {round(away_pred, 2)} {away_team}")
home_prob, draw_prob, away_prob = system.forecast_match_from_predictions(home_pred, away_pred)[0]
print(f"Win Probabilities: {home_team} {round(home_prob, 3)} - Draw {round(draw_prob, 3)} - {away_team} {round(away_prob, 3)}")


Predicted xG: Manchester Utd 1.36 - 1.43 Brighton
Win Probabilities: Manchester Utd 0.347 - Draw 0.252 - Brighton 0.401
