### Imports

In [1]:
import sys
from pathlib import Path
import os
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import requests
from bs4 import BeautifulSoup
from datetime import datetime, timedelta


In [2]:
project_root = Path(os.getcwd()).parent
sys.path.append(str(project_root))

In [3]:
from src.modeling.mcmc import (
    load_data,
    prepare_data,
    build_model,
    sample_model,
    predict_match
)

### Set Gameweek

In [4]:
# Set target gameweek
target_gameweek = 12

### Get Gameweek Matches

In [5]:
team_name_mapping = {
        "Nott'ham Forest": "Nottingham-Forest",
        "Ipswich Town": "Ipswich-Town",
        "Leicester City": "Leicester-City",
        "Tottenham": "Tottenham-Hotspur",
        "Manchester City": "Manchester-City",
        "Newcastle Utd": "Newcastle-United",
        "West Ham": "West-Ham-United",
        "Aston Villa": "Aston-Villa",
        "Brighton": "Brighton-and-Hove-Albion",
        "Crystal Palace": "Crystal-Palace",
        "Wolves": "Wolverhampton-Wanderers",
        "Manchester Utd": "Manchester-United",
    }

def get_premier_league_matches_by_gameweek(target_gameweek):
    url = "https://fbref.com/en/comps/9/schedule/Premier-League-Scores-and-Fixtures"
    response = requests.get(url)
    soup = BeautifulSoup(response.content, "html.parser")
    
    # Find the table by ID
    table = soup.find("table", {"id": "sched_2024-2025_9_1"})
    
    matches = []
    for row in table.find("tbody").find_all("tr"):
        # Get and clean the gameweek cell text
        gameweek_cell = row.find("th", {"data-stat": "gameweek"})
        gameweek_text = gameweek_cell.text.strip() if gameweek_cell else ""
        
        # Proceed only if gameweek matches the target
        if gameweek_text.isdigit() and int(gameweek_text) == target_gameweek:
            date = row.find("td", {"data-stat": "date"}).find("a").text
            home_team = row.find("td", {"data-stat": "home_team"}).text
            away_team = row.find("td", {"data-stat": "away_team"}).text
            
            # Map team names using the team_name_mapping
            home_team = team_name_mapping.get(home_team, home_team)
            away_team = team_name_mapping.get(away_team, away_team)

            matches.append([gameweek_text, date, home_team, away_team])
    
    return pd.DataFrame(matches, columns=["Gameweek", "Date", "Home Team", "Away Team"])

gameweek_matches = get_premier_league_matches_by_gameweek(target_gameweek)
if gameweek_matches is not None:
    print(gameweek_matches)


  Gameweek        Date         Home Team                 Away Team
0       12  2024-11-23    Leicester-City                   Chelsea
1       12  2024-11-23           Arsenal         Nottingham-Forest
2       12  2024-11-23       Bournemouth  Brighton-and-Hove-Albion
3       12  2024-11-23       Aston-Villa            Crystal-Palace
4       12  2024-11-23           Everton                 Brentford
5       12  2024-11-23            Fulham   Wolverhampton-Wanderers
6       12  2024-11-23   Manchester-City         Tottenham-Hotspur
7       12  2024-11-24       Southampton                 Liverpool
8       12  2024-11-24      Ipswich-Town         Manchester-United
9       12  2024-11-25  Newcastle-United           West-Ham-United


### Model

In [6]:
# Load and prepare data
df = load_data()
data = prepare_data(df)

# Build and sample from model
model = build_model(data)
trace = sample_model(model)

Loaded 220 rows from match_logs


Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [home_advantage, attack, defense, beta_home_xG, beta_away_xG, beta_home_possession, beta_away_possession, recent_form_coefficient]


Output()

Sampling 4 chains for 1_000 tune and 2_000 draw iterations (4_000 + 8_000 draws total) took 3 seconds.


### Get Predictions

In [7]:
# Set matchups
weekend_matches = [
    {'home_team': match['Home Team'], 'away_team': match['Away Team']}
    for match in gameweek_matches.to_dict(orient='records')
]

# Store predictions
predictions = []

# Loop through each matchup and predict
for match in weekend_matches:
    prediction = predict_match(
        trace=trace,
        home_team=match['home_team'],
        away_team=match['away_team'],
        data=data
    )
    predictions.append({
        'Home Team': match['home_team'],
        'Away Team': match['away_team'],
        'Home Win Probability': prediction['home_win_prob'],
        'Draw Probability': prediction['draw_prob'],
        'Away Win Probability': prediction['away_win_prob'],
        'Expected Home Goals': prediction['expected_home_goals'],
        'Expected Away Goals': prediction['expected_away_goals']
    })

# Convert predictions to DataFrame for better visualization
predictions_df = pd.DataFrame(predictions)

# Display predictions
predictions_df

Unnamed: 0,Home Team,Away Team,Home Win Probability,Draw Probability,Away Win Probability,Expected Home Goals,Expected Away Goals
0,Leicester-City,Chelsea,0.332,0.19475,0.47325,1.726235,2.15764
1,Arsenal,Nottingham-Forest,0.47025,0.252,0.27775,1.514715,1.113619
2,Bournemouth,Brighton-and-Hove-Albion,0.375625,0.237875,0.3865,1.434536,1.458887
3,Aston-Villa,Crystal-Palace,0.499875,0.285875,0.21425,1.360182,0.738601
4,Everton,Brentford,0.3915,0.221625,0.386875,1.664283,1.638222
5,Fulham,Wolverhampton-Wanderers,0.633625,0.18,0.186375,2.46183,1.19834
6,Manchester-City,Tottenham-Hotspur,0.545625,0.2145,0.239875,1.992382,1.202165
7,Southampton,Liverpool,0.124375,0.248125,0.6275,0.542276,1.623342
8,Ipswich-Town,Manchester-United,0.320625,0.294875,0.3845,1.060373,1.139111
9,Newcastle-United,West-Ham-United,0.5565,0.230875,0.212625,1.752529,0.916287
