In [51]:
import pandas as pd
import numpy as np
from scipy.stats import norm
import utils    

# Calculate Standings

In [69]:
data = pd.read_csv('data/24_25.csv')
data["Date"] = pd.to_datetime(data["Date"], format="%d/%m/%Y")
data = data.sort_values(by=["Date", "Time"]).reset_index(drop=True)
data

Unnamed: 0,Div,Date,Time,HomeTeam,AwayTeam,FTHG,FTAG,FTR,HTHG,HTAG,...,B365CAHH,B365CAHA,PCAHH,PCAHA,MaxCAHH,MaxCAHA,AvgCAHH,AvgCAHA,BFECAHH,BFECAHA
0,E0,2024-08-16,20:00,Man United,Fulham,1,0,H,0,0,...,1.86,2.07,1.83,2.11,1.88,2.11,1.82,2.05,1.90,2.08
1,E0,2024-08-17,12:30,Ipswich,Liverpool,0,2,A,0,0,...,2.05,1.88,2.04,1.90,2.20,2.00,1.99,1.88,2.04,1.93
2,E0,2024-08-17,15:00,Arsenal,Wolves,2,0,H,1,0,...,2.02,1.91,2.00,1.90,2.05,1.93,1.99,1.87,2.02,1.96
3,E0,2024-08-17,15:00,Everton,Brighton,0,3,A,0,1,...,1.87,2.06,1.86,2.07,1.92,2.10,1.83,2.04,1.88,2.11
4,E0,2024-08-17,15:00,Newcastle,Southampton,1,0,H,1,0,...,1.87,2.06,1.88,2.06,1.89,2.10,1.82,2.05,1.89,2.10
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
135,E0,2024-12-04,19:30,Southampton,Chelsea,1,5,A,1,3,...,2.07,1.86,2.07,1.87,2.07,1.90,2.01,1.86,2.07,1.92
136,E0,2024-12-04,20:15,Arsenal,Man United,2,0,H,0,0,...,2.07,1.86,2.07,1.86,2.07,1.93,1.99,1.87,2.06,1.93
137,E0,2024-12-04,20:15,Aston Villa,Brentford,3,1,H,3,0,...,1.85,2.08,1.85,2.08,1.88,2.08,1.84,2.02,1.92,2.07
138,E0,2024-12-05,19:30,Fulham,Brighton,3,1,H,1,0,...,1.94,1.99,1.94,1.98,1.96,2.00,1.92,1.96,1.97,2.01


In [70]:
t = len(data)
standings = utils.calculate_standings(data, 0, t)
standings

Unnamed: 0,Team,Points,Goal Difference
0,Liverpool,35,18
1,Chelsea,28,16
2,Arsenal,28,14
3,Man City,26,6
4,Brighton,23,3
5,Fulham,22,2
6,Nott'm Forest,22,0
7,Aston Villa,22,-1
8,Bournemouth,21,2
9,Tottenham,20,13


In [71]:
remaining = pd.read_csv('data/24_25_remaining.csv')
remaining

Unnamed: 0,HomeTeam,AwayTeam
0,Everton,Liverpool
1,Aston Villa,Southampton
2,Brentford,Newcastle
3,Crystal Palace,Man City
4,Man Utd,Nott'm Forest
...,...,...
235,Newcastle United,Everton
236,Nottingham Forest,Chelsea
237,Southampton,Arsenal
238,Spurs,Brighton


In [72]:
# Concatenate remaining data to the original data
data = pd.concat([data, remaining], ignore_index=True)
data

Unnamed: 0,Div,Date,Time,HomeTeam,AwayTeam,FTHG,FTAG,FTR,HTHG,HTAG,...,B365CAHH,B365CAHA,PCAHH,PCAHA,MaxCAHH,MaxCAHA,AvgCAHH,AvgCAHA,BFECAHH,BFECAHA
0,E0,2024-08-16,20:00,Man United,Fulham,1.0,0.0,H,0.0,0.0,...,1.86,2.07,1.83,2.11,1.88,2.11,1.82,2.05,1.90,2.08
1,E0,2024-08-17,12:30,Ipswich,Liverpool,0.0,2.0,A,0.0,0.0,...,2.05,1.88,2.04,1.90,2.20,2.00,1.99,1.88,2.04,1.93
2,E0,2024-08-17,15:00,Arsenal,Wolves,2.0,0.0,H,1.0,0.0,...,2.02,1.91,2.00,1.90,2.05,1.93,1.99,1.87,2.02,1.96
3,E0,2024-08-17,15:00,Everton,Brighton,0.0,3.0,A,0.0,1.0,...,1.87,2.06,1.86,2.07,1.92,2.10,1.83,2.04,1.88,2.11
4,E0,2024-08-17,15:00,Newcastle,Southampton,1.0,0.0,H,1.0,0.0,...,1.87,2.06,1.88,2.06,1.89,2.10,1.82,2.05,1.89,2.10
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
375,,NaT,,Newcastle United,Everton,,,,,,...,,,,,,,,,,
376,,NaT,,Nottingham Forest,Chelsea,,,,,,...,,,,,,,,,,
377,,NaT,,Southampton,Arsenal,,,,,,...,,,,,,,,,,
378,,NaT,,Spurs,Brighton,,,,,,...,,,,,,,,,,


# Implement Probit Regression Model for Monte Carlo Simulation

In [73]:
def generate_match_outcome(c_1: float, c_neg1: float, p: float):
    p_1 = norm.cdf(p - c_1)
    p_0 = norm.cdf(p - c_neg1) - norm.cdf(p - c_1)
    p_neg1 = norm.cdf(c_neg1 - p)

    probs = [p_neg1, p_0, p_1]

    outcome = np.random.choice([-1, 0, 1], p=probs)
    return outcome