In [11]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.api as sm
import statsmodels.formula.api as smf
from scipy import stats

In [4]:
epl_1718 = pd.read_csv("https://www.football-data.co.uk/mmz4281/2122/E0.csv")
epl_1718 = epl_1718[['HomeTeam','AwayTeam','FTHG','FTAG']]
epl_1718 = epl_1718.rename(columns={'FTHG': 'HomeGoals', 'FTAG': 'AwayGoals'})
epl_1718.head()

Unnamed: 0,HomeTeam,AwayTeam,HomeGoals,AwayGoals
0,Brentford,Arsenal,2,0
1,Man United,Leeds,5,1
2,Burnley,Brighton,1,2
3,Chelsea,Crystal Palace,3,0
4,Everton,Southampton,3,1


In [5]:
goal_model_data = pd.concat([epl_1718[['HomeTeam','AwayTeam','HomeGoals']].assign(home=1).rename(
            columns={'HomeTeam':'team', 'AwayTeam':'opponent','HomeGoals':'goals'}),
           epl_1718[['AwayTeam','HomeTeam','AwayGoals']].assign(home=0).rename(
            columns={'AwayTeam':'team', 'HomeTeam':'opponent','AwayGoals':'goals'})])
goal_model_data

Unnamed: 0,team,opponent,goals,home
0,Brentford,Arsenal,2,1
1,Man United,Leeds,5,1
2,Burnley,Brighton,1,1
3,Chelsea,Crystal Palace,3,1
4,Everton,Southampton,3,1
...,...,...,...,...
375,Man United,Crystal Palace,0,0
376,Southampton,Leicester,1,0
377,Wolves,Liverpool,1,0
378,Aston Villa,Man City,2,0


In [6]:
poisson_model = smf.glm(formula="goals ~ home + team + opponent", data=goal_model_data, 
                        family=sm.families.Poisson()).fit()

print(poisson_model.summary())

                 Generalized Linear Model Regression Results                  
Dep. Variable:                  goals   No. Observations:                  760
Model:                            GLM   Df Residuals:                      720
Model Family:                 Poisson   Df Model:                           39
Link Function:                    Log   Scale:                          1.0000
Method:                          IRLS   Log-Likelihood:                -1075.0
Date:                Sun, 22 Oct 2023   Deviance:                       794.70
Time:                        16:14:32   Pearson chi2:                     694.
No. Iterations:                     5   Pseudo R-squ. (CS):             0.2519
Covariance Type:            nonrobust                                         
                                 coef    std err          z      P>|z|      [0.025      0.975]
----------------------------------------------------------------------------------------------
Intercept           

In [7]:
poisson_model.predict(pd.DataFrame(data={'team': 'Arsenal', 'opponent': 'Southampton',
                                       'home':1},index=[1]))

1    2.133813
dtype: float64

In [8]:
poisson_model.predict(pd.DataFrame(data={'team': 'Southampton', 'opponent': 'Arsenal',
                                       'home':0},index=[1]))

1    0.962409
dtype: float64

In [12]:
def simulate_match(foot_model, homeTeam, awayTeam, max_goals=10):
    home_goals_avg = foot_model.predict(pd.DataFrame(data={'team': homeTeam, 
                                                            'opponent': awayTeam,'home':1},
                                                      index=[1])).values[0]
    away_goals_avg = foot_model.predict(pd.DataFrame(data={'team': awayTeam, 
                                                            'opponent': homeTeam,'home':0},
                                                      index=[1])).values[0]
    team_pred = [[stats.poisson.pmf(i, team_avg) for i in range(0, max_goals+1)] for team_avg in [home_goals_avg, away_goals_avg]]
    return(np.outer(np.array(team_pred[0]), np.array(team_pred[1])))
ars_sou = simulate_match(poisson_model, 'Arsenal', 'Southampton', max_goals=10)
print(ars_sou[0:5, 0:5])

[[0.04521972 0.04351987 0.02094196 0.00671824 0.00161642]
 [0.09649042 0.09286326 0.04468622 0.01433548 0.00344915]
 [0.10294625 0.09907641 0.04767602 0.01529461 0.00367992]
 [0.07322268 0.07047018 0.03391057 0.01087861 0.00261742]
 [0.03906088 0.03759254 0.0180897  0.00580323 0.00139627]]
