In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn
from scipy.stats import poisson,skellam

In [2]:
epl_1920 = pd.read_csv("http://football-data.co.uk/mmz4281/1920/E0.csv")
epl_1920 = epl_1920[['HomeTeam','AwayTeam','FTHG','FTAG','FTR']]
epl_1920 = epl_1920.rename(columns={'FTHG': 'HomeGoals', 'FTAG': 'AwayGoals','FTR':'Result'})
epl_1920.head()

Unnamed: 0,HomeTeam,AwayTeam,HomeGoals,AwayGoals,Result
0,Liverpool,Norwich,4,1,H
1,West Ham,Man City,0,5,A
2,Bournemouth,Sheffield United,1,1,D
3,Burnley,Southampton,3,0,H
4,Crystal Palace,Everton,0,0,D


In [3]:
import statsmodels.api as sm
import statsmodels.formula.api as smf

goal_model_data = pd.concat([epl_1920[['HomeTeam','AwayTeam','HomeGoals']].assign(home=1).rename(
            columns={'HomeTeam':'team', 'AwayTeam':'opponent','HomeGoals':'goals'}),
           epl_1920[['AwayTeam','HomeTeam','AwayGoals']].assign(home=0).rename(
            columns={'AwayTeam':'team', 'HomeTeam':'opponent','AwayGoals':'goals'})])

poisson_model = smf.glm(formula="goals ~ home + team + opponent", data=goal_model_data, 
                        family=sm.families.Poisson()).fit()
poisson_model.summary()

0,1,2,3
Dep. Variable:,goals,No. Observations:,506.0
Model:,GLM,Df Residuals:,466.0
Model Family:,Poisson,Df Model:,39.0
Link Function:,log,Scale:,1.0
Method:,IRLS,Log-Likelihood:,-697.98
Date:,"Sat, 15 Feb 2020",Deviance:,468.63
Time:,10:26:30,Pearson chi2:,391.0
No. Iterations:,5,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,0.2012,0.254,0.794,0.427,-0.296,0.698
team[T.Aston Villa],-0.0302,0.251,-0.120,0.904,-0.522,0.462
team[T.Bournemouth],-0.2914,0.266,-1.097,0.273,-0.812,0.229
team[T.Brighton],-0.1236,0.253,-0.488,0.625,-0.619,0.372
team[T.Burnley],-0.1556,0.260,-0.598,0.550,-0.665,0.354
team[T.Chelsea],0.2736,0.235,1.164,0.244,-0.187,0.734
team[T.Crystal Palace],-0.4126,0.275,-1.501,0.133,-0.951,0.126
team[T.Everton],-0.0062,0.247,-0.025,0.980,-0.491,0.479
team[T.Leicester],0.4879,0.224,2.177,0.029,0.049,0.927


In [4]:
def simulate_match(foot_model, homeTeam, awayTeam, max_goals=10):
    home_goals_avg = foot_model.predict(pd.DataFrame(data={'team': homeTeam, 
                                                            'opponent': awayTeam,'home':1},
                                                      index=[1])).values[0]
    away_goals_avg = foot_model.predict(pd.DataFrame(data={'team': awayTeam, 
                                                            'opponent': homeTeam,'home':0},
                                                      index=[1])).values[0]
    team_pred = [[poisson.pmf(i, team_avg) for i in range(0, max_goals+1)] for team_avg in [home_goals_avg, away_goals_avg]]
    return(np.outer(np.array(team_pred[0]), np.array(team_pred[1])))

In [5]:
sou_burn = simulate_match(poisson_model, 'Southampton', 'Burnley', max_goals=9)
sou_burn

array([[6.06683556e-02, 7.87306175e-02, 5.10851997e-02, 2.20981173e-02,
        7.16929939e-03, 1.86075051e-03, 4.02455929e-04, 7.46108045e-05,
        1.21030038e-05, 1.74514732e-06],
       [9.12823200e-02, 1.18459011e-01, 7.68633912e-02, 3.32490867e-02,
        1.07870120e-02, 2.79970706e-03, 6.05539915e-04, 1.12260292e-04,
        1.82103216e-05, 2.62576914e-06],
       [6.86722251e-02, 8.91174094e-02, 5.78247803e-02, 2.50134831e-02,
        8.11513246e-03, 2.10623605e-03, 4.55551232e-04, 8.44540769e-05,
        1.36997318e-05, 1.97538154e-06],
       [3.44416787e-02, 4.46957001e-02, 2.90012811e-02, 1.25451934e-02,
        4.07004119e-03, 1.05635583e-03, 2.28475910e-04, 4.23568651e-05,
        6.87092578e-06, 9.90727416e-07],
       [1.29553385e-02, 1.68124188e-02, 1.09089170e-02, 4.71891132e-03,
        1.53095794e-03, 3.97351345e-04, 8.59418842e-05, 1.59326591e-05,
        2.58451889e-06, 3.72665025e-07],
       [3.89855088e-03, 5.05923253e-03, 3.28273691e-03, 1.42002587e-03,
   

In [6]:
np.sum(np.tril(sou_burn, -1))

0.4212364558205641

In [7]:
np.sum(np.diag(sou_burn))

0.2511546222571516

In [8]:
np.sum(np.triu(sou_burn, 1))

0.3276035597467952

In [9]:
Nor_Liv = simulate_match(poisson_model, 'Norwich', 'Liverpool', max_goals=9)
Nor_Liv

array([[2.86814367e-02, 8.87382645e-02, 1.37274846e-01, 1.41572773e-01,
        1.09503948e-01, 6.77594388e-02, 3.49404569e-02, 1.54433164e-02,
        5.97256122e-03, 2.05318522e-03],
       [1.31240062e-02, 4.06047139e-02, 6.28140055e-02, 6.47806442e-02,
        5.01066422e-02, 3.10052563e-02, 1.59879987e-02, 7.06652814e-03,
        2.73291506e-03, 9.39493228e-04],
       [3.00263094e-03, 9.28992021e-03, 1.43711664e-02, 1.48211121e-02,
        1.14638588e-02, 7.09366792e-03, 3.65788150e-03, 1.61674535e-03,
        6.25261464e-04, 2.14945909e-04],
       [4.57979696e-04, 1.41695564e-03, 2.19197848e-03, 2.26060696e-03,
        1.74853810e-03, 1.08196976e-03, 5.57922533e-04, 2.46595922e-04,
        9.53687154e-05, 3.27848690e-05],
       [5.23904052e-05, 1.62092077e-04, 2.50750507e-04, 2.58601235e-04,
        2.00023320e-04, 1.23771501e-04, 6.38233264e-05, 2.82092425e-05,
        1.09096663e-05, 3.75041205e-06],
       [4.79454366e-06, 1.48339669e-05, 2.29476036e-05, 2.36660683e-05,
   

In [10]:
print('Norwich win Prob')
print(np.sum(np.tril(Nor_Liv, -1)))
print('Draw Prob')
print(np.sum(np.diag(Nor_Liv)))
print('Liverpool win Prob')
print(np.sum(np.triu(Nor_Liv, 1)))

Norwich win Prob
0.030299702846216286
Draw Prob
0.08612973292634786
Liverpool win Prob
0.8821894597843525


In [11]:
print('Southampton win Prob')
print(np.sum(np.tril(sou_burn, -1)))
print('Draw Prob')
print(np.sum(np.diag(sou_burn)))
print('Burnley win Prob')
print(np.sum(np.triu(sou_burn, 1)))

Southampton win Prob
0.4212364558205641
Draw Prob
0.2511546222571516
Burnley win Prob
0.3276035597467952
