# Can Poisson distribution be used for betting?

In [183]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import math
import seaborn
from scipy.stats import poisson,skellam

league = pd.read_csv('liga_pt_bet.csv')
league = league[['HomeTeam','AwayTeam','FTHG','FTAG']].rename(columns={'FTHG': 'Home goals', 'FTAG': 'Away goals', 'AwayTeam': 'Away team', 'HomeTeam': 'Home team' })

In [184]:
original_league = league
league_till_last_round = league[-9:]
league = league[:-9]
league.mean()


Dropping of nuisance columns in DataFrame reductions (with 'numeric_only=None') is deprecated; in a future version this will raise TypeError.  Select only valid columns before calling the reduction.



Home goals    1.407407
Away goals    1.215488
dtype: float64

In [185]:
league[['Home goals','Away goals']].max()

Home goals    7
Away goals    7
dtype: int64

In [186]:
poisson.pmf(3,1.407407)*100

11.37309785715584

In [188]:
import plotly.graph_objects as go

#Y-Axis ticks for the line
max_goals = max(league[['Home goals','Away goals']].max())+1
goals = []
for i in range(max_goals):
  goals.append(i)
  goals[i] = str(goals[i])

#Poisson predictions as percentage
prob_poisson_home = []
prob_poisson_away = []
for i in range(max_goals):
    prob_poisson_home.append(poisson.pmf(i, league.mean().to_list()[0])*100)
    prob_poisson_away.append(poisson.pmf(i, league.mean().to_list()[1])*100)

fig = go.Figure()
fig.add_trace(go.Histogram(
    x=league['Home goals'],
    histnorm='percent',
    name='Home goals',
    marker_color='#E13C3C',
    opacity=0.65
))
fig.add_trace(go.Histogram(
    x=league['Away goals'],
    histnorm='percent',
    name='Away goals',
    marker_color='#4C3CE1',
    opacity=0.65
))

fig.add_trace(go.Scatter(x=goals, y=prob_poisson_home,
                    mode='lines+markers',
                    line = dict(color='#E13C3C', width=4, dash='dash'),
                    name='Home goals - Poisson', ))

fig.add_trace(go.Scatter(x=goals, y=prob_poisson_away,
                    mode='lines+markers',
                    line = dict(color='#4C3CE1', width=4, dash='dash'),
                    name='Away goals - Poisson'))

fig.update_layout(title='Real nº goals per match vs Poisson prediction\n         Liga Portugal 2021/22',
                    title_font_family="Calibri",
                    title_font_color="black",
                   xaxis_title='Goals in a match',
                   yaxis_title='% Probability',
                    barmode='group')

fig.update_layout({
'plot_bgcolor': 'rgba(255,250,250 250)',
'paper_bgcolor': 'rgba(255,250,250,250)',
})




Dropping of nuisance columns in DataFrame reductions (with 'numeric_only=None') is deprecated; in a future version this will raise TypeError.  Select only valid columns before calling the reduction.


Dropping of nuisance columns in DataFrame reductions (with 'numeric_only=None') is deprecated; in a future version this will raise TypeError.  Select only valid columns before calling the reduction.



## Predict a match of the Premier League

In [189]:
import statsmodels.api as sm
import statsmodels.formula.api as smf

model_data = pd.concat([league[['Home team','Away team','Home goals']].assign(home=1).rename(
            columns={'Home team':'team', 'Away team':'opponent','Home goals':'goals'}),
           league[['Away team','Home team','Away goals']].assign(home=0).rename(
            columns={'Away team':'team', 'Home team':'opponent','Away goals':'goals'})])

poisson_model = smf.glm(formula="goals ~ home + team + opponent", data=model_data, 
                        family=sm.families.Poisson()).fit()
poisson_model.summary()

0,1,2,3
Dep. Variable:,goals,No. Observations:,594.0
Model:,GLM,Df Residuals:,558.0
Model Family:,Poisson,Df Model:,35.0
Link Function:,log,Scale:,1.0
Method:,IRLS,Log-Likelihood:,-796.11
Date:,"Wed, 05 Oct 2022",Deviance:,544.46
Time:,18:05:30,Pearson chi2:,474.0
No. Iterations:,5,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,0.0436,0.234,0.186,0.852,-0.415,0.502
team[T.Belenenses],-0.2707,0.277,-0.977,0.329,-0.814,0.273
team[T.Benfica],0.8868,0.216,4.103,0.000,0.463,1.310
team[T.Boavista],0.2077,0.246,0.843,0.399,-0.275,0.690
team[T.Estoril],0.1424,0.248,0.575,0.565,-0.343,0.628
team[T.Famalicao],0.3128,0.240,1.306,0.192,-0.157,0.782
team[T.Gil Vicente],0.4142,0.234,1.769,0.077,-0.045,0.873
team[T.Guimaraes],0.3775,0.236,1.598,0.110,-0.085,0.840
team[T.Maritimo],0.2412,0.243,0.991,0.322,-0.236,0.718


In [190]:
import math
benfica_home_goals = math.exp(0.0436+0.8868-0.5195+0.1496)
porto_away_goals = math.exp(0.0436+0.9801-0.8177)

display(benfica_home_goals, porto_away_goals)

1.7515480554167886

1.2287532039953122

## Testing the last matchday

In [191]:
league_till_last_round = league_till_last_round.reset_index()

In [151]:
def simulate_match(foot_model, homeTeam, awayTeam, max_goals=10):
    home_goals_avg = foot_model.predict(pd.DataFrame(data={'team': homeTeam, 
                                                            'opponent': awayTeam,'home':1},
                                                      index=[1])).values[0]
    away_goals_avg = foot_model.predict(pd.DataFrame(data={'team': awayTeam, 
                                                            'opponent': homeTeam,'home':0},
                                                      index=[1])).values[0]
    team_pred = [[poisson.pmf(i, team_avg) for i in range(0, max_goals+1)] for team_avg in [home_goals_avg, away_goals_avg]]
    return(np.outer(np.array(team_pred[0]), np.array(team_pred[1])))


In [153]:
from numpy import unravel_index

home_match = []
away_match = []
home_goals = []
away_goals = []
pred_list_home = []
pred_list_away = []

for n in range(0,len(league_till_last_round['Home team'])):

    a = simulate_match(poisson_model, league_till_last_round['Home team'].loc[n], league_till_last_round['Away team'].loc[n], max_goals=7)
    b = unravel_index(a.argmax(), a.shape)
    
    home_match.append(league_till_last_round['Home team'].loc[n])
    away_match.append(league_till_last_round['Away team'].loc[n])
    home_goals.append(league_till_last_round['Home goals'].loc[n])
    away_goals.append(league_till_last_round['Away goals'].loc[n])
    pred_list_home.append(b[0])
    pred_list_away.append(b[1])
    

final_df = pd.DataFrame({'Home': home_match, 'Away': away_match,
                         'Home Goals': home_goals, 'Away Goals': away_goals,
                         'Home Prediction': pred_list_home, 'Away Prediction': pred_list_away})

In [154]:
def transform_df(mod_df):
    mod_df.loc[(mod_df['Home Goals'] > mod_df['Away Goals']), '1x2'] = '1'
    mod_df.loc[(mod_df['Home Goals'] == mod_df['Away Goals']), '1x2'] = 'X'
    mod_df.loc[(mod_df['Home Goals'] < mod_df['Away Goals']), '1x2'] = '2'
    
    mod_df['Home Prediction Int'] = mod_df['Home Prediction'].round(0).astype(int)
    mod_df['Away Prediction Int'] = mod_df['Away Prediction'].round(0).astype(int)
    mod_df.loc[(mod_df['Home Prediction Int'] > mod_df['Away Prediction Int']), '1x2_Pred'] = '1'
    mod_df.loc[(mod_df['Home Prediction Int'] == mod_df['Away Prediction Int']), '1x2_Pred'] = 'X'
    mod_df.loc[(mod_df['Home Prediction Int'] < mod_df['Away Prediction Int']), '1x2_Pred'] = '2'
    
    mod_df['Exact_Res'] = 'NO'
    mod_df.loc[(mod_df['Home Goals'] == mod_df['Home Prediction Int']) & 
               (mod_df['Away Goals'] == mod_df['Away Prediction Int']), 'Exact_Res'] = 'YES'
    
    mod_df.loc[mod_df['1x2'] == mod_df['1x2_Pred'], 'Match'] = 'Yes'
    mod_df.loc[mod_df['1x2'] != mod_df['1x2_Pred'], 'Match'] = 'No'
    return mod_df

In [155]:
transform_df(final_df)

Unnamed: 0,Home,Away,Home Goals,Away Goals,Home Prediction,Away Prediction,1x2,Home Prediction Int,Away Prediction Int,1x2_Pred,Exact_Res,Match
0,Pacos Ferreira,Benfica,0,2,0,2,2,0,2,2,YES,Yes
1,Arouca,Belenenses,0,0,1,0,X,1,0,1,NO,No
2,Moreirense,Vizela,4,1,1,1,1,1,1,X,NO,No
3,Tondela,Boavista,2,2,1,1,X,1,1,X,NO,Yes
4,Maritimo,Portimonense,0,1,1,0,2,1,0,1,NO,No
5,Porto,Estoril,2,0,2,0,1,2,0,1,YES,Yes
6,Sp Lisbon,Santa Clara,4,0,2,0,1,2,0,1,NO,Yes
7,Famalicao,Sp Braga,3,2,0,1,1,0,1,2,NO,No
8,Guimaraes,Gil Vicente,5,0,1,1,1,1,1,X,NO,No


In [202]:
matching_results = final_df.groupby('Match')['Match'].count()
# accuracy
accuracy = matching_results[0]*100/(matching_results[0] + matching_results[1])
accuracy_exact_result = len(final_df[final_df['Exact_Res'] == 'YES'])*100 / len(final_df)

print(f'Winning game accuracy: {accuracy}% \nExact result accuracy: {accuracy_exact_result}%')


Winning game accuracy: 55.55555555555556% 
Exact result accuracy: 22.22222222222222%
