# Can Poisson distribution be used for betting?

In [50]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import math
import seaborn
from scipy.stats import poisson,skellam

#https://www.football-data.co.uk/portugalm.php
league = pd.read_csv('P1.csv')
league = league[['HomeTeam','AwayTeam','FTHG','FTAG']].rename(columns={'FTHG': 'Home goals', 'FTAG': 'Away goals', 'AwayTeam': 'Away team', 'HomeTeam': 'Home team' })

In [71]:
league

Unnamed: 0,Home team,Away team,Home goals,Away goals
0,Benfica,Arouca,4,0
1,Rio Ave,Vizela,0,1
2,Estoril,Famalicao,2,0
3,Porto,Maritimo,5,1
4,Santa Clara,Casa Pia,0,0
...,...,...,...,...
76,Pacos Ferreira,Guimaraes,0,1
77,Boavista,Maritimo,1,1
78,Casa Pia,Vizela,0,1
79,Sp Braga,Chaves,0,1


In [52]:
#original_league = league
#league_till_last_round = league[-9:]
#league = league[:-9]
league.mean()


Dropping of nuisance columns in DataFrame reductions (with 'numeric_only=None') is deprecated; in a future version this will raise TypeError.  Select only valid columns before calling the reduction.



Home goals    1.395062
Away goals    1.098765
dtype: float64

In [72]:
league_till_last_round = pd.read_excel('/content/Jornada_10_ligapt.xlsx')
league_till_last_round.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9 entries, 0 to 8
Data columns (total 4 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   Home team   9 non-null      object 
 1   Away team   9 non-null      object 
 2   Home goals  0 non-null      float64
 3   Away goals  0 non-null      float64
dtypes: float64(2), object(2)
memory usage: 416.0+ bytes


In [54]:
league[['Home goals','Away goals']].max()

Home goals    5
Away goals    6
dtype: int64

In [55]:
poisson.pmf(3,1.395062)*100

11.214025738951833

In [56]:
import plotly.graph_objects as go

#Y-Axis ticks for the line
max_goals = max(league[['Home goals','Away goals']].max())+1
goals = []
for i in range(max_goals):
  goals.append(i)
  goals[i] = str(goals[i])

#Poisson predictions as percentage
prob_poisson_home = []
prob_poisson_away = []
for i in range(max_goals):
    prob_poisson_home.append(poisson.pmf(i, league.mean().to_list()[0])*100)
    prob_poisson_away.append(poisson.pmf(i, league.mean().to_list()[1])*100)

fig = go.Figure()
fig.add_trace(go.Histogram(
    x=league['Home goals'],
    histnorm='percent',
    name='Home goals',
    marker_color='#E13C3C',
    opacity=0.65
))
fig.add_trace(go.Histogram(
    x=league['Away goals'],
    histnorm='percent',
    name='Away goals',
    marker_color='#4C3CE1',
    opacity=0.65
))

fig.add_trace(go.Scatter(x=goals, y=prob_poisson_home,
                    mode='lines+markers',
                    line = dict(color='#E13C3C', width=4, dash='dash'),
                    name='Home goals - Poisson', ))

fig.add_trace(go.Scatter(x=goals, y=prob_poisson_away,
                    mode='lines+markers',
                    line = dict(color='#4C3CE1', width=4, dash='dash'),
                    name='Away goals - Poisson'))

fig.update_layout(title='Real nº goals per match vs Poisson prediction\n         Liga Portugal 2021/22',
                    title_font_family="Calibri",
                    title_font_color="black",
                   xaxis_title='Goals in a match',
                   yaxis_title='% Probability',
                    barmode='group')

fig.update_layout({
'plot_bgcolor': 'rgba(255,250,250 250)',
'paper_bgcolor': 'rgba(255,250,250,250)',
})




Dropping of nuisance columns in DataFrame reductions (with 'numeric_only=None') is deprecated; in a future version this will raise TypeError.  Select only valid columns before calling the reduction.


Dropping of nuisance columns in DataFrame reductions (with 'numeric_only=None') is deprecated; in a future version this will raise TypeError.  Select only valid columns before calling the reduction.



## Predict a match of the Premier League

In [57]:
import statsmodels.api as sm
import statsmodels.formula.api as smf

model_data = pd.concat([league[['Home team','Away team','Home goals']].assign(home=1).rename(
            columns={'Home team':'team', 'Away team':'opponent','Home goals':'goals'}),
           league[['Away team','Home team','Away goals']].assign(home=0).rename(
            columns={'Away team':'team', 'Home team':'opponent','Away goals':'goals'})])

poisson_model = smf.glm(formula="goals ~ home + team + opponent", data=model_data, 
                        family=sm.families.Poisson()).fit()
poisson_model.summary()

0,1,2,3
Dep. Variable:,goals,No. Observations:,162.0
Model:,GLM,Df Residuals:,126.0
Model Family:,Poisson,Df Model:,35.0
Link Function:,log,Scale:,1.0
Method:,IRLS,Log-Likelihood:,-198.56
Date:,"Mon, 17 Oct 2022",Deviance:,133.13
Time:,20:50:46,Pearson chi2:,116.0
No. Iterations:,5,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,0.6546,0.419,1.561,0.119,-0.168,1.477
team[T.Benfica],0.4853,0.383,1.268,0.205,-0.265,1.236
team[T.Boavista],-0.4116,0.462,-0.892,0.373,-1.316,0.493
team[T.Casa Pia],-0.4002,0.459,-0.872,0.383,-1.299,0.499
team[T.Chaves],-0.1723,0.491,-0.351,0.726,-1.134,0.790
team[T.Estoril],0.0869,0.442,0.197,0.844,-0.780,0.954
team[T.Famalicao],-0.5499,0.517,-1.064,0.287,-1.563,0.463
team[T.Gil Vicente],-0.4860,0.482,-1.008,0.313,-1.431,0.459
team[T.Guimaraes],-0.3883,0.496,-0.783,0.434,-1.361,0.584


In [28]:
import math
benfica_home_goals = math.exp(0.0436+0.8868-0.5195+0.1496)
porto_away_goals = math.exp(0.0436+0.9801-0.8177)

display(benfica_home_goals, porto_away_goals)

1.7515480554167886

1.2287532039953122

## Testing the last matchday

In [66]:
league_till_last_round = league_till_last_round.reset_index()

In [73]:
def simulate_match(foot_model, homeTeam, awayTeam, max_goals=10):
    home_goals_avg = foot_model.predict(pd.DataFrame(data={'team': homeTeam, 
                                                            'opponent': awayTeam,'home':1},
                                                      index=[1])).values[0]
    away_goals_avg = foot_model.predict(pd.DataFrame(data={'team': awayTeam, 
                                                            'opponent': homeTeam,'home':0},
                                                      index=[1])).values[0]
    team_pred = [[poisson.pmf(i, team_avg) for i in range(0, max_goals+1)] for team_avg in [home_goals_avg, away_goals_avg]]
    return(np.outer(np.array(team_pred[0]), np.array(team_pred[1])))


In [74]:
from numpy import unravel_index

home_match = []
away_match = []
home_goals = []
away_goals = []
pred_list_home = []
pred_list_away = []

for n in range(0,len(league_till_last_round['Home team'])):

    a = simulate_match(poisson_model, league_till_last_round['Home team'].loc[n], league_till_last_round['Away team'].loc[n], max_goals=7)
    b = unravel_index(a.argmax(), a.shape)
    
    home_match.append(league_till_last_round['Home team'].loc[n])
    away_match.append(league_till_last_round['Away team'].loc[n])
    home_goals.append(league_till_last_round['Home goals'].loc[n])
    away_goals.append(league_till_last_round['Away goals'].loc[n])
    pred_list_home.append(b[0])
    pred_list_away.append(b[1])
    

final_df = pd.DataFrame({'Home': home_match, 'Away': away_match,
                         'Home Goals': home_goals, 'Away Goals': away_goals,
                         'Home Prediction': pred_list_home, 'Away Prediction': pred_list_away})

In [75]:
league_till_last_round

Unnamed: 0,Home team,Away team,Home goals,Away goals
0,Porto,Benfica,,
1,Famalicao,Pacos Ferreira,,
2,Estoril,Sp Braga,,
3,Sp Lisbon,Casa Pia,,
4,Vizela,Santa Clara,,
5,Maritimo,Arouca,,
6,Chaves,Gil Vicente,,
7,Guimaraes,Boavista,,
8,Rio Ave,Portimonense,,


In [76]:
def transform_df(mod_df):
    mod_df.loc[(mod_df['Home Goals'] > mod_df['Away Goals']), '1x2'] = '1'
    mod_df.loc[(mod_df['Home Goals'] == mod_df['Away Goals']), '1x2'] = 'X'
    mod_df.loc[(mod_df['Home Goals'] < mod_df['Away Goals']), '1x2'] = '2'
    
    mod_df['Home Prediction Int'] = mod_df['Home Prediction'].round(0).astype(int)
    mod_df['Away Prediction Int'] = mod_df['Away Prediction'].round(0).astype(int)
    mod_df.loc[(mod_df['Home Prediction Int'] > mod_df['Away Prediction Int']), '1x2_Pred'] = '1'
    mod_df.loc[(mod_df['Home Prediction Int'] == mod_df['Away Prediction Int']), '1x2_Pred'] = 'X'
    mod_df.loc[(mod_df['Home Prediction Int'] < mod_df['Away Prediction Int']), '1x2_Pred'] = '2'
    
    mod_df['Exact_Res'] = 'NO'
    mod_df.loc[(mod_df['Home Goals'] == mod_df['Home Prediction Int']) & 
               (mod_df['Away Goals'] == mod_df['Away Prediction Int']), 'Exact_Res'] = 'YES'
    
    mod_df.loc[mod_df['1x2'] == mod_df['1x2_Pred'], 'Match'] = 'Yes'
    mod_df.loc[mod_df['1x2'] != mod_df['1x2_Pred'], 'Match'] = 'No'
    return mod_df

In [77]:
transform_df(final_df)[['Home','Away','Home Prediction','Away Prediction','1x2_Pred']]

Unnamed: 0,Home,Away,Home Prediction,Away Prediction,1x2_Pred
0,Porto,Benfica,1,0,1
1,Famalicao,Pacos Ferreira,1,0,1
2,Estoril,Sp Braga,1,1,X
3,Sp Lisbon,Casa Pia,1,0,1
4,Vizela,Santa Clara,1,0,1
5,Maritimo,Arouca,1,2,2
6,Chaves,Gil Vicente,1,0,1
7,Guimaraes,Boavista,1,0,1
8,Rio Ave,Portimonense,1,0,1


In [None]:
matching_results = final_df.groupby('Match')['Match'].count()
# accuracy
accuracy = matching_results[0]*100/(matching_results[0] + matching_results[1])
accuracy_exact_result = len(final_df[final_df['Exact_Res'] == 'YES'])*100 / len(final_df)

print(f'Winning game accuracy: {accuracy}% \nExact result accuracy: {accuracy_exact_result}%')


Winning game accuracy: 55.55555555555556% 
Exact result accuracy: 22.22222222222222%
