In [70]:
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import poisson
data = pd.read_csv('World Football Results 2018 to 2022 - data.csv')

In [71]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4096 entries, 0 to 4095
Data columns (total 23 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   year         4096 non-null   int64  
 1   date         4096 non-null   object 
 2   tournament   4096 non-null   object 
 3   h_team       4096 non-null   object 
 4   a_team       4096 non-null   object 
 5   h_elo        4096 non-null   int64  
 6   a_elo        4096 non-null   int64  
 7   h_score      4096 non-null   int64  
 8   a_score      4096 non-null   int64  
 9   location     4096 non-null   object 
 10  neutral      4096 non-null   object 
 11  Unnamed: 11  0 non-null      float64
 12  Unnamed: 12  0 non-null      float64
 13  Unnamed: 13  0 non-null      float64
 14  Unnamed: 14  0 non-null      float64
 15  Unnamed: 15  0 non-null      float64
 16  Unnamed: 16  0 non-null      float64
 17  Unnamed: 17  0 non-null      float64
 18  Unnamed: 18  0 non-null      float64
 19  Unname

In [72]:
data = data.dropna(axis=1)

In [73]:
data.isnull().sum()

year          0
date          0
tournament    0
h_team        0
a_team        0
h_elo         0
a_elo         0
h_score       0
a_score       0
location      0
neutral       0
dtype: int64

In [74]:
data['tournament'].value_counts()

Friendly                                  1296
World Cup qualifier                        579
African Nations Cup qualifier              272
European Championship qualifier            212
European Nations League C                  142
World Cup and Asian Cup qualifier          134
Friendly tournament                        129
World Cup                                  128
European Nations League A                  119
European Nations League B                  113
African Nations Cup                        104
COSAFA Cup                                  90
European Nations League D                   84
CONCACAF Nations League qualifier           68
CONCACAF Championship                       62
CONCACAF Nations League B                   62
Copa América                                54
Asian Cup qualifier                         53
European Championship                       51
Asian Cup                                   51
CONCACAF Nations League C                   40
Southeast Asi

In [75]:
data[data['tournament'].str.contains('Asian Championship', case=False)]['tournament'].value_counts()

Southeast Asian Championship              37
South Asian Championship                  23
West Asian Championship                   17
East Asian Championship qualifier         12
East Asian Championship                    6
Southeast Asian Championship qualifier     4
Name: tournament, dtype: int64

In [76]:
data = data[(data['tournament'].str.contains('Southeast Asian Championship', case=False))] 

In [77]:
data_home = data[['h_team', 'h_score', 'a_score']]
data_away = data[['a_team', 'h_score', 'a_score']]

In [78]:
data_home = data_home.rename(columns={'h_team': 'Team', 'h_score': 'GoalsScored', 'a_score': 'GoalsConceded'})
data_away = data_away.rename(columns={'a_team': 'Team', 'h_score': 'GoalsConceded', 'a_score': 'GoalsScored'})

In [79]:
team_strength = pd.concat([data_home, data_away], ignore_index=True).groupby('Team').mean()
team_strength

Unnamed: 0_level_0,GoalsScored,GoalsConceded
Team,Unnamed: 1_level_1,Unnamed: 2_level_1
Brunei,1.285714,3.285714
Cambodia,1.333333,2.166667
East Timor,1.25,3.375
Indonesia,2.285714,1.285714
Laos,0.5,3.833333
Malaysia,1.7,0.8
Myanmar,1.5,1.5
Philippines,1.555556,1.666667
Singapore,2.0,1.4
Thailand,3.111111,0.777778


In [80]:
def predict_point(home, away):
    if home in team_strength.index and away in team_strength.index:
        lamb_home = team_strength.at[home, 'GoalsScored'] * team_strength.at[away, 'GoalsConceded']
        lamb_away = team_strength.at[away, 'GoalsScored'] * team_strength.at[home, 'GoalsConceded']
        prob_home, prob_away, prob_draw = 0, 0 , 0
        for x in range(0,11): #number of goals home team
            for y in range(0,11): #number of goals away team
                p = poisson.pmf(x, lamb_home) * poisson.pmf(y, lamb_away)
                if x == y:
                    prob_draw += p
                elif x > y:
                    prob_home += p
                else:
                    prob_away += p
        points_home = 3 * prob_home + prob_draw
        points_away = 3 * prob_away + prob_draw
        return[(points_home, points_away), (prob_home*100, prob_draw*100, prob_away*100)]
    else:
        return [(0,0), (prob_home*100, prob_draw*100, prob_away*100)]

In [81]:
predict_point('Vietnam', 'Singapore')

[(2.586021026054972, 0.3051315646918386),
 (82.63122219358318, 10.708436024747654, 6.601573481478735)]

Vietnam wins the match.
Result: Viet Nam: 2.59 pts, Singapore: 0.31 pts


Vietnam win probability: Viet Nam: 82.63%, Draw: 10.71%, Singapore: 6.60%