In [278]:
import pandas as pd
import pickle
from scipy.stats import poisson

In [279]:
dict_table = pickle.load(open('dict_table','rb'))
historical_data = pd.read_csv('clean_fifa_worldcup_matches.txt')
fixture_data = pd.read_csv('clean_fifa_worldcup_fixture.txt')

In [280]:
dict_table

{'Group A':    Pos         Team  Pld  W  D  L  GF  GA  GD  Pts
 0    1    Qatar (H)    0  0  0  0   0   0   0    0
 1    2      Ecuador    0  0  0  0   0   0   0    0
 2    3      Senegal    0  0  0  0   0   0   0    0
 3    4  Netherlands    0  0  0  0   0   0   0    0,
 'Group B':    Pos           Team  Pld  W  D  L  GF  GA  GD  Pts
 0    1        England    0  0  0  0   0   0   0    0
 1    2           Iran    0  0  0  0   0   0   0    0
 2    3  United States    0  0  0  0   0   0   0    0
 3    4          Wales    0  0  0  0   0   0   0    0,
 'Group C':    Pos          Team  Pld  W  D  L  GF  GA  GD  Pts
 0    1     Argentina    0  0  0  0   0   0   0    0
 1    2  Saudi Arabia    0  0  0  0   0   0   0    0
 2    3        Mexico    0  0  0  0   0   0   0    0
 3    4        Poland    0  0  0  0   0   0   0    0,
 'Group D':    Pos       Team  Pld  W  D  L  GF  GA  GD  Pts
 0    1     France    0  0  0  0   0   0   0    0
 1    2  Australia    0  0  0  0   0   0   0    0
 2    3 

### 1 Calculate Team Strength

In [281]:
home_data = historical_data[['HomeTeam' , 'HomeGoals' , 'AwayGoals']]
away_data = historical_data[['AwayTeam' , 'HomeGoals' , 'AwayGoals']]

In [282]:
home_data = home_data.rename(columns={'HomeTeam':'Team' , 'HomeGoals':'GoalsScored' , 'AwayGoals':'GoalsConceded'})
away_data = away_data.rename(columns={'AwayTeam':'Team' , 'HomeGoals':'GoalsConceded' , 'AwayGoals':'GoalsScored'})

In [283]:
team_strength = pd.concat([home_data, away_data], ignore_index=True).groupby('Team').mean()
team_strength

Unnamed: 0_level_0,GoalsScored,GoalsConceded
Team,Unnamed: 1_level_1,Unnamed: 2_level_1
Algeria,1.000000,1.461538
Angola,0.333333,0.666667
Argentina,1.691358,1.148148
Australia,0.812500,1.937500
Austria,1.482759,1.620690
...,...,...
Uruguay,1.553571,1.321429
Wales,0.800000,0.800000
West Germany,2.112903,1.241935
Yugoslavia,1.666667,1.272727


### 2 Function for predicting points

In [284]:
def predictor(home, away):
    if home in team_strength.index and away in team_strength.index:
        lambda_home = team_strength.at[home, 'GoalsScored'] * team_strength.at[away, 'GoalsConceded']
        lambda_away = team_strength.at[away, 'GoalsScored'] * team_strength.at[home, 'GoalsConceded']
        home_prob, away_prob, draw_prob = 0, 0, 0
        for x in range(0,11):
            for y in range(0,11):
                p = poisson.pmf(x, lambda_home) * poisson.pmf(y, lambda_away)
                if x==y:
                    draw_prob += p
                elif x>y:
                    home_prob += p
                else:
                    away_prob += p
                    
        home_points = 3 * home_prob + draw_prob
        away_points = 3 * away_prob + draw_prob
        return (home_points, away_points)
    else:
        return (0, 0)

### 2.1 Testing function

In [285]:
predictor('Argentina', 'Mexico')
predictor('England', 'United States')

(2.2356147635326007, 0.5922397535606193)

### Predicting FIFA World Cup 2022, Qatar

##### Group Stage

In [286]:
group_fixture = fixture_data[:48].copy()
knockout_fixture = fixture_data[48:56].copy()
quarter_fixture = fixture_data[56:60].copy()
semi_fixture = fixture_data[60:62].copy()
final_fixture = fixture_data[62:].copy()

In [287]:
group_fixture

Unnamed: 0,home,score,away,year
0,Qatar,Match 1,Ecuador,2022
1,Senegal,Match 2,Netherlands,2022
2,Qatar,Match 18,Senegal,2022
3,Netherlands,Match 19,Ecuador,2022
4,Ecuador,Match 35,Senegal,2022
5,Netherlands,Match 36,Qatar,2022
6,England,Match 3,Iran,2022
7,United States,Match 4,Wales,2022
8,Wales,Match 17,Iran,2022
9,England,Match 20,United States,2022


In [288]:
for group in dict_table:
    print(dict_table[f'{group}'])

   Pos         Team  Pld  W  D  L  GF  GA  GD  Pts
0    1    Qatar (H)    0  0  0  0   0   0   0    0
1    2      Ecuador    0  0  0  0   0   0   0    0
2    3      Senegal    0  0  0  0   0   0   0    0
3    4  Netherlands    0  0  0  0   0   0   0    0
   Pos           Team  Pld  W  D  L  GF  GA  GD  Pts
0    1        England    0  0  0  0   0   0   0    0
1    2           Iran    0  0  0  0   0   0   0    0
2    3  United States    0  0  0  0   0   0   0    0
3    4          Wales    0  0  0  0   0   0   0    0
   Pos          Team  Pld  W  D  L  GF  GA  GD  Pts
0    1     Argentina    0  0  0  0   0   0   0    0
1    2  Saudi Arabia    0  0  0  0   0   0   0    0
2    3        Mexico    0  0  0  0   0   0   0    0
3    4        Poland    0  0  0  0   0   0   0    0
   Pos       Team  Pld  W  D  L  GF  GA  GD  Pts
0    1     France    0  0  0  0   0   0   0    0
1    2  Australia    0  0  0  0   0   0   0    0
2    3    Denmark    0  0  0  0   0   0   0    0
3    4    Tunisia    0  

In [289]:
for group in dict_table:
    print(dict_table[group]['Team'].values)

['Qatar (H)' 'Ecuador' 'Senegal' 'Netherlands']
['England' 'Iran' 'United States' 'Wales']
['Argentina' 'Saudi Arabia' 'Mexico' 'Poland']
['France' 'Australia' 'Denmark' 'Tunisia']
['Spain' 'Costa Rica' 'Germany' 'Japan']
['Belgium' 'Canada' 'Morocco' 'Croatia']
['Brazil' 'Serbia' 'Switzerland' 'Cameroon']
['Portugal' 'Ghana' 'Uruguay' 'South Korea']


In [290]:
for group in dict_table:
    group_teams = dict_table[group]['Team'].values
    group_matches = group_fixture[group_fixture['home'].isin(group_teams)]
    for index, row in group_matches.iterrows():
        home, away = row['home'], row['away']
        points_home, points_away = predictor(home, away)
        dict_table[group].loc[dict_table[group]['Team'] == home, 'Pts'] += points_home
        dict_table[group].loc[dict_table[group]['Team'] == away, 'Pts'] += points_away

    dict_table[group] = dict_table[group].sort_values('Pts', ascending=False).reset_index()
    dict_table[group] = dict_table[group][['Team', 'Pts']]
    dict_table[group] = dict_table[group].round(0)

In [291]:
dict_table['Group C']

Unnamed: 0,Team,Pts
0,Argentina,7.0
1,Poland,6.0
2,Mexico,4.0
3,Saudi Arabia,1.0


##### Knockout

In [292]:
knockout_fixture

Unnamed: 0,home,score,away,year
48,Winners Group A,Match 49,Runners-up Group B,2022
49,Winners Group C,Match 50,Runners-up Group D,2022
50,Winners Group D,Match 52,Runners-up Group C,2022
51,Winners Group B,Match 51,Runners-up Group A,2022
52,Winners Group E,Match 53,Runners-up Group F,2022
53,Winners Group G,Match 54,Runners-up Group H,2022
54,Winners Group F,Match 55,Runners-up Group E,2022
55,Winners Group H,Match 56,Runners-up Group G,2022


In [293]:
for group in dict_table:
    group_winner = dict_table[group].loc[0, 'Team']
    runners_up = dict_table[group].loc[1, 'Team']
    knockout_fixture.replace({f'Winners {group}':group_winner,
                                 f'Runners-up {group}':runners_up}, inplace=True)

knockout_fixture['winner'] = '?'
knockout_fixture

Unnamed: 0,home,score,away,year,winner
48,Netherlands,Match 49,Wales,2022,?
49,Argentina,Match 50,Denmark,2022,?
50,France,Match 52,Poland,2022,?
51,England,Match 51,Senegal,2022,?
52,Germany,Match 53,Belgium,2022,?
53,Brazil,Match 54,Uruguay,2022,?
54,Croatia,Match 55,Spain,2022,?
55,Portugal,Match 56,Switzerland,2022,?


In [294]:
def predict_winner(knockout_fixture_updated):
    for index, row in knockout_fixture_updated.iterrows():
        home, away = row['home'], row['away']
        points_home, points_away = predictor(home, away)
        if points_home > points_away:
            winner = home
        else:
            winner = away
        knockout_fixture_updated.loc[index, 'winner'] = winner
    return knockout_fixture_updated

In [295]:
predict_winner(knockout_fixture)

Unnamed: 0,home,score,away,year,winner
48,Netherlands,Match 49,Wales,2022,Netherlands
49,Argentina,Match 50,Denmark,2022,Argentina
50,France,Match 52,Poland,2022,France
51,England,Match 51,Senegal,2022,England
52,Germany,Match 53,Belgium,2022,Germany
53,Brazil,Match 54,Uruguay,2022,Brazil
54,Croatia,Match 55,Spain,2022,Spain
55,Portugal,Match 56,Switzerland,2022,Portugal


In [296]:
quarter_fixture

Unnamed: 0,home,score,away,year
56,Winners Match 53,Match 58,Winners Match 54,2022
57,Winners Match 49,Match 57,Winners Match 50,2022
58,Winners Match 55,Match 60,Winners Match 56,2022
59,Winners Match 51,Match 59,Winners Match 52,2022


##### Quarter Final

In [297]:
def update_table(semi_fixture, final_fixture):
    for index, row in semi_fixture.iterrows():
        winner = semi_fixture.loc[index, 'winner']
        match = semi_fixture.loc[index, 'score']
        final_fixture.replace({f'Winners {match}' : winner}, inplace = True)
    final_fixture['winner'] = '?'
    return final_fixture

In [299]:
update_table(knockout_fixture, quarter_fixture)

Unnamed: 0,home,score,away,year,winner
56,Germany,Match 58,Brazil,2022,?
57,Netherlands,Match 57,Argentina,2022,?
58,Spain,Match 60,Portugal,2022,?
59,England,Match 59,France,2022,?


In [300]:
predict_winner(quarter_fixture)

Unnamed: 0,home,score,away,year,winner
56,Germany,Match 58,Brazil,2022,Brazil
57,Netherlands,Match 57,Argentina,2022,Netherlands
58,Spain,Match 60,Portugal,2022,Portugal
59,England,Match 59,France,2022,France


In [301]:
update_table(quarter_fixture, semi_fixture)

Unnamed: 0,home,score,away,year,winner
60,Netherlands,Match 61,Brazil,2022,?
61,France,Match 62,Portugal,2022,?


In [302]:
predict_winner(semi_fixture)

Unnamed: 0,home,score,away,year,winner
60,Netherlands,Match 61,Brazil,2022,Brazil
61,France,Match 62,Portugal,2022,France


In [307]:
update_table(semi_fixture, final_fixture)

Unnamed: 0,home,score,away,year,winner
62,Losers Match 61,Match 63,Losers Match 62,2022,?
63,Brazil,Match 64,France,2022,?


In [308]:
predict_winner(final_fixture)

Unnamed: 0,home,score,away,year,winner
62,Losers Match 61,Match 63,Losers Match 62,2022,Losers Match 62
63,Brazil,Match 64,France,2022,Brazil
