In [72]:
import pandas as pd
import pickle
from scipy.stats import poisson

In [73]:
group_table = pickle.load(open('group_table', 'rb'))
df_historical = pd.read_csv('./historic_matches.csv')
df_fixture = pd.read_csv('./current_cup_fixture.csv')

In [74]:
group_table

{'Group A':    Pos         Team  Pld  W  D  L  GF  GA  GD  Pts
 0    1  Netherlands    3  2  1  0   5   1  +4    7
 1    2      Senegal    3  2  0  1   5   4  +1    6
 2    3      Ecuador    3  1  1  1   4   3  +1    4
 3    4    Qatar (H)    3  0  0  3   1   7  −6    0,
 'Group B':    Pos           Team  Pld  W  D  L  GF  GA  GD  Pts
 0    1        England    3  2  1  0   9   2  +7    7
 1    2  United States    3  1  2  0   2   1  +1    5
 2    3           Iran    3  1  0  2   4   7  −3    3
 3    4          Wales    3  0  1  2   1   6  −5    1,
 'Group C':    Pos          Team  Pld  W  D  L  GF  GA  GD  Pts
 0    1     Argentina    3  2  0  1   5   2  +3    6
 1    2        Poland    3  1  1  1   2   2   0    4
 2    3        Mexico    3  1  1  1   2   3  −1    4
 3    4  Saudi Arabia    3  1  0  2   3   5  −2    3,
 'Group D':    Pos       Team  Pld  W  D  L  GF  GA  GD  Pts
 0    1     France    3  2  0  1   6   3  +3    6
 1    2  Australia    3  2  0  1   3   4  −1    6
 2    3 

# 1 Calculate Team Strength

In [75]:
df_team1 = df_historical[['team1', "goals", "goalsGotten"]]
df_team2 = df_historical[['team2', "goals", "goalsGotten"]]

In [76]:
# rename cols
df_home = df_team1.rename(columns={'team1': "Team", "goals": "GoalsScored", "goalsGotten": 'GoalsConceded'})
df_away = df_team2.rename(columns={'team2': "Team", "goals": "GoalsConceded", "goalsGotten": 'GoalsScored'})

In [77]:
# concat team scores
df_team_strength = pd.concat([df_home, df_away], ignore_index=True).groupby('Team').mean()
df_team_strength

Unnamed: 0_level_0,GoalsScored,GoalsConceded
Team,Unnamed: 1_level_1,Unnamed: 2_level_1
Algeria,1.0,1.285714
Angola,0.333333,0.666667
Argentina,1.68,1.0
Australia,1.0,1.882353
Belgium,1.533333,0.733333
Bosnia and Herzegovina,1.333333,1.333333
Brazil,1.64,0.96
Cameroon,0.777778,2.0
Canada,0.666667,2.333333
Chile,1.125,1.125


# 2 predict points for team

In [78]:
def predict_points(home, away):
    if home in df_team_strength.index and away in df_team_strength.index:
        lamb_home = df_team_strength.at[home, 'GoalsScored'] * df_team_strength.at[away, 'GoalsConceded']
        lamb_away = df_team_strength.at[away, 'GoalsScored'] * df_team_strength.at[home, 'GoalsConceded']
        prob_home, prob_away, prob_draw = 0,0,0
        for x in range(0, 11):
            for y in range(0,11):
                p = poisson.pmf(x, lamb_home) * poisson.pmf(y, lamb_away)
                if x == y:
                    prob_draw += p
                elif x > y:
                    prob_home += p
                else:
                    prob_away += p

        points_home = 3 * prob_home + prob_draw
        points_away = 3 * prob_away + prob_draw
        return points_home, points_away
    else:
        return 0, 0


# Test For Winner in Matchup

In [109]:
predict_points('England', 'France')

(1.1382455184286462, 1.6068296893970255)

In [80]:
df_fixture_knockout = df_fixture[:4].copy()
df_fixture_quarter = df_fixture[4:8].copy()
df_fixture_knockout
df_fixture_quarter

Unnamed: 0,team1,score,team2,year
4,Winners Match 53,Match 58,Winners Match 54,2022
5,Netherlands,Match 57,Argentina,2022
6,Winners Match 55,Match 60,Winners Match 56,2022
7,England,Match 59,France,2022


In [81]:
for group in group_table:
    teams_in_group = group_table[group]['Team'].values
    df_knockout = df_fixture_knockout[df_fixture_knockout['team1'].isin(teams_in_group)]
    for index, row in df_knockout.iterrows():
        home, away = row['team1'], row['team2']
        points_home, points_away = predict_points(home,away)
        group_table[group].loc[group_table[group]['Team'] == home, 'Pts'] += points_home
        group_table[group].loc[group_table[group]['Team'] == away, 'Pts'] += points_away

    group_table[group] = group_table[group].sort_values('Pts', ascending=False).reset_index()
    group_table[group] = group_table[group][['Team', 'Pts']]
    group_table[group] = group_table[group].round(0)

In [82]:
group_table['Group A']

Unnamed: 0,Team,Pts
0,Netherlands,7
1,Senegal,6
2,Ecuador,4
3,Qatar (H),0


In [83]:
df_fixture_knockout

Unnamed: 0,team1,score,team2,year
0,Japan,Match 53,Croatia,2022
1,Brazil,Match 54,South Korea,2022
2,Morocco,Match 55,Spain,2022
3,Portugal,Match 56,Switzerland,2022


In [84]:

for group in group_table:
    group_winner = group_table[group].loc[0, 'Team']
    runners_up = group_table[group].loc[1, 'Team']
    df_fixture_knockout.replace({f'Winners {group}':group_winner,
                                 f'Runners-up {group}':runners_up}, inplace=True)

df_fixture_knockout['winner'] = '?'
df_fixture_knockout

Unnamed: 0,team1,score,team2,year,winner
0,Japan,Match 53,Croatia,2022,?
1,Brazil,Match 54,South Korea,2022,?
2,Morocco,Match 55,Spain,2022,?
3,Portugal,Match 56,Switzerland,2022,?


In [85]:

def get_winner(df_fixture_updated):
    for index, row in df_fixture_updated.iterrows():
        home, away = row['team1'], row['team2']
        points_home, points_away = predict_points(home, away)
        if points_home > points_away:
            winner = home
        else:
            winner = away
        df_fixture_updated.loc[index, 'winner'] = winner
    return df_fixture_updated


In [86]:
get_winner(df_fixture_knockout)

Unnamed: 0,team1,score,team2,year,winner
0,Japan,Match 53,Croatia,2022,Croatia
1,Brazil,Match 54,South Korea,2022,Brazil
2,Morocco,Match 55,Spain,2022,Spain
3,Portugal,Match 56,Switzerland,2022,Switzerland


In [87]:

def update_table(df_fixture_round_1, df_fixture_round_2):
    for index, row in df_fixture_round_1.iterrows():
        winner = df_fixture_round_1.loc[index, 'winner']
        match = df_fixture_round_1.loc[index, 'score']
        df_fixture_round_2.replace({f'Winners {match}':winner}, inplace=True)
    df_fixture_round_2['winner'] = '?'
    return df_fixture_round_2



In [88]:
# update_table(df_fixture_knockout, df_fixture_quarter)
# df_fixture_semi = df_fixture[60:62].copy()
#df_fixture_final = df_fixture[62:].copy()