In [18]:
import pandas as pd
import pickle
from scipy.stats import poisson

In [19]:
# Load the files previously cleaned and organised
dict_table = pickle.load(open('dict_table', 'rb'))
df_historical_data = pd.read_csv('clean_uefa_europeancup_historical_data.csv')
df_fixture = pd.read_csv('clean_uefa_europeancup_fixture.csv')

In [20]:
df_home = df_historical_data[['HomeTeam', 'HomeGoals', 'AwayGoals']]
df_away = df_historical_data[['AwayTeam', 'HomeGoals', 'AwayGoals']]

In [21]:
# Rename columns
# The away team will have its goals scored in AwayGoals
df_home = df_home.rename(columns={'HomeTeam': 'Team', 'HomeGoals': 'GoalsScored', 'AwayGoals': 'GoalsConceded'})
df_away = df_away.rename(columns={'AwayTeam': 'Team', 'HomeGoals': 'GoalsConceded', 'AwayGoals': 'GoalsScored'})

In [22]:
dict_table

{'Group A':    Pos         Team  Pld  W  D  L  GF  GA  GD  Pts  \
 0    1  Germany (H)    0  0  0  0   0   0   0    0   
 1    2     Scotland    0  0  0  0   0   0   0    0   
 2    3      Hungary    0  0  0  0   0   0   0    0   
 3    4  Switzerland    0  0  0  0   0   0   0    0   
 
                               Qualification  
 0                 Advance to knockout stage  
 1                 Advance to knockout stage  
 2  Possible knockout stage based on ranking  
 3                                       NaN  ,
 'Group B':    Pos     Team  Pld  W  D  L  GF  GA  GD  Pts  \
 0    1    Spain    0  0  0  0   0   0   0    0   
 1    2  Croatia    0  0  0  0   0   0   0    0   
 2    3    Italy    0  0  0  0   0   0   0    0   
 3    4  Albania    0  0  0  0   0   0   0    0   
 
                               Qualification  
 0                 Advance to knockout stage  
 1                 Advance to knockout stage  
 2  Possible knockout stage based on ranking  
 3                  

In [23]:
# Metric used to differentiate each team (CIS is a region formed after the dissolution of the URSS that participated in 1992)
df_team_strength = pd.concat([df_home, df_away], ignore_index=True).groupby('Team').mean()

In [24]:
df_team_strength

Unnamed: 0_level_0,GoalsScored,GoalsConceded
Team,Unnamed: 1_level_1,Unnamed: 2_level_1
Albania,0.333333,1.0
Austria,0.7,1.2
Belgium,1.409091,1.272727
Bulgaria,0.666667,2.166667
CIS,0.333333,1.333333
Croatia,1.363636,1.272727
Czech Republic,1.241379,1.275862
Czechoslovakia,1.5,1.25
Denmark,1.272727,1.515152
England,1.342105,0.973684


In [25]:
# Method to predict the points each team will get
# lambda will be the median of goals in 90 minutes (one lambda for each team)
def predict_points(home, away):
  if home in df_team_strength.index and away in df_team_strength.index:
    # goals_scored * goals_conceded
    lamb_home = df_team_strength.at[home, 'GoalsScored'] * df_team_strength.at[away, 'GoalsConceded']
    lamb_away = df_team_strength.at[away, 'GoalsScored'] * df_team_strength.at[home, 'GoalsConceded']

    prob_home, prob_away, prob_draw = 0, 0, 0
    # Let's say a team can only score up to 10 goals
    # x is the number of goals scored by the home team
    # y is the number of goals scored by the away team
    for x in range(0, 11):
      for y in range(0, 11):
        p = poisson.pmf(x, lamb_home) * poisson.pmf(y, lamb_away)
        if x == y:
          prob_draw += p
        elif x > y:
          prob_home += p
        else:
          prob_away += p

    points_home = 3 * prob_home + prob_draw
    points_away = 3 * prob_away + prob_draw
    return (points_home, points_away)

  else:
    return (0,0)


In [26]:
# Testing function
# Spain and Italy are in the same group
# predict_points('Spain', 'Italy')
# Portugal and Georgia are in the same group but the latest have never participated before so they do not appear in df_team_strength
# predict_points('Portugal', 'Georgia') Result: (0, 0)
# Serbia has never participated as an independent country too

In [27]:
df_fixture_group_stage = df_fixture[:36].copy()
df_fixture_round_of_16 = df_fixture[36:44].copy()
df_fixture_quarter_finals = df_fixture[44:48].copy()
df_fixture_semi_finals = df_fixture[48:50].copy()
df_fixture_final = df_fixture[50:].copy()

In [28]:
for group in dict_table:
  teams_in_group = dict_table[group]['Team'].values
  # Get the six matches every group has to avoid unnecessary simulations
  df_fixture_six = df_fixture_group_stage[df_fixture_group_stage['home'].isin(teams_in_group)]
  for index, row in df_fixture_six.iterrows():
    home, away = row['home'], row['away']
    points_home, points_away = predict_points(home, away)
    dict_table[group].loc[dict_table[group]['Team'] == home, 'Pts'] += points_home
    dict_table[group].loc[dict_table[group]['Team'] == away, 'Pts'] += points_away

  # The team with the highest score will be at the top of the table
  dict_table[group] = dict_table[group].sort_values('Pts', ascending=False).reset_index()
  dict_table[group] = dict_table[group][['Team', 'Pts']]
  dict_table[group] = dict_table[group].round(0)

In [29]:
df_fixture_round_of_16

Unnamed: 0,home,score,away,year
36,Runner-up Group A,Match 38,Runner-up Group B,2024
37,Winner Group A,Match 37,Runner-up Group C,2024
38,Winner Group C,Match 40,3rd Group D/E/F,2024
39,Winner Group B,Match 39,3rd Group A/D/E/F,2024
40,Runner-up Group D,Match 42,Runner-up Group E,2024
41,Winner Group F,Match 41,3rd Group A/B/C,2024
42,Winner Group E,Match 43,3rd Group A/B/C/D,2024
43,Winner Group D,Match 44,Runner-up Group F,2024


In [48]:
for group in dict_table:
  group_winner = dict_table[group].loc[0, 'Team']
  runner_up = dict_table[group].loc[1, 'Team']

  # Replace group winner and runner-up in the round of 16 fixtures
  df_fixture_round_of_16.replace({f'Winner {group}': group_winner, f'Runner-up {group}': runner_up}, inplace=True)
  # Replace manually the best third-place teams
  df_fixture_round_of_16.replace({f'3rd Group D/E/F': 'Poland'}, inplace=True)
  df_fixture_round_of_16.replace({f'3rd Group A/D/E/F': 'Ukraine'}, inplace=True)
  df_fixture_round_of_16.replace({f'3rd Group A/B/C': 'Slovenia'}, inplace=True)
  df_fixture_round_of_16.replace({f'3rd Group A/B/C/D': 'Croatia'}, inplace=True)

In [47]:
# Add winner column
df_fixture_round_of_16['winner'] = '?'
df_fixture_round_of_16

Unnamed: 0,home,score,away,year,winner
36,Hungary,Match 38,Italy,2024,?
37,Switzerland,Match 37,Denmark,2024,?
38,England,Match 40,Poland,2024,?
39,Spain,Match 39,Ukraine,2024,?
40,France,Match 42,Romania,2024,?
41,Portugal,Match 41,Slovenia,2024,?
42,Belgium,Match 43,Croatia,2024,?
43,Netherlands,Match 44,Czech Republic,2024,?


In [49]:
def get_winner(df_fixture_updated):
  for index, row in df_fixture_updated.iterrows():
    home, away = row['home'], row['away']
    points_home, points_away = predict_points(home, away)
    if points_home > points_away:
      winner = home
    else:
      winner = away
    df_fixture_updated.loc[index, 'winner'] = winner

  return df_fixture_updated

In [51]:
get_winner(df_fixture_round_of_16)

Unnamed: 0,home,score,away,year,winner
36,Hungary,Match 38,Italy,2024,Italy
37,Switzerland,Match 37,Denmark,2024,Denmark
38,England,Match 40,Poland,2024,England
39,Spain,Match 39,Ukraine,2024,Spain
40,France,Match 42,Romania,2024,France
41,Portugal,Match 41,Slovenia,2024,Portugal
42,Belgium,Match 43,Croatia,2024,Belgium
43,Netherlands,Match 44,Czech Republic,2024,Netherlands


In [63]:
def update_table(df_fixture_octavos, df_fixture_cuartos):
  for index, row in df_fixture_octavos.iterrows():
    winner = df_fixture_octavos.loc[index, 'winner']
    match = df_fixture_octavos.loc[index, 'score']
    df_fixture_cuartos.replace({f'Winner {match}': winner}, inplace=True)
  df_fixture_cuartos['winner'] = '?'

  return df_fixture_cuartos

In [68]:
update_table(df_fixture_round_of_16, df_fixture_quarter_finals)

Unnamed: 0,home,score,away,year,winner
44,Spain,Match 45,Denmark,2024,?
45,Portugal,Match 46,France,2024,?
46,England,Match 48,Italy,2024,?
47,Belgium,Match 47,Netherlands,2024,?


In [71]:
get_winner(df_fixture_quarter_finals)

Unnamed: 0,home,score,away,year,winner
44,Spain,Match 45,Denmark,2024,Spain
45,Portugal,Match 46,France,2024,Portugal
46,England,Match 48,Italy,2024,Italy
47,Belgium,Match 47,Netherlands,2024,Netherlands


In [73]:
update_table(df_fixture_quarter_finals, df_fixture_semi_finals)

Unnamed: 0,home,score,away,year,winner
48,Spain,Match 49,Portugal,2024,?
49,Netherlands,Match 50,Italy,2024,?


In [74]:
get_winner(df_fixture_semi_finals)

Unnamed: 0,home,score,away,year,winner
48,Spain,Match 49,Portugal,2024,Spain
49,Netherlands,Match 50,Italy,2024,Italy


In [75]:
update_table(df_fixture_semi_finals, df_fixture_final)

Unnamed: 0,home,score,away,year,winner
50,Spain,Match 51,Italy,2024,?


In [76]:
get_winner(df_fixture_final)

Unnamed: 0,home,score,away,year,winner
50,Spain,Match 51,Italy,2024,Italy
