# Euro 2024 Prediction Model 

In [3]:
import pandas as pd
import numpy as np
import random
import pickle
from sklearn.linear_model import PoissonRegressor
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

## Load the Data 

In [5]:
# load the historical data 
historical_data = pd.read_csv("clean_euro_cup_matches.csv")

# Load the fixtures of 2024 
fixtures = pd.read_csv("clean_euro_cup_fixtures.csv")

# load the group stage tables 
with open('dict_table','rb') as file: 
    group_standings = pickle.load(file)
    

## Seperating the fixtures 

In [7]:
# make copy so that original dataset will not be modified
group_stage_fixtures = fixtures[0:36].copy()
RO16_fixtures = fixtures[36:44].copy()
quarter_final_fixtures = fixtures[44:48].copy()
semi_final_fixtures = fixtures[48:50].copy()
final_fixture = fixtures[50:51].copy()

## Calculate Attack and Defence Strength of each Teams

In [9]:
historical_data.columns

Index(['HomeTeam', 'AwayTeam', 'Year', 'HomeGoals', 'AwayGoals', 'TotalGoals'], dtype='object')

In [10]:
team_stats_home = historical_data.groupby('HomeTeam').agg(
    HomeGoalsScored=('HomeGoals', 'mean'),
    HomeGoalsConceded=('AwayGoals', 'mean')
)

team_stats_away = historical_data.groupby('AwayTeam').agg(
    AwayGoalsScored=('AwayGoals', 'mean'),
    AwayGoalsConceded=('HomeGoals', 'mean')
)
team_stats = team_stats_home.join(team_stats_away)


In [11]:
team_stats['AttackStrength'] = (team_stats['HomeGoalsScored'] + team_stats['AwayGoalsScored']) / 2
team_stats['DefenceStrength'] = (team_stats['HomeGoalsConceded'] + team_stats['AwayGoalsConceded']) / 2 

In [12]:
# Fill the Null values with means HomeGoals + Away Goals
overall_attack_strength = (historical_data['HomeGoals'].mean() + historical_data['AwayGoals'].mean()) / 2 
overall_defence_strength = (historical_data['AwayGoals'].mean() + historical_data['HomeGoals'].mean()) / 2
team_stats = team_stats.fillna({'AttackStrength': overall_attack_strength, 'DefenceStrength':overall_defence_strength })

In [13]:
# Normalizing the Attack and Defence strengths by dividing each team strengths with the average of attack strength
#team_stats['AttackStrength'] = team_stats['AttackStrength'] / team_stats['AttackStrength'].mean()
#team_stats['DefenceStrength'] = team_stats['DefenceStrength'] / team_stats['DefenceStrength'].mean() 

In [14]:
# function to add strengths to each teams in group stage fixtures df to help in prediction 
def add_strength(fixture, team_stats):
    group_stage_fixtures = fixture.copy()
    group_stage_fixtures['HomeAttackStrength'] = group_stage_fixtures['home'].map(team_stats['AttackStrength'])
    group_stage_fixtures['HomeDefenceStrength'] = group_stage_fixtures['home'].map(team_stats['DefenceStrength'])
    group_stage_fixtures['AwayAttackStrength'] = group_stage_fixtures['away'].map(team_stats['AttackStrength'])
    group_stage_fixtures['AwayDefenceStrength'] = group_stage_fixtures['away'].map(team_stats['DefenceStrength'])
    return group_stage_fixtures

In [15]:
group_stage_fixtures = add_strength(group_stage_fixtures,team_stats)
group_stage_fixtures

Unnamed: 0,home,away,Year,Match,HomeAttackStrength,HomeDefenceStrength,AwayAttackStrength,AwayDefenceStrength
0,Germany,Scotland,2024,1,1.4,1.1,0.525,1.1
1,Hungary,Switzerland,2024,2,1.321429,1.696429,0.8875,1.3625
2,Germany,Hungary,2024,3,1.4,1.1,1.321429,1.696429
3,Scotland,Switzerland,2024,4,0.525,1.1,0.8875,1.3625
4,Switzerland,Germany,2024,5,0.8875,1.3625,1.4,1.1
5,Scotland,Hungary,2024,6,0.525,1.1,1.321429,1.696429
6,Spain,Croatia,2024,7,1.469231,0.876923,1.341667,1.225
7,Italy,Albania,2024,8,1.14,0.72,0.25,1.0
8,Croatia,Albania,2024,9,1.341667,1.225,0.25,1.0
9,Spain,Italy,2024,10,1.469231,0.876923,1.14,0.72


In [16]:
#filling the Nan values with the overall mean if required
mean_home_attack_strength = group_stage_fixtures['HomeAttackStrength'].mean()
mean_home_defense_strength = group_stage_fixtures['HomeDefenceStrength'].mean()
mean_away_attack_strength = group_stage_fixtures['AwayAttackStrength'].mean()
mean_away_defense_strength = group_stage_fixtures['AwayDefenceStrength'].mean()

In [17]:
# filling the nan values randomly according to the teams as Its the first appearnce of Serbia and Georgia in Euro Cup
group_stage_fixtures['HomeAttackStrength'].fillna(0.5, inplace=True)
group_stage_fixtures['HomeDefenceStrength'].fillna(0.5, inplace=True)
group_stage_fixtures['AwayAttackStrength'].fillna(0.3, inplace=True)
group_stage_fixtures['AwayDefenceStrength'].fillna(0.3,inplace=True)

## Data Modelling 

In [19]:
# Add features so that the model can be trained on that features 
X = historical_data[['HomeTeam','AwayTeam','Year']].copy()

# mapping the teams with their strengths 
X['HomeAttackStrength'] = X['HomeTeam'].map(team_stats['AttackStrength'])
X['HomeDefenceStrength'] = X['HomeTeam'].map(team_stats['DefenceStrength'])
X['AwayAttackStrength'] = X['AwayTeam'].map(team_stats['AttackStrength'])
X['AwayDefenceStrength'] = X['AwayTeam'].map(team_stats['DefenceStrength'])

# One-hot encoding converting ctegorical values like team names to binary 
X = pd.get_dummies(X,columns=['HomeTeam','AwayTeam'])

#Seperate the Target Variables 
y_home = historical_data['HomeGoals']
y_away = historical_data['AwayGoals']

# Scale the data to standardize the data so that the data remains consistent throughout 
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# check for null values 
check_null = X.isna().sum()

# filling the nan values randomly according to the teams as Its the first appearnce of Serbia and Georgia in Euro Cup
X['HomeAttackStrength'].fillna(0.5, inplace=True)
X['HomeDefenceStrength'].fillna(0.5, inplace=True)
X['AwayAttackStrength'].fillna(0.3, inplace=True)
X['AwayDefenceStrength'].fillna(0.3,inplace=True)

## Train the Data 

In [21]:
# splitting the data into training and testing sets
# Test size 20% and training size would be 80% 
X_train, X_test, y_train_home, y_test_home = train_test_split(X_scaled, y_home, test_size=0.2, random_state=42)
X_train, X_test, y_train_away, y_test_away = train_test_split(X_scaled, y_away, test_size=0.2, random_state=42)

## Making a Prediction Model 

In [23]:
# Use poisson regressor to predict 
home_model = PoissonRegressor(max_iter=1000)  
away_model = PoissonRegressor(max_iter=1000)

# fitting the data into the model 
home_model.fit(X_train, y_train_home)
away_model.fit(X_train, y_train_away)

# preparing the data for group_stage prediction 
group_stage_encoded = add_strength(group_stage_fixtures, team_stats)
group_stage_encoded = pd.get_dummies(group_stage_encoded,columns=['home','away'])
group_stage_encoded = group_stage_encoded.reindex(columns=X.columns, fill_value=0)

# check for nan values
check_nan = group_stage_encoded.isna().sum()

#fill the nan values 
group_stage_encoded['HomeAttackStrength'].fillna(0.5, inplace=True)
group_stage_encoded['HomeDefenceStrength'].fillna(0.5, inplace=True)
group_stage_encoded['AwayAttackStrength'].fillna(0.3, inplace=True)
group_stage_encoded['AwayDefenceStrength'].fillna(0.3,inplace=True)

# print(group_stage_encoded.isna().sum())

# Scale the fixtures_encoded data
group_stage_encoded_scaled = scaler.transform(group_stage_encoded)

# predicting goals 
group_stage_fixtures['PredictedHomeGoals'] = home_model.predict(group_stage_encoded_scaled)
group_stage_fixtures['PredictedAwayGoals'] = away_model.predict(group_stage_encoded_scaled)

In [24]:
print(group_stage_fixtures[['home', 'away', 'PredictedHomeGoals', 'PredictedAwayGoals']])

              home            away  PredictedHomeGoals  PredictedAwayGoals
0          Germany        Scotland            1.158356            0.927040
1          Hungary     Switzerland            1.206535            1.211161
2          Germany         Hungary            1.343676            1.119696
3         Scotland     Switzerland            0.887411            1.056507
4      Switzerland         Germany            0.954882            1.256003
5         Scotland         Hungary            0.964312            1.171113
6            Spain         Croatia            1.223271            1.042754
7            Italy         Albania            1.020601            0.787123
8          Croatia         Albania            1.106596            0.905337
9            Spain           Italy            1.078145            0.989700
10         Albania           Spain            0.706768            1.180299
11         Croatia           Italy            1.030387            1.104915
12        Slovenia       

## Group Stage Prediction 

In [26]:
# convert each group into a dataframe 
group_standings_2024 = {group: pd.DataFrame(data).set_index('Team') for group,data in group_standings.items()}

# Replace "Germany (H)" with "Germany" in group standings
for group, standings in group_standings_2024.items():
    standings.index = standings.index.str.replace("Germany (H)", "Germany")

    
# Helper function to find the group of a team which will return the group
def find_group(team, group_standings):
    for group, standings in group_standings.items():
        if team in standings.index:
            return group
    return None

# Updating group standings
for _, match in group_stage_fixtures.iterrows(): #used underscore (_) to ignore the index as match variables return each row data in series form
    home_team = match['home']
    away_team = match['away']
    home_goals = match['PredictedHomeGoals']
    away_goals = match['PredictedAwayGoals']

    # find the group of teams 
    home_group = find_group(home_team, group_standings_2024)
    away_group = find_group(away_team, group_standings_2024)

    # Use loc to search columns by labels and then update it 
    if home_group and away_group:
        # Update For Home Team
        group_standings_2024[home_group].loc[home_team, 'GF'] += home_goals
        group_standings_2024[home_group].loc[home_team, 'GA'] += away_goals
        group_standings_2024[home_group].loc[home_team, 'GD'] += home_goals - away_goals
        group_standings_2024[home_group].loc[home_team, 'Pld'] += 1

        # Update for Away Team
        group_standings_2024[away_group].loc[away_team, 'GF'] += away_goals
        group_standings_2024[away_group].loc[away_team, 'GA'] += home_goals
        group_standings_2024[away_group].loc[away_team, 'GD'] += away_goals - home_goals
        group_standings_2024[away_group].loc[away_team, 'Pld'] += 1

        # Update Points 
        if home_goals > away_goals:
            group_standings_2024[home_group].loc[home_team, 'W'] += 1
            group_standings_2024[home_group].loc[home_team, 'Pts'] += 3
            group_standings_2024[away_group].loc[away_team, 'L'] += 1
        elif away_goals > home_goals:
            group_standings_2024[away_group].loc[away_team, 'W'] += 1
            group_standings_2024[away_group].loc[away_team, 'Pts'] += 3
            group_standings_2024[home_group].loc[home_team, 'L'] += 1
        else:
            group_standings_2024[home_group].loc[home_team, 'D'] += 1
            group_standings_2024[home_group].loc[home_team, 'Pts'] += 1
            group_standings_2024[away_group].loc[away_team, 'D'] += 1
            group_standings_2024[away_group].loc[away_team, 'Pts'] += 1
            
# Sort group standings
for group, standings in group_standings_2024.items():
    group_standings_2024[group] = standings.sort_values(by=['Pts', 'GD', 'GF'], ascending=False).reset_index()

print("\nGroup Stage Standings:")
for group, standings in group_standings_2024.items():
    print(f"\n{group}:")
    print(standings)



Group Stage Standings:

Group A:
          Team  Pos  Pld  W  D  L        GF        GA        GD  Pts
0      Germany    1    3  3  0  0  3.758036  3.001618  0.756418    9
1  Switzerland    2    3  2  0  1  3.222549  3.349950 -0.127401    6
2      Hungary    3    3  1  0  2  3.497345  3.519149 -0.021804    3
3     Scotland    4    3  0  0  3  2.778763  3.385976 -0.607214    0

Group B:
      Team  Pos  Pld  W  D  L        GF        GA        GD  Pts
0    Spain    1    3  3  0  0  3.481716  2.739222  0.742493    9
1    Italy    2    3  2  0  1  3.115216  2.895655  0.219561    6
2  Croatia    3    3  1  0  2  3.179737  3.233523 -0.053787    3
3  Albania    4    3  0  0  3  2.399229  3.307497 -0.908268    0

Group C:
       Team  Pos  Pld  W  D  L        GF        GA        GD Pts
0   England    1    3  3  0  0  3.653804  3.061001  0.592803   9
1    Serbia    4    3  2  0  1  2.905549  2.972698 -0.067148   6
2  Slovenia    3    3  1  0  2  3.766852  3.949063 -0.182211   3
3   Denmark    2

  group_standings_2024[home_group].loc[home_team, 'GF'] += home_goals
  group_standings_2024[home_group].loc[home_team, 'GA'] += away_goals
  group_standings_2024[home_group].loc[home_team, 'GF'] += home_goals
  group_standings_2024[home_group].loc[home_team, 'GA'] += away_goals
  group_standings_2024[home_group].loc[home_team, 'GF'] += home_goals
  group_standings_2024[home_group].loc[home_team, 'GA'] += away_goals
  group_standings_2024[home_group].loc[home_team, 'GF'] += home_goals
  group_standings_2024[home_group].loc[home_team, 'GA'] += away_goals
  group_standings_2024[home_group].loc[home_team, 'GF'] += home_goals
  group_standings_2024[home_group].loc[home_team, 'GA'] += away_goals
  group_standings_2024[home_group].loc[home_team, 'GF'] += home_goals
  group_standings_2024[home_group].loc[home_team, 'GA'] += away_goals


## Ranking Top 4 third-tanked Teams 

In [28]:
third_placed_teams = []
for group,standings in group_standings_2024.items():
    third_placed_teams.append(standings.iloc[2])
third_placed_teams = pd.DataFrame(third_placed_teams).sort_values(by=['Pts','GD','GF'], ascending=False).reset_index()

# best third placed teams 
best_third_placed_teams = third_placed_teams.iloc[:4]['Team'].tolist()
best_third_placed_teams

['Romania', 'Georgia', 'Hungary', 'Croatia']

## Predicting RO16 fixtures

In [30]:
RO16_fixtures

Unnamed: 0,home,away,Year,Match
36,Runner-up Group A,Runner-up Group B,2024,37
37,Winner Group A,Runner-up Group C,2024,38
38,Winner Group C,3rd Group D/E/F,2024,39
39,Winner Group B,3rd Group A/D/E/F,2024,40
40,Runner-up Group D,Runner-up Group E,2024,41
41,Winner Group F,3rd Group A/B/C,2024,42
42,Winner Group E,3rd Group A/B/C/D,2024,43
43,Winner Group D,Runner-up Group F,2024,44


In [31]:
# Making dictionary for RO16 Fixtures to get the teams 
R016_fixtures_dict  = [
    {'Home': group_standings_2024['Group A'].iloc[1]['Team'], 'Away': group_standings_2024['Group B'].iloc[1]['Team'], 'Match': 37},
    {'Home': group_standings_2024['Group A'].iloc[0]['Team'], 'Away': group_standings_2024['Group C'].iloc[1]['Team'], 'Match': 38},
    {'Home':group_standings_2024['Group C'].iloc[0]['Team'], 'Away': best_third_placed_teams[0] , 'Match': 39},
    {'Home':group_standings_2024['Group B'].iloc[0]['Team'], 'Away': best_third_placed_teams[1] , 'Match': 40},
    {'Home': group_standings_2024['Group D'].iloc[1]['Team'], 'Away': group_standings_2024['Group E'].iloc[1]['Team'], 'Match': 41},
    {'Home':group_standings_2024['Group F'].iloc[0]['Team'], 'Away': best_third_placed_teams[2] , 'Match': 42},
    {'Home':group_standings_2024['Group E'].iloc[0]['Team'], 'Away': best_third_placed_teams[3] , 'Match': 43},
    {'Home': group_standings_2024['Group D'].iloc[0]['Team'], 'Away': group_standings_2024['Group F'].iloc[1]['Team'], 'Match': 44}
]

# Convert to DataFrame to help use in preidction function later on
R016_fixtures_df = pd.DataFrame(R016_fixtures_dict)

# Printing the fixtures for RO16
print('The Round of 16 Fixtures are: ')
print()
for fixture in R016_fixtures_dict:
    print(f"Match {fixture['Match']}: {fixture['Home']} vs {fixture['Away']}")

The Round of 16 Fixtures are: 

Match 37: Switzerland vs Italy
Match 38: Germany vs Serbia
Match 39: England vs Romania
Match 40: Spain vs Georgia
Match 41: France vs Ukraine
Match 42: Portugal vs Hungary
Match 43: Belgium vs Croatia
Match 44: Netherlands vs Czech Republic


## Make Extra Time and Penalties function 

In [33]:
# making extra-time and penalties function if homegoals == awaygoals by using random module 
def extra_time_penalties(home_goals,away_goals): 
    # extra-time goals set with low probablity as there are always less chances to score in extra time 
    extra_time_home_goals = np.random.poisson(0.2)
    extra_time_away_goals = np.random.poisson(0.2)
    
    home_goals = home_goals + extra_time_home_goals
    away_goals = away_goals + extra_time_away_goals

    if home_goals == away_goals: 
        penalties_home = 0
        penalties_away = 0 

        for i in range(5):
            # using random.random() to generate a random float number from 0.0-1.0
            if random.random() < 0.75:  # assuming the success rate is 75% 
                penalties_home =+ 1
            if random.random() < 0.75:
                penalties_away =+ 1

        while penalties_home == penalties_away:
            if random.random() < 0.75:
                penalties_home =+ 1
            if random.random() < 0.75:
                penalties_away =+ 1

        if penalties_home > penalties_away:
             winner = 'Home'
        else:
            winner = 'Away'

    else:
      if home_goals > away_goals: 
          winner = 'Home'
      else:
          winner = 'Away'

    return home_goals, away_goals, winner

## Predicting Knockout Stages Function

In [35]:
# predict knockout
def predict_knockout(match_data, home_model, away_model, team_stats):
    results = []

    for index, row in match_data.iterrows():
        home_team = row['Home']
        away_team = row['Away']
        match_id = row['Match']

        # Prepare data for prediction
        fixture = pd.DataFrame([{
            'HomeTeam': home_team,
            'AwayTeam': away_team,
            'Year': 2024 
        }])

        # Map team strengths
        fixture['HomeAttackStrength'] = fixture['HomeTeam'].map(team_stats['AttackStrength'])
        fixture['HomeDefenceStrength'] = fixture['HomeTeam'].map(team_stats['DefenceStrength'])
        fixture['AwayAttackStrength'] = fixture['AwayTeam'].map(team_stats['AttackStrength'])
        fixture['AwayDefenceStrength'] = fixture['AwayTeam'].map(team_stats['DefenceStrength'])

        # Fill NaN values
        fixture['HomeAttackStrength'].fillna(0.75, inplace=True)
        fixture['HomeDefenceStrength'].fillna(0.75, inplace=True)
        fixture['AwayAttackStrength'].fillna(0.5, inplace=True)
        fixture['AwayDefenceStrength'].fillna(0.5, inplace=True)

        # Encode categorical variables
        fixture_encoded = pd.get_dummies(fixture[['HomeTeam', 'AwayTeam', 'Year', 'HomeAttackStrength', 'HomeDefenceStrength', 'AwayAttackStrength', 'AwayDefenceStrength']], columns=['HomeTeam', 'AwayTeam'])

        # Align encoded features with training data
        fixture_encoded = fixture_encoded.reindex(columns=X.columns, fill_value=0)

        # Scale the data
        fixture_encoded_scaled = scaler.transform(fixture_encoded)

        # Predict goals
        home_goals = home_model.predict(fixture_encoded_scaled)[0]
        away_goals = away_model.predict(fixture_encoded_scaled)[0]

        # Determine winner
        if home_goals == away_goals:
            home_goals, away_goals, winner_type = extra_time_penalties(home_goals, away_goals)
            if winner_type == 'Home':
                winner = home_team
            else:
                winner = away_team
        else:
            if home_goals > away_goals:
                winner = home_team
            else:
                winner = away_team

        results.append({
            'Match': match_id,
            'Home': home_team,
            'Away': away_team,
            'HomeGoals': home_goals,
            'AwayGoals': away_goals,
            'Winner': winner
        })

    return results

## Predicting RO16 Results 

In [37]:
# Predict RO16 results
ro16_results = predict_knockout(R016_fixtures_df, home_model, away_model, team_stats)

# Print the results
print("\nRound of 16 Results:")
for result in ro16_results:
    print(result)

# convert it to the dataframe
ro16_results_df = pd.DataFrame(ro16_results)
ro16_results_df


Round of 16 Results:
{'Match': 37, 'Home': 'Switzerland', 'Away': 'Italy', 'HomeGoals': 0.737817887054878, 'AwayGoals': 1.0957069005100852, 'Winner': 'Italy'}
{'Match': 38, 'Home': 'Germany', 'Away': 'Serbia', 'HomeGoals': 1.2463205708010605, 'AwayGoals': 0.9079655614055234, 'Winner': 'Germany'}
{'Match': 39, 'Home': 'England', 'Away': 'Romania', 'HomeGoals': 1.4045219946366077, 'AwayGoals': 0.9405031578872772, 'Winner': 'England'}
{'Match': 40, 'Home': 'Spain', 'Away': 'Georgia', 'HomeGoals': 1.0069845180390629, 'AwayGoals': 0.7002851231029074, 'Winner': 'Spain'}
{'Match': 41, 'Home': 'France', 'Away': 'Ukraine', 'HomeGoals': 1.7370852512777166, 'AwayGoals': 0.6602030665268919, 'Winner': 'France'}
{'Match': 42, 'Home': 'Portugal', 'Away': 'Hungary', 'HomeGoals': 1.455447446297744, 'AwayGoals': 1.3586840759428245, 'Winner': 'Portugal'}
{'Match': 43, 'Home': 'Belgium', 'Away': 'Croatia', 'HomeGoals': 1.1610268996347861, 'AwayGoals': 1.1250783541622689, 'Winner': 'Belgium'}
{'Match': 44

Unnamed: 0,Match,Home,Away,HomeGoals,AwayGoals,Winner
0,37,Switzerland,Italy,0.737818,1.095707,Italy
1,38,Germany,Serbia,1.246321,0.907966,Germany
2,39,England,Romania,1.404522,0.940503,England
3,40,Spain,Georgia,1.006985,0.700285,Spain
4,41,France,Ukraine,1.737085,0.660203,France
5,42,Portugal,Hungary,1.455447,1.358684,Portugal
6,43,Belgium,Croatia,1.161027,1.125078,Belgium
7,44,Netherlands,Czech Republic,1.510979,1.092218,Netherlands


## Predicting Quarter-final 

In [39]:
quarter_final_fixtures

Unnamed: 0,home,away,Year,Match
44,Winner of Match 40,Winner of Match 38,2024,45
45,Winner of Match 42,Winner of Match 41,2024,46
46,Winner of Match 39,Winner of Match 37,2024,47
47,Winner of Match 43,Winner of Match 44,2024,48


In [40]:
quarter_final_fixtures_dict = [
    {'Home': ro16_results[3]['Winner'], 'Away': ro16_results[1]['Winner'], 'Year': 2024, 'Match':45},
    {'Home': ro16_results[5]['Winner'], 'Away': ro16_results[4]['Winner'], 'Year':2024, 'Match': 46},
    {'Home': ro16_results[2]['Winner'], 'Away': ro16_results[0]['Winner'], 'Year':2024, 'Match': 46},
    {'Home': ro16_results[6]['Winner'], 'Away': ro16_results[7]['Winner'], 'Year':2024, 'Match': 46}
]

for fixtures in quarter_final_fixtures_dict:
    print(f"Match {fixtures['Match']}: {fixtures['Home']} vs {fixtures['Away']}")

Match 45: Spain vs Germany
Match 46: Portugal vs France
Match 46: England vs Italy
Match 46: Belgium vs Netherlands


In [41]:
# convert dict to dataframe 
quarter_final_fixtures_df = pd.DataFrame(quarter_final_fixtures_dict)

In [42]:
# quarter-final winners 
quarter_final_results = predict_knockout(quarter_final_fixtures_df, home_model, away_model, team_stats)

# convert it to dataframe 
quarter_final_results_df = pd.DataFrame(quarter_final_results)
quarter_final_results_df

Unnamed: 0,Match,Home,Away,HomeGoals,AwayGoals,Winner
0,45,Spain,Germany,1.141315,0.877429,Spain
1,46,Portugal,France,1.511634,1.100937,Portugal
2,46,England,Italy,0.857922,1.028268,Italy
3,46,Belgium,Netherlands,1.146807,1.329159,Netherlands


## Predicting Semi-final 

In [44]:
semi_final_fixtures

Unnamed: 0,home,away,Year,Match
48,Winner of Match 45,Winner of Match 46,2024,49
49,Winner of Match 48,Winner of Match 47,2024,50


In [45]:
semi_final_fixture_dict = [
    {'Home': quarter_final_results[0]['Winner'], 'Away': quarter_final_results[1]['Winner'], 'Year': 2024, 'Match': 49},
    {'Home': quarter_final_results[3]['Winner'], 'Away': quarter_final_results[2]['Winner'], 'Year': 2024, 'Match': 50}
]
for fixtures in semi_final_fixture_dict:
        print(f"Match {fixtures['Match']}: {fixtures['Home']} vs {fixtures['Away']}")

Match 49: Spain vs Portugal
Match 50: Netherlands vs Italy


In [46]:
# convert it to a dataframe 
semi_final_fixture_df = pd.DataFrame(semi_final_fixture_dict)

In [47]:
# predicting winner 
semi_final_results = predict_knockout(semi_final_fixture_df, home_model, away_model, team_stats)

#convert the results to the data frame 
semi_final_results_df = pd.DataFrame(semi_final_results)
semi_final_results_df

Unnamed: 0,Match,Home,Away,HomeGoals,AwayGoals,Winner
0,49,Spain,Portugal,1.012088,0.882545,Spain
1,50,Netherlands,Italy,1.076791,1.04344,Netherlands


## Predicting Final 

In [49]:
final_fixture

Unnamed: 0,home,away,Year,Match
50,Winner of Match 49,Winner of Match 50,2024,51


In [50]:
final_fixture_dict = [{'Home': semi_final_results[0]['Winner'], 'Away': semi_final_results[1]['Winner'], 'Year': 2024, 'Match': 51}]

In [51]:
final_fixture_df = pd.DataFrame(final_fixture_dict)
final_fixture_df

Unnamed: 0,Home,Away,Year,Match
0,Spain,Netherlands,2024,51


In [52]:
# Predicting Winner
final_results = predict_knockout(final_fixture_df, home_model, away_model, team_stats)

# convert into the dataframe 
final_results_df = pd.DataFrame(final_results)
final_results_df

Unnamed: 0,Match,Home,Away,HomeGoals,AwayGoals,Winner
0,51,Spain,Netherlands,1.070597,1.017981,Spain
