In [6]:
import pandas as pd

In [56]:
df = pd.read_csv("match_results.csv")
df['date'] = pd.to_datetime(df['date'])

# Predicted Tournament Winner

In [83]:
teams = ['Turkey', 'Italy', 'Wales', 'Switzerland',
         'Denmark', 'Finland', 'Belgium', 'Russia',
         'Netherlands', 'Ukraine', 'Austria', 'North Macedonia',
         'England', 'Croatia', 'Scotland', 'Czech Republic',
         'Spain', 'Sweden', 'Poland', 'Slovakia',
         'Hungary', 'Portugal', 'France', 'Germany']

In [87]:
def initialize_elo_system(teams, initial_rating):
    ratings = {}
    for team in teams:
        ratings[team] = initial_rating
        
    return ratings

In [125]:
def calculate_rankings(matches, ratings, scale_factor=400, k_factor=22.2):
    num_matches = len(matches)
    home_teams, away_teams = matches['home_team'].values, matches['away_team'].values
    home_scores, away_scores = matches['home_score'].values, matches['away_score'].values
    
    for i in range(num_matches):
        home_team, away_team = home_teams[i], away_teams[i]
        home_score, away_score = home_scores[i], away_scores[i]
        
        p_home = 1 / (1 + 10**((ratings[away_team] - ratings[home_team]) / scale_factor))
        p_away = 1 / (1 + 10**((ratings[home_team] - ratings[away_team]) / scale_factor))
        
        if home_score > away_score:
            match_result_home = 1
            match_result_away = 0
        elif home_score < away_score:
            match_result_home = 0
            match_result_away = 1
        elif home_score == away_score:
            match_result_home = 0.5
            match_result_away = 0.5
            
        new_rating_home = ratings[home_team] + k_factor * (match_result_home - p_home)
        new_rating_away = ratings[away_team] + k_factor * (match_result_away - p_away)
        
        ratings[home_team] = new_rating_home
        ratings[away_team] = new_rating_away
        
    return ratings

In [126]:
ratings = initialize_elo_system(teams, 1200)
matches = df[df['home_team'].isin(teams) & df['away_team'].isin(teams)]
matches = matches[matches['date'].dt.year >= 2016]
new_ratings = calculate_rankings(matches, ratings)
rankings = {k: v for k, v in sorted(new_ratings.items(), key=lambda item: item[1])}

In [127]:
rankings

{'Finland': 1106.9160648176423,
 'Hungary': 1137.0885735619433,
 'Slovakia': 1143.136559167868,
 'North Macedonia': 1145.836863022899,
 'Russia': 1145.9700150236488,
 'Czech Republic': 1147.720106975519,
 'Scotland': 1156.1437458786165,
 'Sweden': 1159.9417539436076,
 'Switzerland': 1173.1100439669417,
 'Wales': 1177.3990750993348,
 'Austria': 1182.4672188472784,
 'Poland': 1186.7665477282073,
 'Ukraine': 1193.182745637513,
 'Turkey': 1196.2730739269941,
 'Croatia': 1204.113028313914,
 'Denmark': 1231.7011497532494,
 'Netherlands': 1232.5206854915434,
 'Germany': 1233.4968865903897,
 'England': 1233.501483006158,
 'Italy': 1254.4484473432221,
 'Spain': 1269.474788041637,
 'Belgium': 1287.2334007131576,
 'Portugal': 1288.3652637652528,
 'France': 1313.1924793834614}