# Predicting the UEFA European Football Championship

Tutorial by Aabid Roshan, Sid Joshi, Pranav Sivaraman

In [2]:
import pandas as pd
import matplotlib.pyplot as plt

In [3]:
df = pd.read_csv("match_results.csv")
df['date'] = pd.to_datetime(df['date'])

# Creating the Ranking System

In [4]:
teams = ['Turkey', 'Italy', 'Wales', 'Switzerland',
         'Denmark', 'Finland', 'Belgium', 'Russia',
         'Netherlands', 'Ukraine', 'Austria', 'North Macedonia',
         'England', 'Croatia', 'Scotland', 'Czech Republic',
         'Spain', 'Sweden', 'Poland', 'Slovakia',
         'Hungary', 'Portugal', 'France', 'Germany']

In [5]:
def initialize_elo_system(teams, initial_rating):
    ratings = {}
    for team in teams:
        ratings[team] = [initial_rating]
        
    return ratings

In [6]:
def calculate_probabilities(home_rating, away_rating, scale_factor=400):
    p_home = 1 / (1 + 10**((away_rating - home_rating) / scale_factor))
    p_away = 1 / (1 + 10**((home_rating - away_rating) / scale_factor)) 
    
    return p_home, p_away

In [7]:
def calculate_rankings(matches, ratings, k_factor=22.2):
    num_matches = len(matches)
    home_teams, away_teams = matches['home_team'].values, matches['away_team'].values
    home_scores, away_scores = matches['home_score'].values, matches['away_score'].values
    
    for i in range(num_matches):
        home_team, away_team = home_teams[i], away_teams[i]
        home_score, away_score = home_scores[i], away_scores[i]
        
        p_home, p_away = calculate_probabilities(ratings[home_team][-1], ratings[away_team][-1])
        
        if home_score > away_score:
            match_result_home = 1
            match_result_away = 0
        elif home_score < away_score:
            match_result_home = 0
            match_result_away = 1
        elif home_score == away_score:
            match_result_home = 0.5
            match_result_away = 0.5
            
        new_rating_home = ratings[home_team][-1] + k_factor * (match_result_home - p_home)
        new_rating_away = ratings[away_team][-1] + k_factor * (match_result_away - p_away)
        
        ratings[home_team].append(new_rating_home)
        ratings[away_team].append(new_rating_away)
        
    return ratings

In [8]:
def calculate_elo(teams, year_range):
    ratings = initialize_elo_system(teams, 1200)
    matches = df[df['home_team'].isin(teams) & df['away_team'].isin(teams)]
    matches = matches[matches['date'].dt.year.between(year_range[0], year_range[1])]
    new_ratings = calculate_rankings(matches, ratings)
    rankings = {k: v[-1] for k, v in sorted(new_ratings.items(), key=lambda item: item[1][-1], reverse=True)}
    
    return rankings

In [11]:
rankings = calculate_elo(teams, [2006, 2010])
rankings

{'Spain': 1364.5183009647797,
 'Netherlands': 1313.7134131866942,
 'Germany': 1311.2241288528364,
 'Croatia': 1240.5535333845992,
 'Italy': 1239.1587366951428,
 'France': 1232.2957884337645,
 'England': 1226.9820451253613,
 'Portugal': 1226.670041047823,
 'Switzerland': 1223.7896858908427,
 'Russia': 1213.045771738059,
 'Turkey': 1205.615986060427,
 'Czech Republic': 1193.2703435177937,
 'Sweden': 1188.5366080369754,
 'Slovakia': 1187.7533660997435,
 'Ukraine': 1183.0565508409186,
 'Denmark': 1165.6846236970146,
 'Finland': 1165.6292344395317,
 'Poland': 1159.2568534609081,
 'Scotland': 1148.6082421430551,
 'North Macedonia': 1138.9454850945222,
 'Hungary': 1138.0076886006218,
 'Belgium': 1118.97073932807,
 'Wales': 1109.6238695003447,
 'Austria': 1105.088963860171}