In [1]:
import pandas as pd
import re
from datetime import datetime

In [2]:
df =  pd.read_csv("data/results.csv")

In [3]:
df["date"] = pd.to_datetime(df["date"])

In [4]:
euro_games = df[(df.date.dt.year == 2024) & (df.tournament == "UEFA Euro")]

In [5]:
df.dropna(inplace=True)

In [6]:
df = df[(df["date"] >= "2019-8-1")].reset_index(drop=True)

In [7]:
rank = pd.read_csv("data/fifa_ranking-2024-04-04.csv")
rank.rank_date = rank.rank_date.apply(lambda x: x.replace("2024-04-04", datetime.today().strftime('%Y-%m-%d')))

In [8]:
rank["rank_date"] = pd.to_datetime(rank["rank_date"])
rank = rank[(rank["rank_date"] >= "2019-8-1")].reset_index(drop=True)
rank["country_full"] = rank["country_full"].str.replace("Czechia", "Czech Republic").str.replace("IR Iran", "Iran").str.replace("Korea Republic", "South Korea").str.replace("USA", "United States")

rank = rank.set_index(['rank_date']).groupby(['country_full'], group_keys=False).resample('D').first().fillna(method='ffill').reset_index()

In [9]:
df_ranked = df.merge(rank[["country_full", "total_points", "previous_points", "rank", "rank_change", "rank_date"]], left_on=["date", "home_team"], right_on=["rank_date", "country_full"]).drop(["rank_date", "country_full"], axis=1)

df_ranked = df_ranked.merge(rank[["country_full", "total_points", "previous_points", "rank", "rank_change", "rank_date"]], left_on=["date", "away_team"], right_on=["rank_date", "country_full"], suffixes=("_home", "_away")).drop(["rank_date", "country_full"], axis=1)

In [10]:
df_ranked.to_csv("data/data_rank.csv", index=False)

In [11]:
df_ranked

Unnamed: 0,date,home_team,away_team,home_score,away_score,tournament,city,country,neutral,total_points_home,previous_points_home,rank_home,rank_change_home,total_points_away,previous_points_away,rank_away,rank_change_away
0,2019-09-29,Bangladesh,Bhutan,4.0,1.0,Friendly,Dhaka,Bangladesh,False,912.00,922.00,187.0,5.0,916.00,916.00,185.0,-1.0
1,2019-09-30,Botswana,Liberia,0.0,0.0,Friendly,Gaborone,Botswana,False,1059.00,1069.00,148.0,1.0,1053.00,1044.00,152.0,0.0
2,2019-10-02,Mexico,Trinidad and Tobago,2.0,0.0,Friendly,Toluca,Mexico,False,1603.00,1604.00,12.0,0.0,1226.00,1226.00,100.0,-1.0
3,2019-10-03,Bangladesh,Bhutan,2.0,0.0,Friendly,Dhaka,Bangladesh,False,912.00,922.00,187.0,5.0,916.00,916.00,185.0,-1.0
4,2019-10-05,Jordan,Singapore,0.0,0.0,Friendly,Amman,Jordan,False,1235.00,1229.00,98.0,-1.0,1019.00,999.00,157.0,-5.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3839,2024-06-17,Austria,France,0.0,1.0,UEFA Euro,Düsseldorf,Germany,True,1546.10,1546.10,25.0,1.0,1845.44,1845.44,2.0,0.0
3840,2024-06-17,Romania,Ukraine,3.0,0.0,UEFA Euro,Munich,Germany,True,1472.73,1472.73,45.0,2.0,1553.35,1553.35,24.0,2.0
3841,2024-06-17,Belgium,Slovakia,0.0,1.0,UEFA Euro,Frankfurt,Germany,True,1798.46,1798.46,4.0,0.0,1465.73,1465.73,48.0,3.0
3842,2024-06-18,Turkey,Georgia,3.0,1.0,UEFA Euro,Dortmund,Germany,True,1495.94,1505.28,40.0,5.0,1333.76,1312.45,75.0,-2.0


In [14]:
def result_finder(home, away):
    if home > away:
        return "Home"
    if home < away:
        return "Away"
    else:
        return "Draw"

df["result"] = df.apply(lambda x: result_finder(x["home_score"], x["away_score"]), axis=1)

In [21]:
df[df.tournament  =="UEFA Euro"].head(36).result.value_counts()

Home    17
Away    11
Draw     8
Name: result, dtype: int64

In [6]:
euro_games.to_csv("data/euro_matches.csv", index=False)

In [14]:
rank[rank.country_full.str.contains("Czech")]

Unnamed: 0,rank_date,rank,country_full,country_abrv,total_points,previous_points,rank_change,confederation
90766,2019-09-19,44.0,Czech Republic,CZE,1441.00,1448.00,1.0,UEFA
90767,2019-09-20,44.0,Czech Republic,CZE,1441.00,1448.00,1.0,UEFA
90768,2019-09-21,44.0,Czech Republic,CZE,1441.00,1448.00,1.0,UEFA
90769,2019-09-22,44.0,Czech Republic,CZE,1441.00,1448.00,1.0,UEFA
90770,2019-09-23,44.0,Czech Republic,CZE,1441.00,1448.00,1.0,UEFA
...,...,...,...,...,...,...,...,...
92491,2024-06-09,40.0,Czech Republic,CZE,1494.04,1494.04,1.0,UEFA
92492,2024-06-10,40.0,Czech Republic,CZE,1494.04,1494.04,1.0,UEFA
92493,2024-06-11,40.0,Czech Republic,CZE,1494.04,1494.04,1.0,UEFA
92494,2024-06-12,40.0,Czech Republic,CZE,1494.04,1494.04,1.0,UEFA
