In [1]:
import pandas as pd
import matplotlib.pyplot as plt

In [12]:
def determine_result(row):
    if row["HomeTeamGoals"] > row["AwayTeamGoals"]:
        return "Home Win"
    elif row["HomeTeamGoals"] < row["AwayTeamGoals"]:
        return "Away Win"
    else:
        return "Draw"

def calculate_performance(data, teams):
    data["result"] = data.apply(determine_result, axis=1)

    filtered_data = data[
        (data["HomeTeamName"].isin(teams)) & (data["AwayTeamName"].isin(teams))
    ]

    home_team_performance = (
        filtered_data.groupby("HomeTeamName")
        .agg(
            home_matches_played=("HomeTeamName", "size"),
            home_goals_scored=("HomeTeamGoals", "sum"),
            home_goals_conceded=("AwayTeamGoals", "sum"),
            home_wins=("result", lambda x: (x == "Home Win").sum()),
            home_draws=("result", lambda x: (x == "Draw").sum()),
            home_losses=("result", lambda x: (x == "Away Win").sum()),
        )
        .reset_index()
    )

    away_team_performance = (
        filtered_data.groupby("AwayTeamName")
        .agg(
            away_matches_played=("AwayTeamName", "size"),
            away_goals_scored=("AwayTeamGoals", "sum"),
            away_goals_conceded=("HomeTeamGoals", "sum"),
            away_wins=("result", lambda x: (x == "Away Win").sum()),
            away_draws=("result", lambda x: (x == "Draw").sum()),
            away_losses=("result", lambda x: (x == "Home Win").sum()),
        )
        .reset_index()
    )

    home_team_performance.rename(columns={"HomeTeamName": "Team"}, inplace=True)
    away_team_performance.rename(columns={"AwayTeamName": "Team"}, inplace=True)

    team_performance = pd.merge(
        home_team_performance,
        away_team_performance,
        on="Team"
    )

    team_performance["total_matches_played"] = (
        team_performance["home_matches_played"]
        + team_performance["away_matches_played"]
    )

    team_performance["total_goals_scored"] = (
        team_performance["home_goals_scored"] + team_performance["away_goals_scored"]
    )

    team_performance["total_goals_conceded"] = (
        team_performance["home_goals_conceded"]
        + team_performance["away_goals_conceded"]
    )

    team_performance["total_wins"] = (
        team_performance["home_wins"] + team_performance["away_wins"]
    )

    team_performance["total_draws"] = (
        team_performance["home_draws"] + team_performance["away_draws"]
    )

    team_performance["total_losses"] = (
        team_performance["home_losses"] + team_performance["away_losses"]
    )

    team_performance["win_rate"] = (
        team_performance["total_wins"] / team_performance["total_matches_played"]
    )

    team_performance["avg_goals_scored"] = (
        team_performance["total_goals_scored"]
        / team_performance["total_matches_played"]
    )

    team_performance["avg_goals_conceded"] = (
        team_performance["total_goals_conceded"]
        / team_performance["total_matches_played"]
    )

    most_recent_year = filtered_data["Year"].max()
    recent_data = filtered_data[filtered_data["Year"] == most_recent_year]

    recent_home_performance = (
        recent_data.groupby("HomeTeamName")
        .agg(recent_home_wins=("result", lambda x: (x == "Home Win").sum()))
        .reset_index()
    )

    recent_away_performance = (
        recent_data.groupby("AwayTeamName")
        .agg(recent_away_wins=("result", lambda x: (x == "Away Win").sum()))
        .reset_index()
    )

    recent_home_performance.rename(columns={"HomeTeamName": "Team"}, inplace=True)
    recent_away_performance.rename(columns={"AwayTeamName": "Team"}, inplace=True)

    recent_performance = pd.merge(
        recent_home_performance,
        recent_away_performance,
        on="Team",
        suffixes=("_home", "_away"),
    )

    recent_performance["total_recent_wins"] = (
        recent_performance["recent_home_wins"] + recent_performance["recent_away_wins"]
    )

    full_performance = pd.merge(
        team_performance, recent_performance, on="Team", how="left"
    )

    full_performance["total_recent_wins"].fillna(0, inplace=True)

    full_performance["performance_score"] = (
        full_performance["win_rate"] * 0.6
        + full_performance["total_recent_wins"]
        / full_performance["total_matches_played"] * 0.4
        + full_performance["avg_goals_scored"] * 0.2
        - full_performance["avg_goals_conceded"] * 0.2
    )

    full_performance = full_performance.sort_values(
        by="performance_score", ascending=False
    )

    return full_performance

def predict_winner(performance):
    predicted_winner = performance.sort_values(
        by="performance_score", ascending=False
    ).iloc[0]["Team"]

    return predicted_winner

In [3]:
data = pd.read_csv(
    "https://raw.githubusercontent.com/menene/euro2024/main/uefa_euro_matches.csv",
    encoding="utf8",
)

data["HomeTeamName"] = data["HomeTeamName"].str.rstrip()
data["AwayTeamName"] = data["AwayTeamName"].str.lstrip()

In [5]:
teams = [
    "Albania",
    "Austria",
    "Belgium",
    "Croatia",
    "Czechia",
    "Denmark",
    "England",
    "France",
    "Georgia",
    "Germany",
    "Hungary",
    "Italy",
    "Netherlands",
    "Poland",
    "Portugal",
    "Romania",
    "Scotland",
    "Serbia",
    "Slovakia",
    "Slovenia",
    "Spain",
    "Switzerland",
    "Türkiye",
    "Ukraine",
]

In [13]:
performance = calculate_performance(data, teams)

performance

Unnamed: 0,Team,home_matches_played,home_goals_scored,home_goals_conceded,home_wins,home_draws,home_losses,away_matches_played,away_goals_scored,away_goals_conceded,...,total_wins,total_draws,total_losses,win_rate,avg_goals_scored,avg_goals_conceded,recent_home_wins,recent_away_wins,total_recent_wins,performance_score
6,France,17,29,15,10,4,3,11,13,11,...,15,7,6,0.535714,1.5,0.928571,2.0,1.0,3.0,0.478571
10,Netherlands,10,17,11,5,2,3,9,14,8,...,9,5,5,0.473684,1.631579,1.0,,,0.0,0.410526
12,Portugal,11,17,10,6,4,1,12,17,15,...,11,7,5,0.478261,1.478261,1.086957,1.0,1.0,2.0,0.4
9,Italy,10,11,2,5,5,0,13,10,12,...,9,11,3,0.391304,0.913043,0.608696,1.0,1.0,2.0,0.330435
7,Germany,13,15,10,5,5,3,11,12,15,...,10,6,8,0.416667,1.125,1.041667,,,0.0,0.266667
5,England,8,9,8,3,2,3,14,20,14,...,7,10,5,0.318182,1.318182,1.0,,,0.0,0.254545
3,Croatia,8,11,13,3,1,4,5,4,3,...,5,3,5,0.384615,1.153846,1.230769,,,0.0,0.215385
16,Spain,10,11,7,3,4,3,15,15,17,...,8,9,8,0.32,1.04,0.96,,,0.0,0.208
8,Hungary,4,7,10,1,1,2,2,3,2,...,2,1,3,0.333333,1.666667,2.0,0.0,1.0,1.0,0.2
0,Albania,1,0,1,0,0,1,2,1,2,...,1,0,2,0.333333,0.333333,1.0,0.0,1.0,1.0,0.2


In [14]:
predicted_winner = predict_winner(performance)

print("El ganador de la Euro 2024 será:", predicted_winner)

El ganador de la Euro 2024 será: France
