In [1]:
#imports
import pandas as pd
import pickle
from scipy.stats import poisson

In [2]:
dict_table = pickle.load(open('./data/dict_table','rb'))
df_historical_data = pd.read_csv('./data/clean_copa_america_matches.csv')
df_fixture = pd.read_csv('./data/clean_copa_america_fixture.csv')

In [3]:
df_home = df_historical_data[['HomeTeam', 'HomeGoals', 'AwayGoals']]
df_away = df_historical_data[['AwayTeam', 'HomeGoals', 'AwayGoals']]

df_home = df_home.rename(columns={'HomeTeam':'Team', 'HomeGoals': 'GoalsScored', 'AwayGoals': 'GoalsConceded'})
df_away = df_away.rename(columns={'AwayTeam':'Team', 'HomeGoals': 'GoalsConceded', 'AwayGoals': 'GoalsScored'})

df_team_strength = pd.concat([df_home, df_away], ignore_index=True).groupby(['Team']).mean()
df_team_strength

Unnamed: 0_level_0,GoalsScored,GoalsConceded
Team,Unnamed: 1_level_1,Unnamed: 2_level_1
Argentina,1.797753,0.842697
Bolivia,0.75,1.734375
Brazil,1.969388,0.744898
Chile,1.482353,1.141176
Colombia,1.184783,0.869565
Costa Rica,1.0,1.823529
Ecuador,1.153846,1.646154
Haiti,0.333333,4.0
Honduras,1.166667,0.833333
Jamaica,0.0,1.5


## Predicting Points

In [4]:
def predict_points(home, away):
    if home in df_team_strength.index and away in df_team_strength.index:
        # goals_scored * goals_conceded
        lamb_home = df_team_strength.at[home,'GoalsScored'] * df_team_strength.at[away,'GoalsConceded']
        lamb_away = df_team_strength.at[away,'GoalsScored'] * df_team_strength.at[home,'GoalsConceded']
        prob_home, prob_away, prob_draw = 0, 0, 0
        for x in range(0,11): #number of goals home team
            for y in range(0, 11): #number of goals away team
                p = poisson.pmf(x, lamb_home) * poisson.pmf(y, lamb_away)
                if x == y:
                    prob_draw += p
                elif x > y:
                    prob_home += p
                else:
                    prob_away += p
        
        points_home = 3 * prob_home + prob_draw
        points_away = 3 * prob_away + prob_draw
        return (points_home, points_away)
    else:
        return (0, 0)

In [5]:
print(predict_points('Colombia', 'United States',))
print(predict_points('Argentina', 'Mexico'))
print(predict_points('Brazil', 'Ecuador'))

(2.070552659169355, 0.7125828727996534)
(2.0897847853727654, 0.7210167109987151)
(2.592945157470004, 0.2987257932665714)


In [6]:
df_fixture

Unnamed: 0,home,score,away,year
0,Argentina,Match 1,Canada,2024
1,Peru,Match 2,Chile,2024
2,Peru,Match 10,Canada,2024
3,Chile,Match 9,Argentina,2024
4,Argentina,Match 17,Peru,2024
5,Canada,Match 18,Chile,2024
6,Ecuador,Match 4,Venezuela,2024
7,Mexico,Match 3,Jamaica,2024
8,Ecuador,Match 12,Jamaica,2024
9,Venezuela,Match 11,Mexico,2024


## Predicting Copa America

## Group Stage

In [7]:
df_fixture_group_22 = df_fixture[:24].copy()
df_fixture_quarter = df_fixture[24:28].copy()
df_fixture_semi = df_fixture[28:30].copy()
df_fixture_final = df_fixture[30:].copy()

In [8]:
df_fixture_final

Unnamed: 0,home,score,away,year
30,Loser Match 29,Match 31,Loser Match 30,2024
31,Winner Match 29,Match 32,Winner Match 30,2024


In [10]:
for group in dict_table:
    teams_in_group = dict_table[group]['Team'].values
    df_fixture_group_6 = df_fixture_group_22[df_fixture_group_22['home'].isin(teams_in_group)]
    for index, row in df_fixture_group_6.iterrows():
        home, away = row['home'], row['away']
        points_home, points_away = predict_points(home, away)
        dict_table[group].loc[dict_table[group]['Team'] == home, 'Pts'] += points_home
        dict_table[group].loc[dict_table[group]['Team'] == away, 'Pts'] += points_away

    dict_table[group] = dict_table[group].sort_values('Pts', ascending=False).reset_index()
    dict_table[group] = dict_table[group][['Team', 'Pts']]
    dict_table[group] = dict_table[group].round(0)

  dict_table[group].loc[dict_table[group]['Team'] == home, 'Pts'] += points_home
  dict_table[group].loc[dict_table[group]['Team'] == home, 'Pts'] += points_home
  dict_table[group].loc[dict_table[group]['Team'] == home, 'Pts'] += points_home
  dict_table[group].loc[dict_table[group]['Team'] == home, 'Pts'] += points_home


In [11]:
dict_table

{'Group A':         Team  Pts
 0  Argentina  4.0
 1      Chile  3.0
 2       Peru  2.0
 3     Canada  0.0,
 'Group B':         Team  Pts
 0     Mexico  7.0
 1    Ecuador  6.0
 2  Venezuela  3.0
 3    Jamaica  1.0,
 'Group C':                 Team  Pts
 0            Uruguay  5.0
 1             Panama  2.0
 2            Bolivia  2.0
 3  United States (H)  0.0,
 'Group D':          Team  Pts
 0      Brazil  7.0
 1    Colombia  5.0
 2    Paraguay  3.0
 3  Costa Rica  2.0}

## Quarter Finals

In [12]:
df_fixture_quarter

Unnamed: 0,home,score,away,year
24,Winner Group A,Match 25,Runner-up Group B,2024
25,Winner Group B,Match 26,Runner-up Group A,2024
26,Winner Group C,Match 27,Runner-up Group D,2024
27,Winner Group D,Match 28,Runner-up Group C,2024


In [13]:
for group in dict_table:
    print(group)
    group_winner = dict_table[group].loc[0, 'Team']
    runners_up = dict_table[group].loc[1, 'Team']

    print(group_winner, runners_up)
    
    # Replace the values in df_fixture_quarter
    df_fixture_quarter.replace({f'Winner {group}': group_winner,
                                f'Runner-up {group}': runners_up}, inplace=True)

# After replacing values, set the 'winner' column to '?'
df_fixture_quarter['winner'] = '?'

# Print the DataFrame to verify changes
print(df_fixture_quarter)

Group A
Argentina Chile
Group B
Mexico Ecuador
Group C
Uruguay Panama
Group D
Brazil Colombia
         home     score      away  year winner
24  Argentina  Match 25   Ecuador  2024      ?
25     Mexico  Match 26     Chile  2024      ?
26    Uruguay  Match 27  Colombia  2024      ?
27     Brazil  Match 28    Panama  2024      ?


In [17]:
def get_winner(df_fixture_updated):
    for index, row in df_fixture_updated.iterrows():
        home, away = row['home'], row['away']
        points_home, points_away = predict_points(home, away)
        if points_home > points_away:
            winner = home
        else:
            winner = away
        df_fixture_updated.loc[index, 'winner'] = winner
    return df_fixture_updated

In [18]:
get_winner(df_fixture_quarter)


Unnamed: 0,home,score,away,year,winner
24,Argentina,Match 25,Ecuador,2024,Argentina
25,Mexico,Match 26,Chile,2024,Chile
26,Uruguay,Match 27,Colombia,2024,Uruguay
27,Brazil,Match 28,Panama,2024,Brazil


In [21]:
def update_table(df_fixture_round_1, df_fixture_round_2):
    for index, row in df_fixture_round_1.iterrows():
        winner = df_fixture_round_1.loc[index, 'winner']
        match = df_fixture_round_1.loc[index, 'score']
        df_fixture_round_2.replace({f'Winner {match}':winner}, inplace=True)
    df_fixture_round_2['winner'] = '?'
    return df_fixture_round_2

In [22]:
update_table(df_fixture_quarter, df_fixture_semi)


Unnamed: 0,home,score,away,year,winner
28,Argentina,Match 29,Chile,2024,?
29,Uruguay,Match 30,Brazil,2024,?


In [23]:
get_winner(df_fixture_semi)


Unnamed: 0,home,score,away,year,winner
28,Argentina,Match 29,Chile,2024,Argentina
29,Uruguay,Match 30,Brazil,2024,Brazil


In [24]:
update_table(df_fixture_semi, df_fixture_final)


Unnamed: 0,home,score,away,year,winner
30,Loser Match 29,Match 31,Loser Match 30,2024,?
31,Argentina,Match 32,Brazil,2024,?


In [25]:
get_winner(df_fixture_final)


Unnamed: 0,home,score,away,year,winner
30,Loser Match 29,Match 31,Loser Match 30,2024,Loser Match 30
31,Argentina,Match 32,Brazil,2024,Brazil
