In [1]:
import pandas as pd
import pickle
from scipy.stats import poisson

In [2]:
dict_table=pickle.load(open('dict_table','rb'))
df_historical_data=pd.read_csv('clean_copa_america_matches.csv')
df_fixture=pd.read_csv('clean_copa_america_fixture.csv')

In [3]:
for group_name in dict_table:
    df = dict_table[group_name]
    df['Teams'] = df['Teams'].replace({'Argentina (H)': 'Argentina', 'Colombia (H)': 'Colombia'})
    dict_table[group_name] = df
    
dict_table['Group A']

Unnamed: 0,Pos,Teams,Pld,W,D,L,GF,GA,GD,Pts,Qualification
0,1,Argentina,0,0,0,0,0,0,0,0,Advance to knockout stage
2,3,Bolivia,0,0,0,0,0,0,0,0,Advance to knockout stage
3,4,Uruguay,0,0,0,0,0,0,0,0,Advance to knockout stage
4,5,Chile,0,0,0,0,0,0,0,0,
5,6,Paraguay,0,0,0,0,0,0,0,0,


# 1 Calcular Team Strength

In [4]:
df_home=df_historical_data[['HomeTeam', 'HomeGoals', 'AwayGoals']]
df_away=df_historical_data[['AwayTeam', 'HomeGoals', 'AwayGoals']]

In [5]:
df_home=df_home.rename(columns={'HomeTeam':'Team', 'HomeGoals': 'GoalsScored', 'AwayGoals': 'GoalsConceded'})
df_away=df_away.rename(columns={'AwayTeam':'Team', 'HomeGoals': 'GoalsConceded', 'AwayGoals': 'GoalsScored'})

In [6]:
df_team_strength=pd.concat([df_home, df_away], ignore_index=True).groupby('Team').mean()
df_team_strength

Unnamed: 0_level_0,GoalsScored,GoalsConceded
Team,Unnamed: 1_level_1,Unnamed: 2_level_1
Argentina,2.379888,0.910615
Bolivia,0.951456,2.456311
Brazil,2.285714,1.059524
Chile,1.566474,1.699422
Colombia,1.176991,1.663717
Costa Rica,1.071429,1.928571
Ecuador,1.070796,2.681416
Haiti,0.333333,4.0
Honduras,1.166667,0.833333
Jamaica,0.0,2.0


# 2 Funcion predict_points

In [7]:
def predict_points(home, away):
    if home in df_team_strength.index and away in df_team_strength.index:
        lamb_home = df_team_strength.at[home, 'GoalsScored'] * df_team_strength.at[away, 'GoalsConceded']
        lamb_away = df_team_strength.at[away, 'GoalsScored'] * df_team_strength.at[home, 'GoalsConceded']
        prob_home, prob_away, prob_draw = 0, 0, 0
        for x in range(0, 12):
            for y in range (0,12):
                p = poisson.pmf(x,lamb_home) * poisson.pmf(y, lamb_away)
                if x==y :
                    prob_draw+=p
                elif x>y :
                    prob_home+=p
                else:
                    prob_away+=p
        points_home = 3*prob_home + prob_draw
        points_away = 3*prob_away + prob_draw
        return (points_home, points_away)
    else:
        return (0,0)

## 2.1 Testear funcion

In [8]:
predict_points('Mexico', 'Peru')

(1.6160178742925952, 1.1864265581399516)

# 3 Predict

## 3.1 Fase de grupo

In [9]:
df_fixture_group_24 = df_fixture[:20].copy()
df_fixture_quarter = df_fixture[20:24].copy()
df_fixture_semi = df_fixture[24:26].copy()
df_fixture_final = df_fixture[26:].copy()

In [10]:
for group in dict_table:
    teams_in_group = dict_table[group]['Teams'].values
    df_fixture_group_6 = df_fixture_group_24[df_fixture_group_24['home'].isin(teams_in_group)]
    for index, row in df_fixture_group_6.iterrows():
        home, away = row['home'], row['away']
        points_home, points_away = predict_points(home, away)
        
        dict_table[group].loc[dict_table[group]['Teams'] == home, 'Pts'] += points_home
        dict_table[group].loc[dict_table[group]['Teams'] == away, 'Pts'] += points_away

    dict_table[group] = dict_table[group].sort_values('Pts', ascending=False).reset_index()
    dict_table[group] = dict_table[group][['Teams', 'Pts']]
    dict_table[group] = dict_table[group].round(0)

  dict_table[group].loc[dict_table[group]['Teams'] == home, 'Pts'] += points_home
  dict_table[group].loc[dict_table[group]['Teams'] == home, 'Pts'] += points_home


In [11]:
print(dict_table['Group A'], '\n')
print(dict_table['Group B'])

       Teams   Pts
0  Argentina  10.0
1    Uruguay   8.0
2      Chile   5.0
3   Paraguay   4.0
4    Bolivia   1.0 

       Teams   Pts
0     Brazil  11.0
1       Peru   7.0
2   Colombia   6.0
3    Ecuador   3.0
4  Venezuela   2.0


## 3.2 Cuartos

In [12]:
for group in dict_table:
    group_winner = dict_table[group].loc[0, 'Teams']
    runners_up = dict_table[group].loc[1, 'Teams']
    third_group= dict_table[group].loc[2, 'Teams']
    forth_group=dict_table[group].loc[3, 'Teams']
    df_fixture_quarter.replace({f'Winner {group}': group_winner,
                                f'Runner-up {group}': runners_up,
                                f'3rd {group}': third_group,
                                f'4th {group}': forth_group}, inplace=True)
df_fixture_quarter['winner']='?'
df_fixture_quarter

Unnamed: 0,home,score,away,year,winner
20,Peru,Match 21,Chile,2021,?
21,Brazil,Match 22,Paraguay,2021,?
22,Uruguay,Match 23,Colombia,2021,?
23,Argentina,Match 24,Ecuador,2021,?


In [13]:
def get_winner(df_fixture_updated):
    for index, row in df_fixture_updated.iterrows():
        home, away= row['home'], row['away']
        points_home, points_away = predict_points(home, away)
        if points_home>points_away:
            winner = home
        else:
            winner = away
        df_fixture_updated.loc[index, 'winner']= winner
    return df_fixture_updated

In [14]:
get_winner(df_fixture_quarter)

Unnamed: 0,home,score,away,year,winner
20,Peru,Match 21,Chile,2021,Peru
21,Brazil,Match 22,Paraguay,2021,Brazil
22,Uruguay,Match 23,Colombia,2021,Uruguay
23,Argentina,Match 24,Ecuador,2021,Argentina


## 3.3 Semis

In [15]:
def update_table(df_fixture_round_1, df_fixture_round_2):
    for index, row in df_fixture_round_1.iterrows():
        winner = df_fixture_round_1.loc[index, 'winner']
        match = df_fixture_round_1.loc[index, 'score']
        df_fixture_round_2.replace({f'Winner {match}':winner}, inplace=True)
    df_fixture_round_2['winner'] = '?'
    return df_fixture_round_2

In [16]:
update_table(df_fixture_quarter, df_fixture_semi)

Unnamed: 0,home,score,away,year,winner
24,Brazil,Match 25,Peru,2021,?
25,Argentina,Match 26,Uruguay,2021,?


In [17]:
get_winner(df_fixture_semi)

Unnamed: 0,home,score,away,year,winner
24,Brazil,Match 25,Peru,2021,Brazil
25,Argentina,Match 26,Uruguay,2021,Argentina


## 3.1 Final

In [18]:
update_table(df_fixture_semi, df_fixture_final)

Unnamed: 0,home,score,away,year,winner
26,Loser Match 26,Match 27,Loser Match 25,2021,?
27,Argentina,Match 28,Brazil,2021,?


In [19]:
print(predict_points('Colombia', 'Peru'))
get_winner(df_fixture_final)

(1.041271801937593, 1.7710753332445022)


Unnamed: 0,home,score,away,year,winner
26,Loser Match 26,Match 27,Loser Match 25,2021,Loser Match 25
27,Argentina,Match 28,Brazil,2021,Argentina
