In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import pandas as pd
import pickle 
from scipy.stats import poisson

In [3]:
dir_datasets = '/content/drive/MyDrive/DataScience/2. Proyecto de Data Science con Python - YT/0.Dataset/{}'
dir_dict = '/content/drive/MyDrive/DataScience/2. Proyecto de Data Science con Python - YT/dict_table.pickle'

dict_table = pickle.load(open(dir_dict, 'rb'))
df_data_historical = pd.read_csv(dir_datasets.format('cleaned_fifa_worldcup_matches.csv'))
df_fixture = pd.read_csv(dir_datasets.format('cleaned_fifa_worldcup_fixture.csv'))

In [4]:
# revisando los archivos
dict_table.keys()
dict_table['Group A']
df_data_historical.head()

Unnamed: 0,HomeTeam,AwayTeam,Year,HomeGoals,AwayGoals,TotalGoals
0,France,Mexico,1930,4,1,5
1,Uruguay,Argentina,1930,4,2,6
2,Uruguay,Yugoslavia,1930,6,1,7
3,Argentina,United States,1930,6,1,7
4,Paraguay,Belgium,1930,1,0,1


# **1.Calcular Team Strenth**

In [5]:
# dividir df en df_home y df_away
df_home = df_data_historical[['HomeTeam', 'HomeGoals', 'AwayGoals']]
df_away = df_data_historical[['AwayTeam', 'HomeGoals', 'AwayGoals']]

In [6]:
# renombrar columnas
df_home = df_home.rename(columns={'HomeTeam': 'Team',
                        'HomeGoals': 'GoalsScored',
                        'AwayGoals': 'GoalsConceded'})

df_away = df_away.rename(columns={'AwayTeam': 'Team',
                        'HomeGoals': 'GoalsConceded',
                        'AwayGoals': 'GoalsScored'})

In [7]:
# concatenar df_home y df_away, hacer group por team y calcular promedio
df_team_strength = pd.concat([df_home, df_away], ignore_index=True).groupby('Team').mean()
df_team_strength

Unnamed: 0_level_0,GoalsScored,GoalsConceded
Team,Unnamed: 1_level_1,Unnamed: 2_level_1
Algeria,1.000000,1.461538
Angola,0.333333,0.666667
Argentina,1.691358,1.148148
Australia,0.812500,1.937500
Austria,1.482759,1.620690
...,...,...
Uruguay,1.553571,1.321429
Wales,0.800000,0.800000
West Germany,2.112903,1.241935
Yugoslavia,1.666667,1.272727


In [None]:
from google.colab import files

df_team_strength.to_csv('df_team_strength.csv', index=False)
files.download('df_team_strength.csv')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

# **2.Función predict_points()**

In [8]:
def predict_points(home, away):
  if home in df_team_strength.index and away in df_team_strength.index:
    lamb_home = df_team_strength.at[home, 'GoalsScored'] * df_team_strength.at[away,'GoalsConceded']
    lamb_away = df_team_strength.at[away,'GoalsScored'] * df_team_strength.at[home,'GoalsConceded']
    prob_home, prob_away, prob_draw = 0, 0, 0
    #print(lamb_home)
    #print(lamb_away)
    for x in range(0, 11):
      for y in range(0, 11):
        p = poisson.pmf(x, lamb_home) * poisson.pmf(y, lamb_away)
        if x == y:
          prob_draw += p
        elif x > y:
          prob_home += p
        else:
          prob_away += p

    points_home = 3 * prob_home + prob_draw
    points_away = 3 * prob_away + prob_draw

    
    return (points_home, points_away)
  else:
    return (0, 0)

#predict_points('Argentina', 'Mexico')

**2.1 Testear mi funcion con:
Argentina - Mexico, Inglaterra - USA, Qatar - Ecuador**

In [9]:
print(predict_points('Argentina', 'Mexico'))
print(predict_points('England', 'United States'))
print(predict_points('Qatar (H)', 'Ecuador'))

(2.3129151525530505, 0.5378377125059863)
(2.2356147635326007, 0.5922397535606193)
(0, 0)


# **3. Prediciendo el mundial**

**3.1 Prediciendo la fase de grupos**

In [10]:
# diviendo el fixture en grupos, octavos, cuartos, semis, etc
df_fixture_group_48 = df_fixture[:48].copy()
df_fixture_knockout = df_fixture[48:56].copy()
df_fixture_quarter = df_fixture[56:60].copy()
df_fixture_semi = df_fixture[60:62].copy()
df_fixture_final = df_fixture[62:].copy()

In [11]:
# correr todos los partidos de la fase de grupos y actualizar las tablas de cada grupo
for group in dict_table:
  teams_in_group = dict_table[group]['Team'].values
  df_fixture_group_6 = df_fixture_group_48[df_fixture_group_48['home'].isin(teams_in_group)]
  for index, row in df_fixture_group_6.iterrows():
      home, away = row['home'], row['away']
      points_home, points_away = predict_points(home, away)
      dict_table[group].loc[dict_table[group]['Team'] == home, 'Pts'] += points_home
      dict_table[group].loc[dict_table[group]['Team'] == away, 'Pts'] += points_away

  dict_table[group] = dict_table[group].sort_values('Pts', ascending=False).reset_index()
  dict_table[group] = dict_table[group][['Team', 'Pts']]
  dict_table[group] = dict_table[group].round(0)

In [12]:
# mostrar las tablas actualizadas
dict_table['Group A']
dict_table['Group B']
dict_table['Group C']
dict_table['Group D']
dict_table['Group E']
dict_table['Group F']
dict_table['Group G']
dict_table['Group H']

Unnamed: 0,Team,Pts
0,Portugal,6.0
1,Uruguay,5.0
2,Ghana,4.0
3,South Korea,2.0


**3.2 Prediciendo los octavos de final**

In [13]:
df_fixture_knockout

Unnamed: 0,home,score,away,year
48,Winners Group A,Match 49,Runners-up Group B,2022
49,Winners Group C,Match 50,Runners-up Group D,2022
50,Winners Group D,Match 52,Runners-up Group C,2022
51,Winners Group B,Match 51,Runners-up Group A,2022
52,Winners Group E,Match 53,Runners-up Group F,2022
53,Winners Group G,Match 54,Runners-up Group H,2022
54,Winners Group F,Match 55,Runners-up Group E,2022
55,Winners Group H,Match 56,Runners-up Group G,2022


In [14]:
dict_table['Group A']

Unnamed: 0,Team,Pts
0,Netherlands,4.0
1,Senegal,2.0
2,Ecuador,2.0
3,Qatar (H),0.0


In [16]:
# actualizar el fixture de octavos con el puesto 1 y 2 
for group in dict_table:
  group_winner = dict_table[group].loc[0, 'Team'] # primer puesto
  runner_up = dict_table[group].loc[1, 'Team'] # segundo puesto
  df_fixture_knockout.replace({f'Winners {group}' : group_winner,
                                       f'Runners-up {group}' : runner_up}, 
                              inplace=True)
  df_fixture_knockout['Winner'] = '?'

In [17]:
df_fixture_knockout

Unnamed: 0,home,score,away,year,Winner
48,Netherlands,Match 49,Wales,2022,?
49,Argentina,Match 50,Denmark,2022,?
50,France,Match 52,Poland,2022,?
51,England,Match 51,Senegal,2022,?
52,Germany,Match 53,Belgium,2022,?
53,Brazil,Match 54,Uruguay,2022,?
54,Croatia,Match 55,Spain,2022,?
55,Portugal,Match 56,Switzerland,2022,?


In [18]:
# obtener el ganador de los partidos de octavos
def get_winner(df_fixture_updated):
  for index, row in df_fixture_updated.iterrows():
    home, away = row['home'], row['away']
    points_home, points_away = predict_points(home, away)
    if points_home > points_away:
      winner = home
    else:
      winner = away

    df_fixture_updated.loc[index, 'Winner'] = winner
  
  return df_fixture_updated

In [19]:
# ejecutamos la funcion para que nos indique el ganador
get_winner(df_fixture_knockout)

Unnamed: 0,home,score,away,year,Winner
48,Netherlands,Match 49,Wales,2022,Netherlands
49,Argentina,Match 50,Denmark,2022,Argentina
50,France,Match 52,Poland,2022,France
51,England,Match 51,Senegal,2022,England
52,Germany,Match 53,Belgium,2022,Germany
53,Brazil,Match 54,Uruguay,2022,Brazil
54,Croatia,Match 55,Spain,2022,Spain
55,Portugal,Match 56,Switzerland,2022,Portugal


**3.3 Prediciendo los cuartos de final**

In [22]:
# vamos a crear una funcion para cambiar los ganadores en los dataset correctos
def update_table(df_fixture_round_1, df_fixture_round_2): 
  for index, row in df_fixture_round_1.iterrows():
    winner = df_fixture_round_1.loc[index, 'Winner']
    matchh = df_fixture_round_1.loc[index, 'score']
    df_fixture_round_2.replace({f'Winners {matchh}':winner}, inplace=True)
  df_fixture_round_2['Winner'] = '?'
  return df_fixture_round_2

In [23]:
update_table(df_fixture_knockout, df_fixture_quarter)

Unnamed: 0,home,score,away,year,Winner
56,Germany,Match 58,Brazil,2022,?
57,Netherlands,Match 57,Argentina,2022,?
58,Spain,Match 60,Portugal,2022,?
59,England,Match 59,France,2022,?


In [24]:
get_winner(df_fixture_quarter)

Unnamed: 0,home,score,away,year,Winner
56,Germany,Match 58,Brazil,2022,Brazil
57,Netherlands,Match 57,Argentina,2022,Netherlands
58,Spain,Match 60,Portugal,2022,Portugal
59,England,Match 59,France,2022,France


**3.4 Prediciendo semi finales**

In [25]:
update_table(df_fixture_quarter, df_fixture_semi)

Unnamed: 0,home,score,away,year,Winner
60,Netherlands,Match 61,Brazil,2022,?
61,France,Match 62,Portugal,2022,?


In [26]:
get_winner(df_fixture_semi)

Unnamed: 0,home,score,away,year,Winner
60,Netherlands,Match 61,Brazil,2022,Brazil
61,France,Match 62,Portugal,2022,France


**3.5 Prediciendo la final**

In [28]:
update_table(df_fixture_semi, df_fixture_final)

Unnamed: 0,home,score,away,year,Winner
62,Losers Match 61,Match 63,Losers Match 62,2022,?
63,Brazil,Match 64,France,2022,?


In [29]:
get_winner(df_fixture_final)

Unnamed: 0,home,score,away,year,Winner
62,Losers Match 61,Match 63,Losers Match 62,2022,Losers Match 62
63,Brazil,Match 64,France,2022,Brazil
