In [None]:
import pandas as pd
import pickle
from scipy.stats import poisson

In [None]:
tabla = open('dict_table', 'rb')
dict_table = pickle.load(tabla)
df_historical_data = pd.read_csv('clean_fifa_worldcup_matches.csv')
df_fixture = pd.read_csv('clean_fifa_worldcup_fixture.csv')

In [None]:
dict_table['Group A']

Unnamed: 0,Pos,Team,Pld,W,D,L,GF,GA,GD,Pts
0,1,Qatar (H),0,0,0,0,0,0,0,0
1,2,Ecuador,0,0,0,0,0,0,0,0
2,3,Senegal,0,0,0,0,0,0,0,0
3,4,Netherlands,0,0,0,0,0,0,0,0


In [None]:
df_home = df_historical_data[['HomeTeam', 'HomeGoals', 'AwayGoals']]
df_away = df_historical_data[['AwayTeam', 'HomeGoals', 'AwayGoals']]

In [None]:
#El equipo local ha marcado tantos HomeGoals, pero el equipo visitante ha marcado tantos AwayGoals
df_home = df_home.rename(columns={'HomeTeam': 'Team', 'HomeGoals': 'Goals scored', 'AwayGoals': 'Goals conceded'})
df_away = df_away.rename(columns={'AwayTeam': 'Team', 'HomeGoals': 'Goals conceded', 'AwayGoals': 'Goals scored'})

In [None]:
#Concateno df_home y df_away, agrupo por equipo y calculamos la media de goles marcados y recibidos
df_team_strength = pd.concat([df_home, df_away], ignore_index=True).groupby('Team').mean()
df_team_strength

Unnamed: 0_level_0,Goals scored,Goals conceded
Team,Unnamed: 1_level_1,Unnamed: 2_level_1
Algeria,1.000000,1.461538
Angola,0.333333,0.666667
Argentina,1.691358,1.148148
Australia,0.812500,1.937500
Austria,1.482759,1.620690
...,...,...
Uruguay,1.553571,1.321429
Wales,0.800000,0.800000
West Germany,2.112903,1.241935
Yugoslavia,1.666667,1.272727


In [None]:
#Distribución de Poisson: distribución discreta que describe el número de eventos que ocurren en un
#intervalo fijo o región.
#Variable discreta es aquella variable cuantitativa que puede tomar un valor en un rango, no puede tomar cualquier valor
#Condiciones de la distribución de Poisson:
  # 1. El número de eventos se puede contar (los goles se pueden contar)
  # 2. La ocurrencia de eventos son independientes (sobre el papel un gol no afecta a la probabilidad de otro gol)
  # 3. La tasa a la que ocurren los eventos es constante (la probabilidad de un gol en un partido es la misma que en otro partido)
  # 4. Dos eventos no pueden ocurrir a la vez (no hay dos goles en el mismo instante de tiempo)

In [None]:
#Función para predecir la victoria
def predict_points(home, away):
    if home in df_team_strength.index and away in df_team_strength.index:
        # goals_scored * goals_conceded
        lamb_home = df_team_strength.at[home,'Goals scored'] * df_team_strength.at[away,'Goals conceded']
        lamb_away = df_team_strength.at[away,'Goals scored'] * df_team_strength.at[home,'Goals conceded']
        prob_home, prob_away, prob_draw = 0, 0, 0
        for x in range(0,11): #number of goals home team
            for y in range(0, 11): #number of goals away team
                p = poisson.pmf(x, lamb_home) * poisson.pmf(y, lamb_away)
                if x == y:
                    prob_draw += p
                elif x > y:
                    prob_home += p
                else:
                    prob_away += p

        points_home = 3 * prob_home + prob_draw
        points_away = 3 * prob_away + prob_draw
        return (points_home, points_away)
    else:
        return (0, 0)


In [None]:
predict_points('Qatar', 'Ecuador') #No hay datos históricos de Qatar porque es su primera participación
predict_points('Argentina', 'Mexico')

(2.3129151525530505, 0.5378377125059863)

In [None]:
#Fase de grupos
df_fixture_group_phase = df_fixture[:48].copy() #partidos del 0 al 47
df_fixture_roundof16 = df_fixture[48:56].copy() #partidos de octavos de final
df_fixture_quarterfinals = df_fixture[56:60].copy() #partidos de cuartos de final
df_fixture_semifinals = df_fixture[60:62].copy() #partidos de semifinales
df_fixture_final = df_fixture[62:] #la final

In [None]:
#Ejecuto los partidos de la fase de grupos y actualizo las tablas de cada grupo
for group in dict_table:
  teams_in_group = dict_table[group]['Team'].values
  #Separamos cada grupo (habrá 6 partidos en cada grupo)
  df_fixture_group = df_fixture_group_phase[df_fixture_group_phase['home'].isin(teams_in_group)]
  for index, row in df_fixture_group.iterrows():
    home, away = row['home'], row['away']
    points_home, points_away = predict_points(home, away)
    dict_table[group].loc[dict_table[group]['Team'] == home, 'Pts'] += points_home
    dict_table[group].loc[dict_table[group]['Team'] == away, 'Pts'] += points_away

  #El equipo con más puntos estará más arriba en la tabla
  dict_table[group] = dict_table[group].sort_values('Pts', ascending=False).reset_index()
  #Cojo solo dos columnas
  dict_table[group] = dict_table[group][['Team', 'Pts']]
  dict_table[group] = dict_table[group].round(0)


In [None]:
#Actualizo los octavos de final con el primer y segundo puesto de cada grupo
for group in dict_table:
  #Accedo al nombre del país con loc(fila, columna). El ganador estará siempre en la primera fila porque lo ordené antes
  group_winner = dict_table[group].loc[0, 'Team']
  runner_up = dict_table[group].loc[1, 'Team']
  #Cambio en la tabla de los octavos y meto los clasificados de cada grupo con replace
  df_fixture_roundof16.replace({f'Winners {group}': group_winner, f'Runners-up {group}': runner_up}, inplace=True)

In [None]:
#Añado la columna ganador
df_fixture_roundof16['winner'] = '?'
df_fixture_roundof16

In [None]:
def get_winner(df_fixture_updated):
  for index, row in df_fixture_updated.iterrows():
    #Guarda en home y away los nombres de equipo local y visitante
    home, away = row['home'], row['away']
    points_home, points_away = predict_points(home, away)
    if points_home > points_away:
      winner = home
    else:
      winner = away
    df_fixture_updated.loc[index, 'winner'] = winner
  return df_fixture_updated

In [None]:
#Obtengo el ganador de cada partido de octavos
get_winner(df_fixture_roundof16)

In [None]:
#Hago una función y le paso la ronda de octavos y la nueva de cuartos
def update_table(df_fixture_octavos, df_fixture_cuartos):
  for index, row in df_fixture_octavos.iterrows():
    winner = df_fixture_octavos.loc[index, 'winner']
    match = df_fixture_octavos.loc[index, 'score']
    df_fixture_cuartos.replace({f'Winners {match}':winner}, inplace=True)
  df_fixture_cuartos['winner'] = '?'
  return df_fixture_cuartos

In [None]:
#Ya con las funciones que tengo puedo simular el resto de rondas
update_table(df_fixture_roundof16, df_fixture_quarterfinals)

Unnamed: 0,home,score,away,year,winner
56,Germany,Match 58,Brazil,2022,?
57,Netherlands,Match 57,Argentina,2022,?
58,Spain,Match 60,Portugal,2022,?
59,England,Match 59,France,2022,?


In [None]:
#Ganadores de los cuartos de final
get_winner(df_fixture_quarterfinals)

Unnamed: 0,home,score,away,year,winner
56,Germany,Match 58,Brazil,2022,Brazil
57,Netherlands,Match 57,Argentina,2022,Netherlands
58,Spain,Match 60,Portugal,2022,Portugal
59,England,Match 59,France,2022,France


In [None]:
update_table(df_fixture_quarterfinals, df_fixture_semifinals)

Unnamed: 0,home,score,away,year,winner
60,Netherlands,Match 61,Brazil,2022,?
61,France,Match 62,Portugal,2022,?


In [None]:
#Ganadores de la semifinal
get_winner(df_fixture_semifinals)

Unnamed: 0,home,score,away,year,winner
60,Netherlands,Match 61,Brazil,2022,Brazil
61,France,Match 62,Portugal,2022,France


In [None]:
update_table(df_fixture_semifinals, df_fixture_final)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_fixture_cuartos.replace({f'Winners {match}':winner}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_fixture_cuartos['winner'] = '?'


Unnamed: 0,home,score,away,year,winner
62,Losers Match 61,Match 63,Losers Match 62,2022,?
63,Brazil,Match 64,France,2022,?


In [None]:
#Obtengo el ganador del Mundial
get_winner(df_fixture_final)

Unnamed: 0,home,score,away,year,winner
62,Losers Match 61,Match 63,Losers Match 62,2022,Losers Match 62
63,Brazil,Match 64,France,2022,Brazil
