In [11]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.api as sm
from scipy import stats
from scipy.optimize import minimize
from scipy.stats import poisson


import warnings
from pandas.errors import SettingWithCopyWarning
warnings.simplefilter(action="ignore", category=SettingWithCopyWarning)
warnings.simplefilter(action='ignore', category=FutureWarning)

In [12]:
tabela = pd.read_excel('dados/tabela/tabela-rodada_27.xlsx')
tabela_casa = pd.read_excel('dados/tabela/tabela_casa-rodada_27.xlsx')
historico_casa = pd.read_excel('dados/historico/historicocasa-rodada_27.xlsx')
historico = pd.read_excel('dados/historico/historico-rodada_27.xlsx')
rodadas = pd.read_excel('dados/previsoes/todas_rodadas.xlsx')
rodada_futura = pd.read_excel('dados/previsoes/rodada_futura-rodada-28.xlsx')

rodada_atual = tabela['MP'].max()

In [13]:
def rho_correction(x, y, lambda_x, mu_y, rho):
    if x==0 and y==0:
        return 1- (lambda_x * mu_y * rho)
    elif x==0 and y==1:
        return 1 + (lambda_x * rho)
    elif x==1 and y==0:
        return 1 + (mu_y * rho)
    elif x==1 and y==1:
        return 1 - rho
    else:
        return 1.0

In [14]:
def solve_parameters(dataset, init_vals = None, debug = False, options={'disp': True, 'maxiter':100},
                     constraints = [{'type':'eq', 'fun': lambda x: sum(x[:20])-20}] , **kwargs):
    teams = np.sort(dataset['Equipe'].unique())
    n_teams = len(teams)
    if init_vals is None:
        # random initialisation of model parameters
        init_vals = np.concatenate((np.random.uniform(0,1,(n_teams)), # attack strength
                                      np.random.uniform(0,-1,(n_teams)), # defence strength
                                      np.array([0, 1.0]) # rho (score correction), gamma (home advantage)
                                     ))
    def dc_log_like(x, y, alpha_x, beta_x, alpha_y, beta_y, rho, gamma):
        lambda_x, mu_y = np.exp(alpha_x + beta_y + gamma), np.exp(alpha_y + beta_x) 
        return (np.log(rho_correction(x, y, lambda_x, mu_y, rho)) + 
                np.log(poisson.pmf(x, lambda_x)) + np.log(poisson.pmf(y, mu_y)))

    def estimate_paramters(params):
        score_coefs = dict(zip(teams, params[:n_teams]))
        defend_coefs = dict(zip(teams, params[n_teams:(2*n_teams)]))
        rho, gamma = params[-2:]
        log_like = [dc_log_like(row.GF, row.GA, score_coefs[row.Equipe], defend_coefs[row.Equipe],
                     score_coefs[row.Opponent], defend_coefs[row.Opponent], rho, gamma) for row in dataset.itertuples()]
        return -sum(log_like)
    opt_output = minimize(estimate_paramters, init_vals, options=options, constraints = constraints, **kwargs)
    if debug:
        # sort of hacky way to investigate the output of the optimisation process
        return opt_output
    else:
        return dict(zip(["attack_"+team for team in teams] + 
                        ["defence_"+team for team in teams] +
                        ['rho', 'home_adv'],
                        opt_output.x)) 

In [15]:
params = solve_parameters(historico)

  np.log(poisson.pmf(x, lambda_x)) + np.log(poisson.pmf(y, mu_y)))
  return (np.log(rho_correction(x, y, lambda_x, mu_y, rho)) +


Optimization terminated successfully    (Exit mode 0)
            Current function value: 1471.0368058784172
            Iterations: 58
            Function evaluations: 2585
            Gradient evaluations: 58


In [16]:
def calc_means(param_dict, homeTeam, awayTeam):
    return [np.exp(param_dict['attack_'+homeTeam] + param_dict['defence_'+awayTeam] + param_dict['home_adv']),
            np.exp(param_dict['defence_'+homeTeam] + param_dict['attack_'+awayTeam])]

def dixon_coles_simulate_match(params_dict, homeTeam, awayTeam, max_goals=10):
    team_avgs = calc_means(params_dict, homeTeam, awayTeam)
    team_pred = [[poisson.pmf(i, team_avg) for i in range(0, max_goals+1)] for team_avg in team_avgs]
    output_matrix = np.outer(np.array(team_pred[0]), np.array(team_pred[1]))
    correction_matrix = np.array([[rho_correction(home_goals, away_goals, team_avgs[0],
                                                   team_avgs[1], params['rho']) for away_goals in range(2)]
                                   for home_goals in range(2)])
    output_matrix[:2,:2] = output_matrix[:2,:2] * correction_matrix
    return output_matrix

In [17]:
def inserir_df(matriz, df, time):
  #times = list(matriz.keys())
  
  #for time in times:
  df.loc[df['Home'] == time, 'vitoria_casa'] = np.sum(np.tril(matriz, -1))
  df.loc[df['Home'] == time, 'empate'] = np.sum(np.diag(matriz))
  df.loc[df['Home'] == time, 'vitoria_fora'] = np.sum(np.triu(matriz, 1))
  
  return df

In [31]:
ars_sou_dc = dixon_coles_simulate_match(params, 'Botafogo (RJ)', 'Ath Paranaense', max_goals=10)
rodada_previsoes = inserir_df(ars_sou_dc, rodada_futura, 'Botafogo (RJ)')

In [32]:
rodada_previsoes

Unnamed: 0,Wk,Day,Date,Home,xGCasa,gols_casa,gols_fora,xGFora,Away,vitoria_casa,empate,vitoria_fora
0,28,Sat,2023-10-21,Cuiaba,,,,,Goias,0.395964,0.321754,0.282282
1,28,Sat,2023-10-21,Sao Paulo,,,,,Gremio,0.316229,0.272192,0.411579
2,28,Sat,2023-10-21,Bahia,,,,,Fortaleza,0.28859,0.295081,0.416329
3,28,Sun,2023-10-22,Atletico Mineiro,,,,,Cruzeiro,0.380611,0.403978,0.21541
4,28,Sun,2023-10-22,Internacional,,,,,Santos,0.371898,0.307329,0.320772
5,28,Sun,2023-10-22,Flamengo,,,,,Vasco da Gama,0.528468,0.260888,0.210642
6,28,Sun,2023-10-22,Corinthians,,,,,America (MG),0.579622,0.216269,0.204092
7,28,Sun,2023-10-22,Coritiba,,,,,Palmeiras,0.09637,0.167618,0.735945
8,28,Sun,2023-10-22,Bragantino,,,,,Fluminense,0.465987,0.283301,0.250712
9,28,Sat,2023-10-21,Botafogo (RJ),,,,,Ath Paranaense,0.527832,0.286942,0.185226


In [33]:
rodada_previsoes.to_excel('dados/modelodc3par-28.xlsx')