In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.api as sm
from scipy import stats
from scipy.optimize import minimize


import warnings
from pandas.errors import SettingWithCopyWarning
warnings.simplefilter(action="ignore", category=SettingWithCopyWarning)
warnings.simplefilter(action='ignore', category=FutureWarning)

$$X= ``Número\ de\ gols\ do\ time\ de\ casa (i)" \sim Poisson(\lambda_x)$$
$$Y= ``Número\ de\ gols\ do\ time\ visitante (j)" \sim Poisson(\lambda_y)$$
$\lambda_x = \alpha_i\beta_j\gamma$\
$\lambda_y = \alpha_j\beta_i$

onde:\
$\alpha_i = Ataque\ do\ time\ i$\
$\beta_j = Defesa\ do\ time\ j$\
$\gamma = Fator\ casa\ do\ time\ i$

$$P(X=x)=\frac{(\alpha_i\beta_j\gamma)^{x}e^{-\alpha_i\beta_j\gamma}}{x!}\qquad P(Y=y)=\frac{(\alpha_j\beta_i)^{y}e^{-\alpha_j\beta_i}}{y!}\\$$

$$P(X=x, Y=y)=P(X=x)P(Y=y)=\frac{(\alpha_i\beta_j\gamma)^{x}(\alpha_j\beta_i)^{y} e^{-(\alpha_i\beta_j\gamma+\alpha_j\beta_i)}}{x!y!}$$

# Imports

In [9]:
tabela = pd.read_excel('dados/tabela/tabela-rodada_27.xlsx')
tabela_casa = pd.read_excel('dados/tabela/tabela_casa-rodada_27.xlsx')
historico_casa = pd.read_excel('dados/historico/historicocasa-rodada_27.xlsx')
historico = pd.read_excel('dados/historico/historico-rodada_27.xlsx')
rodadas = pd.read_excel('dados/rodadas/todas_rodadas.xlsx')
rodada_futura = pd.read_excel('dados/rodadas/rodada-28.xlsx')

rodada_atual = tabela['MP'].max()

# Modelo

In [5]:
def parametros_iniciais(par_x, par_y, peso_x, peso_y, fator_casa):
  dem_x = (sum(peso_x[0])+sum(peso_x[1]))
  dem_y = (sum(peso_y[0])+sum(peso_y[1]))
  times, att_i, def_i, casa_i, att_j, def_j = [], [], [], [], [], []


  for time in tabela['Squad']:
    times.append(time)
    att_i.append(float(sum(tabela.loc[tabela['Squad'] == time, par_x[0][i]]*peso_x[0][i] for i in range(len(par_x[0])))/dem_x))
    def_i.append(float(sum(tabela.loc[tabela['Squad'] == time, par_y[1][i]]*peso_y[1][i] for i in range(len(par_y[1])))/dem_y))
    casa_i.append(((1/int(tabela_casa.loc[tabela_casa['Squad'] == time, fator_casa[0]]))*0.5)+1)
    att_j.append(float(sum(tabela.loc[tabela['Squad'] == time, par_y[0][i]]*peso_y[0][i] for i in range(len(par_y[0])))/dem_y))
    def_j.append(float(sum(tabela.loc[tabela['Squad'] == time, par_x[1][i]]*peso_x[1][i] for i in range(len(par_x[1])))/dem_x))
  
  data = {
    'time': times,
    'att_i': att_i,
    'def_i': def_i,
    'casa_i': casa_i,
    'att_j': att_j,
    'def_j': def_j,
  }

  data = pd.DataFrame(data)
  return data

In [6]:
par_x = [['xG_total', 'xG_casa', 'GF_total', 'GF_casa', 'xG_5total', 'xG_5casa', 'GF_5total', 'GF_5casa'], #Ataque casa
        ['GA_total', 'GA_fora', 'GA_5total', 'xG_sofrido_fora', 'xG_5sofrido_fora']]  #Defesa fora
peso_x = [[1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1]]


par_y = [['xG_total', 'xG_fora', 'GF_total', 'GF_fora', 'xG_5total', 'xG_5fora', 'GF_5total', 'GF_5fora'], #Ataque fora
        ['GA_total', 'GA_casa', 'GA_5total', 'xG_sofrido_casa', 'xG_5sofrido_casa']]  #Defesa casa
peso_y = [[1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1]]

fator_casa = ['Rk_casa']

par_iniciais = parametros_iniciais(par_x, par_y, peso_x, peso_y, fator_casa)

In [7]:
def rho_dependencia(x, y, lambda_x, mu_y, rho):
    if x==0 and y==0:
        return 1- (lambda_x * mu_y * rho)
    elif x==0 and y==1:
        return 1 + (lambda_x * rho)
    elif x==1 and y==0:
        return 1 + (mu_y * rho)
    elif x==1 and y==1:
        return 1 - rho
    else:
        return 1.0
    
def log_mv(params):
  alpha_i, beta_j, gamma, alpha_j, beta_i, rho = params
  lambda_x, lambda_y = alpha_i*beta_j*gamma, alpha_j*beta_i
  result = -sum(np.log(rho_dependencia(x_gf, y_ga, lambda_x, lambda_y, rho)) + np.log(stats.poisson.pmf(x_gf, lambda_x)) + np.log(stats.poisson.pmf(y_ga, lambda_y)))
  return result

def emv(jogos_rodada, par_ini):
    jogos = dict(zip(jogos_rodada['Home'].unique(), jogos_rodada['Away'].unique()))
    global x_gf
    global y_ga
    estimados = {}

    for casa, fora in jogos.items():
      x_gf = list(historico_casa[historico_casa['Equipe'] == casa]['GF'])
      y_ga = list(historico_casa[historico_casa['Opponent'] == fora]['GA'])
      a_i = float(par_ini[par_ini['time'] == casa]['att_i'])
      b_j = float(par_ini[par_ini['time'] == fora]['def_j'])
      gam = float(par_ini[par_ini['time'] == casa]['casa_i'])
      a_j = float(par_ini[par_ini['time'] == fora]['att_j'])
      b_i = float(par_ini[par_ini['time'] == casa]['def_i'])
      rho = np.random.uniform(-1, 1, 1)[0]

      if len(x_gf) > len(y_ga):
          x_gf = x_gf[len(x_gf) - len(y_ga):]
      elif len(y_ga) > len(x_gf):
          y_ga = y_ga[len(y_ga) - len(x_gf):]
    
      parametros = np.array([a_i, b_j, gam, a_j, b_i, rho])
    
      output = minimize(log_mv, parametros, options={'disp': True, 'maxiter':10000})
      estimados[casa] = output.x
    return estimados

def matrix_probs(estimados):
  lambdas = {}
  matriz_gols = {}
  rho_matriz = {}

  for time, value in estimados.items():
    lambda_x = value[0]*value[1]*value[2]
    lambda_y = value[3]*value[4]
    lambdas[time] = [[stats.poisson.pmf(i, lambda_x) for i in range(7)], 
                     [stats.poisson.pmf(i, lambda_y) for i in range(7)]]
    matriz_gols[time] = np.outer(lambdas[time][0], lambdas[time][1])
    rho_matriz[time] = np.array([[rho_dependencia(gol_casa, gol_fora, lambda_x, lambda_y, value[5]) for gol_fora in range(2)] for gol_casa in range(2)])
    matriz_gols[time][:2,:2] = matriz_gols[time][:2,:2] * rho_matriz[time]

  return matriz_gols, lambdas, rho_matriz

def inserir_df(matriz, df):
  times = list(matriz.keys())
  
  for time in times:
    df.loc[df['Home'] == time, 'vitoria_casa'] = np.sum(np.tril(matriz[time], -1))
    df.loc[df['Home'] == time, 'empate'] = np.sum(np.diag(matriz[time]))
    df.loc[df['Home'] == time, 'vitoria_fora'] = np.sum(np.triu(matriz[time], 1))
  
  return df

In [8]:
estimados = emv(rodada_futura, par_iniciais)
matriz_gols, lambdas, rhos = matrix_probs(estimados)
rodada_previsoes = inserir_df(matriz_gols, rodada_futura)

Optimization terminated successfully.
         Current function value: 33.616487
         Iterations: 5
         Function evaluations: 63
         Gradient evaluations: 9
Optimization terminated successfully.
         Current function value: 39.834598
         Iterations: 5
         Function evaluations: 63
         Gradient evaluations: 9
         Current function value: 34.220551
         Iterations: 1
         Function evaluations: 776
         Gradient evaluations: 110
Optimization terminated successfully.
         Current function value: 36.839235
         Iterations: 9
         Function evaluations: 77
         Gradient evaluations: 11
         Current function value: 31.786801
         Iterations: 0
         Function evaluations: 109
         Gradient evaluations: 15
Optimization terminated successfully.
         Current function value: 30.128041
         Iterations: 5
         Function evaluations: 56
         Gradient evaluations: 8
Optimization terminated successfully.
      

  result = -sum(np.log(rho_dependencia(x_gf, y_ga, lambda_x, lambda_y, rho)) + np.log(stats.poisson.pmf(x_gf, lambda_x)) + np.log(stats.poisson.pmf(y_ga, lambda_y)))
  res = _minimize_bfgs(fun, x0, args, jac, callback, **options)
  res = _minimize_bfgs(fun, x0, args, jac, callback, **options)
  result = -sum(np.log(rho_dependencia(x_gf, y_ga, lambda_x, lambda_y, rho)) + np.log(stats.poisson.pmf(x_gf, lambda_x)) + np.log(stats.poisson.pmf(y_ga, lambda_y)))


         Current function value: nan
         Iterations: 4
         Function evaluations: 812
         Gradient evaluations: 116


  result = -sum(np.log(rho_dependencia(x_gf, y_ga, lambda_x, lambda_y, rho)) + np.log(stats.poisson.pmf(x_gf, lambda_x)) + np.log(stats.poisson.pmf(y_ga, lambda_y)))
  result = -sum(np.log(rho_dependencia(x_gf, y_ga, lambda_x, lambda_y, rho)) + np.log(stats.poisson.pmf(x_gf, lambda_x)) + np.log(stats.poisson.pmf(y_ga, lambda_y)))
  res = _minimize_bfgs(fun, x0, args, jac, callback, **options)


In [9]:
rodada_futura.to_excel('dados/previsaodc5par_28.xlsx')

In [10]:
rodada_futura

Unnamed: 0,Wk,Day,Date,Home,xGCasa,gols_casa,gols_fora,xGFora,Away,vitoria_casa,empate,vitoria_fora
0,28,Sat,2023-10-21,Cuiaba,,,,,Goias,0.614967,0.043057,0.341542
1,28,Sat,2023-10-21,Sao Paulo,,,,,Gremio,0.430427,0.276699,0.291512
2,28,Sat,2023-10-21,Bahia,,,,,Fortaleza,0.635187,0.102076,0.261176
3,28,Sun,2023-10-22,Atletico Mineiro,,,,,Cruzeiro,0.371903,0.18378,0.443441
4,28,Sun,2023-10-22,Internacional,,,,,Santos,0.465232,0.148561,0.386112
5,28,Sun,2023-10-22,Flamengo,,,,,Vasco da Gama,0.542783,0.11308,0.343387
6,28,Sun,2023-10-22,Corinthians,,,,,America (MG),0.324552,0.48845,0.185637
7,28,Sun,2023-10-22,Coritiba,,,,,Palmeiras,0.154387,0.510014,0.33467
8,28,Sun,2023-10-22,Bragantino,,,,,Fluminense,0.562534,0.061546,0.374449
9,28,Sat,2023-10-21,Botafogo (RJ),,,,,Ath Paranaense,,,


In [39]:
lista = [round(x, 4) for x in list(np.arange(0, 2 + 0.01, 0.01))]
gma = [round(x, 4) for x in list(np.arange(1, 2 + 0.01, 0.01))]

In [40]:
# Crie listas para armazenar os valores de cada coluna
lambda_values = []
alpha_i_values = []
beta_j_values = []
gamma_values = []

for alpha_i in lista:
    for beta_j in lista:
        for gamma in gma:
            lambda_x = np.prod(stats.poisson.pmf(historico_casa['GF'], alpha_i * beta_j * gamma))
            lambda_values.append(lambda_x)
            alpha_i_values.append(alpha_i)
            beta_j_values.append(beta_j)
            gamma_values.append(gamma)
    print(alpha_i, '/', lista[-1])

# Crie o DataFrame no final
estimador = pd.DataFrame({
    'lambda': lambda_values,
    'alpha_i': alpha_i_values,
    'beta_j': beta_j_values,
    'gamma': gamma_values
})


0.0 / 2.0
0.01 / 2.0
0.02 / 2.0
0.03 / 2.0
0.04 / 2.0
0.05 / 2.0
0.06 / 2.0
0.07 / 2.0
0.08 / 2.0
0.09 / 2.0
0.1 / 2.0
0.11 / 2.0
0.12 / 2.0
0.13 / 2.0
0.14 / 2.0
0.15 / 2.0
0.16 / 2.0
0.17 / 2.0
0.18 / 2.0
0.19 / 2.0
0.2 / 2.0
0.21 / 2.0
0.22 / 2.0
0.23 / 2.0
0.24 / 2.0
0.25 / 2.0
0.26 / 2.0
0.27 / 2.0
0.28 / 2.0
0.29 / 2.0
0.3 / 2.0
0.31 / 2.0
0.32 / 2.0
0.33 / 2.0
0.34 / 2.0
0.35 / 2.0
0.36 / 2.0
0.37 / 2.0
0.38 / 2.0
0.39 / 2.0
0.4 / 2.0
0.41 / 2.0
0.42 / 2.0
0.43 / 2.0
0.44 / 2.0
0.45 / 2.0
0.46 / 2.0
0.47 / 2.0
0.48 / 2.0
0.49 / 2.0
0.5 / 2.0
0.51 / 2.0
0.52 / 2.0
0.53 / 2.0
0.54 / 2.0
0.55 / 2.0
0.56 / 2.0
0.57 / 2.0
0.58 / 2.0
0.59 / 2.0
0.6 / 2.0
0.61 / 2.0
0.62 / 2.0
0.63 / 2.0
0.64 / 2.0
0.65 / 2.0
0.66 / 2.0
0.67 / 2.0
0.68 / 2.0
0.69 / 2.0
0.7 / 2.0
0.71 / 2.0
0.72 / 2.0
0.73 / 2.0
0.74 / 2.0
0.75 / 2.0
0.76 / 2.0
0.77 / 2.0
0.78 / 2.0
0.79 / 2.0
0.8 / 2.0
0.81 / 2.0
0.82 / 2.0
0.83 / 2.0
0.84 / 2.0
0.85 / 2.0
0.86 / 2.0
0.87 / 2.0
0.88 / 2.0
0.89 / 2.0
0.9 / 2.0
0.91 / 2.

In [43]:
estimador['lambda'].max()

1.8018589969546852e-176

In [52]:
stats.poisson.pmf(4, 1.8018589969546852)

0.0724660432985796