In [1]:
%reload_ext autoreload
%autoreload 2

In [2]:
import pandas as pd


In [125]:
elo_dif = pd.read_csv("metrics/mov_elo/elo_diff.csv", sep=";")
elo_dif["home"] = 1

In [126]:
elo_dif2 = elo_dif.copy()
elo_dif2["elo_dif"] = elo_dif2["away_elo"] - elo_dif2["home_elo"]
elo_dif2["goal_dif"] = elo_dif2["away_score"] - elo_dif2["home_score"]
elo_dif2["home"] = 0

In [127]:
elo_dif_complete = pd.concat([elo_dif, elo_dif2], axis=0)

In [3]:
import pymc as pm
import numpy as np

In [39]:
elo_dif_complete["elo_dif"] = elo_dif_complete["elo_dif"] / elo_dif_complete["elo_dif"].max()
elo_dif_complete["goal_dif"] = elo_dif_complete["goal_dif"] / elo_dif_complete["goal_dif"].max()

In [40]:
with pm.Model() as mov:
    home_adv = pm.Normal("home_advantage")
    power_three = pm.Normal("power_three", shape=3)
    power = pm.Deterministic("power", power_three[0] + power_three[1] * np.power(elo_dif_complete["elo_dif"].values, 2) + power_three[2] * np.power(elo_dif_complete["elo_dif"].values, 3))
    #power = pm.Normal("power")
    influence = pm.Deterministic("influence", elo_dif_complete["home"].values * home_adv + power)
    
    diff = pm.Normal("diff", influence, observed=elo_dif_complete["goal_dif"].values)

In [41]:
with mov:
    trace = pm.sample(1000, tune=1000, progressbar=True)

Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [home_advantage, power_three]


Output()

Sampling 4 chains for 1_000 tune and 1_000 draw iterations (4_000 + 4_000 draws total) took 5 seconds.


In [43]:
trace

In [116]:
elo_diff_test = 0.5
home_advantage_test = 0

In [117]:
pred = (trace.posterior.home_advantage[0].values * home_advantage_test + 
        trace.posterior.power_three[0][:, 0].values * elo_diff_test + 
        trace.posterior.power_three[0][:, 1].values * np.power(elo_diff_test, 2) + 
        trace.posterior.power_three[0][:, 2].values * np.power(elo_diff_test, 3))

In [118]:
# mean and std
print(np.mean(pred) * 8)
print(np.percentile(pred, [20, 80]) * 8)

0.4282241247493298
[0.25803698 0.59850493]


In [119]:
import arviz as az
trace.to_netcdf("metrics/mov_elo/mov_elo.nc")

'metrics/mov_elo/mov_elo.nc'

In [4]:
from database_io.db_handler import DB_handler
db = DB_handler()

In [5]:
games = db.games.get_all_games(0.1, last=18000)

In [7]:
# games["minutes_missed"] = (90 - games["minutes"]).clip(lower=0)
games["elo_diff"] = games["elo"] - games["opposition_elo"]
games["goal_diff"] = games["result"].apply(lambda x: int(x.split("-")[0]) - int(x.split("-")[1]))



In [186]:
games["goal_diff"].max()

8

In [8]:
games["elo_diff"] = games["elo_diff"] / (elo_std:=max(games["elo_diff"].max(), abs(games["elo_diff"].min())))
games["goal_diff"] = games["goal_diff"] / (goal_std:=max(games["goal_diff"].max(), abs(games["goal_diff"].min())))
games["minutes"] = games["minutes"] / (min_std:=games["minutes"].max())

In [10]:
print(elo_std, goal_std, min_std)

411.2269214742066 7 107


In [9]:
games

Unnamed: 0,id,minutes,elo,opposition_elo,result,home,elo_diff,goal_diff
0,1643244,0.149533,1474.451012,1366.958340,0-0,1,0.261395,0.000000
1,1643229,0.915888,1523.669914,1386.710162,4-2,1,0.333052,0.285714
2,1643229,0.915888,1558.944291,1386.710162,4-2,1,0.418830,0.285714
3,1643229,0.915888,1521.724306,1386.710162,4-2,1,0.328320,0.285714
4,1643229,0.588785,1531.290999,1383.367928,2-2,1,0.359712,0.000000
...,...,...,...,...,...,...,...,...
17995,1557341,0.887850,1398.701202,1667.333589,2-3,0,-0.653246,-0.142857
17996,1557341,0.887850,1382.349301,1667.333589,2-3,0,-0.693010,-0.142857
17997,1557341,0.887850,1722.643135,1458.776199,3-2,1,0.641658,0.142857
17998,1557341,0.728972,1766.939939,1457.393242,3-2,1,0.752739,0.142857


In [11]:
with pm.Model() as mov2:
    home_adv = pm.Normal("home_advantage")
    power_three = pm.Normal("power_three")
    power = pm.Deterministic("power", power_three * games["elo_diff"].values)
    minutes_inf = pm.Normal("minutes_inf")

    influence = pm.Deterministic("influence", (games["home"].values * home_adv + power) - (minutes_inf * games["minutes"].values))
    
    diff = pm.Normal("diff", influence, observed=games["goal_diff"].values)

In [12]:
with mov2:
    trace = pm.sample(1000, tune=1000, progressbar=True)

Auto-assigning NUTS sampler...


Initializing NUTS using jitter+adapt_diag...


Multiprocess sampling (4 chains in 4 jobs)


NUTS: [home_advantage, power_three, minutes_inf]


Output()

Sampling 4 chains for 1_000 tune and 1_000 draw iterations (4_000 + 4_000 draws total) took 10 seconds.


In [191]:
trace

In [21]:
elo_diff_test = 1
home_advantage_test = 1
minutes_inf_test = 1

pred = (
        (trace.posterior.home_advantage[0].values * home_advantage_test + 
        trace.posterior.power_three[0].values * elo_diff_test)
        - (trace.posterior.minutes_inf[0].values * minutes_inf_test))

# mean and std
print(np.mean(pred) * 7)
print(np.percentile(pred, [20, 80]) * 7)

1.8383097645157815
[1.65502384 2.02303319]


In [20]:
trace.to_netcdf(filename="metrics/mov_elo/mov_elo.nc", groups="posterior")

'metrics/mov_elo/mov_elo.nc'