## AR Mock

### Questions
- What is the complexity of the model? -> Quantify variables and runtime exactly!
- What about the identifiability constraints?

In [1]:
import numpy as np
import pandas as pd
import pymc3 as pm, theano.tensor as tt
import matplotlib.pyplot as plt

%matplotlib inline

In [5]:
columns = ['home_score', 'away_score', 'home_team', 'away_team', 'i_home', 'i_away']

# data = [[2,1,'Arsenal','Liverpool',0,1],[0,2,'Liverpool','Arsenal',1,0],
#         [1,1,'Arsenal','Liverpool',0,1],[2,2,'Liverpool','Arsenal',1,0],
#         [3,1,'Arsenal','Liverpool',0,1],[1,2,'Liverpool','Arsenal',1,0],
#         [1,0,'Arsenal','Liverpool',0,1],[0,2,'Liverpool','Arsenal',1,0]]

# data = [[2,1,'Arsenal','Liverpool',0,1],[0,2,'Liverpool','Arsenal',1,0],
#         [1,1,'Burnley','Stoke City',2,3],[1,0,'Stoke City','Burnley',3,2]]

data = [[2,1,'Arsenal','Liverpool',0,1],[0,2,'Liverpool','Arsenal',1,0]]

df = pd.DataFrame(data=data, columns=columns)

observed_home_goals = df.home_score.values
observed_away_goals = df.away_score.values

home_team = df.i_home.values
away_team = df.i_away.values

num_teams = len(df.i_home.drop_duplicates())
num_games = len(home_team)

print "num_teams: %d, num_games: %d" %(num_teams, num_games)

df

num_teams: 2, num_games: 2


Unnamed: 0,home_score,away_score,home_team,away_team,i_home,i_away
0,2,1,Arsenal,Liverpool,0,1
1,0,2,Liverpool,Arsenal,1,0


In [6]:
with pm.Model() as model:
    # global home and intercept params?
    home = pm.Normal('home', 0, .001)
    intercept = pm.Normal('intercept', 0, .001)
    tau_att =  pm.Exponential('tau_att', 1./.02)
    tau_def =  pm.Exponential('tau_def', 1./.02)

    atts = pm.GaussianRandomWalk('atts', tau_att**-2, shape=[num_teams, num_games])
    defs = pm.GaussianRandomWalk('defs', tau_def**-2, shape=[num_teams, num_games])
    
    home_theta  = tt.exp(intercept + home + atts[home_team] + defs[away_team])
    away_theta  = tt.exp(intercept + atts[away_team] + defs[home_team])

    # likelihood of observed data
    home_points = pm.Poisson('home_points', mu=home_theta, observed=observed_home_goals)
    away_points = pm.Poisson('away_points', mu=away_theta, observed=observed_away_goals)

Applied log-transform to tau_att and added transformed tau_att_log to model.
Applied log-transform to tau_def and added transformed tau_def_log to model.


In [7]:
with model:
    mu, sds, elbo = pm.variational.advi(n=200000)

Iteration 0 [0%]: ELBO = -218383.91
Iteration 20000 [10%]: ELBO = -20.16
Iteration 40000 [20%]: ELBO = -25.14
Iteration 60000 [30%]: ELBO = -39.74
Iteration 80000 [40%]: ELBO = -24.01
Iteration 100000 [50%]: ELBO = -22.94
Iteration 120000 [60%]: ELBO = -28.39
Iteration 140000 [70%]: ELBO = -18.35
Iteration 160000 [80%]: ELBO = -28.09
Iteration 180000 [90%]: ELBO = -22.13
Finished [100%]: ELBO = -43.42


In [None]:
# Try variational inference
with model:
    # Start next run at the last sampled position.
    step = pm.NUTS(scaling=model.dict_to_array(sds), is_cov=True)
    trace = pm.sample(20000, step, start=mu)
    
# with model:
#     start = pm.find_MAP()
#     step = pm.NUTS(state=start)
#     trace = pm.sample(2000, step, start=start)

 [                  0%                  ] 18 of 20000 complete in 44.3 sec

In [None]:
figsize(12,6)
pm.traceplot(trace, ['atts', 'defs'])