## Baio AR-1

In [2]:
import numpy as np
import pandas as pd
import pymc3 as pm, theano.tensor as tt
import matplotlib.pyplot as plt

%matplotlib inline

## Time Series

In [4]:
ts = pd.read_csv('./data/ts14-15.csv')
ts = ts.drop('Unnamed: 0', 1)

# Build team index
teams = ts.home_team.unique()
teams = pd.DataFrame(teams, columns=['team'])
teams['i'] = teams.index

ts = pd.merge(ts, teams, left_on='home_team', right_on='team', how='left')
ts = ts.rename(columns = {'i': 'i_home'}).drop('team', 1)
ts = pd.merge(ts, teams, left_on='away_team', right_on='team', how='left')
ts = ts.rename(columns = {'i': 'i_away'}).drop('team', 1)

# Observed goals stats (Eyeball Poisson)
observed_home_goals = ts.home_score.values
observed_away_goals = ts.away_score.values

home_team = ts.i_home.values
away_team = ts.i_away.values

num_teams = len(ts.i_home.drop_duplicates())
num_games = len(home_team)

ts = ts.sort_values(by='kick_off', ascending=1)
# ts = ts[((ts['home_team'] == 'Arsenal') | (ts['away_team'] == 'Arsenal'))]
ts['t'] = 1
# ts['t'] = ts.groupby('kick_off')['t'].cumsum()
ts['t'] = ts['t'].cumsum()

# ts = ts.set_index(keys='t')
# ts.index.name = None
ts.iloc[0:10,]

Unnamed: 0,home_score,away_score,kick_off,home_team,away_team,i_home,i_away,t
290,0,1,2014-08-16,Stoke City,Aston Villa,15,5,1
56,1,2,2014-08-16,Manchester United,Swansea City,2,19,2
274,0,1,2014-08-16,West Ham United,Tottenham Hotspur,14,8,3
10,2,1,2014-08-16,Arsenal,Crystal Palace,0,11,4
340,0,1,2014-08-16,Queens Park Rangers,Hull City,17,18,5
235,2,2,2014-08-16,Leicester City,Everton,12,7,6
202,2,2,2014-08-16,West Bromwich Albion,Sunderland,10,13,7
62,2,1,2014-08-17,Liverpool,Southampton,3,6,8
84,0,2,2014-08-17,Newcastle United,Manchester City,4,9,9
305,1,3,2014-08-18,Burnley,Chelsea,16,1,10


In [21]:
with pm.Model() as model:    
    # global home and intercept params?
    home = pm.Normal('home', 0, .01)
    intercept = pm.Normal('intercept', 0, .01)
    tau_att =  pm.Exponential('tau_att', 1./.02)
    tau_def =  pm.Exponential('tau_def', 1./.02)

    # team-specific model parameters
    # atts = [pm.GaussianRandomWalk('atts', tau_att**-2, shape=[num_games])]*len(teams)
    # Still center attack and defense parameters?
    atts = pm.GaussianRandomWalk('atts', tau_att**-2, shape=[num_teams, num_games])
    defs = pm.GaussianRandomWalk('defs', tau_def**-2, shape=[num_teams, num_games])
    
    home_theta  = tt.exp(intercept + home + atts[home_team] + defs[away_team])
    away_theta  = tt.exp(intercept + atts[away_team] + defs[home_team])

    # likelihood of observed data
    home_points = pm.Poisson('home_points', mu=home_theta, observed=observed_home_goals)
    away_points = pm.Poisson('away_points', mu=away_theta, observed=observed_away_goals)

Applied log-transform to tau_att and added transformed tau_att_log to model.
Applied log-transform to tau_def and added transformed tau_def_log to model.


In [22]:
# Fit data, starting with the MAP
with model:    
    start = pm.find_MAP()
    step = pm.NUTS(state=start)
    trace = pm.sample(2000, step, start=start)

 [-----------------100%-----------------] 1 of 1 complete in 0.1 sec

In [37]:
model.vars
# pm.traceplot(trace, [home, intercept]);
trace.varnames

['home',
 'tau_att_log',
 'tau_def_log',
 'intercept',
 'atts',
 'defs',
 'tau_att',
 'tau_def']

In [52]:
trace['atts']

array([[[ 0.,  0.,  0., ...,  0.,  0.,  0.],
        [ 0.,  0.,  0., ...,  0.,  0.,  0.],
        [ 0.,  0.,  0., ...,  0.,  0.,  0.],
        ..., 
        [ 0.,  0.,  0., ...,  0.,  0.,  0.],
        [ 0.,  0.,  0., ...,  0.,  0.,  0.],
        [ 0.,  0.,  0., ...,  0.,  0.,  0.]]])

In [53]:
trace['defs']

array([[[ 0.,  0.,  0., ...,  0.,  0.,  0.],
        [ 0.,  0.,  0., ...,  0.,  0.,  0.],
        [ 0.,  0.,  0., ...,  0.,  0.,  0.],
        ..., 
        [ 0.,  0.,  0., ...,  0.,  0.,  0.],
        [ 0.,  0.,  0., ...,  0.,  0.,  0.],
        [ 0.,  0.,  0., ...,  0.,  0.,  0.]]])

In [54]:
trace['home']

array([ 0.])

In [55]:
trace['intercept']

array([ 0.])

In [57]:
trace['tau_att']

array([ 0.01386294])

In [58]:
trace['tau_def']

array([ 0.01386294])

In [39]:
observed_away_goals

array([0, 2, 1, 1, 0, 0, 0, 1, 2, 1, 1, 1, 0, 0, 0, 0, 1, 2, 1, 0, 0, 1, 0,
       0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 2, 1, 1, 0, 1, 1, 1, 1, 0,
       2, 1, 0, 1, 0, 1, 1, 1, 0, 0, 2, 2, 2, 2, 0, 1, 1, 1, 2, 1, 1, 3, 2,
       0, 0, 0, 0, 1, 0, 1, 2, 1, 1, 0, 0, 2, 2, 3, 2, 1, 3, 0, 1, 0, 1, 3,
       0, 2, 3, 3, 2, 1, 2, 0, 1, 2, 2, 2, 1, 0, 1, 0, 0, 2, 1, 3, 1, 1, 0,
       1, 2, 2, 0, 1, 0, 2, 3, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 2, 6, 0, 0, 0,
       0, 0, 1, 1, 0, 3, 2, 2, 1, 1, 0, 1, 1, 0, 1, 3, 0, 3, 2, 1, 0, 1, 1,
       1, 0, 3, 1, 2, 2, 1, 0, 0, 2, 2, 1, 0, 1, 0, 2, 0, 0, 1, 0, 0, 0, 2,
       0, 1, 2, 0, 1, 1, 1, 0, 2, 0, 2, 0, 0, 2, 3, 3, 2, 3, 2, 2, 0, 0, 4,
       0, 0, 2, 2, 2, 1, 1, 1, 3, 1, 1, 1, 2, 0, 3, 3, 1, 0, 1, 2, 0, 1, 3,
       3, 3, 0, 0, 0, 2, 2, 1, 1, 1, 0, 1, 1, 2, 1, 0, 0, 2, 0, 1, 1, 0, 4,
       1, 1, 2, 4, 0, 4, 0, 1, 1, 0, 2, 3, 0, 2, 1, 1, 1, 0, 0, 3, 2, 1, 1,
       1, 3, 0, 0, 1, 0, 0, 0, 1, 2, 2, 1, 1, 0, 1, 1, 0, 0, 4, 0, 2, 1, 1,
       2, 2,

In [41]:
away_team

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
       18, 19,  0,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
       16, 17, 18, 19,  0,  1,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13,
       14, 15, 16, 17, 18, 19,  0,  1,  2,  4,  5,  6,  7,  8,  9, 10, 11,
       12, 13, 14, 15, 16, 17, 18, 19,  0,  1,  2,  3,  5,  6,  7,  8,  9,
       10, 11, 12, 13, 14, 15, 16, 17, 18, 19,  0,  1,  2,  3,  4,  6,  7,
        8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,  0,  1,  2,  3,  4,
        5,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,  0,  1,  2,
        3,  4,  5,  6,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,  0,
        1,  2,  3,  4,  5,  6,  7,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
       19,  0,  1,  2,  3,  4,  5,  6,  7,  8, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 11, 12, 13, 14,
       15, 16, 17, 18, 19,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 12,
       13, 14, 15, 16, 17