# Heirarchical Modelling of the Rugby Premiership

In [None]:
import pandas as pd
import numpy as np
import pymc3 as pm
import theano.tensor as tt

ImportError: cannot import name 'logsumexp'

## Simple model 
Inspired by [football model by Daniel Weitzenfeld.](http://danielweitzenfeld.github.io/passtheroc/blog/2014/10/28/bayes-premier-league/)

First we need to load the data and restrict our attention to the 2016-2017 season.

In [14]:
df = pd.read_csv('expected_points/match_results.csv',index_col=0)
df['date'] = pd.to_datetime(df['date'])
# restrict to 2016-2017 season for now
df = df.loc[(df['date'] > '2016-08-01') & (df['date'] <= '2017-08-01')]
df.head()

Unnamed: 0,home_team,date,away_team,home_score,away_score,away_tries,away_pens,home_tries,home_pens
457,Sale Sharks,2016-09-09,Harlequins,19.0,10.0,1.0,1.0,1.0,4.0
458,Bath Rugby,2016-09-10,Newcastle Falcons,58.0,5.0,1.0,0.0,7.0,1.0
459,Leicester Tigers,2016-09-10,Wasps,22.0,34.0,4.0,2.0,2.0,4.0
460,Bristol Rugby,2016-09-11,Northampton Saints,10.0,32.0,4.0,2.0,1.0,1.0
461,Exeter Chiefs,2016-09-11,Saracens,13.0,34.0,5.0,1.0,1.0,2.0


In [15]:
teams = df.home_team.unique()
teams = pd.DataFrame(teams, columns=['team'])
teams['i'] = teams.index
teams.head()

Unnamed: 0,team,i
0,Sale Sharks,0
1,Bath Rugby,1
2,Leicester Tigers,2
3,Bristol Rugby,3
4,Exeter Chiefs,4


Now we'll '

In [17]:
df = pd.merge(df, teams, left_on='home_team', right_on='team', how='left')
df = df.rename(columns = {'i': 'i_home'}).drop('team', 1)
df = pd.merge(df, teams, left_on='away_team', right_on='team', how='left')
df = df.rename(columns = {'i': 'i_away'}).drop('team', 1)
df.head()

Unnamed: 0,home_team,date,away_team,home_score,away_score,away_tries,away_pens,home_tries,home_pens,i_home,i_away
0,Sale Sharks,2016-09-09,Harlequins,19.0,10.0,1.0,1.0,1.0,4.0,0,10
1,Bath Rugby,2016-09-10,Newcastle Falcons,58.0,5.0,1.0,0.0,7.0,1.0,1,6
2,Leicester Tigers,2016-09-10,Wasps,22.0,34.0,4.0,2.0,2.0,4.0,2,7
3,Bristol Rugby,2016-09-11,Northampton Saints,10.0,32.0,4.0,2.0,1.0,1.0,3,9
4,Exeter Chiefs,2016-09-11,Saracens,13.0,34.0,5.0,1.0,1.0,2.0,4,5


We need to convert the data into array of the same length so that pymc can work with it

In [19]:
observed_home_score = df.home_score.values
observed_away_score = df.away_score.values
home_team = df.i_home.values
away_team = df.i_away.values
num_teams = len(df.i_home.unique())
num_games = len(home_team)

Generate some initial values (not necessary)

In [20]:
g = df.groupby('i_away')
att_starting_points = np.log(g.away_score.mean())
g = df.groupby('i_home')
def_starting_points = -np.log(g.away_score.mean())

### Build the model

In [24]:
with pm.Model() as model:
    # global model parameters
    home = pm.Flat('home') # intercept for home advantage
    sd_att = pm.HalfStudentT('sd_att', nu=3, sigma=2.5)
    sd_def = pm.HalfStudentT('sd_def', nu=3, sigma=2.5)
    intercept = pm.Flat('intercept')

    # team-specific model parameters
    atts_star = pm.Normal("atts_star", mu=0, sigma=sd_att, shape=num_teams)
    defs_star = pm.Normal("defs_star", mu=0, sigma=sd_def, shape=num_teams)

    atts = pm.Deterministic('atts', atts_star - tt.mean(atts_star))
    defs = pm.Deterministic('defs', defs_star - tt.mean(defs_star))
    home_theta = tt.exp(intercept + home + atts[home_team] + defs[away_team])
    away_theta = tt.exp(intercept + atts[away_team] + defs[home_team])

    # likelihood of observed data
    home_points = pm.Poisson('home_points', mu=home_theta, observed=observed_home_score)
    away_points = pm.Poisson('away_points', mu=away_theta, observed=observed_away_score)

  'The MCMC() syntax is deprecated. Please pass in nodes explicitly via M = MCMC(input).')


AttributeError: __enter__