In [None]:
import configobj
from crate import client
import numpy as np
import pandas as pd
import matplotlib
matplotlib.use("nbagg")
import matplotlib.pyplot as plt

import pymc3 as pm
import theano.tensor as tt

from teams import nba_teams
team_keys = nba_teams

def index_teams(team_keys):
    team_index = {}
    for i,key in enumerate(team_keys):
        team_index[team_keys[key]] = i
        
    return team_index
    
team_index = index_teams(team_keys)

print team_index

In [None]:
# use the last N games from a team to predict the next game
# query for the last N team games
# for each of their opponents 
    
config = configobj.ConfigObj("/Users/smacmullin/sports/crate.ini")
crate_host = config["crate"]["host_url"]

connection = client.connect(crate_host)
print connection.client._active_servers
cursor = connection.cursor()

date = 20151120

frames = []

for team in team_index.keys():

    sql = '''
    SELECT 
    nba.games."GameId" as "GameId", 
    nba.games."GameDate" as "GameDate", 
    nba.games."HomeTeam" as "HomeTeam", 
    nba.games."AwayTeam" as "AwayTeam",
    nba.results."AwayScore" as "AwayScore", 
    nba.results."HomeScore" as "HomeScore",
    nba.lines."HomeSpread" as "HomeSpread", 
    nba.lines."OverUnder" as "OverUnder"
    FROM nba.games, nba.lines, nba.results
    WHERE nba.games."GameId" = nba.results."GameId"
    AND nba.games."GameId" = nba.lines."GameId"
    AND (nba.games."HomeTeam" = '%s' or nba.games."AwayTeam" = '%s')
    AND nba.games."GameDate" < %i
    ORDER BY nba.games."GameDate" DESC
    LIMIT 3
    '''%(team,team,date)

    df = pd.read_sql(sql, connection)

    frames.append(df)
        
df = pd.concat(frames)   

print df

In [None]:
observed_home_score = df['HomeScore'].values
observed_away_score = df['AwayScore'].values

home_team = [team_index[i] for i in df['HomeTeam'].values]
away_team = [team_index[i] for i in df['AwayTeam'].values]

num_teams = len(team_index)

In [None]:
model = pm.Model()

with pm.Model() as model:
    # global model parameters
    baseline_home = pm.Normal('baseline_home', 0., tau=0.01)
    tau_offense = pm.Gamma('tau_offense', .1, .1)  # tau for a normal distribution is 1/sigma**2
    tau_defense = pm.Gamma('tau_defense', .1, .1)
    intercept = pm.Normal('intercept',  4.4, tau=0.1)

    # team-specific model parameters
    offense_skills = pm.Normal("offense_skills",
                            mu=0.0,
                            tau=tau_offense,
                            shape=num_teams)

    defense_skills = pm.Normal("defense_skills",
                            mu=0.0,
                            tau=tau_defense,
                            shape=num_teams)

    offense_skill = pm.Deterministic('offense_skill', offense_skills - tt.mean(offense_skills))
    defense_skill = pm.Deterministic('defense_skill', defense_skills - tt.mean(defense_skills))

    home_theta = np.exp(intercept + baseline_home + offense_skill[home_team] - defense_skill[away_team])
    away_theta = np.exp(intercept + offense_skill[away_team] - defense_skill[home_team])
    # likelihood of observed data
    home_points = pm.Poisson('home_points', mu=home_theta, observed=observed_home_score)
    away_points = pm.Poisson('away_points', mu=away_theta, observed=observed_away_score)

In [None]:
with model:
    start = pm.find_MAP()
    step = pm.NUTS(state=start)
    trace = pm.sample(1000, step, start=start)
    pm.traceplot(trace)
    plt.show()