In [None]:
import configobj
from crate import client
from fuzzywuzzy import process
from fuzzywuzzy import fuzz
import numpy as np
import pandas as pd
import matplotlib
matplotlib.use("nbagg")
import matplotlib.pyplot as plt

import pymc3 as pm
import theano.tensor as tt

from teams import nba_teams
team_keys = nba_teams

def index_teams(team_keys):
    team_index = {}
    for i,key in enumerate(team_keys):
        team_index[team_keys[key]] = i
        
    return team_index
    
team_index = index_teams(team_keys)

def moneyline_from_implied_odds(p):
    
    if p < 0.5:
        return int(-1.*(100. * (p-1.0))/p)
    else:
        return int((100.*p)/(p-1.0))

In [None]:
config = configobj.ConfigObj("/Users/smacmullin/sports/crate.ini")
crate_host = config["crate"]["host_url"]

connection = client.connect(crate_host)
print connection.client._active_servers
cursor = connection.cursor()

sql = '''
SELECT 
nba.games."GameId" as "GameId", 
nba.games."GameDate" as "GameDate", 
nba.games."HomeTeam" as "HomeTeam", 
nba.games."AwayTeam" as "AwayTeam",
nba.results."AwayScore" as "AwayScore", 
nba.results."HomeScore" as "HomeScore",
nba.lines."HomeSpread" as "HomeSpread", 
nba.lines."OverUnder" as "OverUnder"
FROM nba.games, nba.lines, nba.results
WHERE nba.games."GameId" = nba.results."GameId"
AND nba.games."GameId" = nba.lines."GameId"
AND nba.games."GameDate" > 20151026 AND nba.games."GameDate" < 20151117
ORDER BY nba.games."GameDate"
LIMIT 30000
'''

df = pd.read_sql(sql, connection)

teams = df.HomeTeam.unique()
teams = pd.DataFrame(teams, columns=['Teams'])
teams['i'] = teams.index

df = pd.merge(df, teams, left_on='HomeTeam', right_on='Teams', how='left')
df = df.rename(columns = {'i': 'i_home'})
df = pd.merge(df, teams, left_on='AwayTeam', right_on='Teams', how='left')
df = df.rename(columns = {'i': 'i_away'})

observed_home_score = df['HomeScore'].values
observed_away_score = df['AwayScore'].values

home_team = df['i_home'].values
away_team = df['i_away'].values

num_teams = len(df.i_home.drop_duplicates())

print df.head()

In [None]:
# this model doesn't have the pitcher information in it

model = pm.Model()

with pm.Model() as model:
    
    # global model parameters
    baseline_home = pm.Normal('baseline_home', 0., 0.0005)
    baseline_away = pm.Normal('baseline_away', 0., 0.0005)
    tau = pm.Gamma('tau', 1., 2.) # tau for a normal distribution is 1/sigma**2
    
    # team-specific model parameters
    team_skills = pm.Normal("team_skills",
                        mu   = 0.0,
                        tau  = tau,
                        shape = num_teams)
        
    team_skill = pm.Deterministic('team_skill', team_skills - tt.mean(team_skills))
    
    home_theta  = np.exp(baseline_home + team_skill[home_team] - team_skill[away_team])
    away_theta  = np.exp(baseline_away + team_skill[away_team] - team_skill[home_team])

    # likelihood of observed data
    home_points = pm.Poisson('home_points', mu=home_theta, observed=observed_home_score)
    away_points = pm.Poisson('away_points', mu=away_theta, observed=observed_away_score)

In [None]:
with model:
    start = pm.find_MAP()
    step = pm.NUTS(state=start)
    trace = pm.sample(5000, step, start=start)
    pm.traceplot(trace)
    plt.show()
    
pm.forestplot(trace, ylabels=teams.values, varnames=['team_skills'], main="Team Skill")
plt.show()

# #write the trace to a file
# with open("/Users/smacmullin/mlb/modeltrace_mlb_db_060116_071516.pkl","wb") as fp:
#     cPickle.dump(trace,fp,cPickle.HIGHEST_PROTOCOL)

# import the model

In [None]:
# import cPickle
# model = pm.Model()
# with open("/Users/smacmullin/mlb/modeltrace_2016_june.pkl","rb") as fp:
#     with model:
#         trace = cPickle.load(fp)
        
baseline_home = trace['baseline_home']
baseline_away = trace['baseline_away']
#print teams.values
team_skills_likelihood = trace['team_skill']
team_skill = {}
for val in teams.values:
    team_skill[val[0]]=[j[val[1]] for j in team_skills_likelihood]

# simulate the outcome of a single game

In [None]:
away_team='NYK'
home_team='SAS'

#theta
home_theta  = np.exp(baseline_home + team_skill[home_team] - team_skill[away_team])
away_theta  = np.exp(baseline_away + team_skill[away_team] - team_skill[home_team])
    
home_scores = np.random.poisson(home_theta)
away_scores = np.random.poisson(away_theta)

predicted_home_score = np.average(home_scores)
stdev_home_score = np.std(home_scores)
predicted_away_score = np.average(away_scores)
stdev_away_score = np.std(away_scores)

#predict the score
print "Predicted Away Score (%s): %s +/- %s"%(away_team, predicted_away_score, stdev_away_score)
print "Predicted Home Score (%s): %s +/- %s"%(home_team, predicted_home_score, stdev_home_score)

#predict the spread
predicted_spread = np.average([aws-hs for hs,aws in zip(home_scores,away_scores)])
std_spread = np.std([aws-hs for hs,aws in zip(home_scores,away_scores)])
print "Predicted Home Spread: %s +/- %s"%(predicted_spread, std_spread)
#predict the o/u
predicted_ou = np.average([aws+hs for hs,aws in zip(home_scores,away_scores)])
std_ou = np.std([aws+hs for hs,aws in zip(home_scores,away_scores)])
print "Predicted Over Under: %s +/- %s"%(predicted_ou, std_ou)

In [None]:
# query for a test set of games
sql = '''
SELECT 
nba.games."GameId" as "GameId", 
nba.games."GameDate" as "GameDate", 
nba.games."HomeTeam" as "HomeTeam", 
nba.games."AwayTeam" as "AwayTeam",
nba.results."AwayScore" as "AwayScore", 
nba.results."HomeScore" as "HomeScore",
nba.lines."HomeSpread" as "HomeSpread", 
nba.lines."OverUnder" as "OverUnder"
FROM nba.games, nba.lines, nba.results
WHERE nba.games."GameId" = nba.results."GameId"
AND nba.games."GameId" = nba.lines."GameId"
AND nba.games."GameDate" > 20151117 AND nba.games."GameDate" < 20151124
ORDER BY nba.games."GameDate"
LIMIT 30000
'''
df = pd.read_sql(sql, connection)
print df
test_records = df.to_dict('records')

In [None]:
ou_counter = 0.0
spread_counter = 0.0

for game in test_records:
    
    away_team = game["AwayTeam"]
    home_team = game["HomeTeam"]
    
    #theta
    home_theta  = np.exp(baseline_home + team_skill[home_team] - team_skill[away_team])
    away_theta  = np.exp(baseline_away + team_skill[away_team] - team_skill[home_team])

    home_scores = np.random.poisson(home_theta)
    away_scores = np.random.poisson(away_theta)

    predicted_home_score = np.average(home_scores)
    predicted_away_score = np.average(away_scores)
    predicted_spread = np.average([aws-hs for hs,aws in zip(home_scores,away_scores)])
    predicted_ou = np.average([aws+hs for hs,aws in zip(home_scores,away_scores)])
    

    # over/under validation
    
    if predicted_ou > game['OverUnder']:
        ou_bet = 1
    else:
        ou_bet = 0
        
    if (game['HomeScore'] + game['AwayScore']) > game['OverUnder']:
        ou_outcome = 1
    else:
        ou_outcome = 0
        
    if ou_outcome==ou_bet:
        
        ou_counter+=1.0
        
#     spread: H -10 predicts home to win by 10
#     prediction H -12 predicts home to win by 12
#     actual away (120) - home (140) = -20
    
    # spread bet validation
    
    if predicted_spread < game["HomeSpread"]:
        home_bet = 1
    else:
        home_bet = 0
        
    if (game['AwayScore'] - game['HomeScore']) < game["HomeSpread"]:
        home_outcome = 1
    else:
        home_outcome = 0
        
    if home_bet == home_outcome:
        spread_counter+=1.0
    
print ou_counter/len(test_records)
print spread_counter/len(test_records) 