In [1]:
from scipy.stats import norm
import sys
import os
import pandas as pd
import numpy as np
from scipy import optimize


df = pd.read_csv("https://raw.githubusercontent.com/mathletics-book/Gambling/main/extra/nfl_2023_schedule.csv")
dflines = pd.read_csv("https://raw.githubusercontent.com/mathletics-book/Gambling/main/extra/NFL_totalwins_betting.csv")
teams = list(dflines["Team"])
col1 = "VisTm"
col2 = "HomeTm"
df['aidx'] = df[col1].apply(lambda x: teams.index(x))
df['hidx'] = df[col2].apply(lambda x: teams.index(x))
reg_season = df[df['Week'] <= 17].copy()

n_teams = 32
home_edge = 2 # this is based on the home edge from Sagarin ratings for the 2022 season

def rtg_constr(x):
    return np.mean(x)

def obj(x):
	err = 0
	reg_season['proj'] = home_edge+reg_season.hidx.apply(lambda i: x[i]) - reg_season.aidx.apply(lambda i: x[i])
	reg_season['hwinpr']=1 - norm.cdf(0.5,reg_season['proj'],14.5)
	reg_season['awinpr'] = 1-reg_season['hwinpr']
	w = np.zeros(shape=n_teams)
	for i in range(len(reg_season)):
		w[teams.index(reg_season[col1][i])] = w[teams.index(reg_season[col1][i])] + reg_season['awinpr'][i]
		w[teams.index(reg_season[col2][i])] = w[teams.index(reg_season[col2][i])] + reg_season['hwinpr'][i]
	err = ((dflines["Line"]-w)**2).sum()
	return err


x0 = np.zeros(shape=n_teams)

res = optimize.minimize(obj,x0, constraints=[{'type':'eq', 'fun':rtg_constr}], method="SLSQP",
                        options={'maxiter':10000})


preseason_ratings = dict()

print(res.success, res.message)
print("                Team   Rating   Line")
for i, t in enumerate(dflines["Team"]):
    print("{:>20s}    {:.2f}    {:.1f}".format(t, res.x[i],dflines["Line"][i]))
    preseason_ratings[t] = res.x[i]


True Optimization terminated successfully
                Team   Rating   Line
  Kansas City Chiefs    8.61    11.5
    Baltimore Ravens    5.03    10.5
 San Francisco 49ers    4.38    10.5
  New Orleans Saints    -0.54    9.5
      Dallas Cowboys    3.06    9.5
Tampa Bay Buccaneers    -5.56    6.5
    Seattle Seahawks    0.01    8.5
  Indianapolis Colts    -6.06    6.5
 Philadelphia Eagles    7.56    11.5
 Pittsburgh Steelers    -0.74    8.5
       Buffalo Bills    6.39    10.5
   Minnesota Vikings    0.26    8.5
New England Patriots    -0.46    7.5
   Green Bay Packers    -2.87    7.5
    Cleveland Browns    1.73    9.5
    Los Angeles Rams    -5.42    6.5
    Tennessee Titans    -3.36    7.5
       Chicago Bears    -3.34    7.5
Los Angeles Chargers    3.43    9.5
   Arizona Cardinals    -9.93    4.5
     Atlanta Falcons    -3.03    8.5
      Denver Broncos    1.11    8.5
      Houston Texans    -6.03    6.5
   Las Vegas Raiders    -3.31    6.5
       Detroit Lions    2.01    9.5
   

In [2]:
import arviz
import pymc3 as pm

week = 3

data = df[df['Week'] < week]

y = np.array(data.GH-data.GA).reshape((len(data),1))
X = np.zeros((len(y),len(teams)+1))
for i in range(len(data)):
    X[i,data['hidx'][i]] = 1
    X[i,data['aidx'][i]] = -1

X = pd.DataFrame(X)
X.columns = [teams[i].replace(" ", "") for i in range(len(teams))] + ['PHFA']
X['pts'] = y 

with pm.Model() as normal_model:

    sigma = pm.distributions.continuous.HalfCauchy('sigma', beta=10, testval=1.)
    # assume home field prior of 2 points
    intercept = pm.distributions.continuous.Normal('HFA',2, sigma =3)
    # assume a prior rating for every team based on Vegas total wins 
    x_coeff = [pm.distributions.continuous.Normal(teams[i].replace(" ", ""), preseason_ratings[teams[i]], sigma=3) for i in range(len(teams))]

    likelihood = pm.distributions.continuous.Normal('y', mu=intercept + sum([x_coeff[i]*X[teams[i].replace(" ", "")] for i in range(len(teams))]), sigma=sigma, observed=X['pts'])
    nfl_trace = pm.sample(2000, chains = 5)

arviz.summary(nfl_trace)


  return wrapped_(*args_, **kwargs_)
Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (5 chains in 4 jobs)
NUTS: [JacksonvilleJaguars, WashingtonCommanders, CincinnatiBengals, CarolinaPanthers, MiamiDolphins, NewYorkJets, NewYorkGiants, DetroitLions, LasVegasRaiders, HoustonTexans, DenverBroncos, AtlantaFalcons, ArizonaCardinals, LosAngelesChargers, ChicagoBears, TennesseeTitans, LosAngelesRams, ClevelandBrowns, GreenBayPackers, NewEnglandPatriots, MinnesotaVikings, BuffaloBills, PittsburghSteelers, PhiladelphiaEagles, IndianapolisColts, SeattleSeahawks, TampaBayBuccaneers, DallasCowboys, NewOrleansSaints, SanFrancisco49ers, BaltimoreRavens, KansasCityChiefs, HFA, sigma]


Sampling 5 chains for 1_000 tune and 2_000 draw iterations (5_000 + 10_000 draws total) took 44 seconds.
Got error No model on context stack. trying to find log_likelihood in translation.


Unnamed: 0,mean,sd,hdi_3%,hdi_97%,mcse_mean,mcse_sd,ess_bulk,ess_tail,r_hat
HFA,0.189,1.764,-3.362,3.329,0.012,0.019,20972.0,8092.0,1.0
KansasCityChiefs,8.251,2.844,2.876,13.432,0.02,0.016,20769.0,7285.0,1.0
BaltimoreRavens,5.477,2.837,0.008,10.654,0.02,0.017,21047.0,7315.0,1.0
SanFrancisco49ers,5.331,2.906,-0.222,10.838,0.02,0.018,20296.0,7485.0,1.0
NewOrleansSaints,-0.65,2.792,-6.152,4.362,0.018,0.03,24434.0,6989.0,1.0
DallasCowboys,6.168,2.876,0.659,11.478,0.021,0.017,18017.0,7960.0,1.0
TampaBayBuccaneers,-4.45,2.831,-9.584,0.982,0.019,0.018,23164.0,7405.0,1.0
SeattleSeahawks,-0.736,2.83,-6.06,4.667,0.019,0.031,21666.0,7225.0,1.0
IndianapolisColts,-5.598,2.884,-11.039,-0.221,0.02,0.017,20765.0,7062.0,1.0
PhiladelphiaEagles,7.302,2.794,2.026,12.357,0.019,0.016,21462.0,7184.0,1.0
