In [1]:
import pandas as pd
import pymc as pm
import arviz as az
import numpy as np
import ipywidgets

import pytensor
pytensor.config.cxx = ""




In [2]:
df = pd.read_csv('data/master_df.csv')

df = df[~df['opponent'].isna()]

In [3]:
# Encode categorical variables as indices
teams = df['team'].unique()
team_idx = df['team'].apply(lambda x: np.where(teams == x)[0][0])
opponents = df['opponent'].unique()
opp_idx = df['opponent'].apply(lambda x: np.where(opponents == x)[0][0])

In [None]:
with pm.Model() as model:
    # League average PPP (fixed effect)
    mu = pm.Normal('mu', mu=1, sigma=0.5)
    
    # Random effects for teams (offense)
    sigma_team = pm.HalfNormal('sigma_team', sigma=0.5)
    team_offense = pm.Normal('team_offense', mu=0, sigma=sigma_team, shape=len(teams))
    
    # Random effects for opponents (defense)
    sigma_opp = pm.HalfNormal('sigma_opp', sigma=0.5)
    opp_defense = pm.Normal('opp_defense', mu=0, sigma=sigma_opp, shape=len(opponents))
    
    # Expected PPP
    ppp_hat = mu + team_offense[team_idx] - opp_defense[opp_idx]
    
    # Likelihood
    sigma = pm.HalfNormal('sigma', sigma=0.1)
    y = pm.Normal('y', mu=ppp_hat, sigma=sigma, observed=df['ppp_off_team'])
    
    # Sample posterior
    trace = pm.sample(1000, tune=500, target_accept=0.9, progressbar = True)

# Examine results
az.summary(trace, var_names=['mu', 'team_offense', 'opp_defense'])

Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [mu, sigma_team, team_offense, sigma_opp, opp_defense, sigma]


Output()

In [8]:
teams = data['Team'].unique()

In [None]:
team_idx_map = {team: i for i, team in enumerate(teams)}
team_name = "A"  # example

# Posterior samples for team offense
off_samples = trace.posterior['team_offense'].sel(team_offense_dim_0=team_idx_map[team_name])

# Posterior mean (expected rating)
off_mean = off_samples.mean().item()

# Adjusted offensive efficiency (points per possession)
mu_mean = trace.posterior['mu'].mean().item()
adj_off_eff = (mu_mean + off_mean)

print(f"{team_name} adjusted offensive efficiency: {adj_off_eff:.2f} points per possession")


A adjusted offensive efficiency: 1.08 points per 100 possessions


In [14]:
# Suppose opponents are listed in 'Opponent' variable
opponents = data['Opponent'].unique()
opp_idx_map = {opp: i for i, opp in enumerate(opponents)}

opp_name = "X"  # example
def_samples = trace.posterior['opp_defense'].sel(opp_defense_dim_0=opp_idx_map[opp_name])

def_mean = def_samples.mean().item()
adj_def_eff = (mu_mean - def_mean)

print(f"{opp_name} adjusted defensive efficiency: {adj_def_eff:.2f} points per possession")

X adjusted defensive efficiency: 1.04 points per possession


In [16]:
adj_off_eff = {}
for team, i in team_idx_map.items():
    off_samples = trace.posterior['team_offense'].sel(team_offense_dim_0=i)
    adj_off_eff[team] = (trace.posterior['mu'].mean().item() + off_samples.mean().item())

adj_def_eff = {}
for opp, i in opp_idx_map.items():
    def_samples = trace.posterior['opp_defense'].sel(opp_defense_dim_0=i)
    adj_def_eff[opp] = (trace.posterior['mu'].mean().item() - def_samples.mean().item())


pd.DataFrame({
    'Team': list(adj_off_eff.keys()),
    'AdjOffEff': list(adj_off_eff.values())
}).sort_values('AdjOffEff', ascending=False)



Unnamed: 0,Team,AdjOffEff
0,A,1.075613
2,C,1.054261
1,B,1.034636
