In [1]:
import pandas as pd
import numpy as np
import pymc as pm
import altair as alt
import arviz as az
import pymc_extras as pmx
import pytensor.tensor as pt

t_dict = {}

In [None]:
# Overall team stats
stats = pd.read_csv('./data/basketball_results/team_statistics.csv').rename(columns={'Unnamed: 0': 'team'})
stats

In [None]:
# Granular Match results
results = pd.read_csv('./data/basketball_results/all_matches.csv')
results.date_time = pd.to_datetime(results.date_time)
results['diff'] = results.home_score - results.away_score
results.head()

In [None]:
# Get list of teams
teams = list(set(results["home_team"]).union(set(results["away_team"])))
n_teams = len(teams)

# Encode teams as indices
team_idx = {team: i for i, team in enumerate(teams)}
results["home_idx"] = results["home_team"].map(team_idx)
results["away_idx"] = results["away_team"].map(team_idx)

# Add binary outcome variable
results['home_win'] = results['home_score'] > results['away_score']

results[['home_team','home_idx','home_score','away_team','away_idx','away_score','diff','home_win']].head()

In [None]:
# Win/loss model with team strenghts (ELO)
with pm.Model(coords={"teams": teams}) as model:
    # Latent strength for each team
    team_strengths = pm.ZeroSumNormal("team_strengths", sigma=30, dims="teams")
    
    # Expected log-odds of home win
    mu = (team_strengths[results["home_idx"].values] 
          - team_strengths[results["away_idx"].values])
    
    # Observed win/loss
    pm.Bernoulli("score_diff_obs", p=pm.math.sigmoid(mu), observed=results["home_win"])
    
    # Sampling
    trace = pm.sample(accept=0.9, draws=2000) #, nuts_sampler='nutpie')
    pm.compute_log_likelihood(trace, extend_inferencedata=True)
    pm.sample_posterior_predictive(trace, extend_inferencedata=True)

print(pm.summary(trace))
pm.plot_trace(trace)


In [None]:
# Posterior analysis
az.plot_forest(trace, var_names=["team_strengths"], combined=True, textsize=10)

### 👇 Improve!