In [None]:
import os

import aesara.tensor as at
import arviz as az
import numpy as np
import pandas as pd
import pymc as pm
from sklearn.preprocessing import LabelEncoder, StandardScaler

from draft_optimizer.src.utils import DATA_DIR

In [None]:
def trace_helper(trace, var_names):
    display(az.summary(trace, var_names=var_names, kind="diagnostics"))
    az.plot_trace(trace, var_names=var_names, compact=False)

In [None]:
# Load data
league_id = 88497130
year = 2022
league_dir = os.path.join(DATA_DIR, f"espn_{league_id}_{year}")
teams_raw = pd.read_csv(os.path.join(league_dir, "pro_teams.csv"))
schedule_raw = pd.read_csv(os.path.join(league_dir, "pro_schedule.csv"))
players_raw = pd.read_csv(os.path.join(league_dir, "pro_players.csv"))

In [None]:
# Get maps
team_map = teams_raw.set_index("abbrev")["id"]

# Prepare data
players_df = players_raw.copy()
players_df["team_id"] = players_df["pro_team"].map(team_map)
players_df = players_df[["id", "position", "proj_points", "team_id"]].rename({"id": "player_id"}, axis=1)
home_data = schedule_raw[["home_id", "away_id", "week"]].merge(players_df, left_on="home_id", right_on="team_id")
home_data["home"] = True
home_data["opponent_id"] = home_data["away_id"]
away_data = schedule_raw[["home_id", "away_id", "week"]].merge(players_df, left_on="away_id", right_on="team_id")
away_data["home"] = False
away_data["opponent_id"] = away_data["home_id"]
all_data = pd.concat([home_data, away_data], axis=0).drop(["team_id", "home_id", "away_id"], axis=1).dropna(how="any")
all_data = all_data.sort_values(["week", "player_id"])

print(all_data.shape)
all_data.head()

# All Weeks, One Position
* Probably will diverage a bunch because single data point for each player

In [None]:
# Subset data
pos = "QB"
data = all_data.loc[all_data["position"] == pos]
data = data.loc[data["proj_points"] > 0]

# Prepare to model teams
teams = sorted(data["opponent_id"].unique())
teams_encoder = LabelEncoder().fit(teams)
opp_idx = teams_encoder.transform(data["opponent_id"])

# Prepare to model home field advantage
side = data["home"].values

# Prepare to model players
players = sorted(data["player_id"].unique())
players_encoder = LabelEncoder().fit(players)
player_idx = players_encoder.transform(data["player_id"])

# Prepare to sum across weeks
unique_player_idx = sorted(set(player_idx))
player_week_idx = {}
for p in unique_player_idx:
    player_week_idx[p] = np.argwhere(player_idx == p).ravel()

# Prepare to model points
scaler = StandardScaler()
scaled_points = scaler.fit_transform(data[["proj_points"]]).ravel()
# points = data.groupby("player_id")["proj_points"].first()
# points.index = players_encoder.transform(points.index)
# points = points.sort_index()  # ensure sorted
# scaled_points = scaler.fit_transform(points.values.reshape(-1, 1)).ravel()

# Build coords
coords = {"team": teams, "player_": players}

print(data.shape)
data.head()

In [None]:
# TODO: home field advantage

# Build model
with pm.Model(coords=coords) as model:
    # Global parameters
    # home = pm.Normal("home", mu=0, sigma=1)  # TODO
    opp_std = pm.HalfNormal("opp_std", sigma=2)
    player_std = pm.HalfNormal("player_std", sigma=2)
    intercept = pm.Normal("intercept", mu=3, sigma=1)
    points_std = pm.HalfNormal("points_std", sigma=2)

    # Team-specific parameters
    opp_star = pm.Normal("opp_star", mu=0, sigma=opp_std, dims="team")
    opp = pm.Deterministic("opp", opp_star - at.mean(opp_star), dims="team")

    # Player-specific parameters
    player_star = pm.Normal("player_star", mu=0, sigma=player_std, dims="player_")
    player = pm.Deterministic("player", player_star - at.mean(player_star), dims="player_")

    # Likelihoods
    theta = pm.Deterministic("theta", at.exp(intercept + player[player_idx] + opp[opp_idx]))
    points = pm.Normal("points", mu=theta, sigma=points_std, observed=scaled_points)

display(pm.model_to_graphviz(model))
model.point_logps()

In [None]:
# Sample model
with model:
    trace = pm.sample(1000, tune=2000, cores=4)

In [None]:
# Check global parameters
trace_helper(trace, ["opp_std", "player_std", "intercept", "points_std"])

In [None]:
# Check team-specific parameters
trace_helper(trace, ["opp_star", "opp"])

In [None]:
# Check player-specific parameters
trace_helper(trace, ["player_star", "player"])

In [None]:
az.plot_pair(trace, var_names=["opp_std", "player_std", "intercept", "points_std"], coords=coords, divergences=True);