In [None]:
import os

import aesara.tensor as at
import arviz as az
import numpy as np
import pandas as pd
import pymc as pm
from sklearn.preprocessing import LabelEncoder

from draft_optimizer.src.utils import DATA_DIR

In [None]:
def plot_player(weekly_points, players_map, name):
    player_weekly_points = weekly_points.xs(players_map[name], level=1)
    display(player_weekly_points)
    fig = player_weekly_points.cumsum().plot()
    fig.set_title(name)


def trace_helper(trace, var_names):
    display(az.summary(trace, var_names=var_names, kind="diagnostics"))
    az.plot_trace(trace, var_names=var_names, compact=False)

In [None]:
# Load data
league_id = 88497130
year = 2022
league_dir = os.path.join(DATA_DIR, f"espn_{league_id}_{year}")
teams_raw = pd.read_csv(os.path.join(league_dir, "pro_teams.csv"))
schedule_raw = pd.read_csv(os.path.join(league_dir, "pro_schedule.csv"))
players_raw = pd.read_csv(os.path.join(league_dir, "pro_players.csv"))

In [None]:
# Get maps
teams_map = teams_raw.set_index("abbrev")["id"]
players_map = players_raw.set_index("name")["id"]

# Prepare data
players_df = players_raw.copy()
players_df["team_id"] = players_df["pro_team"].map(teams_map)
players_df = players_df[["id", "position", "proj_points", "team_id"]].rename({"id": "player_id"}, axis=1)
home_data = schedule_raw[["home_id", "away_id", "week"]].merge(players_df, left_on="home_id", right_on="team_id")
home_data["home"] = True
home_data["opponent_id"] = home_data["away_id"]
away_data = schedule_raw[["home_id", "away_id", "week"]].merge(players_df, left_on="away_id", right_on="team_id")
away_data["home"] = False
away_data["opponent_id"] = away_data["home_id"]
all_data = pd.concat([home_data, away_data], axis=0).drop(["team_id", "home_id", "away_id"], axis=1).dropna(how="any")
all_data = all_data.sort_values(["week", "player_id"])

print(all_data.shape)
all_data.head()

# All Weeks, One Position

In [None]:
# Subset data
pos = "QB"
data = all_data.loc[all_data["position"] == pos]
data = data.loc[data["proj_points"] > 0]

# Make wide dataframe
wide_data = data.set_index(["week", "player_id"]).unstack(level=1)
home_vals = wide_data["home"].fillna(False).astype(float).values
bye_mask = wide_data["opponent_id"].isnull().values

# Build coords
teams = sorted(np.unique(wide_data["opponent_id"].values))  # includes NaN for bye weeks
players = sorted(data["player_id"].unique())
coords = {"teams": teams, "players": players}

# Encode opponents
teams_encoder = LabelEncoder().fit(teams)
opp_idx = wide_data["opponent_id"].apply(teams_encoder.transform).values

# Get season-long projected points
proj_points = data.groupby("player_id")["proj_points"].first()

print(wide_data.shape)
wide_data.head()

In [None]:
with pm.Model(coords=coords) as model:
    # Opponent-specific parameters (non-centered format for better sampling)
    opp_std = pm.HalfNormal("opp_std", sigma=7)
    opp_mu = pm.Normal("opp_mu", mu=0, sigma=7)
    opp_offset = pm.Normal("opp_offset", mu=0, sigma=7, dims="teams")
    opp = pm.Deterministic("opp", opp_mu + opp_std * opp_offset, dims="teams")

    # Broadcast `opp` to be weeks x players
    wide_opp = opp[opp_idx]

    # Player-specific parameters (non-centered format for better sampling)
    player_std = pm.HalfNormal("player_std", sigma=7)
    player_mu = pm.Normal("player_mu", mu=0, sigma=7)
    player_offset = pm.Normal("player_offset", mu=0, sigma=7, dims=["players"])
    player = pm.Deterministic("player", player_mu + player_std * player_offset, dims=["players"])

    # Home advantage
    beta_home = pm.Normal("home", mu=0, sigma=7)
    wide_home = beta_home * home_vals  # weeks x players

    # Points scored each week is an RV centered on player traits, the opponent, and home-field advantage
    # When a team has a bye, replace the estimated weekly score with 0.0
    weekly_mu = (
        player + wide_opp + wide_home
    )  # broadcasting is OK: player is 1 x players; other two are weeks x players
    weekly_points = pm.Normal("weekly_points", mu=weekly_mu, sigma=1, dims=["weeks", "players"])
    weekly_points = at.set_subtensor(weekly_points[bye_mask], 0.0)

    # The evolution of a player's cumulative score over the season is a gaussian random walk (GRW) with drift
    points_sigma = pm.HalfNormal("points_std", sigma=7)
    points_mu = pm.Deterministic("points_mu", weekly_points.cumsum(axis=0), dims=["weeks", "players"])

    # Evalute the model likelihood based on where the GRW lands the player at the end of the season
    points = pm.Normal("points", mu=points_mu[-1, :], sigma=points_sigma, observed=proj_points, dims="players")

display(pm.model_to_graphviz(model))
model.point_logps()

In [None]:
# Sample model
with model:
    trace = pm.sample(draws=1000, tune=2000, init="jitter+adapt_diag_grad", target_accept=0.9)

In [None]:
# Check opponent-specific parameters
trace_helper(trace, ["opp_std", "opp_mu", "opp_offset", "opp"])

In [None]:
# Check player-specific parameters
trace_helper(trace, ["player_std", "player_mu", "player_offset", "player"])

In [None]:
# Check miscellaneous parameters
trace_helper(trace, ["home"])

In [None]:
az.plot_pair(trace, var_names=["opp_std", "opp_mu", "player_std", "player_mu"], coords=coords, divergences=True);

In [None]:
# Get posterior predicted weekly points
posterior_weekly_points_vals = trace.posterior.weekly_points.mean(axis=0).mean(axis=0)
posterior_weekly_points = posterior_weekly_points_vals.to_dataframe()["weekly_points"]

In [None]:
plot_player(posterior_weekly_points, players_map, "Matthew Stafford")