In [None]:
import os
from typing import Tuple

import aesara.tensor as at
import arviz as az
import matplotlib.pyplot as plt
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import pymc as pm
import pymc.sampling_jax
from sklearn.preprocessing import LabelEncoder, StandardScaler

from draft_optimizer.src.utils import DATA_DIR

# Load Data
- For `opponent`, consider merging OAK and LV

In [None]:
# Load players
data_dir = os.path.join(DATA_DIR, "sleeper")
players = pd.read_csv(os.path.join(data_dir, "players.csv"), index_col=0)

print(players.shape)
players.head()

In [None]:
# Load data
stats_dfs = []
projs_dfs = []
years = [2018, 2019, 2020, 2021, 2022]
for year in years:
    # Get paths
    data_dir = os.path.join(DATA_DIR, "sleeper", str(year))
    stats_path = os.path.join(data_dir, "stats.csv")
    projs_path = os.path.join(data_dir, "projections.csv")

    # Load
    if os.path.isfile(stats_path):
        stats_df = pd.read_csv(stats_path)
        stats_df["year"] = year
        stats_dfs.append(stats_df)
    if os.path.isfile(projs_path):
        projs_df = pd.read_csv(projs_path)
        projs_df["year"] = year
        projs_dfs.append(projs_df)

# Concat data
stats = pd.concat(stats_dfs, axis=0).set_index(["sleeper_id", "year", "week", "opponent"])
projs = pd.concat(projs_dfs, axis=0).set_index(["sleeper_id", "year", "week", "opponent"])

# Align data so they have identical indices
idx = stats.index.union(projs.index)
stats = stats.reindex(idx)
projs = projs.reindex(idx)

# Helpers

In [None]:
def filter_pos(players, stats, projs, pos) -> Tuple[pd.DataFrame, pd.DataFrame]:
    ids = players.loc[players["position"] == pos].index
    stats = stats.loc[stats.index.get_level_values("sleeper_id").isin(ids)]
    projs = projs.loc[projs.index.get_level_values("sleeper_id").isin(ids)]
    return stats, projs


def calc_deltas(stats, projs, metric) -> Tuple[pd.DataFrame, StandardScaler]:
    deltas = stats[metric] - projs[metric]
    deltas = deltas.dropna(how="any")
    deltas = deltas.reset_index()
    scaler = StandardScaler()
    deltas["scaled"] = scaler.fit_transform(deltas[[metric]])

    return deltas, scaler


def plot_hist(data, metric, color=None, histnorm="probability"):
    if color is not None:
        color_order = sorted(data[color].unique())
        category_orders = {color: color_order}
    else:
        category_orders = None
    fig = px.histogram(
        data, x=metric, color=color, opacity=0.25, barmode="overlay", histnorm=histnorm, category_orders=category_orders
    )
    fig.update_layout(width=720, height=480)
    fig.show()


def trace_summary(trace, var_names, plot=True):
    display(az.summary(trace, var_names=var_names, kind="diagnostics"))
    if plot:
        az.plot_trace(trace, var_names=var_names, compact=False)
        plt.show()

# Model

## Prepare

In [None]:
# Subset
pos_stats, pos_projs = filter_pos(players, stats, projs, "RB")

In [None]:
# Calculate and scale deltas
metric = "rush_yd"
deltas, scaler = calc_deltas(pos_stats, pos_projs, metric)
plot_hist(deltas, "scaled", color="year")
deltas.groupby("year")[[metric, "scaled"]].agg(["mean", "std"])

deltas = deltas.loc[deltas["sleeper_id"] == "3198"]

## Define

In [None]:
# Build coords
players_ = sorted(deltas["sleeper_id"].unique())
opponents = sorted(deltas["opponent"].unique())
years = sorted(deltas["year"].unique())
coords = {"player": players_, "year": years, "opponent": opponents}

# Fit encoders
players_encoder = LabelEncoder().fit(players_)
opponents_encoder = LabelEncoder().fit(opponents)
years_encoder = LabelEncoder().fit(years)

# Prepare data
player_idx = players_encoder.transform(deltas["sleeper_id"])
opponent_idx = opponents_encoder.transform(deltas["opponent"])
year_idx = years_encoder.transform(deltas["year"])
vals = deltas["scaled"].values

In [None]:
"""
# Build model
with pm.Model(coords=coords) as model:
    # Opponent-specific parameters (non-centered format for better sampling)
    opp_std = pm.HalfNormal("opp_std", sigma=10)
    opp_offset = pm.Normal("opp_offset", mu=0, sigma=1, dims=["year", "opponent"])
    year_opp = pm.Deterministic("year_opp", opp_std * opp_offset, dims=["year", "opponent"])
    opp_zeros = at.shape_padleft(at.zeros_like(year_opp[0, :]))
    opp_cumsum = year_opp.cumsum(axis=0)[:-1]
    opp = pm.Deterministic("opp", year_opp - at.concatenate([opp_zeros, opp_cumsum], axis=0), dims=["year", "opponent"])

    # Player-specific parameters (non-centered format for better sampling)
    player_std = pm.HalfNormal("player_std", sigma=10)
    player_mu = pm.Normal("player_mu", mu=0, sigma=10)
    player_offset = pm.Normal("player_offset", mu=0, sigma=1, dims=["year", "player"])
    year_player = pm.Deterministic("year_player", player_mu + player_std * player_offset, dims=["year", "player"])
    players_zeros = at.shape_padleft(at.zeros_like(year_player[0, :]))
    players_cumsum = year_player.cumsum(axis=0)[:-1]
    player = pm.Deterministic(  # underscore to not conflict with dimension name
        "player_",
        year_player - at.concatenate([players_zeros, players_cumsum], axis=0),
        dims=["year", "player"],
    )

    # Metric: RV centered on player traits and the opponent
    metric_mu = player[year_idx, player_idx] + opp[year_idx, opponent_idx]
    metric_std = pm.HalfNormal("metric_std", sigma=10)
    metric_nu = pm.TruncatedNormal("metric_nu", mu=6, sigma=0.5, lower=1)
    metric_ = pm.StudentT(  # underscore to not overwrite variable from pre-processing
        "metric",
        mu=metric_mu,
        sigma=metric_std,
        nu=metric_nu,
        observed=vals,
    )

display(pm.model_to_graphviz(model))
model.point_logps()
""";

In [None]:
# Build model
with pm.Model(coords=coords) as model:
    # Opponent-specific parameters (non-centered format for better sampling)
    opp_std = pm.HalfNormal("opp_std", sigma=10)
    opp_offset = pm.Normal("opp_offset", mu=0, sigma=10, dims=["year", "opponent"])
    opp = pm.Deterministic("opp", opp_std * opp_offset, dims=["year", "opponent"])

    # Player-specific parameters (non-centered format for better sampling)
    player_std = pm.HalfNormal("player_std", sigma=10)
    player_mu = pm.Normal("player_mu", mu=0, sigma=10)
    player_offset = pm.Normal("player_offset", mu=0, sigma=10, dims=["year", "player"])
    player = pm.Deterministic(
        "player_", player_mu + player_std * player_offset, dims=["year", "player"]
    )  # underscore to not conflict with dimension name

    # Metric: RV centered on player traits and the opponent
    metric_mu = player[year_idx, player_idx] + opp[year_idx, opponent_idx]
    metric_std = pm.HalfNormal("metric_std", sigma=10)
    metric_ = pm.Normal(  # underscore to not overwrite variable from pre-processing
        "metric",
        mu=metric_mu,
        sigma=metric_std,
        observed=vals,
    )

display(pm.model_to_graphviz(model))
model.point_logps()

## Prior-Predictive Checks

In [None]:
# Sample prior
with model:
    prior = pm.sample_prior_predictive(samples=1000)
    prior_scaled = prior["prior_predictive"]["metric"].mean(axis=0).mean(axis=0).values

# View prior
fig = go.Figure()
fig.add_trace(go.Histogram(x=vals, opacity=0.25, histnorm="probability", name="observed"))
fig.add_trace(go.Histogram(x=prior_scaled, opacity=0.25, histnorm="probability", name="prior"))
fig.update_layout(width=720, height=480, barmode="overlay")
fig.show()

## Sample

In [None]:
# Sample model
with model:
    trace = pm.sample(draws=1000, tune=2000, init="jitter+adapt_diag_grad")
    # trace = pm.sampling_jax.sample_numpyro_nuts(draws=1000, tune=2000)

## Posterior-Predictive Checks

In [None]:
# Check opponent-specific parameters
trace_summary(trace, ["opp_std", "opp_offset", "opp"], plot=False)

In [None]:
# Check player-specific parameters
trace_summary(trace, ["player_std", "player_mu", "player_offset", "player_"], plot=False)

In [None]:
# Check miscellaneous parameters
trace_summary(trace, ["metric_std"], plot=True)  # "metric_nu", "home"

## Posterior Samples

In [None]:
# Sample posterior
with model:
    posterior = pm.sample_posterior_predictive(trace)
    posterior_scaled = posterior["posterior_predictive"]["metric"].mean(axis=0).mean(axis=0).values

# View posterior
fig = go.Figure()
fig.add_trace(go.Histogram(x=vals, opacity=0.25, histnorm="probability", name="observed"))
fig.add_trace(go.Histogram(x=posterior_scaled, opacity=0.25, histnorm="probability", name="posterior"))
fig.update_layout(width=720, height=480, barmode="overlay")
fig.show()

In [None]:
# Compare to a specific player
player_id = "3198"
is_player = player_idx == players_encoder.transform([player_id])[0]

# View posterior
fig = go.Figure()
fig.add_trace(go.Histogram(x=vals[is_player], opacity=0.25, histnorm="probability", name="observed"))
fig.add_trace(go.Histogram(x=posterior_scaled[is_player], opacity=0.25, histnorm="probability", name="posterior"))
fig.update_layout(title=player_id, width=720, height=480, barmode="overlay")
fig.show()

In [None]:
az.summary(trace, var_names=["player_", "opp"])