In [None]:
import os

import aesara.tensor as at
import arviz as az
import pandas as pd
import pymc as pm
from sklearn.preprocessing import LabelEncoder, MinMaxScaler

from draft_optimizer.src.utils import DATA_DIR

In [None]:
def trace_helper(trace, var_names):
    display(az.summary(trace, var_names=var_names, kind="diagnostics"))
    az.plot_trace(trace, var_names=var_names, compact=False)

In [None]:
# Load data
league_id = 88497130
year = 2022
league_dir = os.path.join(DATA_DIR, f"espn_{league_id}_{year}")
teams_raw = players_df = pd.read_csv(os.path.join(league_dir, "pro_teams.csv"))
schedule_raw = pd.read_csv(os.path.join(league_dir, "pro_schedule.csv"))
players_raw = pd.read_csv(os.path.join(league_dir, "pro_players.csv"))

In [None]:
# Get maps
team_map = teams_raw.set_index("abbrev")["id"]

# Prepare data
players_df = players_raw.copy()
players_df["team_id"] = players_df["pro_team"].map(team_map)
players_df = players_df[["id", "position", "proj_points", "team_id"]].rename({"id": "player_id"}, axis=1)
home_data = schedule_raw[["home_id", "away_id", "week"]].merge(players_df, left_on="home_id", right_on="team_id")
home_data["home"] = True
home_data["offense_id"] = home_data["home_id"]
home_data["defense_id"] = home_data["away_id"]
away_data = schedule_raw[["home_id", "away_id", "week"]].merge(players_df, left_on="away_id", right_on="team_id")
away_data["home"] = False
away_data["offense_id"] = away_data["away_id"]
away_data["defense_id"] = away_data["home_id"]
all_data = pd.concat([home_data, away_data], axis=0).drop(["team_id", "home_id", "away_id"], axis=1).dropna(how="any")

print(all_data.shape)
all_data.head()

# One Week, One Position
* Probably will diverage a bunch because single data point for each player

In [None]:
# Subset data
week = 1
pos = "QB"
data = all_data.loc[(all_data["week"] == week) & (all_data["position"] == pos)]

# Prepare to model teams
teams = sorted(set(data["offense_id"].unique()) | set(data["defense_id"].unique()))
teams_encoder = LabelEncoder().fit(teams)
off_idx = teams_encoder.transform(data["offense_id"])
def_idx = teams_encoder.transform(data["defense_id"])

# Prepare to model home field advantage
side = data["home"].values

# Prepare to model players
players = sorted(data["player_id"].unique())
players_encoder = LabelEncoder().fit(players)
player_idx = players_encoder.transform(data["player_id"])

# Prepare to model points
scaler = MinMaxScaler()
scaled_points = scaler.fit_transform(data[["proj_points"]]).ravel()

# Build coords
coords = {"team": teams, "player_": players}

print(data.shape)
data.head()

In [None]:
# TODO: home field advantage

# Build model
with pm.Model(coords=coords) as model:
    # Global parameters
    # home = pm.Normal("home", mu=0, sigma=1)  # TODO: use `side`
    off_std = pm.HalfNormal("off_std", sigma=2)
    def_std = pm.HalfNormal("def_std", sigma=2)
    player_std = pm.HalfNormal("player_std", sigma=2)
    intercept = pm.Normal("intercept", mu=3, sigma=1)
    
    # Team-specific parameters
    off_star = pm.Normal("off_star", mu=0, sigma=off_std, dims="team")
    def_star = pm.Normal("def_star", mu=0, sigma=def_std, dims="team")
    off = pm.Deterministic("off", off_star - at.mean(off_star), dims="team")
    def_ = pm.Deterministic("def", def_star - at.mean(def_star), dims="team")
    
    # Player-specific parameters
    player_star = pm.Normal("player_star", mu=0, sigma=player_std, dims="player_")
    player = pm.Deterministic("player", player_star - at.mean(player_star), dims="player_")
    
    # Likelihoods
    theta = at.exp(intercept + player[player_idx] + off[off_idx] + def_[def_idx])
    points = pm.TruncatedNormal("points", mu=theta, sigma=2, lower=0, observed=scaled_points)  # TODO: parameterize sigma?

In [None]:
# Sample model
with model:
    trace = pm.sample(1000, tune=1500, cores=4)

In [None]:
# Check global parameters
trace_helper(trace, ["off_std", "def_std", "player_std", "intercept"])

In [None]:
# Check team-specific parameters
trace_helper(trace, ["off_star", "def_star", "off", "def"])

In [None]:
# Check player-specific parameters
trace_helper(trace, ["player_star", "player"])

# All Weeks, One Position
* Probably will diverage a bunch because single data point for each player

In [None]:
# Subset data
pos = "QB"
data = all_data.loc[all_data["position"] == pos]

# Prepare to model teams
teams = sorted(set(data["offense_id"].unique()) | set(data["defense_id"].unique()))
teams_encoder = LabelEncoder().fit(teams)
off_idx = teams_encoder.transform(data["offense_id"])
def_idx = teams_encoder.transform(data["defense_id"])

# Prepare to model home field advantage
side = data["home"].values

# Prepare to model players
players = sorted(data["player_id"].unique())
players_encoder = LabelEncoder().fit(players)
player_idx = players_encoder.transform(data["player_id"])

# Prepare to model points
scaler = MinMaxScaler()
scaled_points = scaler.fit_transform(data[["proj_points"]]).ravel()

# Build coords
coords = {"team": teams, "player_": players}

print(data.shape)
data.head()

In [None]:
# TODO: home field advantage

# Build model
with pm.Model(coords=coords) as model:
    # Global parameters
    # home = pm.Normal("home", mu=0, sigma=1)  # TODO: use `side`
    off_std = pm.HalfNormal("off_std", sigma=2)
    def_std = pm.HalfNormal("def_std", sigma=2)
    player_std = pm.HalfNormal("player_std", sigma=2)
    intercept = pm.Normal("intercept", mu=3, sigma=1)
    
    # Team-specific parameters
    off_star = pm.Normal("off_star", mu=0, sigma=off_std, dims="team")
    def_star = pm.Normal("def_star", mu=0, sigma=def_std, dims="team")
    off = pm.Deterministic("off", off_star - at.mean(off_star), dims="team")
    def_ = pm.Deterministic("def", def_star - at.mean(def_star), dims="team")
    
    # Player-specific parameters
    player_star = pm.Normal("player_star", mu=0, sigma=player_std, dims="player_")
    player = pm.Deterministic("player", player_star - at.mean(player_star), dims="player_")
    
    # Likelihoods
    # TODO: sum each week's theta (filtering idx I guess?); then make observed per player dim
    theta = at.exp(intercept + player[player_idx] + off[off_idx] + def_[def_idx])
    points = pm.TruncatedNormal("points", mu=theta, sigma=2, lower=0, observed=scaled_points)  # TODO: parameterize sigma?

In [None]:
# Sample model
with model:
    trace = pm.sample(1000, tune=1500, cores=4)

In [None]:
# Check global parameters
trace_helper(trace, ["off_std", "def_std", "player_std", "intercept"])

In [None]:
# Check team-specific parameters
trace_helper(trace, ["off_star", "def_star", "off", "def"])

In [None]:
# Check player-specific parameters
trace_helper(trace, ["player_star", "player"])

# Old

In [None]:
# Copy data
data = all_data.copy()

# Prepare to model teams
teams = sorted(set(data["offense_id"].unique()) | set(data["defense_id"].unique()))
teams_encoder = LabelEncoder().fit(teams)
off_idx = teams_encoder.transform(data["offense_id"])
def_idx = teams_encoder.transform(data["defense_id"])

# Prepare to model home field advantage
side = data["home"].values

# Prepare to model positions
positions = sorted(data["position"].unique())
positions_encoder = LabelEncoder().fit(positions)
position_idx = positions_encoder.transform(data["position"])

# Prepare to model players
players = sorted(data["player_id"].unique())
players_encoder = LabelEncoder().fit(players)
player_idx = players_encoder.transform(data["player_id"])

# Prepare to model points
scaler = MinMaxScaler()
scaled_points = scaler.fit_transform(data[["proj_points"]]).ravel()

# Build coords
coords = {"team": teams, "position": positions, "player_": players}

In [None]:
# TODO: home field advantage, player-specific, sum of multiple weeks (i.e. can we sum along week coord wrt the other dims or something?)
# Note: bye week is implicitly skipped in this setup since the sum is automatically handled (pending sum along coord)

# Build model
with pm.Model(coords=coords) as model:
    # Global parameters
    # home = pm.Normal("home", mu=0, sigma=1)  # TODO: use `side`
    off_std = pm.HalfNormal("off_std", sigma=2)
    def_std = pm.HalfNormal("def_std", sigma=2)
    pos_std = pm.HalfNormal("pos_std", sigma=2)
    player_std = pm.HalfNormal("player_std", sigma=2)
    intercept = pm.Normal("intercept", mu=3, sigma=1)
    
    # Team-specific parameters
    off_star = pm.Normal("off_star", mu=0, sigma=off_std, dims="team")
    def_star = pm.Normal("def_star", mu=0, sigma=def_std, dims="team")
    off = pm.Deterministic("off", off_star - at.mean(off_star), dims="team")
    def_ = pm.Deterministic("def", def_star - at.mean(def_star), dims="team")
    
    # Position-specific parameters
    pos_star = pm.Normal("pos_star", mu=0, sigma=off_std, dims="position")
    pos = pm.Deterministic("pos", pos_star - at.mean(pos_star), dims="position")
    
    # Player-specific parameters
    # player_star = pm.Normal("player_star", mu=0, sigma=player_std, dims="player_")
    # player = pm.Deterministic("player", player_star - at.mean(player_star), dims="player_")
    
    # Likelihoods
    theta = at.exp(intercept + pos[position_idx] + off[off_idx] + def_[def_idx])  # player[player_idx] + 
    points = pm.TruncatedNormal("points", mu=theta, sigma=2, lower=0, observed=scaled_points)  # TODO: parameterize sigma?

In [None]:
# Sample model
with model:
    trace = pm.sample(500, tune=1000, cores=4)

In [None]:
# Check global parameters
trace_helper(trace, ["off_std", "def_std", "pos_std", "player_std", "intercept"])

In [None]:
# Check team-specific parameters
trace_helper(trace, ["off_star", "def_star", "off", "def"])

In [None]:
# Check player-specific parameters
# trace_helper(trace, ["player_star", "player"])