# Prototype: QB–Receiver Stability Index vs YoY Offensive Efficiency (Simulated Data)

**Purpose:** Demonstrate an offseason framework using synthetic data:
- Stability Index = Top-2 target overlap year-over-year (0, 1, or 2 returning primary targets)
- Outcome = Year-over-year change in offensive EPA/play

> This notebook uses **simulated data** to prototype the logic and visuals.


In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

RNG_SEED = 42
rng = np.random.default_rng(RNG_SEED)

# Colab-friendly paths
PLAYER_CSV_PATH = "/content/player_targets.csv"
TEAM_CSV_PATH = "/content/team_offense.csv"

# For deterministic jitter in the plot
np.random.seed(RNG_SEED)


In [None]:
# -----------------------------
# Generate synthetic NFL-like data
# -----------------------------
teams = [
    "ARI","ATL","BAL","BUF","CAR","CHI","CIN","CLE","DAL","DEN","DET","GB","HOU","IND","JAX","KC",
    "LA","LAC","LV","MIA","MIN","NE","NO","NYG","NYJ","PHI","PIT","SEA","SF","TB","TEN","WAS"
]
seasons = [2022, 2023, 2024]

first_names = [
    "Aiden","Jordan","Miles","Darius","Chris","Devin","Tyler","Zach","Jalen","Marcus","Cameron","Noah",
    "Evan","Bryce","Trevor","Isaiah","Khalil","Andre","Nate","Cole","Rashad","Dion","Malik","Trey"
]
last_names = [
    "Johnson","Williams","Brown","Davis","Miller","Wilson","Moore","Taylor","Anderson","Thomas","Jackson",
    "White","Harris","Martin","Thompson","Garcia","Martinez","Robinson","Clark","Lewis","Walker","Hall",
    "Young","Allen"
]

def make_player_name() -> str:
    return f"{rng.choice(first_names)} {rng.choice(last_names)}"

team_strength = {t: float(rng.normal(0.0, 0.08)) for t in teams}

player_rows = []
team_rows = []

for team in teams:
    prev_players = None
    prev_shares = None
    base_epa = team_strength[team]

    for season in seasons:
        total_targets = int(np.clip(rng.normal(575, 55), 450, 720))

        if prev_players is None:
            continuity = float(np.clip(rng.beta(4, 2), 0.35, 0.92))
            n_receivers = int(rng.integers(6, 10))  # 6–9
            positions = (["WR"] * int(rng.integers(4, 7))) + (["TE"] * int(rng.integers(1, 3)))
            while len(positions) < n_receivers:
                positions.append(rng.choice(["WR","TE"]))
            rng.shuffle(positions)

            players = []
            for i in range(n_receivers):
                pid = f"{team}-{season}-{i}-{int(rng.integers(1000, 9999))}"
                players.append((pid, make_player_name(), positions[i]))

            alpha = rng.uniform(0.8, 2.2, size=n_receivers)
            shares = rng.dirichlet(alpha)

        else:
            continuity = float(np.clip(rng.beta(5, 2), 0.25, 0.93))
            n_prev = len(prev_players)
            n_return = int(np.clip(rng.integers(max(3, n_prev - 3), n_prev + 1), 3, n_prev))
            return_idx = rng.choice(np.arange(n_prev), size=n_return, replace=False)

            returning_players = [prev_players[i] for i in return_idx]
            returning_prev = prev_shares[return_idx]
            returning_prev = returning_prev / returning_prev.sum()

            churn = rng.normal(0, 0.06, size=n_return)
            ret_shares = np.clip(returning_prev + churn, 0.001, None)
            ret_shares = (ret_shares / ret_shares.sum()) * continuity

            remaining_share = 1.0 - continuity
            n_new = int(rng.integers(1, 4))  # 1–3 new
            new_players = []
            for j in range(n_new):
                pid = f"{team}-{season}-N{j}-{int(rng.integers(1000, 9999))}"
                pos = rng.choice(["WR","TE"], p=[0.75, 0.25])
                new_players.append((pid, make_player_name(), pos))

            new_alpha = rng.uniform(0.8, 2.0, size=n_new)
            new_shares = rng.dirichlet(new_alpha) * remaining_share

            players = returning_players + new_players
            shares = np.concatenate([ret_shares, new_shares])

            order = rng.permutation(len(players))
            players = [players[i] for i in order]
            shares = shares[order]

        # Optional RB bucket
        if rng.random() < 0.45:
            rb_share = float(np.clip(rng.normal(0.09, 0.03), 0.03, 0.16))
            rb_id = f"{team}-{season}-RB-{int(rng.integers(1000, 9999))}"
            players.append((rb_id, make_player_name(), "RB"))
            shares = shares * (1 - rb_share)
            shares = np.append(shares, rb_share)

        targets = np.floor(shares * total_targets).astype(int)
        remainder = total_targets - targets.sum()
        if remainder > 0:
            add_idx = rng.choice(np.arange(len(targets)), size=remainder, replace=True)
            for i in add_idx:
                targets[i] += 1

        for (pid, pname, pos), t in zip(players, targets):
            player_rows.append({
                "season": season,
                "team": team,
                "player_id": pid,
                "player": pname,
                "position": pos,
                "targets": int(t),
            })

        epa = base_epa + 0.10 * (continuity - 0.55) + float(rng.normal(0, 0.045))
        team_rows.append({
            "season": season,
            "team": team,
            "offensive_epa_per_play": float(epa),
        })

        prev_players = players
        prev_shares = shares

player_targets = pd.DataFrame(player_rows)
team_offense = pd.DataFrame(team_rows)

# Add target_share
totals = player_targets.groupby(["season","team"], as_index=False)["targets"].sum().rename(columns={"targets":"team_targets"})
player_targets = player_targets.merge(totals, on=["season","team"])
player_targets["target_share"] = player_targets["targets"] / player_targets["team_targets"]
player_targets = player_targets.drop(columns=["team_targets"])

# Save
player_targets.to_csv(PLAYER_CSV_PATH, index=False)
team_offense.to_csv(TEAM_CSV_PATH, index=False)

print("Saved:", PLAYER_CSV_PATH)
print("Saved:", TEAM_CSV_PATH)

player_targets.head(8)


In [None]:
# Preview team offense table
team_offense.head(8)


In [None]:
# -----------------------------
# QB–Receiver Stability Index
# Definition:
#   For each team-season, find the Top 2 pass-catchers by targets (WR/TE only).
#   Stability Index = count of those Top-2 players who were also Top-2 for the same team in prior season.
#   Values: 0, 1, 2
# -----------------------------
players = pd.read_csv(PLAYER_CSV_PATH)
team = pd.read_csv(TEAM_CSV_PATH)

# Pass catchers only
players_pt = players[players["position"].isin(["WR", "TE"])].copy()

# Top 2 by targets per team-season
top2 = (
    players_pt.sort_values(["season", "team", "targets"], ascending=[True, True, False])
    .groupby(["season", "team"])
    .head(2)
    .copy()
)

# Prior-year top2 shifted forward
prev_top2 = top2[["season", "team", "player_id"]].copy()
prev_top2["season"] += 1

# Overlap: current top2 players who were also top2 last season (same team)
overlap = top2.merge(prev_top2, on=["season", "team", "player_id"], how="inner")

stability = (
    overlap.groupby(["season", "team"], as_index=False)
    .agg(qb_receiver_stability_index=("player_id", "nunique"))
)

# Ensure missing overlaps become 0
all_team_seasons = top2.groupby(["season", "team"], as_index=False).size()[["season","team"]]
stability = all_team_seasons.merge(stability, on=["season","team"], how="left")
stability["qb_receiver_stability_index"] = stability["qb_receiver_stability_index"].fillna(0).astype(int)

stability.head(10)


In [None]:
# -----------------------------
# YoY EPA change
# -----------------------------
prev_epa = team[["season", "team", "offensive_epa_per_play"]].copy()
prev_epa["season"] += 1
prev_epa = prev_epa.rename(columns={"offensive_epa_per_play": "epa_prev"})

epa = team.merge(prev_epa, on=["season", "team"], how="inner")
epa["epa_change"] = epa["offensive_epa_per_play"] - epa["epa_prev"]

epa.head(10)


In [None]:
# Final dataset
final2 = stability.merge(epa[["season", "team", "epa_change"]], on=["season", "team"], how="inner").dropna()

print("Rows in final2:", len(final2))
final2.head(10)


In [None]:
# Plot: distribution of YoY EPA change by stability level (0/1/2)
plt.figure(figsize=(8,6))

for k in [0, 1, 2]:
    subset = final2[final2["qb_receiver_stability_index"] == k].copy()
    n = len(subset)
    if n == 0:
        continue

    jitter = np.random.uniform(-0.08, 0.08, size=n)
    x = subset["qb_receiver_stability_index"].to_numpy() + jitter
    y = subset["epa_change"].to_numpy()

    plt.scatter(x, y, label=f"Stability={k}", alpha=0.8)

plt.axhline(0, linestyle="--", linewidth=1)
plt.xticks([0,1,2], ["0 (both new)", "1 (one returned)", "2 (both returned)"])
plt.xlabel("QB–Receiver Stability Index (Top 2 targets)")
plt.ylabel("Change in Offensive EPA/play (YoY)")
plt.title("Prototype: QB–Receiver Stability vs YoY Offensive Efficiency (Simulated Data)")

plt.tight_layout()
plt.show()


In [None]:
# Summary table
summary = (
    final2.groupby("qb_receiver_stability_index")["epa_change"]
    .agg(["count", "mean", "median"])
    .reset_index()
)
summary


In [None]:
# Optional export
OUT_PATH = "/content/qb_receiver_stability_vs_epa_change.csv"
final2.to_csv(OUT_PATH, index=False)
print("Saved:", OUT_PATH)
