# Team Possession Analysis
Compares **actual tracked possessions** (from `possessions_enriched_*.csv`) against the
**KenPom estimate** (`FGA − ORB + TOV + 0.475 × FTA`, stored in `four_factors_*.csv`).

Change `TEAM` below to any college basketball team name.

In [None]:
import glob
import os
import re

import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker

# ── CONFIG ────────────────────────────────────────────────────────────────────
TEAM     = "Duke"          # change to any team name
DATA_DIR = "cbbd_data"     # folder where daily_fetch.py saves CSVs
# ──────────────────────────────────────────────────────────────────────────────

In [None]:
def load_csvs(pattern):
    """Load all CSVs matching a glob pattern and tag each row with its date."""
    frames = []
    for path in sorted(glob.glob(os.path.join(DATA_DIR, pattern))):
        # filename format: <name>_YYYYMMDD_SEASON.csv
        m = re.search(r"_(\d{8})_", os.path.basename(path))
        date = pd.to_datetime(m.group(1), format="%Y%m%d").date() if m else None
        df = pd.read_csv(path)
        df["game_date"] = date
        frames.append(df)
    return pd.concat(frames, ignore_index=True) if frames else pd.DataFrame()

ff_df   = load_csvs("four_factors_*.csv")
poss_df = load_csvs("possessions_enriched_*.csv")

# Dedup across overlapping batch files (e.g. Nov-Dec and Dec batches share games)
ff_df   = ff_df.drop_duplicates(subset=["game_id", "team"])
poss_df = poss_df.drop_duplicates(subset=["gameId", "possession_id", "possession_team"])

print(f"four_factors rows  : {len(ff_df):,}")
print(f"possessions rows   : {len(poss_df):,}")

In [None]:
# ── KP estimate from four_factors ─────────────────────────────────────────────
# The 'Possessions' column is already computed as: FGA - ORB + TOV + 0.475*FTA
team_ff = (
    ff_df[ff_df["team"].str.lower() == TEAM.lower()]
    [["game_id", "game_date", "team", "opponent",
      "FGA", "ORB", "TOV", "FTA", "Possessions"]]
    .rename(columns={"Possessions": "kp_poss_est"})
    .copy()
)

# verify / recompute manually so it's visible
team_ff["kp_poss_manual"] = (team_ff["FGA"] - team_ff["ORB"]
                              + team_ff["TOV"] + 0.475 * team_ff["FTA"]).round(1)

print(f"Games found for {TEAM}: {len(team_ff)}")
team_ff.head()

In [None]:
# ── Actual tracked possession count from possessions_enriched ─────────────────
team_poss = (
    poss_df[poss_df["possession_team"].str.lower() == TEAM.lower()]
    .groupby("gameId")
    .size()
    .reset_index(name="tracked_poss")
    .rename(columns={"gameId": "game_id"})
)

# make sure types match for the merge
team_ff["game_id"]   = team_ff["game_id"].astype(str)
team_poss["game_id"] = team_poss["game_id"].astype(str)

team_poss.head()

In [None]:
# ── Combine ───────────────────────────────────────────────────────────────────
combined = (
    team_ff
    .merge(team_poss[["game_id", "tracked_poss"]], on="game_id", how="left")
    .sort_values("game_date")
    .reset_index(drop=True)
)

combined["diff"] = (combined["tracked_poss"] - combined["kp_poss_est"]).round(1)

display_cols = ["game_date", "opponent", "FGA", "ORB", "TOV", "FTA",
                "kp_poss_est", "tracked_poss", "diff"]
combined[display_cols]

In [None]:
# ── Summary stats ─────────────────────────────────────────────────────────────
print(f"{'':30s}  {'KP Est':>8}  {'Tracked':>8}  {'Diff':>8}")
print("-" * 58)
for label, col in [("Mean", "mean"), ("Median", "median"), ("Std Dev", "std"),
                    ("Min", "min"),  ("Max", "max")]:
    kp  = getattr(combined["kp_poss_est"],  col)()
    tr  = getattr(combined["tracked_poss"], col)()
    df_ = getattr(combined["diff"],         col)()
    print(f"{label:30s}  {kp:8.1f}  {tr:8.1f}  {df_:+8.1f}")

In [None]:
# ── Chart ─────────────────────────────────────────────────────────────────────
fig, axes = plt.subplots(2, 1, figsize=(12, 7), sharex=True)
x = range(len(combined))
labels = [
    f"{row.game_date}\nvs {row.opponent}" for _, row in combined.iterrows()
]

# top: both series
axes[0].plot(x, combined["kp_poss_est"],  marker="o", label="KP Estimate",    color="steelblue")
axes[0].plot(x, combined["tracked_poss"], marker="s", label="Tracked (actual)", color="darkorange")
axes[0].set_ylabel("Possessions")
axes[0].set_title(f"{TEAM} — Possessions per Game")
axes[0].legend()
axes[0].yaxis.set_minor_locator(ticker.AutoMinorLocator())
axes[0].grid(axis="y", alpha=0.3)

# bottom: difference
colors = ["tomato" if d < 0 else "mediumseagreen" for d in combined["diff"]]
axes[1].bar(x, combined["diff"], color=colors)
axes[1].axhline(0, color="black", linewidth=0.8)
axes[1].set_ylabel("Tracked − KP Est")
axes[1].set_title("Difference (Tracked minus KP Estimate)")
axes[1].grid(axis="y", alpha=0.3)

axes[1].set_xticks(list(x))
axes[1].set_xticklabels(labels, fontsize=7, rotation=45, ha="right")

plt.tight_layout()
plt.show()