In [None]:
# enrich_draft.py
"""
Reads draft_data.csv and (optionally) lineup_data.csv to enrich each draft pick with:
- round, pick, overall
- inferred position (from lineup slots)
- player's total fantasy points over the season (from lineup points)
- autodraft status

Why use lineup_data for points?
It already contains the *league's scoring* fantasy points. Thatâ€™s typically better than
recomputing from NFL stats (which would require scoring rules + a player ID mapping).

Usage:
  python enrich_draft.py --draft draft_data.csv --lineups lineup_data.csv --out draft_enriched.csv
  python enrich_draft.py --draft draft_data.csv --out draft_enriched.csv   (points/position will be NaN)
"""

from __future__ import annotations

import argparse
import re
import sys
from pathlib import Path

import pandas as pd


CORE_POS = {"QB", "RB", "WR", "TE", "K", "D/ST", "DST", "DEF"}


def normalize_player_name(name: str) -> str:
    """Light normalization so joins are more likely to work."""
    if pd.isna(name):
        return name
    s = str(name).strip()
    s = re.sub(r"\s+", " ", s)
    # Common ESPN-ish suffix cleanup (optional)
    s = s.replace(" Jr.", "").replace(" Sr.", "")
    return s


def load_draft(path: Path) -> pd.DataFrame:
    df = pd.read_csv(path)
    required = [
        "League_ID", "Year", "Player", "Team", "Round", "Pick", "Overall",
        "Is_Autodrafted", "Auto_Draft_Type_ID"
    ]
    missing = [c for c in required if c not in df.columns]
    if missing:
        raise ValueError(f"draft_data is missing columns: {missing}")

    df = df.copy()
    df["Player_norm"] = df["Player"].map(normalize_player_name)
    df["League_ID"] = df["League_ID"].astype(int)
    df["Year"] = df["Year"].astype(int)
    return df


def load_lineups(path: Path) -> pd.DataFrame:
    df = pd.read_csv(path)
    required = ["League_ID", "Week", "Team", "Player", "Slot", "Points"]
    missing = [c for c in required if c not in df.columns]
    if missing:
        raise ValueError(f"lineup_data is missing columns: {missing}")

    df = df.copy()
    df["Player_norm"] = df["Player"].map(normalize_player_name)
    df["League_ID"] = df["League_ID"].astype(int)
    # Points may come in as string; coerce safely
    df["Points"] = pd.to_numeric(df["Points"], errors="coerce")
    df["Slot"] = df["Slot"].astype(str).str.strip()
    return df


def infer_position_from_slots(lineups: pd.DataFrame) -> pd.DataFrame:
    """
    Infer a player's position as the most common 'core position' slot they appeared in.
    If they never appear in a core slot, fallback to most common slot.
    """
    x = lineups.copy()

    # Map common slot variants to core labels
    slot_map = {
        "DST": "D/ST",
        "DEF": "D/ST",
        "D/ST": "D/ST",
    }
    x["Slot_norm"] = x["Slot"].replace(slot_map)

    # Prefer core positions
    x["Is_core_pos"] = x["Slot_norm"].isin(CORE_POS)

    # Count occurrences by (League_ID, Player_norm, Slot_norm), prioritize core slots
    counts = (
        x.groupby(["League_ID", "Player_norm", "Slot_norm", "Is_core_pos"], dropna=False)
         .size()
         .reset_index(name="n")
    )

    # Sort: core slots first, then highest frequency
    counts = counts.sort_values(
        by=["League_ID", "Player_norm", "Is_core_pos", "n"],
        ascending=[True, True, False, False]
    )

    # Pick top slot per player per league
    top = (
        counts.drop_duplicates(subset=["League_ID", "Player_norm"])
              .rename(columns={"Slot_norm": "Position"})[["League_ID", "Player_norm", "Position"]]
    )
    return top


def season_points_from_lineups(draft: pd.DataFrame, lineups: pd.DataFrame) -> pd.DataFrame:
    """
    Compute season total fantasy points per player *in that league* from lineup_data points.
    """
    pts = (
        lineups.groupby(["League_ID", "Player_norm"], dropna=False)["Points"]
               .sum(min_count=1)
               .reset_index()
               .rename(columns={"Points": "Season_Total_Points"})
    )
    return pts


def main():
    ap = argparse.ArgumentParser()
    ap.add_argument("--draft", required=True, help="Path to draft_data.csv")
    ap.add_argument("--lineups", required=False, default=None, help="Optional path to lineup_data.csv")
    ap.add_argument("--out", required=True, help="Output CSV path")
    args = ap.parse_args()

    draft_path = Path(args.draft)
    out_path = Path(args.out)

    draft = load_draft(draft_path)

    # Base output columns (always present)
    out = draft[[
        "League_ID", "Year", "Team", "Player",
        "Round", "Pick", "Overall",
        "Is_Autodrafted", "Auto_Draft_Type_ID",
        "Player_norm",
    ]].copy()

    out["Position"] = pd.NA
    out["Season_Total_Points"] = pd.NA

    if args.lineups:
        lineups = load_lineups(Path(args.lineups))

        # Join inferred position
        pos = infer_position_from_slots(lineups)
        out = out.merge(pos, on=["League_ID", "Player_norm"], how="left", suffixes=("", "_y"))
        out["Position"] = out["Position"].fillna(out.get("Position_y"))
        if "Position_y" in out.columns:
            out = out.drop(columns=["Position_y"])

        # Join season totals
        pts = season_points_from_lineups(draft, lineups)
        out = out.merge(pts, on=["League_ID", "Player_norm"], how="left")

    # Final select (drop normalization helper unless you want it)
    final = out[[
        "League_ID", "Year", "Team", "Player",
        "Round", "Pick", "Overall",
        "Position",
        "Season_Total_Points",
        "Is_Autodrafted", "Auto_Draft_Type_ID",
    ]].copy()

    final.to_csv(out_path, index=False)
    print(f"Wrote {len(final):,} rows -> {out_path}")


if __name__ == "__main__":
    try:
        main()
    except Exception as e:
        print(f"ERROR: {e}", file=sys.stderr)
        sys.exit(1)
