In [None]:
import os
from pathlib import Path

import numpy as np
import pandas as pd

from draft_optimizer.src.utils import DATA_DIR

# Specify paths
league_id = 88497130
year = 2022
ESPN_DIR = os.path.join(DATA_DIR, f"espn_{league_id}", str(year))
NFL_DIR = os.path.join(DATA_DIR, "nfl", str(year))
PROD_DIR = os.path.join(DATA_DIR, "production", str(year))

In [None]:
# Load data
espn = pd.read_csv(os.path.join(ESPN_DIR, "pro_players.csv"))
nfl = pd.read_csv(os.path.join(NFL_DIR, "weekly_proj.csv"))

# Match ESPN's naming convention
name_map = {
    "Arizona Cardinals": "Cardinals",
    "Atlanta Falcons": "Falcons",
    "Baltimore Ravens": "Ravens",
    "Buffalo Bills": "Bills",
    "Carolina Panthers": "Panthers",
    "Chicago Bears": "Bears",
    "Cincinnati Bengals": "Bengals",
    "Cleveland Browns": "Browns",
    "Dallas Cowboys": "Cowboys",
    "Denver Broncos": "Broncos",
    "Detroit Lions": "Lions",
    "Green Bay Packers": "Packers",
    "Houston Texans": "Texans",
    "Indianapolis Colts": "Colts",
    "Jacksonville Jaguars": "Jaguars",
    "Kansas City Chiefs": "Chiefs",
    "Las Vegas Raiders": "Raiders",
    "Los Angeles Chargers": "Chargers",
    "Los Angeles Rams": "Rams",
    "Miami Dolphins": "Dolphins",
    "Minnesota Vikings": "Vikings",
    "New England Patriots": "Patriots",
    "New Orleans Saints": "Saints",
    "New York Giants": "Giants",
    "New York Jets": "Jets",
    "Philadelphia Eagles": "Eagles",
    "Pittsburgh Steelers": "Steelers",
    "San Francisco 49ers": "49ers",
    "Seattle Seahawks": "Seahawks",
    "Tampa Bay Buccaneers": "Buccaneers",
    "Tennessee Titans": "Titans",
    "Washington Football Team": "Commanders",
}
to_rename = nfl["Player"].isin(name_map.keys())
nfl.loc[to_rename, "Player"] = nfl.loc[to_rename, "Player"].map(name_map)
is_def = nfl["Position"] == "DEF"
nfl.loc[is_def, "Position"] = "D/ST"
temp = sorted(nfl.loc[is_def, "Player"].unique())
nfl.loc[is_def, "Player"] += " D/ST"

# Remove common patterns
for chars in [" V", " IV", " III", " II", "Jr.", "Sr.", ".", "'"]:
    espn["name"] = espn["name"].str.replace(chars, "", regex=False)
    nfl["Player"] = nfl["Player"].str.replace(chars, "", regex=False)
espn["name"] = espn["name"].str.strip()
nfl["Player"] = nfl["Player"].str.strip()

# Fix name discrepancies
nfl_to_espn_pos_map = {
    "Andrew Beck": "RB",
    "Antonio Gandy-Golden": "TE",
    "Ben Mason": "RB",
    "Cethan Carter": "TE",
    "Dylan Cantrell": "TE",
    "Giovanni Ricci": "RB",
    "Jeff Driskel": "TE",
    "JJ Arcega-Whiteside": "TE",
    "Jody Fortson": "TE",
    "John Lovett": "RB",
    "Jordan Matthews": "TE",
    "JP Holtz": "RB",
    "Juwan Johnson": "TE",
    "Lawrence Cager": "TE",
    "Malcolm Perry": "WR",
    "Reggie Gilliam": "RB",
    "Tory Carter": "RB",
    "Trevon Wesco": "RB",
}
to_repos = nfl["Player"].isin(nfl_to_espn_pos_map.keys())
nfl.loc[to_repos, "Position"] = nfl.loc[to_repos, "Player"].map(nfl_to_espn_pos_map)

# Fix position discrepencies
nfl_to_espn_name_map = {
    "Ced Wilson": "Cedrick Wilson",
    "DWayne Eskridge": "Dee Eskridge",
    "Josh Palmer": "Joshua Palmer",
    "Kenneth Walker": "Ken Walker",
    "Mike Woods": "Michael Woods",
    "Mitchell Trubisky": "Mitch Trubisky",
    "Scott Miller": "Scotty Miller",
    "Will Fuller": "William Fuller",
}
to_rename2 = nfl["Player"].isin(nfl_to_espn_name_map.keys())
nfl.loc[to_rename2, "Player"] = nfl.loc[to_rename2, "Player"].map(nfl_to_espn_name_map)

# Add a common UID
espn["uid"] = espn["name"] + "_" + espn["position"]
nfl["uid"] = nfl["Player"] + "_" + nfl["Position"]

# Get NFL weeks
NFL_WEEKS = nfl["Week"].unique()

In [None]:
# Manually validate UIDs
espn_uids = set(espn["uid"].unique())
nfl_uids = set(nfl["uid"].unique())

# Validate missing ESPN UIDs
# for n in sorted(espn_uids - nfl_uids):
#     last = n.split("_")[0].split(" ")[-1]
#     print(n, sorted([v for v in nfl_uids if last in v]))

# Validate missing NFL UIDs
# for n in sorted(nfl_uids - espn_uids):
#     last = n.split("_")[0].split(" ")[-1]
#     print(n, sorted([v for v in espn_uids if last in v]))

In [None]:
def compute_points(nfl: pd.DataFrame, points_mode: str):
    # Copy df
    nfl = nfl.copy()

    # Get reception multiplier
    if points_mode == "PPR":
        rec_mult = 1
    elif points_mode == "Half PPR":
        rec_mult = 0.5

    # Compute projections based on league rules
    nfl = nfl.fillna(0)
    nfl["proj_points"] = (
        # Offense
        nfl["Passing Yds"] * 0.04
        + nfl["Passing TD"] * 4
        + nfl["Passing Int"] * -2
        + nfl["Rushing Yds"] * 0.1
        + nfl["Rushing TD"] * 6
        + nfl["Receiving Rec"] * rec_mult
        + nfl["Receiving Yds"] * 0.1
        + nfl["Receiving TD"] * 6
        + nfl["Ret TD"] * 6
        + nfl["Misc FumTD"] * 6
        + nfl["Misc 2PT"] * 2
        + nfl["Fum Lost"] * -2
        # Kicker
        + nfl["PAT Made"] * 1
        # nfl['PAT Missed'] * -1 +
        # nfl['FG Missed'] * -1 +
        + nfl["FG Made 0-19"] * 3
        + nfl["FG Made 20-29"] * 3
        + nfl["FG Made 30-39"] * 3
        + nfl["FG Made 40-49"] * 4
        + nfl["FG Made 50+"] * 5
        # Defense
        + nfl["Tackles Sack"] * 1
        + nfl["Turnover Int"] * 2
        + nfl["Turnover Fum Rec"] * 2
        + nfl["Score Saf"] * 2
        + nfl["Score TD"] * 6
        + nfl["Score Def 2pt Ret"] * 1
        # points against
        # yds against
    ).round(3)
    is_bye = nfl["Opp"] == "Bye"
    nfl.loc[is_bye, "proj_points"] = 0

    # Prepare to map
    proj = nfl.set_index(["uid", "Week"])["proj_points"]
    proj_total = proj.groupby(level="uid").sum()
    proj_weekly = proj.unstack(level="Week").to_dict(orient="index")

    # Finalize data
    players = espn.copy()
    players["proj_points"] = players["uid"].map(proj_total)
    players["proj_weekly_points"] = players["uid"].map(proj_weekly)
    players = players.drop("uid", axis=1)

    return players

In [None]:
def scale_points(players: pd.DataFrame, points_scalar: pd.Series) -> pd.DataFrame:
    # Copy df
    players = players.copy()

    # Scale points
    scaled_proj_weekly_points = (
        players_ppr.set_index("id")["proj_weekly_points"]
        .apply(lambda x: pd.Series(x))
        .multiply(points_scalar, axis=0)
        .fillna(0)
        .round(2)
    )
    scaled_proj_weekly_points = scaled_proj_weekly_points[NFL_WEEKS]
    scaled_proj_points = scaled_proj_weekly_points.sum(axis=1)

    # Update df
    players["proj_weekly_points"] = players["id"].map(scaled_proj_weekly_points.to_dict(orient="index"))
    players["proj_points"] = players["id"].map(scaled_proj_points)

    return players

In [None]:
def output_players(players: pd.DataFrame, points_mode: str):
    if not os.path.isdir(PROD_DIR):
        Path(PROD_DIR).mkdir(parents=True, exist_ok=True)
    points_mode_str = points_mode.lower().replace(" ", "_")
    players.to_csv(os.path.join(PROD_DIR, f"players_{points_mode_str}.csv"), index=False)

In [None]:
# Compute points
players_ppr = compute_points(nfl, "PPR")
players_half_ppr = compute_points(nfl, "Half PPR")

# Scale based on ESPN's PPR projections
espn_proj_points_ppr = espn.set_index("id")["proj_points"]
points_scalar = espn_proj_points_ppr / players_ppr.set_index("id")["proj_points"]
points_scalar = points_scalar.replace({np.inf: 0}).fillna(0)
players_ppr_scaled = scale_points(players_ppr, points_scalar)
players_half_ppr_scaled = scale_points(
    players_half_ppr, points_scalar
)  # Note: this isn't perfect as the scalar is derived from PPR

In [None]:
# Output production data
output_players(players_ppr_scaled, "PPR")
output_players(players_half_ppr_scaled, "Half PPR")