In [55]:
import numpy as np
import pandas as pd
import nfl_data_py as nfl
from typing import Dict, List

In [None]:
STANDARD = {
    "passing_yards": 1/25,                 
    "passing_tds": 4.0,                   
    "passing_interceptions": -2.0,         
    "passing_2pt_conversions": 2.0,        
    "sack_yards_lost": 0.0,               
    "sack_fumbles_lost": -2.0,           

    "rushing_yards": 1/10,                 
    "rushing_tds": 6.0,
    "rushing_fumbles_lost": -2.0,
    "rushing_2pt_conversions": 2.0,

    "receiving_yards": 1/10,              
    "receiving_tds": 6.0,
    "receiving_fumbles_lost": -2.0,
    "receiving_2pt_conversions": 2.0,

    "misc_yards": 0.0,                   
}


In [None]:
seasons = [2020, 2021, 2022, 2023, 2024]
weekly = nfl.import_weekly_data(seasons)
ids = nfl.import_ids()

def pick(cols, options):
    return next((c for c in options if c in cols), None)

id_name = pick(ids.columns, ["display_name", "player_name", "full_name", "player_display_name", "name"])
id_pos  = pick(ids.columns, ["position", "pos"])
id_team = pick(ids.columns, ["team", "recent_team"])

ids_small = ids[["gsis_id"] + [c for c in [id_name, id_pos, id_team] if c]].copy()

weekly_named = weekly.merge(
    ids_small,
    how="left",
    left_on="player_id",
    right_on="gsis_id",
    suffixes=("", "_ids")
).drop(columns=["gsis_id"])

wk_name = pick(weekly_named.columns, ["player_display_name", "player_name", "player"])
wk_pos  = pick(weekly_named.columns, ["position", "pos", "position_group"])
wk_team = pick(weekly_named.columns, ["recent_team", "team"])

if id_name:
    weekly_named["player_name"] = weekly_named[id_name].combine_first(
        weekly_named[wk_name] if wk_name else pd.Series([pd.NA]*len(weekly_named))
    )
else:
    weekly_named["player_name"] = weekly_named[wk_name] if wk_name else pd.NA

if wk_pos and id_pos:
    weekly_named["position"] = weekly_named[wk_pos].combine_first(weekly_named[id_pos])
elif wk_pos:
    weekly_named["position"] = weekly_named[wk_pos]
elif id_pos:
    weekly_named["position"] = weekly_named[id_pos]
else:
    weekly_named["position"] = pd.NA

if wk_team and id_team:
    weekly_named["team"] = weekly_named[wk_team].combine_first(weekly_named[id_team])
elif wk_team:
    weekly_named["team"] = weekly_named[wk_team]
elif id_team:
    weekly_named["team"] = weekly_named[id_team]
else:
    weekly_named["team"] = pd.NA

weekly_named["team"] = weekly_named["team"].replace({"FA": pd.NA})

weekly_named = weekly_named[weekly_named['week'] == 1]
cols_to_show = ["season", "player_id", "player_name", "position", "team"]
print(weekly_named[cols_to_show].head(10))


Downcasting floats.
     season   player_id         player_name position team
0      2020  00-0019596           Tom Brady       QB   TB
20     2020  00-0020531          Drew Brees       QB   NO
34     2020  00-0022127        Jason Witten       TE   LV
46     2020  00-0022921    Larry Fitzgerald       WR  ARI
59     2020  00-0022924  Ben Roethlisberger       QB  PIT
75     2020  00-0022942       Philip Rivers       QB  IND
100    2020  00-0023459       Aaron Rodgers       QB   GB
118    2020  00-0023500          Frank Gore       RB  NYJ
133    2020  00-0023682    Ryan Fitzpatrick       QB  MIA
154    2020  00-0025394     Adrian Peterson       RB  DET


In [58]:
assert {"season", "week", "player_id", "position"}.issubset(weekly_named.columns)

# Drop rows with missing position and sort

wk_pos = weekly_named.dropna(subset=["position"]).sort_values(["season", "player_id", "week"])


# Get the index of the first week for each player in each season
first_idx = wk_pos.groupby(["season", "player_id"])['week'].idxmin()


# Select those rows and rename position column
pos_start = wk_pos.loc[first_idx, ["season", "player_id", "position"]].rename(columns={"position": "position_season_start"})


# Merge back to weekly_named
weekly_named = weekly_named.merge(pos_start, on=["season", "player_id"], how="left")


# If position_ids exists, fill missing position_season_start with it
if "position_ids" in weekly_named.columns:
    weekly_named["position_season_start"] = weekly_named["position_season_start"].combine_first(weekly_named["position_ids"])


weekly_named["position_used"] = weekly_named["position_season_start"]


print(weekly_named[["season","week","player_id","player_name","position","position_used","team"]].head(12))


    season  week   player_id         player_name position position_used team
0     2020     1  00-0019596           Tom Brady       QB            QB   TB
1     2020     1  00-0020531          Drew Brees       QB            QB   NO
2     2020     1  00-0022127        Jason Witten       TE            TE   LV
3     2020     1  00-0022921    Larry Fitzgerald       WR            WR  ARI
4     2020     1  00-0022924  Ben Roethlisberger       QB            QB  PIT
5     2020     1  00-0022942       Philip Rivers       QB            QB  IND
6     2020     1  00-0023459       Aaron Rodgers       QB            QB   GB
7     2020     1  00-0023500          Frank Gore       RB            RB  NYJ
8     2020     1  00-0023682    Ryan Fitzpatrick       QB            QB  MIA
9     2020     1  00-0025394     Adrian Peterson       RB            RB  DET
10    2020     1  00-0025396        Ted Ginn Jr.       WR            WR  CHI
11    2020     1  00-0025418          Greg Olsen       TE            TE  SEA

In [59]:
seasons = [2020,2021,2022,2023,2024]
player_stats = nfl.import_seasonal_data(seasons)

print("Player Stats:")
print(player_stats.head())

weekly_stats = nfl.import_weekly_data(seasons)

print("\nWeekly Stats:")
print(weekly_stats.head())

schedules = nfl.import_schedules(seasons)

print("\nSchedules:")
print(schedules.head())

pbp = nfl.import_pbp_data([2023])

print("\nPlay by Play Data:")
print(pbp.head())

player_ids = nfl.import_ids()

print("\nPlayer IDs:")
print(player_ids.head())


Player Stats:
    player_id  season season_type  completions  attempts  passing_yards  \
0  00-0019596    2020         REG          401       610         4633.0   
1  00-0019596    2021         REG          485       719         5316.0   
2  00-0019596    2022         REG          490       733         4694.0   
3  00-0020531    2020         REG          275       390         2942.0   
4  00-0022127    2020         REG            0         0            0.0   

   passing_tds  interceptions  sacks  sack_yards  ...    yac_sh    wopr_y  \
0           40           12.0   21.0       143.0  ...  0.000000  0.000000   
1           43           12.0   22.0       144.0  ...  0.000000  0.000000   
2           25            9.0   22.0       160.0  ...  0.000000  0.004477   
3           24            6.0   13.0        89.0  ...  0.000000  0.000000   
4            0            0.0    0.0         0.0  ...  0.014804  0.101830   

      ry_sh    rtd_sh    rfd_sh  rtdfd_sh       dom     w8dom    yptmpa 

In [60]:
def compute_fantasy_points(df: pd.DataFrame, scoring: Dict) -> pd.Series:
    """Vectorized fantasy point calc that gracefully handles missing columns."""
    def g(col: str, default=0.0):
        return df[col] if col in df.columns else 0.0

    # Base components
    pts = (
        g("passing_yards") / scoring["pass_yds_per_pt"]
        + g("passing_tds") * scoring["pass_td"]
        + g("interceptions") * scoring["int"]
        + g("rushing_yards") / scoring["rush_yds_per_pt"]
        + g("rushing_tds") * scoring["rush_td"]
        + g("receiving_yards") / scoring["rec_yds_per_pt"]
        + g("receiving_tds") * scoring["rec_td"]
        + g("receptions") * scoring["rec"]
        + g("fumbles_lost") * scoring["fumbles_lost"]
    )

    return pts

In [64]:
weekly = nfl.import_weekly_data(seasons)

# Merge player names/positions/teams
ids = nfl.import_ids()
weekly = weekly.merge(
    ids[["gsis_id", "name", "position", "team"]],
    how="left",
    left_on="player_id",
    right_on="gsis_id",
).drop(columns=["gsis_id"])

# Choose your scoring
SCORING = STANDARD  # or STANDARD / HALF_PPR / SIX_PT_PASS_TD / your custom dict

# Compute points per row (i.e., per player-week)
weekly["fantasy_points"] = compute_fantasy_points(weekly, STANDARD).round(2)

print(weekly[["season","week","name","team","fantasy_points"]].head(10))
print("\nSeason totals (top 10 per season):")
season_totals = weekly.groupby(["season", "name", "position", "team"])["fantasy_points"].sum().reset_index()
print(season_totals.groupby("season").head(10).to_string(index=False))


Downcasting floats.
   season  week       name team  fantasy_points
0    2020     1  Tom Brady   FA           20.46
1    2020     2  Tom Brady   FA           10.68
2    2020     3  Tom Brady   FA           23.88
3    2020     4  Tom Brady   FA           32.46
4    2020     5  Tom Brady   FA           14.12
5    2020     6  Tom Brady   FA           14.64
6    2020     7  Tom Brady   FA           36.86
7    2020     8  Tom Brady   FA           19.06
8    2020     9  Tom Brady   FA            2.36
9    2020    10  Tom Brady   FA           31.84

Season totals (top 10 per season):
   season  week       name team  fantasy_points
0    2020     1  Tom Brady   FA           20.46
1    2020     2  Tom Brady   FA           10.68
2    2020     3  Tom Brady   FA           23.88
3    2020     4  Tom Brady   FA           32.46
4    2020     5  Tom Brady   FA           14.12
5    2020     6  Tom Brady   FA           14.64
6    2020     7  Tom Brady   FA           36.86
7    2020     8  Tom Brady   FA 

KeyError: 'position'

In [None]:
ids = nfl.import_ids()

name_col = ids.columns
name_col

Index(['draft_pick', 'pff_id', 'yahoo_id', 'stats_global_id', 'mfl_id', 'age',
       'ktc_id', 'espn_id', 'draft_round', 'fantasypros_id', 'nfl_id',
       'pfr_id', 'rotoworld_id', 'name', 'rotowire_id', 'draft_ovr',
       'birthdate', 'sportradar_id', 'draft_year', 'twitter_username',
       'weight', 'merge_name', 'position', 'fantasy_data_id', 'team',
       'cfbref_id', 'db_season', 'height', 'stats_id', 'gsis_id', 'swish_id',
       'college', 'sleeper_id', 'cbs_id', 'fleaflicker_id'],
      dtype='object')