In [1]:
#!pip install nfl_data_py
import nfl_data_py as nfl
import pandas as pd
from datetime import datetime
from dateutil.relativedelta import relativedelta
py_players = nfl.import_players()

In [3]:
first_year = 2015
last_year = 2025
years_csvs = list(range(first_year, last_year + 1))

## Functions

In [None]:
# Player position
def get_pos(py_players, player_id):
    player = py_players[py_players['gsis_id']==player_id]
    if player.empty:
        return "D/ST"
    elif len(player)==1:
        return player['position'].values[0]
    else:
        return "--"  

# Availability
def get_availability(rost, injs, players, player_id, max_year):
    if max_year >= datetime.now().year or max_year < 2009:
        #print(f"ERROR: max_year({max_year}) has to be the last completed season or higher than 2009.")
        return "--"
    # Get years played
    player_info = players[players['gsis_id'] == player_id]
    if player_info.empty:
        #print(f"ERROR: Player {player_id} not found.")
        return "--"
    player_name = player_info['display_name'].values[0]
    first_year = player_info['rookie_season'].values[0]
    first_year = max(first_year, 2009)
    if first_year > max_year: 
        #print(f"ERROR: Player {player_id} first year ({first_year}) is before max_year ({max_year}).")
        return "--"
    #print("\tPlayer last season: ", player_info['last_season'].values[0])
    last_year = player_info['last_season'].values[0]
    if last_year < 2009:
        #print(f"ERROR: Player {player_id} last year ({first_year}) is before 2009 so there is no data available.")
        return "--"
    last_year = min(last_year, max_year)
    years = list(range(first_year, last_year + 1))

    # Get games rostered and games active
    weekly_rosters = rost[rost['season'].isin(years)]
    player_weekly = weekly_rosters[weekly_rosters['player_id'] == player_id]
    player_weekly = player_weekly[player_weekly['week'] <= 18]
    player_weekly = player_weekly[((player_weekly['season'] <= 2020) & (player_weekly['week'] <= 17)) | (player_weekly['season'] > 2020)]
    status_inactive = ['EXE', 'INA', 'PUP', 'RES', 'RSN', 'RSR', 'SUS']
    not_on_team = ['DEV', 'CUT', 'NWT', 'RET', 'RFA', 'TRC', 'TRD', 'TRL', 'TRT', 'UFA', 'E14']
    status_active = ['ACT']
    games_missed = player_weekly[player_weekly['status'].isin(status_inactive)]
    games_available = player_weekly[player_weekly['status'].isin(status_inactive + status_active)]
    n_games_available = len(games_available)
    games_played = player_weekly[player_weekly['status'].isin(status_active)]
    n_games_played = len(games_played)

    # Missed due to coach decision
    injuries = injs[injs['season'].isin(years)]
    not_injured = ['Coaching', "Coach's decision", 'Coaching Decision ', "Coach's Decision", "Not injury related - coach's decision",
                   'Not injury related - personal matter', 'personal matter', 'Ankle [Not Injury Related - Personal, Thursday Only]',
                   'Not injury related - resting player']
    player_injuries = injuries[injuries['gsis_id'] == player_id]
    player_rest = player_injuries[
        (player_injuries['report_status'].isin(['Out','Doubtful'])) &
        (player_injuries['report_primary_injury'].isin(not_injured))
    ]
    n_rest_games = len(player_rest)
    #print("\tPlayer missed game due to coach decision: ", n_rest_games)

    if n_games_available == 0:
        #print(f"ERROR: Player {player_id} {player_name} hasn't played a game.")
        return "--"
    return (n_games_played + n_rest_games) / n_games_available

# Age
def get_age(py_players, player_id):
    player = py_players[py_players['gsis_id']==player_id]
    if player.empty:
        return "D/ST"
    elif len(player)==1:
        date1 = datetime.strptime(f"{year-1}-09-01", "%Y-%m-%d")
        date2 = datetime.strptime(player['birth_date'].values[0], "%Y-%m-%d")
        delta = relativedelta(date2, date1)
        year_difference = abs(delta.years)
        return year_difference
    else:
        return "--"

# Rookie season
def get_rookie_season(py_players, player_id):
    player = py_players[py_players['gsis_id']==player_id]
    if player.empty:
        return "D/ST"
    elif len(player)==1:
        return player['rookie_season'].values[0]
    else: 
        return "--"

# Previous year snap share
def snap_share(players, ids, snap_counts, player_id, year):
    if year < 2012: 
        #print(f"ERROR: year ({year}) is before the first year of snap count available (2009).")
        return -1
    player = players[players['gsis_id']==player_id]
    if player.empty:
        #print(f"ERROR: Player {player_id} not found.")
        return -1
    name = player['display_name'].values[0]
    player_ids = ids[ids['gsis_id']==player_id]
    if player_ids.empty:
        #print(f"ERROR: Player {player_id} ({name}) not found.")
        return -1
    pfr_id = player_ids['pfr_id'].values[0]
    snap_counts = snap_counts[snap_counts['season']==year]
    player_snaps = snap_counts[snap_counts['pfr_player_id']==pfr_id]
    if player_snaps.empty:
        #print(f"ERROR: There was no data for player {player_id} ({name}) on that year ({year}).")
        return -1
    player_snaps = player_snaps[player_snaps['week'] <= 18]
    player_snaps = player_snaps[((player_snaps['season'] <= 2020) & (player_snaps['week'] <= 17)) | (player_snaps['season'] > 2020)]
    if player_snaps['position'].values[0]=='K':
        return player_snaps['st_pct'].mean()
    return player_snaps['offense_pct'].mean()

# Fantasy points variance for previous season
def fan_pts_var(weekly, id, year, ppr=True):
    if year < 1999:
        #print(f"ERROR: Year ({year}) must be 1999 or later.")
        return "--"
    weekly = weekly[weekly['season']== year]
    weekly = weekly[weekly['week'] >= 1]
    if year <= 2020:
        weekly = weekly[weekly['week'] <= 17]
    else:
        weekly = weekly[weekly['week'] <= 18]
    player = weekly[weekly['player_id'] == id]
    if player.empty:
        #print(f"ERROR: Player with ID {id} not found in year {year}.")
        return "--"
    ppr = player['fantasy_points_ppr'].var()
    standard = player['fantasy_points'].var()
    if ppr:
        return ppr
    else:
        return standard

# Player's cap percentage for that season
def get_player_apy_cap_pct(name, contracts, player_id, year):
    player_cont = contracts[contracts['gsis_id']==player_id]
    if player_cont.empty:
        #print(f"ERROR: did not find contracts for player {player_id} ({name})")
        return "--"
    min_year = player_cont['year_signed'].min()
    if year < min_year:
        #print(f"ERROR Player ({player_id}) first contract was on {min_year}, you tried {year}.")
        return "--"
    last_contract = player_cont[player_cont['year_signed'] <= year].sort_values(by='year_signed', ascending=False).iloc[0]
    #print(last_contract[['team', 'year_signed', 'years', 'apy_cap_pct']])
    # Check if it is the active contract
    if last_contract['year_signed'] + last_contract['years'] < year:
        #print(f"ERROR: Player ({player_id}) last contract ended before {year}.")
        return "--"
    return last_contract['apy_cap_pct']

# Draft pick
def player_pick(players, player_id):
    player = players[players['gsis_id']==player_id]
    if player.empty:
        #print(f"ERROR: player {player_id} not found.")
        return 
    pick = player['draft_pick'].values[0]
    return pick

# Pick value
def pick_value(picks, pick):
    if pd.isna(pick) or pick < 1 or pick > 262:
        return 0
    picks = nfl.import_draft_values()
    return picks[picks['pick'] == pick]['stuart'].values[0]

## Executing

In [None]:
values = nfl.import_draft_values()
ids_players = nfl.import_ids()
conts = nfl.import_contracts()
for year in years_csvs:
    separator = ","
    if year==2024:
        separator = ";"
    adp_full = pd.read_csv(f"adp_full_gsispos_{year}.csv", sep=separator)
    print(f"\nPlayers on csv from {year}: {len(adp_full)}")

    # Player position
    if 'position' in adp_full.columns:
        print("+Position already in file")
        pass
    else:
        adp_full['position'] = adp_full.apply(
            lambda row: get_pos(py_players, row['gsis_id']),
            axis=1
        )
        print("-Added position to file")
    # Availability
    if 'availability' in adp_full.columns:
        print("+Availability already in file")
        pass
    else:
        yrs = list(range(2009, year))
        rostrs = nfl.import_weekly_rosters(yrs)
        injrs = nfl.import_injuries(yrs)
        adp_full['availability'] = adp_full.apply(
            lambda row: get_availability(rostrs, injrs, py_players, row['gsis_id'], year-1),
            axis=1
        )
        print("-Added availability to file")
    # Age
    if 'age' in adp_full.columns:
        print("+Age already in file")
        pass
    else:
        adp_full['age'] = adp_full.apply(
            lambda row: get_age(py_players, row['gsis_id']),
            axis=1
        )
        print("-Added age to file")
    #Rookie season
    if 'rookie_season' in adp_full.columns:
        print("+Rookie season already in file")
        pass
    else:
        adp_full['rookie_season'] = adp_full.apply(
            lambda row: get_rookie_season(py_players, row['gsis_id']),
            axis=1
        )
        print("-Added rookie season to file")
    # Previous year snap share
    if 'snap_share' in adp_full.columns:
        print("+Snap share already in file")
        pass
    else:
        snaps = nfl.import_snap_counts([year-1])
        adp_full['snap_share'] = adp_full.apply(
            lambda row: snap_share(py_players, ids_players, snaps, row['gsis_id'], year-1),
            axis=1
        )
        print("-Added snap share to file")
    # Fantasy points variance for previous season
    if 'fan_pts_var' in adp_full.columns:
        print("+Fantasy pts variance already in file")
        pass
    else:
        wd = nfl.import_weekly_data([year-1])#, downcast=True)
        adp_full['fan_pts_var'] = adp_full.apply(
            lambda row: fan_pts_var(wd, row['gsis_id'], year-1),
            axis=1
        )
        print("-Added fantasy variance to file")
    # Player's cap percentage
    if 'apt_cap_pct' in adp_full.columns:
        print("+Fantasy cap percentage already in file")
        adp_full = adp_full.rename(columns={'apt_cap_pct':'apy_cap_pct'})
        pass
    else:
        adp_full['apy_cap_pct'] = adp_full.apply(
            lambda row: get_player_apy_cap_pct(row['Player'], conts, row['gsis_id'], year),
            axis=1
        )
        print("-Added cap percentage to file")
    # Draft pick
    if 'draft_pick' in adp_full.columns:
        print("+Draft pick already in file")
        pass
    else:
        adp_full['draft_pick'] = adp_full.apply(
            lambda row: player_pick(py_players, row['gsis_id']),
            axis=1
        )
        print("-Added draft pick to file")
    # Pick value
    if 'pick_value' in adp_full.columns:
        print("+Pick value already in file")
        pass
    else:
        adp_full['pick_value'] = adp_full.apply(
            lambda row: pick_value(values, row['draft_pick']),
            axis=1
        )
        print("-Added pick value to file")
    # Export csv
    adp_full.to_csv(f"adp_full_gsispos_{year}.csv")
    print(f"Finished for year {year}.")


Players on csv from 2018: 487

Players on csv from 2019: 1034

Players on csv from 2020: 391
-Added rookie season to file
-Added snap share to file
Downcasting floats.
-Added fantasy variance to file
-Added cap percentage to file

Players on csv from 2021: 499
-Added rookie season to file
-Added snap share to file
Downcasting floats.
-Added fantasy variance to file
-Added cap percentage to file

Players on csv from 2022: 294
-Added rookie season to file
-Added snap share to file
Downcasting floats.
-Added fantasy variance to file
-Added cap percentage to file
