# Obtaining general stats

### Libraries

In [1]:
import nfl_data_py as nfl
import pandas as pd
from datetime import datetime
from dateutil.relativedelta import relativedelta
py_players = nfl.import_players()

### Preparing data

Defining parameters for functions

In [2]:
year = 2019
filename = f"fp_original_files/FantasyPros_{year}_Overall_ADP_Rankings.csv"
separator = ","

Reading csv with Fantasy Pros' ADP

In [4]:
adp = pd.read_csv(filename, sep=separator)
print(f"Total players on csv file: {len(adp)}.")
print(adp.head())

Total players on csv file: 1046.
   Rank               Player Team  Bye  POS  ESPN  Sleeper  NFL  RTSports  \
0     1       Saquon Barkley  PHI   10  RB1   1.0      NaN  1.0       1.0   
1     2  Christian McCaffrey   SF    4  RB2   2.0      NaN  3.0       2.0   
2     3         Alvin Kamara   NO    9  RB3   3.0      NaN  2.0       3.0   
3     4      Ezekiel Elliott  NaN  NaN  RB4   4.0      NaN  4.0       4.0   
4     5      DeAndre Hopkins  BAL    8  WR1   5.0      NaN  5.0       6.0   

   FFC  Fantrax  AVG  
0  NaN      1.0    1  
1  NaN      2.0  2.5  
2  NaN      3.0  2.5  
3  NaN      4.0    4  
4  NaN      6.0  5.3  


Adding gsis_id to players without a problem

In [5]:
def get_gsis_id(py_players, player_name):
    player = py_players[py_players['display_name']==player_name]
    if player.empty:
        return "--"
    elif len(player)==1:
        return player['gsis_id'].values[0]
    else:
        return "--"
    
adp['gsis_id'] = adp.apply(
    lambda row: get_gsis_id(py_players, row['Player']),
    axis=1
)
adp.to_csv(f"adp_{year}_pre_manual.csv")
empties = adp[adp['gsis_id']=='--']
print(f"Players with no gsis_id found:{len(empties)}. First ten players: ")
print(empties.head(10))

Players with no gsis_id found:187. First ten players: 
    Rank              Player Team  Bye   POS  ESPN  Sleeper   NFL  RTSports  \
5      6       David Johnson  NaN  NaN   RB5   6.0      NaN   8.0       7.0   
10    11      Michael Thomas  NaN  NaN   WR4   8.0      NaN  11.0      14.0   
19    20  Patrick Mahomes II   KC   12   QB1  18.0      NaN  14.0      22.0   
20    21       Antonio Brown  NaN  NaN   WR8  21.0      NaN  19.0      21.0   
21    22          Mike Evans   TB    7   WR9  22.0      NaN  21.0      20.0   
31    32     Aaron Jones Sr.  MIN   12  RB15  36.0      NaN  36.0      31.0   
35    36       Chris Carson   NaN  NaN  RB18  34.0      NaN  47.0      32.0   
39    40        Robert Woods  PIT    7  WR17  39.0      NaN  37.0      50.0   
44    45         Mark Ingram   II  NaN  RB22  42.0      NaN  50.0      43.0   
51    52         James White  NaN  NaN  RB25  54.0      NaN  63.0      49.0   

    FFC  Fantrax   AVG gsis_id  
5   NaN      5.0   6.7      --  
10  NaN  

#### Start filling missing players manually

Printing players with the same name to pick which gsis_id to use. ¡¡¡ Edit the file named adp_{year}_pre_manuel.csv !!!!

In [6]:
empties_exp = empties[['Player', 'gsis_id']]
for pla in list(empties_exp['Player']):
    jug = py_players[py_players['display_name']==pla]
    if jug.empty:
        pass
    else:
        for i in range(len(jug)):
            nombre = jug['display_name'].values[i]
            pos =jug['position'].values[i]
            rs = jug['rookie_season'].values[i]
            dp = jug['draft_pick'].values[i]
            gsis = jug['gsis_id'].values[i]
            print(f"{nombre}: position ({pos}), rookie season ({rs}) and pick ({dp}). gsis_id: {gsis}")

David Johnson: position (TE), rookie season (2009) and pick (241.0). gsis_id: 00-0026957
David Johnson: position (RB), rookie season (2015) and pick (86.0). gsis_id: 00-0032187
Michael Thomas: position (DB), rookie season (2012) and pick (nan). gsis_id: 00-0028908
Michael Thomas: position (WR), rookie season (2016) and pick (47.0). gsis_id: 00-0032765
Antonio Brown: position (WR), rookie season (2003) and pick (nan). gsis_id: 00-0021425
Antonio Brown: position (WR), rookie season (2010) and pick (195.0). gsis_id: 00-0027793
Mike Evans: position (DT), rookie season (1992) and pick (101.0). gsis_id: 00-0004999
Mike Evans: position (WR), rookie season (2014) and pick (7.0). gsis_id: 00-0031408
Robert Woods: position (WR), rookie season (2013) and pick (41.0). gsis_id: 00-0030431
Robert Woods: position (WR), rookie season (1978) and pick (134.0). gsis_id: WOO568269
Robert Woods: position (OT), rookie season (1974) and pick (nan). gsis_id: WOO582840
Mark Ingram: position (WR), rookie season

Looking for players one by one

In [131]:
specific_player = py_players[ py_players['display_name'].str.lower().str.contains('.*gary.*jennings.*')]
print(specific_player[['display_name', 'gsis_id','position','rookie_season','birth_date']])


        display_name     gsis_id position  rookie_season  birth_date
11026  Gary Jennings  00-0035589       WR           2019  1997-03-07


### Adding stats to the filled file

Position and season

In [3]:
adp_full = pd.read_csv(f"adp_{year}_pre_manual.csv")
def get_pos(py_players, player_id):
    player = py_players[py_players['gsis_id']==player_id]
    if player.empty:
        return "D/ST"
    elif len(player)==1:
        return player['position'].values[0]
    else:
        return "--"
    
adp_full['position'] = adp_full.apply(
    lambda row: get_pos(py_players, row['gsis_id']),
    axis=1
)

adp_full['season'] = year-1

Availability

In [4]:
def get_availability(rost, injs, players, player_id, max_year):
    if max_year >= datetime.now().year or max_year < 2009:
        print(f"ERROR: max_year({max_year}) has to be the last completed season or higher than 2009.")
        return "--"
    # Get years played
    player_info = players[players['gsis_id'] == player_id]
    if player_info.empty:
        print(f"ERROR: Player {player_id} not found.")
        return "--"
    player_name = player_info['display_name'].values[0]
    first_year = player_info['rookie_season'].values[0]
    first_year = max(first_year, 2009)
    if first_year > max_year: 
        print(f"ERROR: Player {player_id} first year ({first_year}) is before max_year ({max_year}).")
        return "--"
    #print("\tPlayer last season: ", player_info['last_season'].values[0])
    last_year = player_info['last_season'].values[0]
    if last_year < 2009:
        print(f"ERROR: Player {player_id} last year ({first_year}) is before 2009 so there is no data available.")
        return "--"
    last_year = min(last_year, max_year)
    years = list(range(first_year, last_year + 1))

    # Get games rostered and games active
    weekly_rosters = rost[rost['season'].isin(years)]
    player_weekly = weekly_rosters[weekly_rosters['player_id'] == player_id]
    player_weekly = player_weekly[player_weekly['week'] <= 18]
    player_weekly = player_weekly[((player_weekly['season'] <= 2020) & (player_weekly['week'] <= 17)) | (player_weekly['season'] > 2020)]
    status_inactive = ['EXE', 'INA', 'PUP', 'RES', 'RSN', 'RSR', 'SUS']
    not_on_team = ['DEV', 'CUT', 'NWT', 'RET', 'RFA', 'TRC', 'TRD', 'TRL', 'TRT', 'UFA', 'E14']
    status_active = ['ACT']
    games_missed = player_weekly[player_weekly['status'].isin(status_inactive)]
    games_available = player_weekly[player_weekly['status'].isin(status_inactive + status_active)]
    n_games_available = len(games_available)
    games_played = player_weekly[player_weekly['status'].isin(status_active)]
    n_games_played = len(games_played)

    # Missed due to coach decision
    injuries = injs[injs['season'].isin(years)]
    not_injured = ['Coaching', "Coach's decision", 'Coaching Decision ', "Coach's Decision", "Not injury related - coach's decision",
                   'Not injury related - personal matter', 'personal matter', 'Ankle [Not Injury Related - Personal, Thursday Only]',
                   'Not injury related - resting player']
    player_injuries = injuries[injuries['gsis_id'] == player_id]
    player_rest = player_injuries[
        (player_injuries['report_status'].isin(['Out','Doubtful'])) &
        (player_injuries['report_primary_injury'].isin(not_injured))
    ]
    n_rest_games = len(player_rest)
    #print("\tPlayer missed game due to coach decision: ", n_rest_games)

    if n_games_available == 0:
        print(f"ERROR: Player {player_id} {player_name} hasn't played a game.")
        return "--"
    return (n_games_played + n_rest_games) / n_games_available

yrs = list(range(2009, year))
rostrs = nfl.import_weekly_rosters(yrs)
injrs = nfl.import_injuries(yrs)
print(rostrs['season'].max())
print(injrs['season'].max())
adp_full['availability'] = adp_full.apply(
    lambda row: get_availability(rostrs, injrs, py_players, row['gsis_id'], year-1),
    axis=1
)

2016
2016.0
ERROR: Player 00-0033856 first year (2017) is before max_year (2016).
ERROR: Player 00-0033280 first year (2017) is before max_year (2016).
ERROR: Player 00-0033893 first year (2017) is before max_year (2016).
ERROR: Player 00-0033923 first year (2017) is before max_year (2016).
ERROR: Player 00-0033897 first year (2017) is before max_year (2016).
ERROR: Player 00-0033908 first year (2017) is before max_year (2016).
ERROR: Player -- not found.
ERROR: Player 00-0033871 first year (2017) is before max_year (2016).
ERROR: Player 00-0033891 first year (2017) is before max_year (2016).
ERROR: Player 00-0033526 first year (2017) is before max_year (2016).
ERROR: Player 00-0033948 first year (2017) is before max_year (2016).
ERROR: Player 00-0033906 first year (2017) is before max_year (2016).
ERROR: Player 00-0033879 first year (2017) is before max_year (2016).
ERROR: Player -- not found.
ERROR: Player -- not found.
ERROR: Player 00-0033881 first year (2017) is before max_year (2

Age and rookie year

In [5]:
def get_age(py_players, player_id):
    player = py_players[py_players['gsis_id']==player_id]
    if player.empty:
        return "D/ST"
    elif len(player)==1:
        date1 = datetime.strptime(f"{year-1}-09-01", "%Y-%m-%d")
        date2 = datetime.strptime(player['birth_date'].values[0], "%Y-%m-%d")
        delta = relativedelta(date2, date1)
        year_difference = abs(delta.years)
        return year_difference
    else:
        return "--"
    
adp_full['age'] = adp_full.apply(
    lambda row: get_age(py_players, row['gsis_id']),
    axis=1
)

def get_rookie_season(py_players, player_id):
    player = py_players[py_players['gsis_id']==player_id]
    if player.empty:
        return "D/ST"
    elif len(player)==1:
        return player['rookie_season'].values[0]
    else: 
        return "--"

adp_full['rookie_season'] = adp_full.apply(
    lambda row: get_rookie_season(py_players, row['gsis_id']),
    axis=1
)


Previous year snap share

In [6]:
def snap_share(players, ids, snap_counts, player_id, year):
    if year < 2012: 
        print(f"ERROR: year ({year}) is before the first year of snap count available (2009).")
        return -1
    player = players[players['gsis_id']==player_id]
    if player.empty:
        print(f"ERROR: Player {player_id} not found.")
        return -1
    name = player['display_name'].values[0]
    player_ids = ids[ids['gsis_id']==player_id]
    if player_ids.empty:
        print(f"ERROR: Player {player_id} ({name}) not found.")
        return -1
    pfr_id = player_ids['pfr_id'].values[0]
    snap_counts = snap_counts[snap_counts['season']==year]
    player_snaps = snap_counts[snap_counts['pfr_player_id']==pfr_id]
    if player_snaps.empty:
        print(f"ERROR: There was no data for player {player_id} ({name}) on that year ({year}).")
        return -1
    player_snaps = player_snaps[player_snaps['week'] <= 18]
    player_snaps = player_snaps[((player_snaps['season'] <= 2020) & (player_snaps['week'] <= 17)) | (player_snaps['season'] > 2020)]
    if player_snaps['position'].values[0]=='K':
        return player_snaps['st_pct'].mean()
    return player_snaps['offense_pct'].mean()

ids_players = nfl.import_ids()
snaps = nfl.import_snap_counts([year-1])
adp_full['snap_share'] = adp_full.apply(
    lambda row: snap_share(py_players, ids_players, snaps, row['gsis_id'], year-1),
    axis=1
)


ERROR: There was no data for player 00-0028434 (Doug Baldwin) on that year (2016).
ERROR: There was no data for player 00-0033856 (Leonard Fournette) on that year (2016).
ERROR: There was no data for player 00-0033280 (Christian McCaffrey) on that year (2016).
ERROR: There was no data for player 00-0028825 (Terrelle Pryor) on that year (2016).
ERROR: There was no data for player 00-0030656 (Isaiah Crowell) on that year (2016).
ERROR: There was no data for player 00-0033893 (Dalvin Cook) on that year (2016).
ERROR: There was no data for player 00-0025399 (Marshawn Lynch) on that year (2016).
ERROR: There was no data for player 00-0033923 (Kareem Hunt) on that year (2016).
ERROR: There was no data for player 00-0033897 (Joe Mixon) on that year (2016).
ERROR: There was no data for player 00-0031373 (Martavis Bryant) on that year (2016).
ERROR: There was no data for player 00-0029854 (C.J. Anderson) on that year (2016).
ERROR: There was no data for player 00-0026019 (Danny Woodhead) on tha

Fantasy points variance

In [7]:
def fan_pts_var(weekly, id, year, ppr=True):
    if year < 1999:
        print(f"ERROR: Year ({year}) must be 1999 or later.")
        return "--"
    weekly = weekly[weekly['season']== year]
    weekly = weekly[weekly['week'] >= 1]
    if year <= 2020:
        weekly = weekly[weekly['week'] <= 17]
    else:
        weekly = weekly[weekly['week'] <= 18]
    player = weekly[weekly['player_id'] == id]
    if player.empty:
        print(f"ERROR: Player with ID {id} not found in year {year}.")
        return "--"
    ppr = player['fantasy_points_ppr'].var()
    standard = player['fantasy_points'].var()
    if ppr:
        return ppr
    else:
        return standard

wd = nfl.import_weekly_data([year-1])#, downcast=True)
adp_full['fan_pts_var'] = adp_full.apply(
    lambda row: fan_pts_var(wd, row['gsis_id'], year-1),
    axis=1
)

Downcasting floats.
ERROR: Player with ID 00-0033856 not found in year 2016.
ERROR: Player with ID 00-0033280 not found in year 2016.
ERROR: Player with ID 00-0033893 not found in year 2016.
ERROR: Player with ID 00-0025399 not found in year 2016.
ERROR: Player with ID 00-0033923 not found in year 2016.
ERROR: Player with ID 00-0033897 not found in year 2016.
ERROR: Player with ID 00-0031373 not found in year 2016.
ERROR: Player with ID 00-0024333 not found in year 2016.
ERROR: Player with ID 00-0033908 not found in year 2016.
ERROR: Player with ID -- not found in year 2016.
ERROR: Player with ID 00-0033871 not found in year 2016.
ERROR: Player with ID 00-0033891 not found in year 2016.
ERROR: Player with ID 00-0033526 not found in year 2016.
ERROR: Player with ID 00-0033948 not found in year 2016.
ERROR: Player with ID 00-0033906 not found in year 2016.
ERROR: Player with ID 00-0020578 not found in year 2016.
ERROR: Player with ID 00-0033879 not found in year 2016.
ERROR: Player with 

Cap percentage

In [8]:
def get_player_apy_cap_pct(name, contracts, player_id, year):
    player_cont = contracts[contracts['gsis_id']==player_id]
    if player_cont.empty:
        print(f"ERROR: did not find contracts for player {player_id} ({name})")
        return "--"
    min_year = player_cont['year_signed'].min()
    if year < min_year:
        print(f"ERROR Player ({player_id}) first contract was on {min_year}, you tried {year}.")
        return "--"
    last_contract = player_cont[player_cont['year_signed'] <= year].sort_values(by='year_signed', ascending=False).iloc[0]
    #print(last_contract[['team', 'year_signed', 'years', 'apy_cap_pct']])
    # Check if it is the active contract
    if last_contract['year_signed'] + last_contract['years'] < year:
        print(f"ERROR: Player ({player_id}) last contract ended before {year}.")
        return "--"
    return last_contract['apy_cap_pct']

conts = nfl.import_contracts()
adp_full['apt_cap_pct'] = adp_full.apply(
    lambda row: get_player_apy_cap_pct(row['Player'], conts, row['gsis_id'], year),
    axis=1
)

ERROR: did not find contracts for player -- (Denver Broncos)
ERROR: did not find contracts for player -- (Seattle Seahawks)
ERROR: did not find contracts for player -- (New York Giants)
ERROR: did not find contracts for player -- (Kansas City Chiefs)
ERROR: did not find contracts for player -- (Houston Texans)
ERROR: did not find contracts for player -- (Minnesota Vikings)
ERROR: did not find contracts for player -- (Arizona Cardinals)
ERROR: did not find contracts for player -- (New England Patriots)
ERROR: did not find contracts for player -- (Carolina Panthers)
ERROR: did not find contracts for player -- (Pittsburgh Steelers)
ERROR: did not find contracts for player -- (Jacksonville Jaguars)
ERROR: did not find contracts for player -- (Los Angeles Rams)
ERROR: did not find contracts for player -- (Baltimore Ravens)
ERROR: did not find contracts for player -- (Philadelphia Eagles)
ERROR: did not find contracts for player -- (Atlanta Falcons)
ERROR: did not find contracts for player -

Draft pick and Draft Pick value (Stuart)

In [9]:
def player_pick(players, player_id):
    player = players[players['gsis_id']==player_id]
    if player.empty:
        print(f"ERROR: player {player_id} not found.")
        return 
    pick = player['draft_pick'].values[0]
    return pick

def pick_value(picks, pick):
    if pd.isna(pick) or pick < 1 or pick > 262:
        return 0
    picks = nfl.import_draft_values()
    return picks[picks['pick'] == pick]['stuart'].values[0]

values = nfl.import_draft_values()
adp_full['draft_pick'] = adp_full.apply(
    lambda row: player_pick(py_players, row['gsis_id']),
    axis=1
)
adp_full['pick_value'] = adp_full.apply(
    lambda row: pick_value(values, row['draft_pick']),
    axis=1
)

ERROR: player -- not found.
ERROR: player -- not found.
ERROR: player -- not found.
ERROR: player -- not found.
ERROR: player -- not found.
ERROR: player -- not found.
ERROR: player -- not found.
ERROR: player -- not found.
ERROR: player -- not found.
ERROR: player -- not found.
ERROR: player -- not found.
ERROR: player -- not found.
ERROR: player -- not found.
ERROR: player -- not found.
ERROR: player -- not found.
ERROR: player -- not found.
ERROR: player -- not found.
ERROR: player -- not found.
ERROR: player -- not found.
ERROR: player -- not found.
ERROR: player -- not found.
ERROR: player -- not found.
ERROR: player -- not found.
ERROR: player -- not found.
ERROR: player -- not found.
ERROR: player -- not found.
ERROR: player -- not found.
ERROR: player -- not found.
ERROR: player -- not found.
ERROR: player -- not found.
ERROR: player -- not found.
ERROR: player -- not found.


In [10]:
adp_full.to_csv(f"adp_full_gsispos_{year}.csv")