In [2]:
import nfl_data_py as nfl
import pandas as pd
import numpy as np
years = [2013,2014,2015,2016,2017,2018,2019,2020,2021,2022,2023]

In [3]:
def add_games_played_column(weekly_data):
    # Filter the data to include only regular season games
    weekly_data = weekly_data[weekly_data['season_type'] == 'REG']
    
    # Calculate games played per player per season
    weekly_data['GP'] = weekly_data.groupby(['player_display_name', 'season'])['week'].transform('count')
    
    return weekly_data

def add_epa_averages(weekly_data):
    # Calculate the averages of rushing and receiving EPA per player per season
    epa_averages = weekly_data.groupby(['player_display_name', 'season']).agg({
        'rushing_epa': 'mean',
        'receiving_epa': 'mean'
    }).reset_index()
    
    # Rename the columns to indicate they are averages
    epa_averages.rename(columns={
        'rushing_epa': 'avg_rushing_epa',
        'receiving_epa': 'avg_receiving_epa'
    }, inplace=True)
    
    # Merge the averages back into the original weekly_data DataFrame
    weekly_data = pd.merge(weekly_data, epa_averages, on=['player_display_name', 'season'], how='left')
    
    return weekly_data


data = nfl.import_weekly_data(years)
weekly_data = data.sort_values(by=['player_display_name', 'week'], ascending=[True, True])
weekly_data = add_games_played_column(weekly_data)
weekly_data = add_epa_averages(weekly_data)

# Display the columns of the 2018 weekly data to verify the addition of the "GP" column



Downcasting floats.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  weekly_data['GP'] = weekly_data.groupby(['player_display_name', 'season'])['week'].transform('count')


In [4]:
import pandas as pd

def merge_recent_team(seasonal_data, weekly_data):
    # Group weekly data to get the most recent team and other first-occurring attributes
    grouped_weekly = weekly_data.groupby(['player_id', 'season']).agg({
        'player_display_name': 'first',  # Assuming display name doesn't change
        'recent_team': 'last',  # Get the most recent team per season
        'GP': 'first',  # Summing up games played if weekly data has entries per game
        'position': 'first'  # Assuming position doesn't change
    }).reset_index()
    
    # Merge grouped weekly data with seasonal data
    merged_data = pd.merge(grouped_weekly, seasonal_data, on=['player_id', 'season'], how='left')
    return merged_data

season_data = nfl.import_seasonal_data(years)

# Merge and calculate additional columns
season_data = merge_recent_team(season_data, weekly_data)

season_rosters = nfl.import_seasonal_rosters(years)
season_rosters = season_rosters[['season','team','position','player_name','player_id','age','status']]
season_data = pd.merge(season_data, season_rosters[['season','player_id','age','status']], on=['season','player_id'], how='left')

season_data


Unnamed: 0,player_id,season,player_display_name,recent_team,GP,position,season_type,completions,attempts,passing_yards,...,ry_sh,rtd_sh,rfd_sh,rtdfd_sh,dom,w8dom,yptmpa,ppr_sh,age,status
0,00-0006101,2013,Tony Gonzalez,ATL,16,TE,REG,0,0,0.0,...,0.188833,0.307692,0.246696,0.252964,0.248263,0.212605,1.305471,0.147550,37.0,ACT
1,00-0007091,2013,Matt Hasselbeck,IND,3,QB,REG,7,12,130.0,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.009948,37.0,ACT
2,00-0007091,2014,Matt Hasselbeck,IND,4,QB,REG,30,44,301.0,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.037559,38.0,ACT
3,00-0007091,2015,Matt Hasselbeck,IND,8,QB,REG,156,256,1690.0,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.159657,39.0,ACT
4,00-0010346,2013,Peyton Manning,DEN,16,QB,REG,450,659,5477.0,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.199953,37.0,ACT
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6744,00-0039150,2023,Bryce Young,CAR,16,QB,REG,315,527,2877.0,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.154486,22.0,ACT
6745,00-0039152,2023,Will Levis,TEN,9,QB,REG,149,255,1808.0,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.159821,24.0,INA
6746,00-0039163,2023,C.J. Stroud,HOU,15,QB,REG,319,499,4108.0,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.206779,21.0,ACT
6747,00-0039164,2023,Anthony Richardson,IND,4,QB,REG,50,84,577.0,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.213689,21.0,RES


In [5]:
#QB_NGS
passing_ngs = nfl.import_ngs_data('passing', years)
passing_ngs = passing_ngs.drop(['player_gsis_id','player_first_name','player_last_name','player_jersey_number','player_short_name','week'], axis=1)
passing_ngs = passing_ngs[passing_ngs['season_type'] == 'REG']
passing_ngs = passing_ngs.groupby(['season', 'season_type','player_display_name', 'player_position','team_abbr']).mean()
passing_ngs = passing_ngs.reset_index()
keep_columns = ['season', 'season_type', 'player_display_name', 'player_position', 'team_abbr']
new_columns = {col: col + " / game" if col not in keep_columns else col for col in passing_ngs.columns}
passing_ngs.rename(columns=new_columns, inplace=True)
qb_data = season_data[season_data['position'] == "QB"]
# qb_data = pd.merge(passing_ngs, qb_data, how='right', on=['player_display_name', 'season', 'season_type'])

qbr_data = nfl.import_qbr(years)
qbr_data = qbr_data[['season','season_type','name_display','qbr_total']]
qbr_data.rename(columns={'name_display': 'player_display_name'}, inplace=True)
qbr_data['season_type'] = qbr_data['season_type'].replace('Regular', 'REG')
# qb_data = pd.merge(qbr_data, qb_data, how='right', on=['player_display_name', 'season', 'season_type'])
qb_data['comp %'] = qb_data['completions'] / qb_data['attempts']
qb_data['td:int'] = qb_data['passing_tds'] / qb_data['interceptions']
qb_data['yards/attempts'] = qb_data['passing_yards'] / qb_data['attempts']
qb_data['yards/comp'] = qb_data['passing_yards'] / qb_data['completions']
qb_data['yards/carry'] = qb_data['rushing_yards'] / qb_data['carries']
qb_data["passer rating"] = ((((qb_data['comp %'] - 0.3) * 5) + (((qb_data['passing_yards']/qb_data['attempts']) - 3) * 0.25) + ((qb_data['passing_tds'] / qb_data['attempts']) * 20) + 2.375 - ((qb_data['interceptions'] / qb_data['attempts']) * 25)) / 6) * 100
# qb_data = qb_data.drop()
qb_data.to_csv("qb_data.csv")


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  qb_data['comp %'] = qb_data['completions'] / qb_data['attempts']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  qb_data['td:int'] = qb_data['passing_tds'] / qb_data['interceptions']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  qb_data['yards/attempts'] = qb_data['passing_yards'] / qb_data['attemp

In [6]:
#FLEX_NGS
rushing_ngs = nfl.import_ngs_data('rushing', years)
rushing_ngs = rushing_ngs.drop(['player_gsis_id','player_first_name','player_last_name','player_jersey_number','player_short_name','week'], axis=1)
rushing_ngs = rushing_ngs[rushing_ngs['season_type'] == 'REG']
rushing_ngs = rushing_ngs.groupby(['season', 'season_type','player_display_name', 'player_position','team_abbr']).mean()
rushing_ngs = rushing_ngs.reset_index()
keep_columns = ['season', 'season_type', 'player_display_name', 'player_position', 'team_abbr']
new_columns = {col: col + " / game" if col not in keep_columns else col for col in rushing_ngs.columns}
rushing_ngs.rename(columns=new_columns, inplace=True)

rb_data = season_data[season_data['position'].isin(['RB','HB','FB'])]
rb_data['position'] = rb_data['position'].replace({'HB': 'RB', 'FB': 'RB'})
# rb_data = pd.merge(rushing_ngs, rb_data, how='right', on=['player_display_name', 'season', 'season_type'])
rb_data['y/c'] = rb_data['rushing_yards'] / rb_data['carries']
rb_data['y/g'] = rb_data['rushing_yards'] / rb_data['GP']
rb_data['c/g'] = rb_data['carries'] / rb_data['GP']
rb_data['y/rec'] = rb_data['receiving_yards'] / rb_data['receptions']
rb_data['rec/g'] = rb_data['receptions'] / rb_data['GP']
rb_data['y/tgt'] = rb_data['receiving_yards'] / rb_data['targets']
rb_data['catch %'] = 100 * (rb_data['receptions'] / rb_data['targets'])
rb_data['touches'] = rb_data['carries'] + rb_data['receptions']
rb_data['y/touch'] = (rb_data['rushing_yards'] + rb_data['receiving_yards']) / rb_data['touches']
rb_data['rrtd'] = rb_data['rushing_tds'] + rb_data['receiving_tds']
rb_data.to_csv("rb_data.csv")
rb_data

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  rb_data['position'] = rb_data['position'].replace({'HB': 'RB', 'FB': 'RB'})
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  rb_data['y/c'] = rb_data['rushing_yards'] / rb_data['carries']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  rb_data['y/g'] = rb_data['rushing_yards'] / rb_data['GP']
A value 

Unnamed: 0,player_id,season,player_display_name,recent_team,GP,position,season_type,completions,attempts,passing_yards,...,y/c,y/g,c/g,y/rec,rec/g,y/tgt,catch %,touches,y/touch,rrtd
90,00-0022178,2013,Willis McGahee,CLE,11,RB,REG,0,0,0.0,...,2.731884,34.272727,12.545455,2.500000,0.727273,1.818182,72.727273,146,2.719178,2
91,00-0022397,2013,Vonta Leach,BAL,13,RB,REG,0,0,0.0,...,2.090909,1.769231,0.846154,4.272727,0.846154,2.473684,57.894737,22,3.181818,1
99,00-0022736,2013,Steven Jackson,ATL,12,RB,REG,0,0,0.0,...,3.458599,45.250000,13.083333,5.787879,2.750000,3.897959,67.346939,190,3.863158,7
100,00-0022736,2014,Steven Jackson,ATL,15,RB,REG,0,0,0.0,...,3.721053,47.133333,12.666667,7.400000,1.333333,5.481481,74.074074,210,4.071429,6
101,00-0022736,2015,Steven Jackson,NE,2,RB,REG,0,0,0.0,...,2.380952,25.000000,10.500000,20.000000,0.500000,20.000000,100.000000,22,3.181818,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6728,00-0039032,2023,Tyjae Spears,TEN,17,RB,REG,0,0,0.0,...,4.530000,26.647059,5.882353,7.403846,3.058824,5.500000,74.285714,152,5.513158,3
6729,00-0039040,2023,Devon Achane,MIA,11,RB,REG,0,0,0.0,...,7.766990,72.727273,9.363636,7.296296,2.454545,5.324324,72.972973,130,7.669231,11
6740,00-0039136,2023,Zach Evans,LA,4,RB,REG,0,0,0.0,...,2.111111,4.750000,2.250000,,0.000000,,,9,2.111111,0
6741,00-0039139,2023,Jahmyr Gibbs,DET,15,RB,REG,0,0,0.0,...,5.192308,63.000000,12.133333,6.076923,3.466667,4.450704,73.239437,234,5.388889,11


In [7]:
receiving_ngs = nfl.import_ngs_data('receiving', years)
receiving_ngs = receiving_ngs.drop(['player_gsis_id','player_first_name','player_last_name','player_jersey_number','player_short_name','week'], axis=1)
receiving_ngs = receiving_ngs[receiving_ngs['season_type'] == 'REG']
receiving_ngs = receiving_ngs.groupby(['season', 'season_type','player_display_name', 'player_position','team_abbr']).mean()
receiving_ngs = receiving_ngs.reset_index()
keep_columns = ['season', 'season_type', 'player_display_name', 'player_position', 'team_abbr']
new_columns = {col: col + " / game" if col not in keep_columns else col for col in receiving_ngs.columns}
receiving_ngs.rename(columns=new_columns, inplace=True)

wrte_data = season_data[season_data['position'].isin(['WR','TE'])]
# wrte_data = pd.merge(receiving_ngs, wrte_data, how='right', on=['player_display_name', 'season', 'season_type'])
wrte_data['y/c'] = wrte_data['rushing_yards'] / wrte_data['carries']
wrte_data['y/g'] = wrte_data['rushing_yards'] / wrte_data['GP']
wrte_data['c/g'] = wrte_data['carries'] / wrte_data['GP']
wrte_data['y/rec'] = wrte_data['receiving_yards'] / wrte_data['receptions']
wrte_data['rec/g'] = wrte_data['receptions'] / wrte_data['GP']
wrte_data['y/tgt'] = wrte_data['receiving_yards'] / wrte_data['targets']
wrte_data['catch %'] = 100 * (wrte_data['receptions'] / wrte_data['targets'])
wrte_data['touches'] = wrte_data['carries'] + wrte_data['receptions']
wrte_data['y/touch'] = (wrte_data['rushing_yards'] + wrte_data['receiving_yards']) / wrte_data['touches']
wrte_data['rrtd'] = wrte_data['rushing_tds'] + wrte_data['receiving_tds']
wrte_data.to_csv("wrte_data.csv")
wrte_data


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  wrte_data['y/c'] = wrte_data['rushing_yards'] / wrte_data['carries']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  wrte_data['y/g'] = wrte_data['rushing_yards'] / wrte_data['GP']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  wrte_data['c/g'] = wrte_data['carries'] / wrte_data['GP']
A value is try

Unnamed: 0,player_id,season,player_display_name,recent_team,GP,position,season_type,completions,attempts,passing_yards,...,y/c,y/g,c/g,y/rec,rec/g,y/tgt,catch %,touches,y/touch,rrtd
0,00-0006101,2013,Tony Gonzalez,ATL,16,TE,REG,0,0,0.0,...,,0.000000,0.000000,10.349398,5.187500,7.099174,68.595041,83,10.349398,8
7,00-0015754,2013,Brandon Stokley,BAL,6,WR,REG,0,0,0.0,...,,0.000000,0.000000,8.846154,2.166667,5.476190,61.904762,13,8.846154,0
24,00-0020337,2013,Steve Smith,CAR,15,WR,REG,0,0,0.0,...,,0.000000,0.000000,11.640625,4.266667,6.772727,58.181818,64,11.640625,4
25,00-0020337,2014,Steve Smith,BAL,16,WR,REG,0,0,0.0,...,,0.000000,0.000000,13.481013,4.937500,7.947761,58.955224,79,13.481013,6
26,00-0020337,2015,Steve Smith,BAL,7,WR,REG,0,0,0.0,...,,0.000000,0.000000,14.565217,6.571429,9.178082,63.013699,46,14.565217,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6737,00-0039067,2023,Rashee Rice,KC,16,WR,REG,0,0,0.0,...,-3.000000,-0.187500,0.062500,11.873418,4.937500,9.196078,77.450980,80,11.687500,7
6738,00-0039074,2023,Davis Allen,LA,4,TE,REG,0,0,0.0,...,,0.000000,0.000000,9.500000,2.500000,8.636364,90.909091,10,9.500000,1
6739,00-0039075,2023,Puka Nacua,LA,17,WR,REG,0,0,0.0,...,7.416667,5.235294,0.705882,14.152381,6.176471,9.287500,65.625000,117,13.461538,6
6742,00-0039144,2023,Luke Musgrave,GB,11,TE,REG,0,0,0.0,...,,0.000000,0.000000,10.352941,3.090909,7.652174,73.913043,34,10.352941,1
