# Imports

In [1]:
import nfl_data_py as nfl
import pandas as pd
import numpy as np

pd.set_option('display.max_columns', None)

In [2]:
# Import weekly injuries

injuries = nfl.import_injuries(range(2009,2025))

In [3]:
# Import seasonal rosters

rosters_seas = nfl.import_seasonal_rosters(range(2009, 2025))

In [4]:
# Import weekly rosters

rosters_weekly = nfl.import_weekly_rosters(range(2009,2025))

In [5]:
# Import seasonal stats

stats_seas = nfl.import_seasonal_data(range(2009,2025), 'ALL')

**Datasets contain information for player positions that are not fantasy football positions. Fantasy positions are QB, RB, WR, TE and K. Need to condense down to relevant positions**

In [6]:
rosters_weekly_ff = rosters_weekly[rosters_weekly['position'].isin(['RB', 'QB', 'TE', 'WR', 'K'])]
rosters_seas_ff = rosters_seas[rosters_seas['position'].isin(['RB', 'QB', 'TE', 'WR', 'K'])]
injuries_ff = injuries[injuries['position'].isin(['RB', 'QB', 'TE', 'WR', 'K'])]


# Info checks

Looking to explore what unique IDs are available for eventual merge of datasets

## Injuries

Available unique ID - Number of nulls:
1. gsis_id - 0

In [7]:
# Injuries

injuries_ff.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 27052 entries, 0 to 6213
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype         
---  ------                     --------------  -----         
 0   season                     27052 non-null  float64       
 1   game_type                  27052 non-null  object        
 2   team                       27052 non-null  object        
 3   week                       27052 non-null  float64       
 4   gsis_id                    27052 non-null  object        
 5   position                   27052 non-null  object        
 6   full_name                  27052 non-null  object        
 7   first_name                 27052 non-null  object        
 8   last_name                  27052 non-null  object        
 9   report_primary_injury      18597 non-null  object        
 10  report_secondary_injury    1096 non-null   object        
 11  report_status              18600 non-null  object        
 12  pract

## Seasonal Roster

Available unique ID - number of nulls:
1. player_id - 0
2. espn_id - 3358
3. sportradar_id - 2999
4. yahoo_id - 3976
5. rotowire_id - 2961
6. pff_id - 4096
7. pfr_id - 5830
8. fantasy_data_id - 4751
9. sleeper_id - 3483
10. esb_id - 14
11. gsis_it_id - 2272
12. smart_id - 44

In [8]:
# Seasonal roster

rosters_seas_ff.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 13934 entries, 1 to 42818
Data columns (total 37 columns):
 #   Column                   Non-Null Count  Dtype         
---  ------                   --------------  -----         
 0   season                   13934 non-null  int32         
 1   team                     13934 non-null  object        
 2   position                 13934 non-null  object        
 3   depth_chart_position     9315 non-null   object        
 4   jersey_number            13852 non-null  object        
 5   status                   13931 non-null  object        
 6   player_name              13934 non-null  object        
 7   first_name               13934 non-null  object        
 8   last_name                13934 non-null  object        
 9   birth_date               13042 non-null  datetime64[ns]
 10  height                   13932 non-null  float64       
 11  weight                   13932 non-null  float64       
 12  college                  11573 n

In [9]:
# See how many each ID type is missing for reference

ids = rosters_seas_ff.columns[rosters_seas_ff.columns.str.contains('id', case=False)].tolist()

for i, id in enumerate(ids, 1):
  print(f'{i}. {id} - {rosters_seas_ff[id].isna().sum()}')

1. player_id - 0
2. espn_id - 3358
3. sportradar_id - 2999
4. yahoo_id - 3976
5. rotowire_id - 2961
6. pff_id - 4096
7. pfr_id - 5830
8. fantasy_data_id - 4751
9. sleeper_id - 3483
10. esb_id - 14
11. gsis_it_id - 2272
12. smart_id - 44


## Weekly Roster

Available unique ID - number of nulls:
1. player_id - 86
2. espn_id - 40216
3. sportradar_id - 34927
4. yahoo_id - 49752
5. rotowire_id - 34358
6. pff_id - 51743
7. pfr_id - 75772
8. fantasy_data_id - 62375
9. sleeper_id - 42208
10. esb_id - 33
11. gsis_it_id - 30137
12. smart_id - 227

In [10]:
# Weekly rosters

rosters_weekly_ff.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 206429 entries, 19 to 641697
Data columns (total 37 columns):
 #   Column                   Non-Null Count   Dtype         
---  ------                   --------------   -----         
 0   season                   206429 non-null  int32         
 1   team                     206429 non-null  object        
 2   position                 206429 non-null  object        
 3   depth_chart_position     138519 non-null  object        
 4   jersey_number            206131 non-null  object        
 5   status                   206425 non-null  object        
 6   player_name              206429 non-null  object        
 7   first_name               206429 non-null  object        
 8   last_name                206429 non-null  object        
 9   birth_date               197386 non-null  datetime64[ns]
 10  height                   206341 non-null  float64       
 11  weight                   206427 non-null  float64       
 12  college        

In [11]:
ids = rosters_weekly_ff.columns[rosters_weekly_ff.columns.str.contains('id', case=False)].tolist()

for i, id in enumerate(ids, 1):
  print(f'{i}. {id} - {rosters_weekly_ff[id].isna().sum()}')

1. player_id - 86
2. espn_id - 40216
3. sportradar_id - 34927
4. yahoo_id - 49752
5. rotowire_id - 34358
6. pff_id - 51743
7. pfr_id - 75772
8. fantasy_data_id - 62375
9. sleeper_id - 42208
10. esb_id - 33
11. gsis_it_id - 30137
12. smart_id - 227


## Seasonal Stats

Available unique ID - number of nulls:
1. player_id - 0

In [12]:
# Season stats

stats_seas.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 11744 entries, 0 to 11743
Data columns (total 58 columns):
 #   Column                       Non-Null Count  Dtype  
---  ------                       --------------  -----  
 0   player_id                    11744 non-null  object 
 1   season                       11744 non-null  int64  
 2   season_type                  11744 non-null  object 
 3   completions                  11744 non-null  int32  
 4   attempts                     11744 non-null  int32  
 5   passing_yards                11744 non-null  float64
 6   passing_tds                  11744 non-null  int32  
 7   interceptions                11744 non-null  float64
 8   sacks                        11744 non-null  float64
 9   sack_yards                   11744 non-null  float64
 10  sack_fumbles                 11744 non-null  int32  
 11  sack_fumbles_lost            11744 non-null  int32  
 12  passing_air_yards            11744 non-null  float64
 13  passing_yards_af

In [13]:
ids = stats_seas.columns[stats_seas.columns.str.contains('id', case=False)].tolist()

for i, id in enumerate(ids, 1):
  print(f'{i}. {id} - {stats_seas[id].isna().sum()}')

1. player_id - 0


**Across all four datasets, there are a variety of unique IDs, not all of them overlap. Additionally, the seasonal statistics do not have any player names, only player IDs.**

My plan is to use player ID from one of the datasets along with name so I can add my own unique ID to all datasets for a merge.

In [14]:
# Check how many unique player IDs exists in each dataset

dfs = [stats_seas, rosters_weekly_ff, rosters_seas_ff]

df_names = ["stats_seas", "rosters_weekly", "rosters_seas"]

for i, (df, name) in enumerate(zip(dfs, df_names), 1):
  print(f'{i}. {name} - {df["player_id"].nunique()} unique player IDs')


1. stats_seas - 2847 unique player IDs
2. rosters_weekly - 3821 unique player IDs
3. rosters_seas - 3817 unique player IDs


In [15]:
# Get unique player IDs from each dataframe
weekly_ids = rosters_weekly_ff['player_id'].unique()
season_ids = rosters_seas_ff['player_id'].unique()

# Convert to sets for set difference operation
weekly_ids_set = set(weekly_ids)
season_ids_set = set(season_ids)

# Find IDs in weekly but not in season
ids_in_weekly_not_in_season = weekly_ids_set - season_ids_set

# Convert the result to a list
ids_list = list(ids_in_weekly_not_in_season)

print(f"Number of player IDs in rosters_weekly but not in rosters_seas: {len(ids_list)}")

# Create a dataframe with these IDs and their details from rosters_weekly
missing_players_df = rosters_weekly[rosters_weekly['player_id'].isin(ids_list)]

# To get a unique list of these players (without duplicates)
unique_missing_players_df = missing_players_df.drop_duplicates(subset=['player_id'])

# Display the resulting dataframe
print(unique_missing_players_df[['player_id', 'player_name', 'position', 'status']])

Number of player IDs in rosters_weekly but not in rosters_seas: 5
         player_id     player_name position status
63556         None   John Babinecz       LB   None
247679  00-0031876       Matt Wile        P    ACT
252267  00-0032466     George Fant       OL    ACT
348658  00-0034128    Nate Wozniak       OL    DEV
448828  00-0035833  Brandon Wright        K    CUT


In [16]:
ids_in_weekly_not_in_season = list(ids_in_weekly_not_in_season)

In [17]:
ids_in_weekly_not_in_season

['00-0034128', '00-0035833', None, '00-0032466', '00-0031876']

In [18]:


for id in ids_in_weekly_not_in_season:
  display(rosters_weekly_ff[rosters_weekly_ff['player_id'] == id].head())

Unnamed: 0,season,team,position,depth_chart_position,jersey_number,status,player_name,first_name,last_name,birth_date,height,weight,college,player_id,espn_id,sportradar_id,yahoo_id,rotowire_id,pff_id,pfr_id,fantasy_data_id,sleeper_id,years_exp,headshot_url,ngs_position,week,game_type,status_description_abbr,football_name,esb_id,gsis_it_id,smart_id,entry_year,rookie_year,draft_club,draft_number,age
391417,2019,MIN,TE,TE,79.0,CUT,Nate Wozniak,Nate,Wozniak,1994-08-30,82.0,302.0,Minnesota,00-0034128,,,,,,,,,1.0,https://static.www.nfl.com/image/private/f_aut...,,1,REG,,Nate,WOZ425791,46390,3200574f-5a42-5791-5d77-20b7540ca069,2018.0,2018.0,,,25.024
391420,2019,ATL,TE,TE,79.0,CUT,Nate Wozniak,Nate,Wozniak,1994-08-30,82.0,280.0,Minnesota,00-0034128,,,,,,,,,1.0,https://static.www.nfl.com/image/private/f_aut...,,4,REG,,Nate,WOZ425791,46390,3200574f-5a42-5791-5d77-20b7540ca069,2018.0,2018.0,,,25.081
391422,2019,ATL,TE,TE,79.0,DEV,Nate Wozniak,Nate,Wozniak,1994-08-30,82.0,280.0,Minnesota,00-0034128,,,,,,,,,1.0,https://static.www.nfl.com/image/private/f_aut...,,3,REG,,Nate,WOZ425791,46390,3200574f-5a42-5791-5d77-20b7540ca069,2018.0,2018.0,,,25.062
391429,2019,MIN,TE,TE,79.0,CUT,Nate Wozniak,Nate,Wozniak,1994-08-30,82.0,302.0,Minnesota,00-0034128,,,,,,,,,1.0,https://static.www.nfl.com/image/private/f_aut...,,2,REG,,Nate,WOZ425791,46390,3200574f-5a42-5791-5d77-20b7540ca069,2018.0,2018.0,,,25.043


Unnamed: 0,season,team,position,depth_chart_position,jersey_number,status,player_name,first_name,last_name,birth_date,height,weight,college,player_id,espn_id,sportradar_id,yahoo_id,rotowire_id,pff_id,pfr_id,fantasy_data_id,sleeper_id,years_exp,headshot_url,ngs_position,week,game_type,status_description_abbr,football_name,esb_id,gsis_it_id,smart_id,entry_year,rookie_year,draft_club,draft_number,age
448828,2020,JAX,K,K,1.0,CUT,Brandon Wright,Brandon,Wright,1997-02-08,70.0,182.0,Georgia State,00-0035833,,61980614-1609-4b68-b11f-05aa30fefb89,33149,14954,,,22292,7296,0.0,,,4,REG,,Brandon,WRI116406,52735,32005752-4911-6406-49fa-f1b22d3d5c4c,2020.0,2020.0,,,23.652
448829,2020,JAX,K,K,1.0,CUT,Brandon Wright,Brandon,Wright,1997-02-08,70.0,182.0,Georgia State,00-0035833,,61980614-1609-4b68-b11f-05aa30fefb89,33149,14954,,,22292,7296,0.0,,,7,REG,,Brandon,WRI116406,52735,32005752-4911-6406-49fa-f1b22d3d5c4c,2020.0,2020.0,,,23.71
448830,2020,JAX,K,K,1.0,CUT,Brandon Wright,Brandon,Wright,1997-02-08,70.0,182.0,Georgia State,00-0035833,,61980614-1609-4b68-b11f-05aa30fefb89,33149,14954,,,22292,7296,0.0,,,5,REG,,Brandon,WRI116406,52735,32005752-4911-6406-49fa-f1b22d3d5c4c,2020.0,2020.0,,,23.671
448832,2020,JAX,K,K,1.0,ACT,Brandon Wright,Brandon,Wright,1997-02-08,70.0,182.0,Georgia State,00-0035833,,61980614-1609-4b68-b11f-05aa30fefb89,33149,14954,,,22292,7296,0.0,,,3,REG,A01,Brandon,WRI116406,52735,32005752-4911-6406-49fa-f1b22d3d5c4c,2020.0,2020.0,,,23.625
448834,2020,JAX,K,K,1.0,CUT,Brandon Wright,Brandon,Wright,1997-02-08,70.0,182.0,Georgia State,00-0035833,,61980614-1609-4b68-b11f-05aa30fefb89,33149,14954,,,22292,7296,0.0,,,6,REG,,Brandon,WRI116406,52735,32005752-4911-6406-49fa-f1b22d3d5c4c,2020.0,2020.0,,,23.691


Unnamed: 0,season,team,position,depth_chart_position,jersey_number,status,player_name,first_name,last_name,birth_date,height,weight,college,player_id,espn_id,sportradar_id,yahoo_id,rotowire_id,pff_id,pfr_id,fantasy_data_id,sleeper_id,years_exp,headshot_url,ngs_position,week,game_type,status_description_abbr,football_name,esb_id,gsis_it_id,smart_id,entry_year,rookie_year,draft_club,draft_number,age


Unnamed: 0,season,team,position,depth_chart_position,jersey_number,status,player_name,first_name,last_name,birth_date,height,weight,college,player_id,espn_id,sportradar_id,yahoo_id,rotowire_id,pff_id,pfr_id,fantasy_data_id,sleeper_id,years_exp,headshot_url,ngs_position,week,game_type,status_description_abbr,football_name,esb_id,gsis_it_id,smart_id,entry_year,rookie_year,draft_club,draft_number,age
252272,2016,SEA,TE,T,74.0,ACT,George Fant,George,Fant,1992-07-19,77.0,270.0,Western Kentucky,00-0032466,,,,,,,,,0.0,https://static.www.nfl.com/image/private/f_aut...,,1,REG,A01,George,FAN401790,43586,32004641-4e40-1790-4ceb-d01018fe0d26,2016.0,2016.0,,,24.148


Unnamed: 0,season,team,position,depth_chart_position,jersey_number,status,player_name,first_name,last_name,birth_date,height,weight,college,player_id,espn_id,sportradar_id,yahoo_id,rotowire_id,pff_id,pfr_id,fantasy_data_id,sleeper_id,years_exp,headshot_url,ngs_position,week,game_type,status_description_abbr,football_name,esb_id,gsis_it_id,smart_id,entry_year,rookie_year,draft_club,draft_number,age
247683,2016,DAL,K,K,2.0,ACT,Matt Wile,Matt,Wile,1992-06-20,74.0,215.0,Michigan,00-0031876,2576240,4278baf5-f774-4031-ab0f-12a9c7e43c45,28738,10532,,,17550,3008,1.0,https://static.www.nfl.com/image/private/f_aut...,,1,REG,A01,Matt,WIL048868,42907,32005749-4c04-8868-f487-a75d2479caa3,2015.0,2015.0,,,24.227
327670,2018,MIN,K,K,6.0,ACT,Matt Wile,Matt,Wile,1992-06-20,74.0,219.0,Michigan,00-0031876,2576240,4278baf5-f774-4031-ab0f-12a9c7e43c45,28738,10532,,,17550,3008,3.0,https://static.www.nfl.com/image/private/f_aut...,,4,REG,A01,Matt,WIL048868,42907,32005749-4c04-8868-f487-a75d2479caa3,2015.0,2015.0,,,26.27
327673,2018,MIN,K,K,6.0,ACT,Matt Wile,Matt,Wile,1992-06-20,74.0,219.0,Michigan,00-0031876,2576240,4278baf5-f774-4031-ab0f-12a9c7e43c45,28738,10532,,,17550,3008,3.0,https://static.www.nfl.com/image/private/f_aut...,,5,REG,A01,Matt,WIL048868,42907,32005749-4c04-8868-f487-a75d2479caa3,2015.0,2015.0,,,26.297
327677,2018,MIN,K,K,6.0,ACT,Matt Wile,Matt,Wile,1992-06-20,74.0,219.0,Michigan,00-0031876,2576240,4278baf5-f774-4031-ab0f-12a9c7e43c45,28738,10532,,,17550,3008,3.0,https://static.www.nfl.com/image/private/f_aut...,,1,REG,A01,Matt,WIL048868,42907,32005749-4c04-8868-f487-a75d2479caa3,2015.0,2015.0,,,26.22
327678,2018,MIN,K,K,6.0,ACT,Matt Wile,Matt,Wile,1992-06-20,74.0,219.0,Michigan,00-0031876,2576240,4278baf5-f774-4031-ab0f-12a9c7e43c45,28738,10532,,,17550,3008,3.0,https://static.www.nfl.com/image/private/f_aut...,,2,REG,A01,Matt,WIL048868,42907,32005749-4c04-8868-f487-a75d2479caa3,2015.0,2015.0,,,26.24


**Seasonal roster less unique player IDs than weekly. In checking weekly, the 5 extra IDs are players who haven't played a single down in the NFL. When I merge with weekly I will do an inner join so these IDs get thrown out.**

In [74]:
rosters_weekly_ff['active_games'] = rosters_weekly_ff.groupby(['player_id', 'season'])['status'].transform(lambda x: (x == 'ACT').sum())

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  rosters_weekly_ff['active_games'] = rosters_weekly_ff.groupby(['player_id', 'season'])['status'].transform(lambda x: (x == 'ACT').sum())


In [75]:
rosters_weekly_ff['active_games'].value_counts()

17.0    44144
0.0     39711
16.0    22523
18.0    15215
19.0    12502
15.0     6288
20.0     5890
14.0     5487
1.0      5264
13.0     4740
21.0     4536
11.0     4297
12.0     4124
2.0      4041
8.0      3833
3.0      3650
5.0      3434
10.0     3421
9.0      3365
6.0      3273
7.0      3244
4.0      3109
34.0      102
36.0       72
40.0       40
38.0       38
Name: active_games, dtype: int64

In [81]:
rosters_weekly_ff[rosters_weekly_ff['active_games'] == 21].head()

Unnamed: 0,season,team,position,depth_chart_position,jersey_number,status,player_name,first_name,last_name,birth_date,height,weight,college,player_id,espn_id,sportradar_id,yahoo_id,rotowire_id,pff_id,pfr_id,fantasy_data_id,sleeper_id,years_exp,headshot_url,ngs_position,week,game_type,status_description_abbr,football_name,esb_id,gsis_it_id,smart_id,entry_year,rookie_year,draft_club,draft_number,age,active_games
289,2009,NO,QB,,11,ACT,Mark Brunell,Mark,Brunell,1970-09-17,73.0,217.0,,00-0002110,,,,,,,,,16.0,https://static.www.nfl.com/image/private/f_aut...,,18,WC,A01,Mark,BRU575022,,32004252-5557-5022-ece0-cf2096eec69e,1993.0,1993.0,GB,118,,21.0
290,2009,NO,QB,,11,ACT,Mark Brunell,Mark,Brunell,1970-09-17,73.0,217.0,,00-0002110,,,,,,,,,16.0,https://static.www.nfl.com/image/private/f_aut...,,15,REG,A01,Mark,BRU575022,,32004252-5557-5022-ece0-cf2096eec69e,1993.0,1993.0,GB,118,39.255,21.0
291,2009,NO,QB,,11,ACT,Mark Brunell,Mark,Brunell,1970-09-17,73.0,217.0,,00-0002110,,,,,,,,,16.0,https://static.www.nfl.com/image/private/f_aut...,,6,REG,A01,Mark,BRU575022,,32004252-5557-5022-ece0-cf2096eec69e,1993.0,1993.0,GB,118,39.086,21.0
292,2009,NO,QB,,11,ACT,Mark Brunell,Mark,Brunell,1970-09-17,73.0,217.0,,00-0002110,,,,,,,,,16.0,https://static.www.nfl.com/image/private/f_aut...,,17,REG,A01,Mark,BRU575022,,32004252-5557-5022-ece0-cf2096eec69e,1993.0,1993.0,GB,118,39.296,21.0
293,2009,NO,QB,,11,ACT,Mark Brunell,Mark,Brunell,1970-09-17,73.0,217.0,,00-0002110,,,,,,,,,16.0,https://static.www.nfl.com/image/private/f_aut...,,9,REG,A01,Mark,BRU575022,,32004252-5557-5022-ece0-cf2096eec69e,1993.0,1993.0,GB,118,39.143,21.0


In [89]:
df_grouped.set_index('player_id')

Unnamed: 0_level_0,cleaned_name,position,team
player_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
00-0000108,David Akers,K,DET
00-0000865,Charlie Batch,QB,PIT
00-0001477,Marty Booker,WR,ATL
00-0001545,Todd Bouman,QB,JAX
00-0001980,Kris Brown,K,HST
...,...,...,...
00-0039940,Kairee Robinson,RB,SEA
00-0039941,Geor'Quarius Spivey,TE,KC
00-0039942,Mason Fairchild,TE,NO
00-0039945,Alex Hale,K,GB


In [90]:
df_dict = df_grouped.to_dict()['cleaned_name']
df_dict

{0: 'David Akers',
 1: 'Charlie Batch',
 2: 'Marty Booker',
 3: 'Todd Bouman',
 4: 'Kris Brown',
 5: 'Isaac Bruce',
 6: 'Mark Brunell',
 7: 'John Carney',
 8: 'Desmond Clark',
 9: 'Kerry Collins',
 10: 'Todd Collins',
 11: 'Daunte Culpepper',
 12: 'Phil Dawson',
 13: 'Jake Delhomme',
 14: 'Donald Driver',
 15: 'Jason Elam',
 16: 'Bobby Engram',
 17: 'Kevin Faulk',
 18: 'Brett Favre',
 19: 'Brian Finneran',
 20: 'Joey Galloway',
 21: 'Jeff Garcia',
 22: 'Tony Gonzalez',
 23: 'Ahman Green',
 24: 'Chris Greisen',
 25: 'Jason Hanson',
 26: 'Matt Hasselbeck',
 27: 'Steve Heiden',
 28: 'Torry Holt',
 29: 'Edgerrin James',
 30: 'John Kasay',
 31: 'Reggie Kelly',
 32: 'Jon Kitna',
 33: 'Jim Kleinsasser',
 34: 'Ryan Longwell',
 35: 'Peyton Manning',
 36: 'Olindo Mare',
 37: 'Derrick Mason',
 38: 'Donovan McNabb',
 39: 'Sean Morey',
 40: 'Randy Moss',
 41: 'Muhsin Muhammad',
 42: 'Joe Nedney',
 43: 'Terrell Owens',
 44: 'Aaron Stecker',
 45: 'Brandon Stokley',
 46: 'Matt Stover',
 47: 'Fred Tayl

In [93]:
df_grouped.reset_index().drop(columns=['index', 'position', 'team'])

Unnamed: 0,player_id,cleaned_name
0,00-0000108,David Akers
1,00-0000865,Charlie Batch
2,00-0001477,Marty Booker
3,00-0001545,Todd Bouman
4,00-0001980,Kris Brown
...,...,...
3812,00-0039940,Kairee Robinson
3813,00-0039941,Geor'Quarius Spivey
3814,00-0039942,Mason Fairchild
3815,00-0039945,Alex Hale


In [95]:
df_grouped = df_grouped.set_index('player_id')

In [96]:
df_dict = df_grouped.to_dict()['cleaned_name']
df_dict

{'00-0000108': 'David Akers',
 '00-0000865': 'Charlie Batch',
 '00-0001477': 'Marty Booker',
 '00-0001545': 'Todd Bouman',
 '00-0001980': 'Kris Brown',
 '00-0002099': 'Isaac Bruce',
 '00-0002110': 'Mark Brunell',
 '00-0002655': 'John Carney',
 '00-0003035': 'Desmond Clark',
 '00-0003292': 'Kerry Collins',
 '00-0003315': 'Todd Collins',
 '00-0003739': 'Daunte Culpepper',
 '00-0004091': 'Phil Dawson',
 '00-0004161': 'Jake Delhomme',
 '00-0004541': 'Donald Driver',
 '00-0004811': 'Jason Elam',
 '00-0004915': 'Bobby Engram',
 '00-0005091': 'Kevin Faulk',
 '00-0005106': 'Brett Favre',
 '00-0005231': 'Brian Finneran',
 '00-0005720': 'Joey Galloway',
 '00-0005755': 'Jeff Garcia',
 '00-0006101': 'Tony Gonzalez',
 '00-0006305': 'Ahman Green',
 '00-0006410': 'Chris Greisen',
 '00-0006800': 'Jason Hanson',
 '00-0007091': 'Matt Hasselbeck',
 '00-0007213': 'Steve Heiden',
 '00-0007681': 'Torry Holt',
 '00-0008241': 'Edgerrin James',
 '00-0009028': 'John Kasay',
 '00-0009119': 'Reggie Kelly',
 '00-0

In [None]:
rw_clean

In [83]:
rw_clean[(rw_clean['season'] == 2009) & (rw_clean['player_id'] == '00-0002110')].sort_values(by='week')

Unnamed: 0,season,team,position,depth_chart_position,jersey_number,status,player_name,first_name,last_name,birth_date,height,weight,college,player_id,espn_id,sportradar_id,yahoo_id,rotowire_id,pff_id,pfr_id,fantasy_data_id,sleeper_id,years_exp,headshot_url,ngs_position,week,game_type,status_description_abbr,football_name,esb_id,gsis_it_id,smart_id,entry_year,rookie_year,draft_club,draft_number,age,cleaned_name,unique_id
5,2009,NO,QB,,11,ACT,Mark Brunell,Mark,Brunell,1970-09-17,73.0,217.0,,00-0002110,,,,,,,,,16.0,https://static.www.nfl.com/image/private/f_aut...,,21,SB,A01,Mark,BRU575022,,32004252-5557-5022-ece0-cf2096eec69e,1993.0,1993.0,GB,118,38.0,Mark Brunell,00-0002110_2009_Mark_Brunell_QB


In [19]:
# Create dataframe with only player names and their corresponding player ID for cleaning

id_df = rosters_seas_ff[['player_name', 'player_id', 'position', 'team']]
id_df.head()

Unnamed: 0,player_name,player_id,position,team
1,David Akers,00-0000108,K,PHI
7,Charlie Batch,00-0000865,QB,PIT
13,Marty Booker,00-0001477,WR,ATL
16,Kris Brown,00-0001980,K,HST
17,Isaac Bruce,00-0002099,WR,SF


In [20]:
# Make sure unique IDS and player IDs have same number of unique values

print(f'Player name nunique - {id_df["player_name"].nunique()}')
print(f'Player ID nunique - {id_df["player_id"].nunique()}')

Player name nunique - 3854
Player ID nunique - 3817


In [21]:
# Check for missing values

id_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 13934 entries, 1 to 42818
Data columns (total 4 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   player_name  13934 non-null  object
 1   player_id    13934 non-null  object
 2   position     13934 non-null  object
 3   team         13934 non-null  object
dtypes: object(4)
memory usage: 544.3+ KB


In [22]:
# Group by player_id and count unique player_names
ids_with_multiple_names = (id_df.groupby('player_id')['player_name']
                           .nunique()
                           .reset_index())

# Filter to only those with more than one name
ids_with_multiple_names = ids_with_multiple_names[ids_with_multiple_names['player_name'] > 1]
ids_with_multiple_names[ids_with_multiple_names['player_name'] > 1]


Unnamed: 0,player_id,player_name
73,00-0020245,2
80,00-0020397,2
424,00-0025944,2
499,00-0026293,2
673,00-0027253,2
...,...,...
3337,00-0038134,2
3360,00-0038409,2
3420,00-0038611,2
3518,00-0038920,2


In [23]:
# Take a look at one of the IDs to see what the issue is

display(id_df[id_df['player_id'] == '00-0020245'])
display(id_df[id_df['player_id'] == '00-0020397'])
display(id_df[id_df['player_id'] == '00-0038920'])

Unnamed: 0,player_name,player_id,position,team
237,Michael Vick,00-0020245,QB,PHI
2257,Michael Vick,00-0020245,QB,PHI
4351,Michael Vick,00-0020245,QB,PHI
6404,Michael Vick,00-0020245,QB,PHI
8500,Michael Vick,00-0020245,QB,PHI
10624,Michael Vick,00-0020245,QB,NYJ
12775,Michael Vick,00-0020245,QB,PIT
14962,Mike Vick,00-0020245,QB,PIT


Unnamed: 0,player_name,player_id,position,team
259,Chad Ochocinco,00-0020397,WR,CIN
2275,Chad Johnson,00-0020397,WR,CIN
4359,Chad Johnson,00-0020397,WR,NE


Unnamed: 0,player_name,player_id,position,team
39428,John Samuel Shenker,00-0038920,TE,LV
41973,John Shenker,00-0038920,TE,LV


**The user IDs with multiple names appear to be from names not being standardized. Will take the first instance in each repeat and apply to that ID**

In [24]:
# Group by ID and take the first instance sorted player name

df_grouped = id_df.sort_values(by='player_name').groupby('player_id')[['player_name', 'position', 'team']].first()

#Turn back into dataframe
df_grouped = df_grouped.reset_index()


In [25]:
# Check shape to make sure same # of unique IDs exist

df_grouped.shape

(3817, 4)

In [26]:
# Second work check

display(df_grouped[df_grouped['player_id'] == '00-0020245'])
display(df_grouped[df_grouped['player_id'] == '00-0020397'])
display(df_grouped[df_grouped['player_id'] == '00-0038920'])

Unnamed: 0,player_id,player_name,position,team
73,00-0020245,Michael Vick,QB,PIT


Unnamed: 0,player_id,player_name,position,team
80,00-0020397,Chad Johnson,WR,CIN


Unnamed: 0,player_id,player_name,position,team
3518,00-0038920,John Samuel Shenker,TE,LV


In [27]:
# Third work check

# Group by player_id and count unique player_names
ids_with_multiple_names = (df_grouped.groupby('player_id')['player_name']
                           .nunique()
                           .reset_index())

# Filter to only those with more than one name
ids_with_multiple_names = ids_with_multiple_names[ids_with_multiple_names['player_name'] > 1]
ids_with_multiple_names[ids_with_multiple_names['player_name'] > 1]

Unnamed: 0,player_id,player_name


**Now to check for names that repeat with different IDs**

In [28]:
# Group by player_name and count unique player_ids
names_with_multiple_ids = (id_df.groupby('player_name')['player_id']
                           .nunique()
                           .reset_index())

# Filter to only those with more than one ID
names_with_multiple_ids = names_with_multiple_ids[names_with_multiple_ids['player_id'] > 1]
names_with_multiple_ids


Unnamed: 0,player_name,player_id
53,Adrian Peterson,2
91,Alex Smith,2
127,Andre Davis,2
219,Austin Allen,2
648,Chris Brooks,2
649,Chris Brown,2
662,Chris Harper,2
663,Chris Henry,2
692,Chris Thompson,2
817,D.J. Williams,2


In [29]:
# Check first repeat name on list

rosters_seas_ff[rosters_seas_ff['player_name'] == 'Adrian Peterson'].head()

Unnamed: 0,season,team,position,depth_chart_position,jersey_number,status,player_name,first_name,last_name,birth_date,height,weight,college,player_id,espn_id,sportradar_id,yahoo_id,rotowire_id,pff_id,pfr_id,fantasy_data_id,sleeper_id,years_exp,headshot_url,ngs_position,week,game_type,status_description_abbr,football_name,esb_id,gsis_it_id,smart_id,entry_year,rookie_year,draft_club,draft_number,age
409,2009,CHI,RB,,29,ACT,Adrian Peterson,Adrian,Peterson,1979-07-01,70.0,212.0,,00-0021306,,,,,,,,,7.0,https://static.www.nfl.com/image/private/f_aut...,,17,REG,A01,Adrian,PET263012,,32005045-5426-3012-fd67-09573cdb8335,2002.0,2002.0,CHI,199,30.0
1226,2009,MIN,RB,,28,ACT,Adrian Peterson,Adrian,Peterson,1985-03-21,73.0,217.0,Oklahoma,00-0025394,10452.0,ab58c0ac-a747-47e6-9b3c-505e41d2bd3d,8261.0,5215.0,3623.0,PeteAd01,4807.0,184.0,2.0,https://static.www.nfl.com/image/private/f_aut...,,20,CON,A01,Adrian,PET260705,32200.0,32005045-5426-0705-afaf-b095105b31fb,2007.0,2007.0,MIN,7,24.0
3072,2010,MIN,RB,,28,ACT,Adrian Peterson,Adrian,Peterson,1985-03-21,73.0,217.0,Oklahoma,00-0025394,10452.0,ab58c0ac-a747-47e6-9b3c-505e41d2bd3d,8261.0,5215.0,3623.0,PeteAd01,4807.0,184.0,3.0,https://static.www.nfl.com/image/private/f_aut...,,17,REG,A01,Adrian,PET260705,32200.0,32005045-5426-0705-afaf-b095105b31fb,2007.0,2007.0,MIN,7,25.0
5001,2011,MIN,RB,,28,RES,Adrian Peterson,Adrian,Peterson,1985-03-21,73.0,217.0,Oklahoma,00-0025394,10452.0,ab58c0ac-a747-47e6-9b3c-505e41d2bd3d,8261.0,5215.0,3623.0,PeteAd01,4807.0,184.0,4.0,https://static.www.nfl.com/image/private/f_aut...,,16,REG,A01,Adrian,PET260705,32200.0,32005045-5426-0705-afaf-b095105b31fb,2007.0,2007.0,MIN,7,26.0
6880,2012,MIN,RB,,28,ACT,Adrian Peterson,Adrian,Peterson,1985-03-21,73.0,217.0,Oklahoma,00-0025394,10452.0,ab58c0ac-a747-47e6-9b3c-505e41d2bd3d,8261.0,5215.0,3623.0,PeteAd01,4807.0,184.0,5.0,https://static.www.nfl.com/image/private/f_aut...,,18,WC,A01,Adrian,PET260705,32200.0,32005045-5426-0705-afaf-b095105b31fb,2007.0,2007.0,MIN,7,27.0


**Above shows two different players with same name and two different IDs/stat sets**

In [30]:
# Check second repeat name on list

rosters_seas_ff[rosters_seas_ff['player_name'] == 'Alex Smith']

Unnamed: 0,season,team,position,depth_chart_position,jersey_number,status,player_name,first_name,last_name,birth_date,height,weight,college,player_id,espn_id,sportradar_id,yahoo_id,rotowire_id,pff_id,pfr_id,fantasy_data_id,sleeper_id,years_exp,headshot_url,ngs_position,week,game_type,status_description_abbr,football_name,esb_id,gsis_it_id,smart_id,entry_year,rookie_year,draft_club,draft_number,age
779,2009,SF,QB,,11.0,ACT,Alex Smith,Alexander,Smith,1984-05-07,76.0,217.0,Utah,00-0023436,8416.0,2fda010a-8c62-4c07-b601-4ba03f57e6af,7177.0,4306.0,2218.0,SmitAl03,6739.0,268.0,4.0,https://static.www.nfl.com/image/private/f_aut...,,17,REG,A01,Alex,SMI031126,29819,3200534d-4903-1126-6414-eeba5603cc44,2005.0,2005.0,SF,1.0,25.0
841,2009,PHI,TE,,82.0,ACT,Alex Smith,Edwin,Smith,1982-05-22,76.0,258.0,Stanford,00-0023506,,,,,,,,,4.0,https://static.www.nfl.com/image/private/f_aut...,,18,WC,A01,Alex,SMI029398,29898,3200534d-4902-9398-d7ad-d21cbbe6a53d,2005.0,2005.0,TB,71.0,27.0
2692,2010,SF,QB,,11.0,ACT,Alex Smith,Alexander,Smith,1984-05-07,76.0,217.0,Utah,00-0023436,8416.0,2fda010a-8c62-4c07-b601-4ba03f57e6af,7177.0,4306.0,2218.0,SmitAl03,6739.0,268.0,5.0,https://static.www.nfl.com/image/private/f_aut...,,17,REG,A01,Alex,SMI031126,29819,3200534d-4903-1126-6414-eeba5603cc44,2005.0,2005.0,SF,1.0,26.0
2746,2010,CLV,TE,,81.0,ACT,Alex Smith,Edwin,Smith,1982-05-22,76.0,258.0,Stanford,00-0023506,,,,,,,,,5.0,https://static.www.nfl.com/image/private/f_aut...,,17,REG,A01,Alex,SMI029398,29898,3200534d-4902-9398-d7ad-d21cbbe6a53d,2005.0,2005.0,TB,71.0,28.0
4689,2011,SF,QB,,11.0,ACT,Alex Smith,Alexander,Smith,1984-05-07,76.0,217.0,Utah,00-0023436,8416.0,2fda010a-8c62-4c07-b601-4ba03f57e6af,7177.0,4306.0,2218.0,SmitAl03,6739.0,268.0,6.0,https://static.www.nfl.com/image/private/f_aut...,,20,CON,A01,Alex,SMI031126,29819,3200534d-4903-1126-6414-eeba5603cc44,2005.0,2005.0,SF,1.0,27.0
4735,2011,CLV,TE,,81.0,RES,Alex Smith,Edwin,Smith,1982-05-22,76.0,258.0,Stanford,00-0023506,,,,,,,,,6.0,https://static.www.nfl.com/image/private/f_aut...,,15,REG,A01,Alex,SMI029398,29898,3200534d-4902-9398-d7ad-d21cbbe6a53d,2005.0,2005.0,TB,71.0,29.0
6639,2012,SF,QB,,11.0,ACT,Alex Smith,Alexander,Smith,1984-05-07,76.0,217.0,Utah,00-0023436,8416.0,2fda010a-8c62-4c07-b601-4ba03f57e6af,7177.0,4306.0,2218.0,SmitAl03,6739.0,268.0,7.0,https://static.www.nfl.com/image/private/f_aut...,,21,SB,A01,Alex,SMI031126,29819,3200534d-4903-1126-6414-eeba5603cc44,2005.0,2005.0,SF,1.0,28.0
6679,2012,CLV,TE,,81.0,ACT,Alex Smith,Edwin,Smith,1982-05-22,76.0,258.0,Stanford,00-0023506,,,,,,,,,7.0,https://static.www.nfl.com/image/private/f_aut...,,17,REG,A01,Alex,SMI029398,29898,3200534d-4902-9398-d7ad-d21cbbe6a53d,2005.0,2005.0,TB,71.0,30.0
8656,2013,KC,QB,,11.0,ACT,Alex Smith,Alexander,Smith,1984-05-07,76.0,217.0,Utah,00-0023436,8416.0,2fda010a-8c62-4c07-b601-4ba03f57e6af,7177.0,4306.0,2218.0,SmitAl03,6739.0,268.0,8.0,https://static.www.nfl.com/image/private/f_aut...,,18,WC,A01,Alex,SMI031126,29819,3200534d-4903-1126-6414-eeba5603cc44,2005.0,2005.0,SF,1.0,29.0
8681,2013,CIN,TE,,81.0,RES,Alex Smith,Edwin,Smith,1982-05-22,76.0,250.0,Stanford,00-0023506,,,,,,,,,8.0,https://static.www.nfl.com/image/private/f_aut...,,17,REG,A01,Alex,SMI029398,29898,3200534d-4902-9398-d7ad-d21cbbe6a53d,2005.0,2005.0,TB,71.0,31.0


**This name is also a repeat name but two separate players. Will assume that this is the case for all and that ID/Name DF is ready for merge**

In [31]:
df_grouped = df_grouped.rename(columns={'player_name': 'cleaned_name'})

In [32]:
rs_clean = pd.merge(rosters_seas_ff, df_grouped, how='left', on = 'player_id')

In [33]:
rs_clean = rs_clean.drop(columns = ['position_y', 'team_y'])

In [34]:
rs_clean = rs_clean.rename(columns={'team_x' : 'team', 'position_x' : 'position'})

In [35]:
rs_clean

Unnamed: 0,season,team,position,depth_chart_position,jersey_number,status,player_name,first_name,last_name,birth_date,height,weight,college,player_id,espn_id,sportradar_id,yahoo_id,rotowire_id,pff_id,pfr_id,fantasy_data_id,sleeper_id,years_exp,headshot_url,ngs_position,week,game_type,status_description_abbr,football_name,esb_id,gsis_it_id,smart_id,entry_year,rookie_year,draft_club,draft_number,age,cleaned_name
0,2009,PHI,K,,2,ACT,David Akers,David,Akers,1974-12-09,70.0,200.0,,00-0000108,,,,,,,,,12.0,https://static.www.nfl.com/image/private/f_aut...,,18,WC,A01,David,AKE551610,,3200414b-4555-1610-e0e6-a72c82e419e7,1997.0,1997.0,,,34.0,David Akers
1,2009,PIT,QB,,16,ACT,Charlie Batch,Charles,Batch,1974-12-05,74.0,216.0,,00-0000865,1490,a725e7c5-86df-4b5b-abe0-71b809be988d,,945,367,BatcCh00,,,11.0,https://static.www.nfl.com/image/private/f_aut...,,17,REG,I02,Charlie,BAT039161,,32004241-5403-9161-d21e-19cbefc1c6b0,1998.0,1998.0,DET,60,34.0,Charlie Batch
2,2009,ATL,WR,,80,ACT,Marty Booker,Marty,Booker,1976-07-31,72.0,205.0,,00-0001477,,,,,,,,,10.0,https://static.www.nfl.com/image/private/f_aut...,,17,REG,A01,Marty,BOO033965,,3200424f-4f03-3965-c6e4-7a66e7da27ec,1999.0,1999.0,CHI,78,33.0,Marty Booker
3,2009,HST,K,,3,ACT,Kris Brown,Kristopher,Brown,1976-12-23,71.0,212.0,,00-0001980,,,,,,,,,10.0,https://static.www.nfl.com/image/private/f_aut...,,17,REG,A01,Kris,BRO521985,,32004252-4f52-1985-bb20-5663ab81e524,1999.0,1999.0,PIT,228,32.0,Kris Brown
4,2009,SF,WR,,88,ACT,Isaac Bruce,Isaac,Bruce,1972-11-10,72.0,188.0,,00-0002099,,,,,,,,,15.0,https://static.www.nfl.com/image/private/f_aut...,,17,REG,A01,Isaac,BRU085284,,32004252-5508-5284-f394-5e776b7d5d81,1994.0,1994.0,LA,33,36.0,Isaac Bruce
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13929,2024,SEA,RB,RB,35.0,CUT,Kairee Robinson,Kairee,Robinson,2000-02-23,67.0,195.0,San Jose State,00-0039940,,,,,,,,,0.0,,,1,REG,W03,Kairee,ROB591051,58030,3200524f-4259-1051-9b5d-a2fec8e616e8,2024.0,2024.0,,,24.0,Kairee Robinson
13930,2024,KC,TE,TE,85.0,CUT,Geor'Quarius Spivey,Geor'quarius,Spivey,1999-10-06,77.0,240.0,Mississippi State,00-0039941,,,,,,,,,0.0,,,1,REG,W03,Geor'quarius,SPI709879,58035,32005350-4970-9879-b7b8-8108e0dbca38,2024.0,2024.0,,,24.0,Geor'Quarius Spivey
13931,2024,NO,TE,TE,82.0,CUT,Mason Fairchild,Mason,Fairchild,2001-08-30,76.0,260.0,Kansas,00-0039942,,,,,,,,,0.0,,,1,REG,P01,Mason,FAI339781,58004,32004641-4933-9781-9d27-8d73b06581dc,2024.0,2024.0,,,23.0,Mason Fairchild
13932,2024,GB,K,K,16.0,DEV,Alex Hale,Alex,Hale,1998-03-07,72.0,205.0,Oklahoma State,00-0039945,4361831,c0f5daf0-5505-11ef-b7e3-6370a6e73976,,18429,,,,12438,0.0,,,19,WC,P03,Alex,HAL031251,58041,32004841-4c03-1251-97e3-c8a0c955a05c,2024.0,2024.0,,,26.0,Alex Hale


In [36]:
# Check that all IDs exist still

rs_clean['player_id'].nunique()

3817

In [37]:
# Merge weekly

rw_clean = pd.merge(rosters_weekly_ff, df_grouped, how = 'inner', on = 'player_id')

In [38]:
rw_clean = rw_clean.drop(columns=['position_y', 'team_y'])

In [39]:
# Check that all IDs exist still

rw_clean['player_id'].nunique()

3817

In [40]:
rw_clean = rs_clean.rename(columns={'team_x' : 'team', 'position_x' : 'position'})

In [41]:
rw_clean

Unnamed: 0,season,team,position,depth_chart_position,jersey_number,status,player_name,first_name,last_name,birth_date,height,weight,college,player_id,espn_id,sportradar_id,yahoo_id,rotowire_id,pff_id,pfr_id,fantasy_data_id,sleeper_id,years_exp,headshot_url,ngs_position,week,game_type,status_description_abbr,football_name,esb_id,gsis_it_id,smart_id,entry_year,rookie_year,draft_club,draft_number,age,cleaned_name
0,2009,PHI,K,,2,ACT,David Akers,David,Akers,1974-12-09,70.0,200.0,,00-0000108,,,,,,,,,12.0,https://static.www.nfl.com/image/private/f_aut...,,18,WC,A01,David,AKE551610,,3200414b-4555-1610-e0e6-a72c82e419e7,1997.0,1997.0,,,34.0,David Akers
1,2009,PIT,QB,,16,ACT,Charlie Batch,Charles,Batch,1974-12-05,74.0,216.0,,00-0000865,1490,a725e7c5-86df-4b5b-abe0-71b809be988d,,945,367,BatcCh00,,,11.0,https://static.www.nfl.com/image/private/f_aut...,,17,REG,I02,Charlie,BAT039161,,32004241-5403-9161-d21e-19cbefc1c6b0,1998.0,1998.0,DET,60,34.0,Charlie Batch
2,2009,ATL,WR,,80,ACT,Marty Booker,Marty,Booker,1976-07-31,72.0,205.0,,00-0001477,,,,,,,,,10.0,https://static.www.nfl.com/image/private/f_aut...,,17,REG,A01,Marty,BOO033965,,3200424f-4f03-3965-c6e4-7a66e7da27ec,1999.0,1999.0,CHI,78,33.0,Marty Booker
3,2009,HST,K,,3,ACT,Kris Brown,Kristopher,Brown,1976-12-23,71.0,212.0,,00-0001980,,,,,,,,,10.0,https://static.www.nfl.com/image/private/f_aut...,,17,REG,A01,Kris,BRO521985,,32004252-4f52-1985-bb20-5663ab81e524,1999.0,1999.0,PIT,228,32.0,Kris Brown
4,2009,SF,WR,,88,ACT,Isaac Bruce,Isaac,Bruce,1972-11-10,72.0,188.0,,00-0002099,,,,,,,,,15.0,https://static.www.nfl.com/image/private/f_aut...,,17,REG,A01,Isaac,BRU085284,,32004252-5508-5284-f394-5e776b7d5d81,1994.0,1994.0,LA,33,36.0,Isaac Bruce
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13929,2024,SEA,RB,RB,35.0,CUT,Kairee Robinson,Kairee,Robinson,2000-02-23,67.0,195.0,San Jose State,00-0039940,,,,,,,,,0.0,,,1,REG,W03,Kairee,ROB591051,58030,3200524f-4259-1051-9b5d-a2fec8e616e8,2024.0,2024.0,,,24.0,Kairee Robinson
13930,2024,KC,TE,TE,85.0,CUT,Geor'Quarius Spivey,Geor'quarius,Spivey,1999-10-06,77.0,240.0,Mississippi State,00-0039941,,,,,,,,,0.0,,,1,REG,W03,Geor'quarius,SPI709879,58035,32005350-4970-9879-b7b8-8108e0dbca38,2024.0,2024.0,,,24.0,Geor'Quarius Spivey
13931,2024,NO,TE,TE,82.0,CUT,Mason Fairchild,Mason,Fairchild,2001-08-30,76.0,260.0,Kansas,00-0039942,,,,,,,,,0.0,,,1,REG,P01,Mason,FAI339781,58004,32004641-4933-9781-9d27-8d73b06581dc,2024.0,2024.0,,,23.0,Mason Fairchild
13932,2024,GB,K,K,16.0,DEV,Alex Hale,Alex,Hale,1998-03-07,72.0,205.0,Oklahoma State,00-0039945,4361831,c0f5daf0-5505-11ef-b7e3-6370a6e73976,,18429,,,,12438,0.0,,,19,WC,P03,Alex,HAL031251,58041,32004841-4c03-1251-97e3-c8a0c955a05c,2024.0,2024.0,,,26.0,Alex Hale


In [42]:
# Merge Season Stats

ss_clean = pd.merge(stats_seas, df_grouped, how = 'left', on = 'player_id')

In [43]:
# Check how many naans are left

na_names = ss_clean[ss_clean['cleaned_name'].isna()]

In [44]:
na_names.shape

(765, 61)

In [45]:
na_names.describe()

Unnamed: 0,season,completions,attempts,passing_yards,passing_tds,interceptions,sacks,sack_yards,sack_fumbles,sack_fumbles_lost,passing_air_yards,passing_yards_after_catch,passing_first_downs,passing_epa,passing_2pt_conversions,pacr,dakota,carries,rushing_yards,rushing_tds,rushing_fumbles,rushing_fumbles_lost,rushing_first_downs,rushing_epa,rushing_2pt_conversions,receptions,targets,receiving_yards,receiving_tds,receiving_fumbles,receiving_fumbles_lost,receiving_air_yards,receiving_yards_after_catch,receiving_first_downs,receiving_epa,receiving_2pt_conversions,racr,target_share,air_yards_share,wopr_x,special_teams_tds,fantasy_points,fantasy_points_ppr,games,tgt_sh,ay_sh,yac_sh,wopr_y,ry_sh,rtd_sh,rfd_sh,rtdfd_sh,dom,w8dom,yptmpa,ppr_sh
count,765.0,765.0,765.0,765.0,765.0,765.0,765.0,765.0,765.0,765.0,765.0,765.0,765.0,765.0,765.0,765.0,765.0,765.0,765.0,765.0,765.0,765.0,765.0,765.0,765.0,765.0,765.0,765.0,765.0,765.0,765.0,765.0,765.0,765.0,765.0,765.0,765.0,765.0,765.0,765.0,765.0,765.0,765.0,765.0,765.0,765.0,765.0,765.0,765.0,668.0,765.0,765.0,668.0,668.0,765.0,765.0
mean,2015.324183,0.118954,0.189542,2.039216,0.014379,0.007843,0.007843,0.058824,0.006536,0.0,1.720261,1.100654,0.103268,0.148071,0.003922,0.228989,0.0,1.401307,4.992157,0.049673,0.094118,0.028758,0.509804,-0.19113,0.003922,1.330719,1.952941,8.911111,0.12549,0.023529,0.013072,3.320261,7.447059,0.512418,-0.052116,0.007843,1.056183,0.059703,0.012051,0.09799,0.164706,3.500654,4.831373,2.541176,0.014344,0.003938,0.017082,0.024667,0.009793,0.045007,0.014578,0.01783,0.027729,0.017362,0.070058,0.02641
std,4.724283,0.369263,0.51358,7.371661,0.119125,0.088271,0.088271,0.854885,0.080633,0.0,6.268443,4.60924,0.344823,1.381601,0.06254,1.018887,0.0,5.128429,21.003115,0.266135,0.305327,0.167235,1.845887,2.167823,0.06254,3.327524,4.732408,24.435457,0.386203,0.160074,0.113657,11.824332,20.988313,1.253859,2.030062,0.088271,5.840572,0.145179,0.04522,0.237029,0.411305,6.047568,8.659981,3.396555,0.017739,0.014044,0.040114,0.03185,0.025466,0.155118,0.030182,0.041145,0.081663,0.041024,0.185808,0.033074
min,2009.0,0.0,0.0,-4.0,0.0,0.0,0.0,0.0,0.0,0.0,-8.0,0.0,0.0,-5.712035,0.0,0.0,0.0,0.0,-28.0,0.0,0.0,0.0,0.0,-9.848605,0.0,0.0,0.0,-11.0,0.0,0.0,0.0,-36.0,-9.0,0.0,-16.30222,0.0,-41.333333,0.0,-0.112271,0.0,0.0,-3.8,-3.8,1.0,0.0,-0.078947,-0.021978,-0.002632,-0.058201,0.0,0.0,0.0,-0.029101,-0.046561,-0.289474,-0.037322
25%,2011.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00084
50%,2014.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.2,1.7,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.015141
75%,2019.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,4.0,0.0,0.0,0.0,2.0,2.0,1.0,0.0,0.0,0.0,0.032258,0.007496,0.061425,0.0,6.0,6.0,2.0,0.028571,0.004121,0.016,0.049518,0.010381,0.0,0.012821,0.011905,0.011961,0.019064,0.072072,0.04471
max,2024.0,3.0,4.0,66.0,1.0,1.0,1.0,19.0,1.0,0.0,64.0,49.0,3.0,9.425087,1.0,12.166667,0.0,70.0,323.0,3.0,2.0,1.0,22.0,12.674711,1.0,22.0,40.0,224.0,4.0,2.0,1.0,171.0,181.0,9.0,13.765192,1.0,64.833333,1.104273,0.599466,2.076036,4.0,70.3,85.3,18.0,0.072727,0.139344,0.4,0.179657,0.34555,1.0,0.2,0.333333,0.569061,0.37644,3.0,0.189573


**Missing might be non-fantasy players, check**

In [46]:
# There are no names or positions in season stats set, look at IDs with missing names but through the rosters_seasonal set

miss_names = rosters_seas[rosters_seas['player_id'].isin(na_names['player_id'])]

In [47]:
# Look at positions in the missing names set

miss_names['position'].value_counts()

DB     670
P      502
OL     441
LB     193
T      163
FS     154
CB     147
FB     142
SS     120
DL      96
G       81
OLB     59
DE      51
C       44
DT      25
LS      23
NT      16
MLB     10
ILB      9
S        1
Name: position, dtype: int64

**All missing names from the stats set are non-fantasy relevent positions. Will switch merge to inner to keep only overlaps**

In [48]:
# Merge Season Stats

ss_clean = pd.merge(stats_seas, df_grouped, how = 'inner', on = 'player_id')

In [49]:
ss_clean['player_id'].nunique()

2385

In [50]:
ss_clean.head()

Unnamed: 0,player_id,season,season_type,completions,attempts,passing_yards,passing_tds,interceptions,sacks,sack_yards,sack_fumbles,sack_fumbles_lost,passing_air_yards,passing_yards_after_catch,passing_first_downs,passing_epa,passing_2pt_conversions,pacr,dakota,carries,rushing_yards,rushing_tds,rushing_fumbles,rushing_fumbles_lost,rushing_first_downs,rushing_epa,rushing_2pt_conversions,receptions,targets,receiving_yards,receiving_tds,receiving_fumbles,receiving_fumbles_lost,receiving_air_yards,receiving_yards_after_catch,receiving_first_downs,receiving_epa,receiving_2pt_conversions,racr,target_share,air_yards_share,wopr_x,special_teams_tds,fantasy_points,fantasy_points_ppr,games,tgt_sh,ay_sh,yac_sh,wopr_y,ry_sh,rtd_sh,rfd_sh,rtdfd_sh,dom,w8dom,yptmpa,ppr_sh,cleaned_name,position,team
0,00-0000108,2011,REG,1,1,14.0,1,0.0,0.0,0.0,0,0,9.0,5.0,1.0,4.223134,0,1.555556,0.0,0,0.0,0,0.0,0.0,0.0,0.0,0,0,0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,4.56,4.56,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.047313,David Akers,K,DET
1,00-0000865,2009,REG,1,2,17.0,0,0.0,0.0,0.0,0,0,18.0,2.0,1.0,1.449849,0,0.944444,0.0,0,0.0,0,0.0,0.0,0.0,0.0,0,0,0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.68,0.68,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.005375,Charlie Batch,QB,PIT
2,00-0000865,2010,REG,29,49,352.0,3,3.0,4.0,21.0,0,0,538.0,125.0,13.0,-2.543351,0,1.956525,0.381428,7,30.0,0,1.0,0.0,2.0,1.690736,0,0,0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,23.08,23.08,3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.135845,Charlie Batch,QB,PIT
3,00-0000865,2011,REG,15,24,208.0,0,1.0,2.0,10.0,0,0,189.0,92.0,9.0,-0.096867,0,1.149171,0.154437,3,-2.0,0,0.0,0.0,0.0,0.0,0,0,0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,6.12,6.12,3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02427,Charlie Batch,QB,PIT
4,00-0000865,2012,REG,45,70,475.0,1,4.0,3.0,12.0,1,0,663.0,257.0,22.0,-1.646032,0,1.429991,0.160617,0,0.0,0,0.0,0.0,0.0,0.0,0,0,0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,15.0,15.0,2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.119048,Charlie Batch,QB,PIT


In [51]:
injuries.head()

Unnamed: 0,season,game_type,team,week,gsis_id,position,full_name,first_name,last_name,report_primary_injury,report_secondary_injury,report_status,practice_primary_injury,practice_secondary_injury,practice_status,date_modified
0,2009.0,REG,ARI,1.0,00-0022084,WR,Anquan Boldin,Anquan,Boldin,Hamstring,,Questionable,Hamstring,,Limited Participation in Practice,NaT
1,2009.0,REG,ARI,1.0,00-0026221,WR,Early Doucet,Early,Doucet,Ribs,,Questionable,Ribs,,Limited Participation in Practice,NaT
2,2009.0,REG,ARI,1.0,00-0022101,QB,Brian St. Pierre,Brian,St. Pierre,Back,,Questionable,Back,,Limited Participation in Practice,NaT
3,2009.0,REG,ARI,1.0,00-0025529,WR,Steve Breaston,Steve,Breaston,Knee,,Probable,Knee,,Full Participation in Practice,NaT
4,2009.0,REG,ARI,1.0,00-0022786,S,Matt Ware,Matt,Ware,Shoulder,,Probable,Shoulder,,Full Participation in Practice,NaT


In [52]:
# Get unique player IDs from each dataframe
injuries_ids = injuries_ff['gsis_id'].unique()
season_ids = rs_clean['player_id'].unique()

injuries_ids_set = set(injuries_ids)
season_ids_set = set(season_ids)

# Find IDs in weekly but not in season
ids_in_season_no_injuries = season_ids_set - injuries_ids_set

# Convert the result to a list
ids_list = list(ids_in_season_no_injuries)

print(f"Number of player IDs in rs_clean but not in injuries: {len(ids_list)}")

# Create a dataframe with these IDs and their details from rosters_weekly
missing_players_df = rs_clean[rs_clean['player_id'].isin(ids_list)]

# To get a unique list of these players (without duplicates)
unique_missing_players_df = missing_players_df.drop_duplicates(subset=['player_id'])

# Display the resulting dataframe
print(unique_missing_players_df[['player_id', 'player_name', 'position', 'status']])

Number of player IDs in rs_clean but not in injuries: 1754
        player_id          player_name position status
10     00-0003739     Daunte Culpepper       QB    ACT
15     00-0004915         Bobby Engram       WR    CUT
19     00-0005720        Joey Galloway       WR    ACT
21     00-0005755          Jeff Garcia       QB    CUT
28     00-0008241       Edgerrin James       RB    CUT
...           ...                  ...      ...    ...
13929  00-0039940      Kairee Robinson       RB    CUT
13930  00-0039941  Geor'Quarius Spivey       TE    CUT
13931  00-0039942      Mason Fairchild       TE    CUT
13932  00-0039945            Alex Hale        K    DEV
13933  00-0039952       Devon Garrison       TE    CUT

[1754 rows x 4 columns]


In [53]:
injuries_ff[injuries_ff['full_name'] == 'Daunte Culpepper']

Unnamed: 0,season,game_type,team,week,gsis_id,position,full_name,first_name,last_name,report_primary_injury,report_secondary_injury,report_status,practice_primary_injury,practice_secondary_injury,practice_status,date_modified


**From a quick check, the missing names from the injury list are players who didn't have any injuries in the period. Will do a left join to keep all pertinent values**

There is not a shared unique ID across both datasets. Explore to see options to join with DF grouped

In [54]:
injuries_ff.head()

Unnamed: 0,season,game_type,team,week,gsis_id,position,full_name,first_name,last_name,report_primary_injury,report_secondary_injury,report_status,practice_primary_injury,practice_secondary_injury,practice_status,date_modified
0,2009.0,REG,ARI,1.0,00-0022084,WR,Anquan Boldin,Anquan,Boldin,Hamstring,,Questionable,Hamstring,,Limited Participation in Practice,NaT
1,2009.0,REG,ARI,1.0,00-0026221,WR,Early Doucet,Early,Doucet,Ribs,,Questionable,Ribs,,Limited Participation in Practice,NaT
2,2009.0,REG,ARI,1.0,00-0022101,QB,Brian St. Pierre,Brian,St. Pierre,Back,,Questionable,Back,,Limited Participation in Practice,NaT
3,2009.0,REG,ARI,1.0,00-0025529,WR,Steve Breaston,Steve,Breaston,Knee,,Probable,Knee,,Full Participation in Practice,NaT
5,2009.0,REG,ARI,1.0,00-0011641,WR,Sean Morey,Sean,Morey,Ribs,,Probable,Ribs,,Limited Participation in Practice,NaT


In [55]:
name_check = injuries_ff['full_name'].head().tolist()

In [56]:
df_grouped[df_grouped['cleaned_name'].isin(name_check)]

Unnamed: 0,player_id,cleaned_name,position,team
39,00-0011641,Sean Morey,WR,ARZ
153,00-0022084,Anquan Boldin,WR,SF
157,00-0022101,Brian St. Pierre,QB,ARZ
390,00-0025529,Steve Breaston,WR,KC
479,00-0026221,Early Doucet,WR,ARZ


**Player ID from df grouped and gsis_id from injuries are identical, will change name and join here**

In [57]:
injuries_ff = injuries_ff.rename(columns={'gsis_id' : 'player_id'})

In [58]:
inj_clean = pd.merge(injuries_ff, df_grouped, how = 'left', on = 'player_id')

In [59]:
inj_clean

Unnamed: 0,season,game_type,team_x,week,player_id,position_x,full_name,first_name,last_name,report_primary_injury,report_secondary_injury,report_status,practice_primary_injury,practice_secondary_injury,practice_status,date_modified,cleaned_name,position_y,team_y
0,2009.0,REG,ARI,1.0,00-0022084,WR,Anquan Boldin,Anquan,Boldin,Hamstring,,Questionable,Hamstring,,Limited Participation in Practice,NaT,Anquan Boldin,WR,SF
1,2009.0,REG,ARI,1.0,00-0026221,WR,Early Doucet,Early,Doucet,Ribs,,Questionable,Ribs,,Limited Participation in Practice,NaT,Early Doucet,WR,ARZ
2,2009.0,REG,ARI,1.0,00-0022101,QB,Brian St. Pierre,Brian,St. Pierre,Back,,Questionable,Back,,Limited Participation in Practice,NaT,Brian St. Pierre,QB,ARZ
3,2009.0,REG,ARI,1.0,00-0025529,WR,Steve Breaston,Steve,Breaston,Knee,,Probable,Knee,,Full Participation in Practice,NaT,Steve Breaston,WR,KC
4,2009.0,REG,ARI,1.0,00-0011641,WR,Sean Morey,Sean,Morey,Ribs,,Probable,Ribs,,Limited Participation in Practice,NaT,Sean Morey,WR,ARZ
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
27047,2024.0,SB,PHI,22.0,00-0036919,RB,Kenneth Gainwell,Kenneth,Gainwell,,,,Concussion,Knee,Full Participation in Practice,2025-02-07 20:29:05,Kenneth Gainwell,RB,PHI
27048,2024.0,SB,PHI,22.0,00-0034351,TE,Dallas Goedert,Dallas,Goedert,,,,Ankle,,Full Participation in Practice,2025-02-07 20:29:05,Dallas Goedert,TE,PHI
27049,2024.0,SB,PHI,22.0,00-0036912,WR,DeVonta Smith,DeVonta,Smith,,,,Hamstring,,Limited Participation in Practice,2025-02-07 20:29:05,DeVonta Smith,WR,PHI
27050,2024.0,SB,PHI,22.0,00-0037132,WR,Britain Covey,Britain,Covey,Neck,,Out,Neck,,Full Participation in Practice,2025-02-07 20:32:07,Britain Covey,WR,PHI


In [60]:
inj_clean = inj_clean.drop(columns=['position_y', 'team_y'])

In [61]:
inj_clean = inj_clean.rename(columns={'position_x' : 'position', 'team_x' : 'team'})

In [62]:
inj_clean.head()

Unnamed: 0,season,game_type,team,week,player_id,position,full_name,first_name,last_name,report_primary_injury,report_secondary_injury,report_status,practice_primary_injury,practice_secondary_injury,practice_status,date_modified,cleaned_name
0,2009.0,REG,ARI,1.0,00-0022084,WR,Anquan Boldin,Anquan,Boldin,Hamstring,,Questionable,Hamstring,,Limited Participation in Practice,NaT,Anquan Boldin
1,2009.0,REG,ARI,1.0,00-0026221,WR,Early Doucet,Early,Doucet,Ribs,,Questionable,Ribs,,Limited Participation in Practice,NaT,Early Doucet
2,2009.0,REG,ARI,1.0,00-0022101,QB,Brian St. Pierre,Brian,St. Pierre,Back,,Questionable,Back,,Limited Participation in Practice,NaT,Brian St. Pierre
3,2009.0,REG,ARI,1.0,00-0025529,WR,Steve Breaston,Steve,Breaston,Knee,,Probable,Knee,,Full Participation in Practice,NaT,Steve Breaston
4,2009.0,REG,ARI,1.0,00-0011641,WR,Sean Morey,Sean,Morey,Ribs,,Probable,Ribs,,Limited Participation in Practice,NaT,Sean Morey


In [63]:
inj_clean[['season', 'week']] = inj_clean[['season', 'week']].astype(int)

In [64]:
inj_clean.head()

Unnamed: 0,season,game_type,team,week,player_id,position,full_name,first_name,last_name,report_primary_injury,report_secondary_injury,report_status,practice_primary_injury,practice_secondary_injury,practice_status,date_modified,cleaned_name
0,2009,REG,ARI,1,00-0022084,WR,Anquan Boldin,Anquan,Boldin,Hamstring,,Questionable,Hamstring,,Limited Participation in Practice,NaT,Anquan Boldin
1,2009,REG,ARI,1,00-0026221,WR,Early Doucet,Early,Doucet,Ribs,,Questionable,Ribs,,Limited Participation in Practice,NaT,Early Doucet
2,2009,REG,ARI,1,00-0022101,QB,Brian St. Pierre,Brian,St. Pierre,Back,,Questionable,Back,,Limited Participation in Practice,NaT,Brian St. Pierre
3,2009,REG,ARI,1,00-0025529,WR,Steve Breaston,Steve,Breaston,Knee,,Probable,Knee,,Full Participation in Practice,NaT,Steve Breaston
4,2009,REG,ARI,1,00-0011641,WR,Sean Morey,Sean,Morey,Ribs,,Probable,Ribs,,Limited Participation in Practice,NaT,Sean Morey


In [65]:
cleaned_dfs = [rw_clean, rs_clean, ss_clean, inj_clean]

for df in cleaned_dfs:
  df['unique_id'] = (df['player_id'] + '_' + df['season'].astype(str) + '_' + df['cleaned_name'] + '_' + df['position']).str.replace(' ', '_')

In [66]:
inj_clean.head()

Unnamed: 0,season,game_type,team,week,player_id,position,full_name,first_name,last_name,report_primary_injury,report_secondary_injury,report_status,practice_primary_injury,practice_secondary_injury,practice_status,date_modified,cleaned_name,unique_id
0,2009,REG,ARI,1,00-0022084,WR,Anquan Boldin,Anquan,Boldin,Hamstring,,Questionable,Hamstring,,Limited Participation in Practice,NaT,Anquan Boldin,00-0022084_2009_Anquan_Boldin_WR
1,2009,REG,ARI,1,00-0026221,WR,Early Doucet,Early,Doucet,Ribs,,Questionable,Ribs,,Limited Participation in Practice,NaT,Early Doucet,00-0026221_2009_Early_Doucet_WR
2,2009,REG,ARI,1,00-0022101,QB,Brian St. Pierre,Brian,St. Pierre,Back,,Questionable,Back,,Limited Participation in Practice,NaT,Brian St. Pierre,00-0022101_2009_Brian_St._Pierre_QB
3,2009,REG,ARI,1,00-0025529,WR,Steve Breaston,Steve,Breaston,Knee,,Probable,Knee,,Full Participation in Practice,NaT,Steve Breaston,00-0025529_2009_Steve_Breaston_WR
4,2009,REG,ARI,1,00-0011641,WR,Sean Morey,Sean,Morey,Ribs,,Probable,Ribs,,Limited Participation in Practice,NaT,Sean Morey,00-0011641_2009_Sean_Morey_WR


## Export Datasets for more specific cleaning

In [67]:
cleaned_dfs = [(rw_clean, 'rw_clean'), (rs_clean, 'rs_clean'), (ss_clean, 'ss_clean'), (inj_clean, 'inj_clean')]

for df, name in cleaned_dfs:
  df.to_csv(f'{name}.csv', index=False)

In [68]:
HHHHHHHHHH

NameError: name 'HHHHHHHHHH' is not defined

In [None]:
# # Take a look at NaNs in primary injury -- they look to be rest days

# injuries[injuries['report_primary_injury'].isna()]

Unnamed: 0,season,game_type,team,week,gsis_id,position,full_name,first_name,last_name,report_primary_injury,report_secondary_injury,report_status,practice_primary_injury,practice_secondary_injury,practice_status,date_modified
51,2009.0,REG,DET,1.0,00-0021525,TE,Casey Fitzsimmons,Casey,Fitzsimmons,,,,Knee,,Full Participation in Practice,NaT
52,2009.0,REG,DET,1.0,00-0020329,CB,Anthony Henry,Anthony,Henry,,,,Shoulder,,Full Participation in Practice,NaT
71,2009.0,REG,IND,1.0,00-0023464,CB,Marlin Jackson,Marlin,Jackson,,,,Not Injury Related,,Did Not Participate In Practice,NaT
72,2009.0,REG,IND,1.0,00-0024277,CB,Tim Jennings,Tim,Jennings,,,,Not Injury Related,,Full Participation in Practice,NaT
80,2009.0,REG,KC,1.0,00-0025701,RB,Jackie Battle,Jackie,Battle,,,,Illness,,Full Participation in Practice,NaT
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6206,2024.0,SB,PHI,22.0,00-0037002,G,Landon Dickerson,Landon,Dickerson,,,,Knee,,Full Participation in Practice,2025-02-07 20:29:05
6207,2024.0,SB,PHI,22.0,00-0036919,RB,Kenneth Gainwell,Kenneth,Gainwell,,,,Concussion,Knee,Full Participation in Practice,2025-02-07 20:29:05
6208,2024.0,SB,PHI,22.0,00-0034351,TE,Dallas Goedert,Dallas,Goedert,,,,Ankle,,Full Participation in Practice,2025-02-07 20:29:05
6209,2024.0,SB,PHI,22.0,00-0038112,G,Cam Jurgens,Cam,Jurgens,,,,Back,,Full Participation in Practice,2025-02-07 20:29:05


NameError: name 'injuries' is not defined

In [None]:
# Take a look at players who were out but participated in practice

injuries[(injuries['report_status'] == 'Out') & (injuries['practice_status'] != 'Did Not Participate In Practice')]

Unnamed: 0,season,game_type,team,week,gsis_id,position,full_name,first_name,last_name,report_primary_injury,report_secondary_injury,report_status,practice_primary_injury,practice_secondary_injury,practice_status,date_modified
25,2009.0,REG,CIN,1.0,00-0025532,CB,David Jones,David,Jones,Foot,,Out,Foot,,Out (Definitely Will Not Play),NaT
26,2009.0,REG,CIN,1.0,00-0026982,T,Andre Smith,Andre,Smith,Foot,,Out,Foot,,Out (Definitely Will Not Play),NaT
31,2009.0,REG,CLE,1.0,00-0022905,G,Rex Hadnot,Rex,Hadnot,Knee,,Out,Knee,,Out (Definitely Will Not Play),NaT
67,2009.0,REG,HOU,1.0,00-0022767,CB,Jacques Reeves,Jacques,Reeves,Fibula,,Out,Fibula,,Full Participation in Practice,NaT
115,2009.0,REG,OAK,1.0,00-0026366,WR,Chaz Schilens,Chaz,Schilens,Foot,,Out,Foot,,Out (Definitely Will Not Play),NaT
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6125,2024.0,DIV,LA,20.0,00-0035617,LB,Troy Reeder,Troy,Reeder,Hamstring,,Out,Hamstring,,Full Participation in Practice,2025-01-17 20:56:08
6137,2024.0,DIV,PHI,20.0,00-0038978,DT,Byron Young,Byron,Young,Hamstring,,Out,Hamstring,,Full Participation in Practice,2025-01-17 20:10:19
6179,2024.0,CON,PHI,21.0,00-0038978,DT,Byron Young,Byron,Young,Hamstring,,Out,Hamstring,,Full Participation in Practice,2025-01-24 19:56:53
6180,2024.0,CON,PHI,21.0,00-0037132,WR,Britain Covey,Britain,Covey,Neck,,Out,Neck,,Limited Participation in Practice,2025-01-24 19:56:14


In [None]:
# rosters head check

# rosters_seas.head()

Unnamed: 0,season,team,position,depth_chart_position,jersey_number,status,player_name,first_name,last_name,birth_date,...,status_description_abbr,football_name,esb_id,gsis_it_id,smart_id,entry_year,rookie_year,draft_club,draft_number,age
0,2009,DAL,T,,76,ACT,Flozell Adams,Flozell,Adams,1975-05-18,...,A01,Flozell,ADA280969,,32004144-4128-0969-9390-de3ae33872eb,1998.0,1998.0,DAL,38.0,34.0
1,2009,PHI,K,,2,ACT,David Akers,David,Akers,1974-12-09,...,A01,David,AKE551610,,3200414b-4555-1610-e0e6-a72c82e419e7,1997.0,1997.0,,,34.0
2,2009,WAS,LS,,67,ACT,Ethan Albright,Lawrence,Albright,1971-05-01,...,A01,Ethan,ALB637766,,3200414c-4263-7766-3ff2-577dbeb62e1c,1994.0,1994.0,,,38.0
3,2009,BLT,ILB,,51,RES,Brendon Ayanbadejo,Brendon,Ayanbadejo,1976-09-06,...,A01,Brendon,AYA436705,,32004159-4143-6705-e019-7daf37d2f1af,1999.0,1999.0,,,32.0
4,2009,DEN,CB,,24,ACT,Champ Bailey,Roland,Bailey,1978-06-22,...,A01,Champ,BAI582194,,32004241-4958-2194-25da-8624608fa14d,1999.0,1999.0,WAS,7.0,31.0


In [None]:
# Rosters info check

# rosters_seas.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 42809 entries, 0 to 42821
Data columns (total 37 columns):
 #   Column                   Non-Null Count  Dtype         
---  ------                   --------------  -----         
 0   season                   42809 non-null  int32         
 1   team                     42809 non-null  object        
 2   position                 42793 non-null  object        
 3   depth_chart_position     27846 non-null  object        
 4   jersey_number            42575 non-null  object        
 5   status                   42791 non-null  object        
 6   player_name              42808 non-null  object        
 7   first_name               42809 non-null  object        
 8   last_name                42809 non-null  object        
 9   birth_date               40232 non-null  datetime64[ns]
 10  height                   42788 non-null  float64       
 11  weight                   42792 non-null  float64       
 12  college                  35033 n

In [None]:
# # Take a deeper look at missing bdays

# missing = rosters_seas[rosters_seas['birth_date'].isna()]

In [None]:
# How many missing bdays

# missing.shape[0]

2577

In [None]:
# Look at roster status types

# rosters_seas['status'].value_counts()

ACT    27180
RES     5413
CUT     4641
DEV     3768
INA     1276
UFA      124
RET       99
RSN       80
NWT       62
SUS       54
PUP       35
TRC       20
TRD       12
RSR       10
EXE       10
RFA        3
TRT        3
E14        1
Name: status, dtype: int64

**Found data dictionary outlining status type definitions**

In [None]:
# Take a look at players on IR

# rosters_seas[rosters_seas['status'] == 'RES']

Unnamed: 0,season,team,position,depth_chart_position,jersey_number,status,player_name,first_name,last_name,birth_date,...,status_description_abbr,football_name,esb_id,gsis_it_id,smart_id,entry_year,rookie_year,draft_club,draft_number,age
3,2009,BLT,ILB,,51,RES,Brendon Ayanbadejo,Brendon,Ayanbadejo,1976-09-06,...,A01,Brendon,AYA436705,,32004159-4143-6705-e019-7daf37d2f1af,1999.0,1999.0,,,32.0
6,2009,CLV,ILB,,50,RES,Eric Barton,Eric,Barton,1977-09-29,...,A01,Eric,BAR766216,,32004241-5276-6216-1360-96908316eed6,1999.0,1999.0,OAK,146,31.0
29,2009,CAR,QB,,17,RES,Jake Delhomme,Jake,Delhomme,1975-01-10,...,I01,Jake,DEL367367,,32004445-4c36-7367-f75e-2e1555e1a440,1997.0,1997.0,,,34.0
41,2009,MIA,NT,,95,RES,Jason Ferguson,Jason,Ferguson,1974-11-28,...,A01,Jason,FER162060,,32004645-5216-2060-ec42-a45bc9746a28,1997.0,1997.0,NYJ,229,34.0
43,2009,ATL,WR,,86,RES,Brian Finneran,Brian,Finneran,1976-01-31,...,A01,Brian,FIN583520,,32004649-4e58-3520-402c-d27b0be70001,1998.0,1998.0,,,33.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
42776,2024,NE,WR,WR,1.0,RES,Ja'Lynn Polk,Ja'Lynn,Polk,2001-04-11,...,R01,Ja'Lynn,POL136646,57158,3200504f-4c13-6646-64f9-3598ba01ba77,2024.0,2024.0,NE,37.0,23.0
42790,2024,ARI,RB,RB,33.0,RES,Trey Benson,Trey,Benson,2002-07-23,...,R01,Trey,BEN649191,57187,32004245-4e64-9191-567b-28b29177bf93,2024.0,2024.0,ARI,66.0,22.0
42791,2024,MIN,QB,QB,9.0,RES,J.J. McCarthy,Jonathan,McCarthy,2003-01-20,...,R01,J.J.,MCC189531,57131,32004d43-4318-9531-b3f5-120a14383239,2024.0,2024.0,MIN,10.0,21.0
42796,2024,BUF,OL,T,67.0,RES,Travis Clayton,Travis,Clayton,2001-02-17,...,R01,Travis,CLA826931,57341,3200434c-4182-6931-55e8-0b9b4cf80da4,2024.0,2024.0,BUF,221.0,23.0


In [None]:
# Will look at weekly roster info for one year just to explore this particular dataset

# roster_weekly = nfl.import_weekly_rosters(range(2009,2025))

In [None]:
# Weekly info check

# roster_weekly.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 641724 entries, 0 to 641723
Data columns (total 37 columns):
 #   Column                   Non-Null Count   Dtype         
---  ------                   --------------   -----         
 0   season                   641724 non-null  int32         
 1   team                     641724 non-null  object        
 2   position                 641691 non-null  object        
 3   depth_chart_position     419417 non-null  object        
 4   jersey_number            640779 non-null  object        
 5   status                   641695 non-null  object        
 6   player_name              641708 non-null  object        
 7   first_name               641724 non-null  object        
 8   last_name                641724 non-null  object        
 9   birth_date               615721 non-null  datetime64[ns]
 10  height                   641552 non-null  float64       
 11  weight                   641698 non-null  float64       
 12  college         

In [None]:
# roster_weekly['season'].value_counts(ascending=True)

2011    31338
2012    31431
2009    31632
2013    31901
2010    31926
2014    31964
2015    32098
2016    35020
2020    44130
2023    45655
2022    46163
2024    46579
2021    46696
2017    51321
2019    51632
2018    52238
Name: season, dtype: int64

**For purposes of fantasy football, only the following positions are on a roster**

1. QB
2. WR
3. RB
4. TE
5. K

Will condense roster data down to only those positions

In [None]:
# Condense weekly roster info down to fantasy footballrelevent postions

# ff_weekly = roster_weekly[roster_weekly['position'].isin(['RB', 'QB', 'TE', 'WR', 'K'])]

In [None]:
# Info check now that data has been condensed to fantasy relevent positions

# ff_weekly.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 206429 entries, 19 to 641697
Data columns (total 37 columns):
 #   Column                   Non-Null Count   Dtype         
---  ------                   --------------   -----         
 0   season                   206429 non-null  int32         
 1   team                     206429 non-null  object        
 2   position                 206429 non-null  object        
 3   depth_chart_position     138519 non-null  object        
 4   jersey_number            206131 non-null  object        
 5   status                   206425 non-null  object        
 6   player_name              206429 non-null  object        
 7   first_name               206429 non-null  object        
 8   last_name                206429 non-null  object        
 9   birth_date               197386 non-null  datetime64[ns]
 10  height                   206341 non-null  float64       
 11  weight                   206427 non-null  float64       
 12  college        

In [None]:
# ff_weekly

Unnamed: 0,season,team,position,depth_chart_position,jersey_number,status,player_name,first_name,last_name,birth_date,...,status_description_abbr,football_name,esb_id,gsis_it_id,smart_id,entry_year,rookie_year,draft_club,draft_number,age
19,2009,PHI,K,,2,ACT,David Akers,David,Akers,1974-12-09,...,A01,David,AKE551610,,3200414b-4555-1610-e0e6-a72c82e419e7,1997.0,1997.0,,,34.880
20,2009,PHI,K,,2,ACT,David Akers,David,Akers,1974-12-09,...,A01,David,AKE551610,,3200414b-4555-1610-e0e6-a72c82e419e7,1997.0,1997.0,,,34.782
21,2009,PHI,K,,2,ACT,David Akers,David,Akers,1974-12-09,...,A01,David,AKE551610,,3200414b-4555-1610-e0e6-a72c82e419e7,1997.0,1997.0,,,
22,2009,PHI,K,,2,ACT,David Akers,David,Akers,1974-12-09,...,A01,David,AKE551610,,3200414b-4555-1610-e0e6-a72c82e419e7,1997.0,1997.0,,,34.839
23,2009,PHI,K,,2,ACT,David Akers,David,Akers,1974-12-09,...,A01,David,AKE551610,,3200414b-4555-1610-e0e6-a72c82e419e7,1997.0,1997.0,,,34.858
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
641678,2024,GB,K,K,16.0,DEV,Alex Hale,Alex,Hale,1998-03-07,...,P03,Alex,HAL031251,58041,32004841-4c03-1251-97e3-c8a0c955a05c,2024.0,2024.0,,,26.642
641679,2024,GB,K,K,16.0,DEV,Alex Hale,Alex,Hale,1998-03-07,...,P03,Alex,HAL031251,58041,32004841-4c03-1251-97e3-c8a0c955a05c,2024.0,2024.0,,,26.776
641680,2024,GB,K,K,16.0,DEV,Alex Hale,Alex,Hale,1998-03-07,...,P03,Alex,HAL031251,58041,32004841-4c03-1251-97e3-c8a0c955a05c,2024.0,2024.0,,,26.749
641681,2024,GB,K,K,16.0,DEV,Alex Hale,Alex,Hale,1998-03-07,...,P03,Alex,HAL031251,58041,32004841-4c03-1251-97e3-c8a0c955a05c,2024.0,2024.0,,,26.853


In [None]:
# ESB ID Is a unique ID that is missing the least amount of values. Check how many unique IDs there
# are as there are repeat players in weekly data

# ff_weekly['esb_id'].nunique()

3802

**Birth date/age is most likely correlated to missed games. There is enough missing data to warrant a further look as it is not missing at random, birthday data is available**

In [None]:
# Check how many unique IDs are missing birthdays

# ff_weekly[ff_weekly['birth_date'].isna()]['esb_id'].nunique()

811

**10% of players are missing birthdays**

In [None]:
# Make dataframe with just missing bdays

# no_bday = ff_weekly[ff_weekly['birth_date'].isna()]

In [None]:
# no_bday

Unnamed: 0,season,team,position,depth_chart_position,jersey_number,status,player_name,first_name,last_name,birth_date,...,status_description_abbr,football_name,esb_id,gsis_it_id,smart_id,entry_year,rookie_year,draft_club,draft_number,age
166278,2014,NO,TE,,84,,Tom Crabtree,Tom,Crabtree,NaT,...,A01,Tom,CRA116294,,32004352-4111-6294-35de-6139ff2a2c55,2009.0,2009.0,,,
177529,2014,SEA,WR,,88,,Phil Bates,Phillip,Bates,NaT,...,A01,Phil,BAT137358,,32004241-5413-7358-a2cc-8d3ad6f94d03,2012.0,2012.0,,,
177530,2014,SEA,WR,,88,,Phil Bates,Phillip,Bates,NaT,...,I01,Phil,BAT137358,,32004241-5413-7358-a2cc-8d3ad6f94d03,2012.0,2012.0,,,
177531,2014,CLV,WR,,89,,Phil Bates,Phillip,Bates,NaT,...,I01,Phil,BAT137358,,32004241-5413-7358-a2cc-8d3ad6f94d03,2012.0,2012.0,,,
222324,2016,ATL,K,K,6.0,CUT,Shayne Graham,Shayne,Graham,NaT,...,,Shayne,GRA217171,,32004752-4121-7171-4b31-b84625809e98,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
637414,2024,HOU,WR,WR,,CUT,Jaxon Janke,Jaxon,Janke,NaT,...,A01,Jaxon,JAN439601,57733,,2024.0,2024.0,,,
638364,2024,MIN,WR,WR,,CUT,Devron Harper,Devron,Harper,NaT,...,A01,Devron,,57871,,2024.0,2024.0,,,
639281,2024,NYJ,WR,WR,,CUT,Hamze El-Zayat,Hamze,El-Zayat,NaT,...,A01,Hamze,ELZ207645,57925,,2024.0,2024.0,,,
639282,2024,NYJ,RB,RB,,CUT,Markese Stepp,Markese,Stepp,NaT,...,W03,Markese,,57926,,2024.0,2024.0,,,


In [None]:
# no_bday['esb_id'].nunique()

811

In [None]:
# no_bday.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 9043 entries, 166278 to 641488
Data columns (total 37 columns):
 #   Column                   Non-Null Count  Dtype         
---  ------                   --------------  -----         
 0   season                   9043 non-null   int32         
 1   team                     9043 non-null   object        
 2   position                 9043 non-null   object        
 3   depth_chart_position     9039 non-null   object        
 4   jersey_number            8930 non-null   object        
 5   status                   9039 non-null   object        
 6   player_name              9043 non-null   object        
 7   first_name               9043 non-null   object        
 8   last_name                9043 non-null   object        
 9   birth_date               0 non-null      datetime64[ns]
 10  height                   8955 non-null   float64       
 11  weight                   9041 non-null   float64       
 12  college                  87

**I have a feeling that players that were cut may have data in some weeks. My reasoning is that cut players were actually on the roster at some point and their birthdays should be available.**

I know that Shayne Graham from above played for many years in the league, so I assume his birthday should be available. Looking at his information further in the full roster set to see if his bday is out there

In [None]:
# ff_weekly[ff_weekly['player_name'] == 'Shayne Graham']

Unnamed: 0,season,team,position,depth_chart_position,jersey_number,status,player_name,first_name,last_name,birth_date,...,status_description_abbr,football_name,esb_id,gsis_it_id,smart_id,entry_year,rookie_year,draft_club,draft_number,age
2696,2009,CIN,K,,17,ACT,Shayne Graham,Shayne,Graham,1977-12-09,...,A01,Shayne,GRA217171,,32004752-4121-7171-4b31-b84625809e98,2000.0,2000.0,,,31.819
2697,2009,CIN,K,,17,ACT,Shayne Graham,Shayne,Graham,1977-12-09,...,A01,Shayne,GRA217171,,32004752-4121-7171-4b31-b84625809e98,2000.0,2000.0,,,31.858
2698,2009,CIN,K,,17,ACT,Shayne Graham,Shayne,Graham,1977-12-09,...,A01,Shayne,GRA217171,,32004752-4121-7171-4b31-b84625809e98,2000.0,2000.0,,,31.762
2699,2009,CIN,K,,17,ACT,Shayne Graham,Shayne,Graham,1977-12-09,...,A01,Shayne,GRA217171,,32004752-4121-7171-4b31-b84625809e98,2000.0,2000.0,,,32.085
2700,2009,CIN,K,,17,ACT,Shayne Graham,Shayne,Graham,1977-12-09,...,A01,Shayne,GRA217171,,32004752-4121-7171-4b31-b84625809e98,2000.0,2000.0,,,31.992
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
190306,2015,ATL,K,,6,ACT,Shayne Graham,Shayne,Graham,1977-12-09,...,A01,Shayne,GRA217171,,32004752-4121-7171-4b31-b84625809e98,2000.0,2000.0,,,38.048
222324,2016,ATL,K,K,6.0,CUT,Shayne Graham,Shayne,Graham,NaT,...,,Shayne,GRA217171,,32004752-4121-7171-4b31-b84625809e98,,,,,
222325,2016,ATL,K,K,6.0,CUT,Shayne Graham,Shayne,Graham,NaT,...,,Shayne,GRA217171,,32004752-4121-7171-4b31-b84625809e98,,,,,
222326,2016,ATL,K,K,6.0,ACT,Shayne Graham,Shayne,Graham,NaT,...,A01,Shayne,GRA217171,26127,32004752-4121-7171-4b31-b84625809e98,2000.0,2000.0,,,


In [None]:
# check to see if there are different Shayne Graham's for this player ID

# ff_weekly[ff_weekly['player_name'] == 'Shayne Graham']['esb_id'].nunique()

1

**Per above, there is only one unique ID attributed to the name Shayne Graham. That means this is the same player. Some of the information is missing for certain weeks and some isn't. I will take the IDs with missing birthdays and fill in their birthdays if there is a row with a birthday that shares that same ID**

In [None]:
# Fill in birthdays in fashion outlined above

# ff_weekly['birth_date'] = ff_weekly.groupby('esb_id')['birth_date'].transform(lambda x: x.fillna(x.mode()[0] if not x.mode().empty else pd.NaT))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ff_weekly['birth_date'] = ff_weekly.groupby('esb_id')['birth_date'].transform(lambda x: x.fillna(x.mode()[0] if not x.mode().empty else pd.NaT))


In [None]:
# check how many IDs have missing birthdays now that values have added

# ff_weekly[ff_weekly['birth_date'].isna()]['esb_id'].nunique()

399

In [None]:
# Check again how many unique IDs there are

# ff_weekly['esb_id'].nunique()

3802

**Still missing 10% of players bdays. Take a further look at those with missing now that we've eliminated half**

In [None]:
# no_bday = ff_weekly[ff_weekly['birth_date'].isna()]

In [None]:
# no_bday

Unnamed: 0,season,team,position,depth_chart_position,jersey_number,status,player_name,first_name,last_name,birth_date,...,status_description_abbr,football_name,esb_id,gsis_it_id,smart_id,entry_year,rookie_year,draft_club,draft_number,age
177529,2014,SEA,WR,,88,,Phil Bates,Phillip,Bates,NaT,...,A01,Phil,BAT137358,,32004241-5413-7358-a2cc-8d3ad6f94d03,2012.0,2012.0,,,
177530,2014,SEA,WR,,88,,Phil Bates,Phillip,Bates,NaT,...,I01,Phil,BAT137358,,32004241-5413-7358-a2cc-8d3ad6f94d03,2012.0,2012.0,,,
177531,2014,CLV,WR,,89,,Phil Bates,Phillip,Bates,NaT,...,I01,Phil,BAT137358,,32004241-5413-7358-a2cc-8d3ad6f94d03,2012.0,2012.0,,,
226640,2016,CHI,RB,FB,36.0,ACT,Darrel Young,Darrel,Young,NaT,...,A01,Darrel,YOU170298,35089,3200594f-5517-0298-f836-c6523658d362,2009.0,2009.0,,,
230036,2016,DAL,RB,FB,44.0,ACT,Tyler Clutts,Tyler,Clutts,NaT,...,A01,Tyler,CLU606964,33950,3200434c-5560-6964-784b-17a55d4bb080,2008.0,2011.0,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
637414,2024,HOU,WR,WR,,CUT,Jaxon Janke,Jaxon,Janke,NaT,...,A01,Jaxon,JAN439601,57733,,2024.0,2024.0,,,
638364,2024,MIN,WR,WR,,CUT,Devron Harper,Devron,Harper,NaT,...,A01,Devron,,57871,,2024.0,2024.0,,,
639281,2024,NYJ,WR,WR,,CUT,Hamze El-Zayat,Hamze,El-Zayat,NaT,...,A01,Hamze,ELZ207645,57925,,2024.0,2024.0,,,
639282,2024,NYJ,RB,RB,,CUT,Markese Stepp,Markese,Stepp,NaT,...,W03,Markese,,57926,,2024.0,2024.0,,,


**On first glance there are a handful of CUT player statuses with those missing birthdays.**

There is a good chance these players never saw an NFL field, therefore their data isn't relevant for this study. I will make a note to look at players later who played next to no snaps and will remove them.

**Taking a look now at players with missing ESB IDs to see if they are relevant**

In [None]:
# no_bday[no_bday['esb_id'].isna()]

Unnamed: 0,season,team,position,depth_chart_position,jersey_number,status,player_name,first_name,last_name,birth_date,...,status_description_abbr,football_name,esb_id,gsis_it_id,smart_id,entry_year,rookie_year,draft_club,draft_number,age
503307,2021,SEA,TE,TE,,DEV,Mark Vital,Mark,Vital,NaT,...,P01,Mark,,54098,,2021.0,,,,
503308,2021,SEA,TE,TE,,DEV,DeShon Williams,DeShon,Williams,NaT,...,P01,DeShon,,54349,,2021.0,2021.0,,,
540269,2022,ATL,TE,TE,81.0,CUT,Brayden Lenius,Brayden,Lenius,NaT,...,A01,Brayden,,49941,,2019.0,2022.0,,,
540471,2022,JAX,K,K,,CUT,Andrew Mevis,Andrew,Mevis,NaT,...,A01,Andrew,,54762,,2022.0,2022.0,,,
541579,2022,BAL,WR,WR,,CUT,Trevon Clark,Trevon,Clark,NaT,...,A01,Trevon,,54854,,2022.0,2022.0,,,
544871,2022,DET,WR,WR,,RET,Corey Sutton,Corey,Sutton,NaT,...,R02,Corey,,55029,,2022.0,2022.0,,,
544872,2022,DET,WR,WR,,RET,Corey Sutton,Corey,Sutton,NaT,...,R02,Corey,,55029,,2022.0,2022.0,,,
544873,2022,DET,WR,WR,,RET,Corey Sutton,Corey,Sutton,NaT,...,R02,Corey,,55029,,2022.0,2022.0,,,
544874,2022,DET,WR,WR,,RET,Corey Sutton,Corey,Sutton,NaT,...,R02,Corey,,55029,,2022.0,2022.0,,,
544875,2022,DET,WR,WR,,RET,Corey Sutton,Corey,Sutton,NaT,...,R02,Corey,,55029,,2022.0,2022.0,,,


In [None]:
# ff_weekly[ff_weekly['player_name'] == 'Corey Sutton']

Unnamed: 0,season,team,position,depth_chart_position,jersey_number,status,player_name,first_name,last_name,birth_date,...,status_description_abbr,football_name,esb_id,gsis_it_id,smart_id,entry_year,rookie_year,draft_club,draft_number,age
544871,2022,DET,WR,WR,,RET,Corey Sutton,Corey,Sutton,NaT,...,R02,Corey,,55029,,2022.0,2022.0,,,
544872,2022,DET,WR,WR,,RET,Corey Sutton,Corey,Sutton,NaT,...,R02,Corey,,55029,,2022.0,2022.0,,,
544873,2022,DET,WR,WR,,RET,Corey Sutton,Corey,Sutton,NaT,...,R02,Corey,,55029,,2022.0,2022.0,,,
544874,2022,DET,WR,WR,,RET,Corey Sutton,Corey,Sutton,NaT,...,R02,Corey,,55029,,2022.0,2022.0,,,
544875,2022,DET,WR,WR,,RET,Corey Sutton,Corey,Sutton,NaT,...,R02,Corey,,55029,,2022.0,2022.0,,,
544876,2022,DET,WR,WR,,RET,Corey Sutton,Corey,Sutton,NaT,...,R02,Corey,,55029,,2022.0,2022.0,,,
544877,2022,DET,WR,WR,,RET,Corey Sutton,Corey,Sutton,NaT,...,R02,Corey,,55029,,2022.0,2022.0,,,
544878,2022,DET,WR,WR,,RET,Corey Sutton,Corey,Sutton,NaT,...,R02,Corey,,55029,,2022.0,2022.0,,,
544879,2022,DET,WR,WR,,RET,Corey Sutton,Corey,Sutton,NaT,...,R02,Corey,,55029,,2022.0,2022.0,,,
544880,2022,DET,WR,WR,,RET,Corey Sutton,Corey,Sutton,NaT,...,R02,Corey,,55029,,2022.0,2022.0,,,


In [None]:
# ff_weekly = ff_weekly[ff_weekly['esb_id'].notna()]

In [None]:
# ff_weekly['esb_id'].isna().sum()

0

In [None]:
# ff_weekly[(ff_weekly['birth_date'].isna()) & (ff_weekly['status'] == 'CUT')]

Unnamed: 0,season,team,position,depth_chart_position,jersey_number,status,player_name,first_name,last_name,birth_date,...,status_description_abbr,football_name,esb_id,gsis_it_id,smart_id,entry_year,rookie_year,draft_club,draft_number,age
232127,2016,DAL,QB,QB,,CUT,Jerrod Johnson,Jerrod,Johnson,NaT,...,,Jerrod,JOH344163,,32004a4f-4834-4163-eb38-208cbed4c4ad,,,,,
232771,2016,KC,WR,WR,89.0,CUT,Kashif Moore,Kashif,Moore,NaT,...,,Kashif,MOO445899,,32004d4f-4f44-5899-322d-4b4f567300f7,,,,,
238073,2016,SEA,WR,WR,17.0,CUT,Uzoma Nwachukwu,Uzoma,Nwachukwu,NaT,...,,Uzoma,NWA118653,40700,32004e57-4111-8653-1061-f355d798ab90,2013.0,2013.0,,,
238074,2016,SEA,WR,WR,17.0,CUT,Uzoma Nwachukwu,Uzoma,Nwachukwu,NaT,...,,Uzoma,NWA118653,40700,32004e57-4111-8653-1061-f355d798ab90,2013.0,2013.0,,,
238075,2016,SEA,WR,WR,17.0,CUT,Uzoma Nwachukwu,Uzoma,Nwachukwu,NaT,...,,Uzoma,NWA118653,40700,32004e57-4111-8653-1061-f355d798ab90,2013.0,2013.0,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
595138,2023,MIN,RB,RB,46.0,CUT,Aaron Dykes,Aaron,Dykes,NaT,...,W03,Aaron,DYK077058,56716,32004459-4b07-7058-648c-4cd00a392f36,2023.0,,,,
635985,2024,LAC,TE,TE,85.0,CUT,Isaac Rex,Isaac,Rex,NaT,...,W03,Isaac,REX207645,57566,32005245-5820-7645-ebd2-898988316238,2024.0,2024.0,,,
637414,2024,HOU,WR,WR,,CUT,Jaxon Janke,Jaxon,Janke,NaT,...,A01,Jaxon,JAN439601,57733,,2024.0,2024.0,,,
639281,2024,NYJ,WR,WR,,CUT,Hamze El-Zayat,Hamze,El-Zayat,NaT,...,A01,Hamze,ELZ207645,57925,,2024.0,2024.0,,,


In [None]:
# no_bday = ff_weekly[ff_weekly['birth_date'].isna()]

399

In [None]:
# Look at one teams IR for one week and see what it looks like in news/game logs

# car_wk1_2024 = roster_weekly[(roster_weekly['team'] == 'CAR') & (roster_weekly['week'] == 1) & (roster_weekly['season'] == 2024)]

In [None]:
# car_wk1_2024[car_wk1_2024['status'] == 'RES']

In [None]:
# ids = nfl.import_ids()

In [None]:
# ids.head()

In [None]:
# ids.info()

In [None]:
# seas_data = nfl.import_seasonal_data(range(2009,2025), 'ALL')

In [None]:
# seas_data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 11744 entries, 0 to 11743
Data columns (total 58 columns):
 #   Column                       Non-Null Count  Dtype  
---  ------                       --------------  -----  
 0   player_id                    11744 non-null  object 
 1   season                       11744 non-null  int64  
 2   season_type                  11744 non-null  object 
 3   completions                  11744 non-null  int32  
 4   attempts                     11744 non-null  int32  
 5   passing_yards                11744 non-null  float64
 6   passing_tds                  11744 non-null  int32  
 7   interceptions                11744 non-null  float64
 8   sacks                        11744 non-null  float64
 9   sack_yards                   11744 non-null  float64
 10  sack_fumbles                 11744 non-null  int32  
 11  sack_fumbles_lost            11744 non-null  int32  
 12  passing_air_yards            11744 non-null  float64
 13  passing_yards_af

In [None]:
# seas_data['player_id'].nunique()

2847