Datasets 

https://www.kaggle.com/code/dataranch/offensive-stats-eda-model-comparison/input

https://www.kaggle.com/datasets/philiphyde1/nfl-stats-1999-2022/data

Combined Data 

In [108]:
import pandas as pd
import kagglehub
import os

# Load data from GitHub
github_url = "https://raw.githubusercontent.com/salomerivas/app-development/refs/heads/main/nfl_offensive_stats.csv"

try:
    offensive_data = pd.read_csv(github_url)
    print("Data loaded successfully from GitHub.")
except Exception as e:
    print("Failed to load data from GitHub:", e)

# Fetch data from Kaggle API
try:
    path = kagglehub.dataset_download("philiphyde1/nfl-stats-1999-2022")  # Downloads the dataset
    print("Data downloaded from Kaggle to:", path)
except Exception as e:
    print(f"Failed to download data from Kaggle: {e}")

# Get the list of files in the downloaded directory dynamically
downloaded_files = os.listdir(path)

# Read each file dynamically (instead of hardcoding paths)
dataframes = []

for file_name in downloaded_files:
    # Skip non-CSV files like .DS_Store
    if not file_name.endswith('.csv'):
        print(f"Skipping non-CSV file: {file_name}")
        continue
    
    file_path = os.path.join(path, file_name)
    try:
        df = pd.read_csv(file_path)
        dataframes.append(df)
        print(f"Data loaded successfully from: {file_path}")
    except Exception as e:
        print(f"Failed to load data from {file_path}: {e}")

# Combine all dataframes from Kaggle dataset into one
if dataframes:
    try:
        kaggle_combined_data = pd.concat(dataframes, ignore_index=True)
        print("All data from Kaggle combined successfully.")
    except Exception as e:
        print(f"Failed to combine Kaggle data: {e}")
else:
    print("No data from Kaggle available to combine.")

# Combine the GitHub data with the Kaggle data (if both are available)
if 'offensive_data' in locals() and 'kaggle_combined_data' in locals():
    try:
        # Combine the data from both sources
        df = pd.concat([offensive_data, kaggle_combined_data], ignore_index=True)
        print("All data combined successfully from both GitHub and Kaggle.")
        print(df.head())  # Display the first few rows of the combined data
    except Exception as e:
        print(f"Failed to combine data from GitHub and Kaggle: {e}")
else:
    print("No data available for combining from one or both sources.")


Data loaded successfully from GitHub.
Data downloaded from Kaggle to: /Users/salomerivas/.cache/kagglehub/datasets/philiphyde1/nfl-stats-1999-2022/versions/10
Data loaded successfully from: /Users/salomerivas/.cache/kagglehub/datasets/philiphyde1/nfl-stats-1999-2022/versions/10/yearly_team_data.csv
Data loaded successfully from: /Users/salomerivas/.cache/kagglehub/datasets/philiphyde1/nfl-stats-1999-2022/versions/10/weekly_player_data.csv
Skipping non-CSV file: .DS_Store
Data loaded successfully from: /Users/salomerivas/.cache/kagglehub/datasets/philiphyde1/nfl-stats-1999-2022/versions/10/2024_player_predictions.csv
Data loaded successfully from: /Users/salomerivas/.cache/kagglehub/datasets/philiphyde1/nfl-stats-1999-2022/versions/10/yearly_player_data.csv
Data loaded successfully from: /Users/salomerivas/.cache/kagglehub/datasets/philiphyde1/nfl-stats-1999-2022/versions/10/weekly_team_data.csv
All data from Kaggle combined successfully.
All data combined successfully from both GitHub 

GitHub Data Columns

In [109]:
print(offensive_data.columns)

Index(['game_id', 'player_id', 'position ', 'player', 'team', 'pass_cmp',
       'pass_att', 'pass_yds', 'pass_td', 'pass_int', 'pass_sacked',
       'pass_sacked_yds', 'pass_long', 'pass_rating', 'rush_att', 'rush_yds',
       'rush_td', 'rush_long', 'targets', 'rec', 'rec_yds', 'rec_td',
       'rec_long', 'fumbles_lost', 'rush_scrambles', 'designed_rush_att',
       'comb_pass_rush_play', 'comb_pass_play', 'comb_rush_play',
       'Team_abbrev', 'Opponent_abbrev', 'two_point_conv', 'total_ret_td',
       'offensive_fumble_recovery_td', 'pass_yds_bonus', 'rush_yds_bonus',
       'rec_yds_bonus', 'Total_DKP', 'Off_DKP', 'Total_FDP', 'Off_FDP',
       'Total_SDP', 'Off_SDP', 'pass_target_yds', 'pass_poor_throws',
       'pass_blitzed', 'pass_hurried', 'rush_yds_before_contact', 'rush_yac',
       'rush_broken_tackles', 'rec_air_yds', 'rec_yac', 'rec_drops', 'offense',
       'off_pct', 'vis_team', 'home_team', 'vis_score', 'home_score', 'OT',
       'Roof', 'Surface', 'Temperature', 'H

In [113]:
# Check if a specific column exists
column_name = 'pass_td'  # replace with the column you're searching for

if column_name in offensive_data.columns:
    print(f"'{column_name}' column exists in the DataFrame.")
else:
    print(f"'{column_name}' column does not exist in the DataFrame.")

'pass_td' column exists in the DataFrame.


In [112]:
# Check if a specific column exists
column_name = 'player'  # replace with the column you're searching for

if column_name in offensive_data.columns:
    print(f"'{column_name}' column exists in the DataFrame.")
else:
    print(f"'{column_name}' column does not exist in the DataFrame.")

'player' column exists in the DataFrame.


Kaggle Data Columns

In [71]:
print(kaggle_combined_data.columns)

Index(['team', 'season', 'total_snaps', 'yards_gained', 'touchdown',
       'extra_point_attempt', 'field_goal_attempt', 'total_points',
       'td_points', 'xp_points',
       ...
       'vacated_receptions', 'vacated_receiving_yards',
       'vacated_receiving_air_yards', 'vacated_yards_after_catch',
       'vacated_reception_td', 'vacated_rush_attempts',
       'vacated_rushing_yards', 'vacated_run_td', 'vacated_touches',
       'vacated_total_yards'],
      dtype='object', length=236)


In [92]:
# Check if a specific column exists
column_name = 'player_name'  # replace with the column you're searching for

if column_name in kaggle_combined_data.columns:
    print(f"'{column_name}' column exists in the DataFrame.")
else:
    print(f"'{column_name}' column does not exist in the DataFrame.")


'player_name' column exists in the DataFrame.


In [93]:
column_name = 'touchdown'  # replace with the column you're searching for

if column_name in kaggle_combined_data.columns:
    print(f"'{column_name}' column exists in the DataFrame.")
else:
    print(f"'{column_name}' column does not exist in the DataFrame.")


'touchdown' column exists in the DataFrame.


In [114]:
df

Unnamed: 0,game_id,player_id,position,player,team,pass_cmp,pass_att,pass_yds,pass_td,pass_int,...,vacated_receptions,vacated_receiving_yards,vacated_receiving_air_yards,vacated_yards_after_catch,vacated_reception_td,vacated_rush_attempts,vacated_rushing_yards,vacated_run_td,vacated_touches,vacated_total_yards
0,201909050chi,RodgAa00,QB,Aaron Rodgers,GNB,18.0,30.0,203.0,1.0,0.0,...,,,,,,,,,,
1,201909050chi,JoneAa00,RB,Aaron Jones,GNB,0.0,0.0,0.0,0.0,0.0,...,,,,,,,,,,
2,201909050chi,ValdMa00,WR,Marquez Valdes-Scantling,GNB,0.0,0.0,0.0,0.0,0.0,...,,,,,,,,,,
3,201909050chi,AdamDa01,WR,Davante Adams,GNB,0.0,0.0,0.0,0.0,0.0,...,,,,,,,,,,
4,201909050chi,GrahJi00,TE,Jimmy Graham,GNB,0.0,0.0,0.0,0.0,0.0,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
83238,,,,,WAS,,,,1.0,,...,,,,,,,,,,
83239,,,,,WAS,,,,3.0,,...,,,,,,,,,,
83240,,,,,WAS,,,,1.0,,...,,,,,,,,,,
83241,,,,,WAS,,,,1.0,,...,,,,,,,,,,


Overview of the Dataset 

In [115]:
import pandas as pd

# Check the shape
print("Dataset Shape:", df.shape)

print(df.head())

print(df.tail())

# List the column names
print(df.columns)

# Check data types for each column
print(df.dtypes)


Dataset Shape: (83243, 301)
        game_id player_id position                     player team  pass_cmp  \
0  201909050chi  RodgAa00        QB             Aaron Rodgers  GNB      18.0   
1  201909050chi  JoneAa00        RB               Aaron Jones  GNB       0.0   
2  201909050chi  ValdMa00        WR  Marquez Valdes-Scantling  GNB       0.0   
3  201909050chi  AdamDa01        WR             Davante Adams  GNB       0.0   
4  201909050chi  GrahJi00        TE              Jimmy Graham  GNB       0.0   

   pass_att  pass_yds  pass_td  pass_int  ...  vacated_receptions  \
0      30.0     203.0      1.0       0.0  ...                 NaN   
1       0.0       0.0      0.0       0.0  ...                 NaN   
2       0.0       0.0      0.0       0.0  ...                 NaN   
3       0.0       0.0      0.0       0.0  ...                 NaN   
4       0.0       0.0      0.0       0.0  ...                 NaN   

   vacated_receiving_yards  vacated_receiving_air_yards  \
0                

Statistics 

In [116]:
# Summary statistics for numerical columns
print(df.describe())

# Summary statistics for categorical columns
print(df.describe(include=['object']))


  sqr = _ensure_numeric((avg - values) ** 2)
  sqr = _ensure_numeric((avg - values) ** 2)
  sqr = _ensure_numeric((avg - values) ** 2)
  sqr = _ensure_numeric((avg - values) ** 2)
  sqr = _ensure_numeric((avg - values) ** 2)
  sqr = _ensure_numeric((avg - values) ** 2)
  return umr_sum(a, axis, dtype, out, keepdims, initial, where)
  return umr_sum(a, axis, dtype, out, keepdims, initial, where)


           pass_cmp      pass_att      pass_yds       pass_td      pass_int  \
count  19973.000000  19973.000000  19973.000000  83243.000000  19973.000000   
mean       1.844240      2.860011     20.565513      0.519539      0.064988   
std        6.294815      9.659786     70.841227      2.649633      0.336662   
min        0.000000      0.000000     -2.000000      0.000000      0.000000   
25%        0.000000      0.000000      0.000000      0.000000      0.000000   
50%        0.000000      0.000000      0.000000      0.000000      0.000000   
75%        0.000000      0.000000      0.000000      0.000000      0.000000   
max       47.000000     68.000000    525.000000     57.000000      5.000000   

        pass_sacked  pass_sacked_yds     pass_long   pass_rating  \
count  19973.000000     19973.000000  19973.000000  19973.000000   
mean       0.192059         1.299354      3.401742      9.026606   
std        0.789580         5.613713     11.543382     28.645285   
min        0.000

In [117]:
# Check if a specific column exists
column_name1 = 'player_name'  # replace with the column you're searching for
column_name2 = 'player'  # replace with the column you're searching for

if column_name1 in df.columns:
    print(f"'{column_name1}' column exists in the DataFrame.")
else:
    print(f"'{column_name1}' column does not exist in the DataFrame.")

if column_name2 in df.columns:
    print(f"'{column_name2}' column exists in the DataFrame.")
else:
    print(f"'{column_name2}' column does not exist in the DataFrame.")

'player_name' column exists in the DataFrame.
'player' column exists in the DataFrame.


In [118]:
# Check if a specific column exists
column_name3 = 'pass_td'  # replace with the column you're searching for
column_name4 = 'touchdown'  # replace with the column you're searching for

if column_name3 in df.columns:
    print(f"'{column_name3}' column exists in the DataFrame.")
else:
    print(f"'{column_name3}' column does not exist in the DataFrame.")

if column_name4 in df.columns:
    print(f"'{column_name4}' column exists in the DataFrame.")
else:
    print(f"'{column_name4}' column does not exist in the DataFrame.")

'pass_td' column exists in the DataFrame.
'touchdown' column exists in the DataFrame.


Combine the data for players into just one player column and just one row of data per player

In [119]:
# Combine the columns into a new 'players' column
df['players'] = pd.concat([df['player_name'], df['player']], axis=0).reset_index(drop=True)

# Fill missing values in 'players' with the values from 'player'
df['players'] = df['players'].fillna(df['player'])

# Optionally, drop the old columns if no longer needed
df = df.drop(columns=['player_name', 'player'])

print(df)

            game_id player_id position  team  pass_cmp  pass_att  pass_yds  \
0      201909050chi  RodgAa00        QB  GNB      18.0      30.0     203.0   
1      201909050chi  JoneAa00        RB  GNB       0.0       0.0       0.0   
2      201909050chi  ValdMa00        WR  GNB       0.0       0.0       0.0   
3      201909050chi  AdamDa01        WR  GNB       0.0       0.0       0.0   
4      201909050chi  GrahJi00        TE  GNB       0.0       0.0       0.0   
...             ...       ...       ...  ...       ...       ...       ...   
83238           NaN       NaN       NaN  WAS       NaN       NaN       NaN   
83239           NaN       NaN       NaN  WAS       NaN       NaN       NaN   
83240           NaN       NaN       NaN  WAS       NaN       NaN       NaN   
83241           NaN       NaN       NaN  WAS       NaN       NaN       NaN   
83242           NaN       NaN       NaN  WAS       NaN       NaN       NaN   

       pass_td  pass_int  pass_sacked  ...  vacated_receiving_y

  df['players'] = pd.concat([df['player_name'], df['player']], axis=0).reset_index(drop=True)


In [121]:
# Check if a specific column exists
column_name = 'players'  # replace with the column you're searching for

if column_name in df.columns:
    print(f"'{column_name}' column exists in the DataFrame.")
else:
    print(f"'{column_name}' column does not exist in the DataFrame.")


'players' column exists in the DataFrame.


Check for missing values 

In [123]:
if 'players' in df.columns:
    print("The 'players' column exists!")
else:
    print("The 'players' column does not exist.")
print(df.players)

The 'players' column exists!
0                   Aaron Rodgers
1                     Aaron Jones
2        Marquez Valdes-Scantling
3                   Davante Adams
4                    Jimmy Graham
                   ...           
83238                         NaN
83239                         NaN
83240                         NaN
83241                         NaN
83242                         NaN
Name: players, Length: 83243, dtype: object


In [125]:
# Drop rows with NaN values in 'players' column, modifying the original dataframe
df.dropna(subset=['players'], inplace=True)

# Print the first few rows of the cleaned dataframe
print(df.players)


0                   Aaron Rodgers
1                     Aaron Jones
2        Marquez Valdes-Scantling
3                   Davante Adams
4                    Jimmy Graham
                   ...           
77054                   Zay Jones
77055                   Zay Jones
77056                   Zay Jones
77057              Zonovan Knight
77058              Zonovan Knight
Name: players, Length: 76675, dtype: object


In [126]:
df['players'] = df['players'].str.strip().str.lower()
print(df)

            game_id   player_id position  team  pass_cmp  pass_att  pass_yds  \
0      201909050chi    RodgAa00        QB  GNB      18.0      30.0     203.0   
1      201909050chi    JoneAa00        RB  GNB       0.0       0.0       0.0   
2      201909050chi    ValdMa00        WR  GNB       0.0       0.0       0.0   
3      201909050chi    AdamDa01        WR  GNB       0.0       0.0       0.0   
4      201909050chi    GrahJi00        TE  GNB       0.0       0.0       0.0   
...             ...         ...       ...  ...       ...       ...       ...   
77054           NaN  00-0033891       NaN   LV       NaN       NaN       NaN   
77055           NaN  00-0033891       NaN  JAX       NaN       NaN       NaN   
77056           NaN  00-0033891       NaN  JAX       NaN       NaN       NaN   
77057           NaN  00-0037157       NaN  NYJ       NaN       NaN       NaN   
77058           NaN  00-0037157       NaN  DET       NaN       NaN       NaN   

       pass_td  pass_int  pass_sacked  

In [127]:
# Check if a specific column exists
column_name = 'players'  # replace with the column you're searching for

if column_name in df.columns:
    print(f"'{column_name}' column exists in the DataFrame.")
else:
    print(f"'{column_name}' column does not exist in the DataFrame.")

'players' column exists in the DataFrame.


In [None]:
# Define a custom aggregation function
aggregation_dict = {}

# For numeric columns, apply 'sum'
numeric_cols = df.select_dtypes(include=['number']).columns
for col in numeric_cols:
    aggregation_dict[col] = 'sum'

# For non-numeric columns, apply 'first'
non_numeric_cols = df.select_dtypes(exclude=['number']).columns
for col in non_numeric_cols:
    aggregation_dict[col] = 'first'  # You can choose other methods like 'last' or 'mode'

# Perform the aggregation without resetting the index (player remains the index)
df_player_aggregated = df.groupby('player').agg(aggregation_dict)

# Print the first few rows of the aggregated data
print(df_player_aggregated.head())


               season  week  pass_attempts  complete_pass  incomplete_pass  \
player                                                                       
a.j. brown        0.0   0.0            0.0            0.0              0.0   
a.j. green        0.0   0.0            0.0            0.0              0.0   
a.j. mccarron     0.0   0.0            0.0            0.0              0.0   
aaron brewer      0.0   0.0            0.0            0.0              0.0   
aaron fuller      0.0   0.0            0.0            0.0              0.0   

               passing_yards  passing_air_yards  touchdown  interception  \
player                                                                     
a.j. brown               0.0                0.0        0.0           0.0   
a.j. green               0.0                0.0        0.0           0.0   
a.j. mccarron            0.0                0.0        0.0           0.0   
aaron brewer             0.0                0.0        0.0           0.0 

In [85]:
# Search for a player (e.g., 'Aaron Rodgers') in the 'player' column
player_name = "Justin Jefferson"
search_results = df_player_aggregated[df_player_aggregated['player'].str.contains(player_name, case=False, na=False)]

# Print the rows that contain the player's name
print(search_results)


                  season  week  pass_attempts  complete_pass  incomplete_pass  \
player                                                                          
justin jefferson     0.0   0.0            0.0            0.0              0.0   

                  passing_yards  passing_air_yards  touchdown  interception  \
player                                                                        
justin jefferson            0.0                0.0        0.0           0.0   

                  pass_fumble_lost  ...            player  Team_abbrev  \
player                              ...                                  
justin jefferson               0.0  ...  justin jefferson          MIN   

                  Opponent_abbrev  vis_team  home_team     OT  Roof  \
player                                                                
justin jefferson              GNB       GNB        MIN  False  dome   

                    Surface  Vegas_Favorite  game_date  
player                  

In [86]:
# Ensure the 'player' column exists in combined_df for filtering
if 'player' in df_player_aggregated.columns:
    # Filter rows where 'player' is 'Justin Jefferson'
    justin_jefferson_data = df_player_aggregated[df_player_aggregated['player'] == 'Justin Jefferson']
    
    # Display the 'touchdown' and 'pass_td' columns for Justin Jefferson
    justin_jefferson_touchdowns = justin_jefferson_data[['touchdown', 'pass_td']]
    print(justin_jefferson_touchdowns)
else:
    print("The 'player' column is not present in the combined dataset.")


KeyError: "['pass_td'] not in index"

Combine the data of touchdowns from both datasets

In [None]:
# Load datasets (update paths as necessary)
df1 = pd.read_csv('/Users/salomerivas/.cache/kagglehub/datasets/philiphyde1/nfl-stats-1999-2022/versions/10/weekly_player_data.csv')
df2 = pd.read_csv('https://raw.githubusercontent.com/salomerivas/app-development/refs/heads/main/nfl_offensive_stats.csv')

# Function to rename touchdown-related columns to a consistent 'touchdown' column
def rename_touchdown_column(df):
    # Rename touchdown-related columns to 'touchdown'
    if 'pass_td' in df.columns:
        df = df.rename(columns={'pass_td': 'touchdown'})
    elif 'touchdown' in df.columns:
        # Already has 'touchdown', no renaming needed
        pass
    else:
        print("Warning: No touchdown-related column found in this dataframe.")
    
    return df

# Apply the function to both datasets
df1 = rename_touchdown_column(df1)
df2 = rename_touchdown_column(df2)

# Combine both datasets into one (concatenate them vertically)
touchdown_combined_df = pd.concat([df1, df2], ignore_index=True)

# Now, you have a single dataset with all touchdown data in the 'touchdown' column
print(touchdown_combined_df.head())

  team   player_id player_name position  season  week game_type  \
0  TEN  00-0035676  A.J. Brown       WR  2019.0   1.0       REG   
1  TEN  00-0035676  A.J. Brown       WR  2019.0   2.0       REG   
2  TEN  00-0035676  A.J. Brown       WR  2019.0   3.0       REG   
3  TEN  00-0035676  A.J. Brown       WR  2019.0   4.0       REG   
4  TEN  00-0035676  A.J. Brown       WR  2019.0   5.0       REG   

   pass_attempts  complete_pass  incomplete_pass  ...   OT  Roof  Surface  \
0            0.0            0.0              0.0  ...  NaN   NaN      NaN   
1            0.0            0.0              0.0  ...  NaN   NaN      NaN   
2            0.0            0.0              0.0  ...  NaN   NaN      NaN   
3            0.0            0.0              0.0  ...  NaN   NaN      NaN   
4            0.0            0.0              0.0  ...  NaN   NaN      NaN   

   Temperature  Humidity  Wind_Speed  Vegas_Line  Vegas_Favorite  Over_Under  \
0          NaN       NaN         NaN         NaN      

In [None]:
# Find the index of the player with the highest number of touchdowns
top_touchdown_index = df_player_aggregated['touchdown'].idxmax()

# Get the player with the highest number of touchdowns using that index
top_touchdown_player = df_player_aggregated.loc[top_touchdown_index]

# Print the player's name and the number of touchdowns
print("Player with the highest touchdowns:")
print(top_touchdown_player[['player', 'touchdown']])


KeyError: 'touchdown'

In [None]:
# Converting to datetime.date values
df_player_aggregated['birthDate'] = pd.to_datetime(df_player_aggregated['birthDate']).dt.date

# Extracting the year
df_player_aggregated['birthYear'] = pd.to_datetime(df_player_aggregated['birthDate']).dt.year

# Looking at the first five rows
df_player_aggregated.head()

Unnamed: 0_level_0,pass_cmp,pass_att,pass_yds,pass_td,pass_int,pass_sacked,pass_sacked_yds,pass_long,pass_rating,rush_att,...,displayName,playDirection,route,birthDate,collegeName,gameDate,gameTimeEastern,homeTeamAbbr,visitorTeamAbbr,birthYear
player,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
a.j. brown,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,79.2,6.0,...,,,,NaT,,,,,,
a.j. green,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,,,,NaT,,,,,,
a.j. mccarron,22.0,38.0,245.0,0.0,1.0,6.0,55.0,41.0,223.5,5.0,...,,,,NaT,,,,,,
aaron brewer,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,,,,NaT,,,,,,
aaron fuller,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,,,,NaT,,,,,,


In [None]:
if 'player' in df_player_aggregated.columns and 'birthDate' in df_player_aggregated.columns: 
    desmond_trufant = df_player_aggregated[df_player_aggregated['player'].str.contains('Saquon Barkley', case=False, na=False)]

    # Check if any records were found
    if not desmond_trufant.empty:
        print("Saquon Barkley birth date(s):")
        print(desmond_trufant[['player', 'birthDate']])
    else:
            print("Saquon Barkley not found in the dataset.")
else:
    print("The dataset does not have the required columns: 'player' and 'birthDate'.")

Saquon Barkley birth date(s):
                        player birthDate
player                                  
saquon barkley  saquon barkley       NaT


In [None]:
df_player_aggregated.shape

(1019, 350)