# Datasets 

https://www.kaggle.com/code/dataranch/offensive-stats-eda-model-comparison/input

https://www.kaggle.com/datasets/philiphyde1/nfl-stats-1999-2022/data

Kaggle Data

In [52]:
import pandas as pd
import kagglehub
import os

# Fetch data from Kaggle API
try:
    path = kagglehub.dataset_download("philiphyde1/nfl-stats-1999-2022")  # Downloads the dataset
    print("Data downloaded from Kaggle to:", path)
except Exception as e:
    print(f"Failed to download data from Kaggle: {e}")

# Get the list of files in the downloaded directory dynamically
downloaded_files = os.listdir(path)

# Read each file dynamically (instead of hardcoding paths)
dataframes = []

for file_name in downloaded_files:
    # Skip non-CSV files like .DS_Store
    if not file_name.endswith('.csv'):
        print(f"Skipping non-CSV file: {file_name}")
        continue
    
    file_path = os.path.join(path, file_name)
    try:
        if '2024_player_predictions' in file_name:
                pred_df = pd.read_csv(file_path)
                print(f"Data loaded successfully from: {file_path} (2024_player_predictions.csv)")
        elif 'weekly_player_data' in file_name:
                week_df = pd.read_csv(file_path)
                print(f"Data loaded successfully from: {file_path} (weekly_player_data.csv)")
        elif 'yearly_player_data' in file_name:
                year_df = pd.read_csv(file_path)
                print(f"Data loaded successfully from: {file_path} (yearly_player_data.csv)")
    except Exception as e:
            print(f"Failed to load data from {file_path}: {e}")

# Check if the specific dataframes are loaded correctly
if pred_df is not None:
    print("2024 Player Predictions DataFrame:")
    print(pred_df.head())
else:
    print("Failed to load 2024_player_predictions.csv")

if week_df is not None:
    print("Weekly Player Data DataFrame:")
    print(week_df.head())
else:
    print("Failed to load weekly_player_data.csv")

if year_df is not None:
    print("Yearly Player Data DataFrame:")
    print(year_df.head())
else:
    print("Failed to load yearly_player_data.csv")




Data downloaded from Kaggle to: /Users/salomerivas/.cache/kagglehub/datasets/philiphyde1/nfl-stats-1999-2022/versions/10
Data loaded successfully from: /Users/salomerivas/.cache/kagglehub/datasets/philiphyde1/nfl-stats-1999-2022/versions/10/weekly_player_data.csv (weekly_player_data.csv)
Skipping non-CSV file: .DS_Store
Data loaded successfully from: /Users/salomerivas/.cache/kagglehub/datasets/philiphyde1/nfl-stats-1999-2022/versions/10/2024_player_predictions.csv (2024_player_predictions.csv)
Data loaded successfully from: /Users/salomerivas/.cache/kagglehub/datasets/philiphyde1/nfl-stats-1999-2022/versions/10/yearly_player_data.csv (yearly_player_data.csv)
2024 Player Predictions DataFrame:
  team   player_id   player_name position  depth  draft_year  draft_round  \
0  BUF  00-0034857    Josh Allen       QB      1        2018            1   
1  DAL  00-0036358   CeeDee Lamb       WR      1        2020            1   
2  DAL  00-0033077  Dak Prescott       QB      1        2016      

In [53]:
# Inspect columns of each dataset
print(pred_df.columns)
print(week_df.columns)
print(year_df.columns)

Index(['team', 'player_id', 'player_name', 'position', 'depth', 'draft_year',
       'draft_round', 'draft_pick', 'draft_ovr', 'height', 'weight', 'college',
       'age', 'seasons_played', 'rush_attempts', 'rushing_yards', 'touches',
       'receptions', 'targets', 'receiving_yards', 'yards_after_catch',
       'total_yards', 'total_tds', 'run_td', 'reception_td',
       'fantasy_points_ppr', 'position_fantasy_rank', 'total_fantasy_rank',
       'pass_attempts', 'complete_pass', 'incomplete_pass', 'passing_yards',
       'passing_air_yards', 'interception', 'pass_td'],
      dtype='object')
Index(['team', 'player_id', 'player_name', 'position', 'season', 'week',
       'game_type', 'pass_attempts', 'complete_pass', 'incomplete_pass',
       'passing_yards', 'passing_air_yards', 'pass_td', 'interception',
       'pass_fumble_lost', 'targets', 'receptions', 'receiving_yards',
       'receiving_air_yards', 'yards_after_catch', 'reception_td',
       'reception_fumble_lost', 'rush_attempt

In [54]:
for file_name in downloaded_files:
    # Skip non-CSV files like .DS_Store
    if not file_name.endswith('.csv'):
        print(f"Skipping non-CSV file: {file_name}")
        continue
    
    file_path = os.path.join(path, file_name)
    try:
        print(f"Loading CSV file: {file_name}")
        df = pd.read_csv(file_path)
        dataframes.append(df)
        print(f"Data loaded successfully from: {file_path}")
    except Exception as e:
        print(f"Failed to load data from {file_path}: {e}")


Loading CSV file: yearly_team_data.csv
Data loaded successfully from: /Users/salomerivas/.cache/kagglehub/datasets/philiphyde1/nfl-stats-1999-2022/versions/10/yearly_team_data.csv
Loading CSV file: weekly_player_data.csv
Data loaded successfully from: /Users/salomerivas/.cache/kagglehub/datasets/philiphyde1/nfl-stats-1999-2022/versions/10/weekly_player_data.csv
Skipping non-CSV file: .DS_Store
Loading CSV file: 2024_player_predictions.csv
Data loaded successfully from: /Users/salomerivas/.cache/kagglehub/datasets/philiphyde1/nfl-stats-1999-2022/versions/10/2024_player_predictions.csv
Loading CSV file: yearly_player_data.csv
Data loaded successfully from: /Users/salomerivas/.cache/kagglehub/datasets/philiphyde1/nfl-stats-1999-2022/versions/10/yearly_player_data.csv
Loading CSV file: weekly_team_data.csv
Data loaded successfully from: /Users/salomerivas/.cache/kagglehub/datasets/philiphyde1/nfl-stats-1999-2022/versions/10/weekly_team_data.csv


# Weekly Data 

Overview of the Dataset 

In [56]:
import pandas as pd

# Check the shape
print("Dataset Shape:", week_df.shape)

print(week_df.head())

print(week_df.tail())

# List the column names
print(week_df.columns)

# Check data types for each column
print(week_df.dtypes)

Dataset Shape: (50863, 66)
  team   player_id player_name position  season  week game_type  \
0  TEN  00-0035676  A.J. Brown       WR    2019     1       REG   
1  TEN  00-0035676  A.J. Brown       WR    2019     2       REG   
2  TEN  00-0035676  A.J. Brown       WR    2019     3       REG   
3  TEN  00-0035676  A.J. Brown       WR    2019     4       REG   
4  TEN  00-0035676  A.J. Brown       WR    2019     5       REG   

   pass_attempts  complete_pass  incomplete_pass  ...      college  age  wins  \
0            0.0            0.0              0.0  ...  Mississippi   22   1.0   
1            0.0            0.0              0.0  ...  Mississippi   22   1.0   
2            0.0            0.0              0.0  ...  Mississippi   22   1.0   
3            0.0            0.0              0.0  ...  Mississippi   22   2.0   
4            0.0            0.0              0.0  ...  Mississippi   22   2.0   

   losses  ties  win_pct  opp_wins  opp_losses  opp_ties  opp_win_pct  
0     0.0  

In [61]:
if 'player_name' in week_df.columns:
    print("The 'player_name' column exists!")
else:
    print("The 'player_name' column does not exist.")
print(week_df.player_name)

The 'player_name' column exists!
0            A.J. Brown
1            A.J. Brown
2            A.J. Brown
3            A.J. Brown
4            A.J. Brown
              ...      
50858    Zonovan Knight
50859    Zonovan Knight
50860    Zonovan Knight
50861    Zonovan Knight
50862    Zonovan Knight
Name: player_name, Length: 50863, dtype: object


In [72]:
nan_player_name1 = week_df[week_df['player_name'].isna()]
print(nan_player_name1)

Empty DataFrame
Columns: [team, player_id, player_name, position, season, week, game_type, pass_attempts, complete_pass, incomplete_pass, passing_yards, passing_air_yards, pass_td, interception, pass_fumble_lost, targets, receptions, receiving_yards, receiving_air_yards, yards_after_catch, reception_td, reception_fumble_lost, rush_attempts, rushing_yards, run_td, run_fumble_lost, fantasy_points_ppr, air_yards_share, target_share, comp_pct, int_pct, pass_td_pct, ypa, rec_td_pct, yptarget, ypr, rush_td_pct, ypc, touches, total_tds, td_pct, total_yards, yptouch, passer_rating, opponent, offense_snaps, offense_pct, team_snaps, team_yards, yards_pct, draft_year, draft_round, draft_pick, draft_ovr, height, weight, college, age, wins, losses, ties, win_pct, opp_wins, opp_losses, opp_ties, opp_win_pct]
Index: []

[0 rows x 66 columns]


# Yearly Data 

Overview of the Dataset 

In [57]:
import pandas as pd

# Check the shape
print("Dataset Shape:", year_df.shape)

print(year_df.head())

print(year_df.tail())

# List the column names
print(year_df.columns)

# Check data types for each column
print(year_df.dtypes)

Dataset Shape: (5663, 195)
  team   player_id player_name position  season  depth  pass_attempts  \
0  TEN  00-0035676  A.J. Brown       WR    2019    2.0            0.0   
1  TEN  00-0035676  A.J. Brown       WR    2020    1.0            0.0   
2  TEN  00-0035676  A.J. Brown       WR    2021    1.0            2.0   
3  PHI  00-0035676  A.J. Brown       WR    2022    1.0            0.0   
4  PHI  00-0035676  A.J. Brown       WR    2023    1.0            0.0   

   complete_pass  incomplete_pass  passing_yards  ...  vacated_receptions  \
0            0.0              0.0            0.0  ...               147.0   
1            0.0              0.0            0.0  ...                62.0   
2            0.0              2.0            0.0  ...                74.0   
3            0.0              0.0            0.0  ...               135.0   
4            0.0              0.0            0.0  ...                47.0   

   vacated_receiving_yards  vacated_receiving_air_yards  \
0           

In [64]:
if 'player_name' in year_df.columns:
    print("The 'player_name' column exists!")
else:
    print("The 'player_name' column does not exist.")
print(year_df.player_name)

The 'player_name' column exists!
0           A.J. Brown
1           A.J. Brown
2           A.J. Brown
3           A.J. Brown
4           A.J. Brown
             ...      
5658         Zay Jones
5659         Zay Jones
5660         Zay Jones
5661    Zonovan Knight
5662    Zonovan Knight
Name: player_name, Length: 5663, dtype: object


In [73]:
nan_player_name2 = year_df[year_df['player_name'].isna()]
print(nan_player_name2)

Empty DataFrame
Columns: [team, player_id, player_name, position, season, depth, pass_attempts, complete_pass, incomplete_pass, passing_yards, passing_air_yards, pass_td, interception, pass_fumble_lost, targets, receptions, receiving_yards, receiving_air_yards, yards_after_catch, reception_td, reception_fumble_lost, rush_attempts, rushing_yards, run_td, run_fumble_lost, fantasy_points_ppr, air_yards_share, target_share, comp_pct, int_pct, pass_td_pct, ypa, rec_td_pct, yptarget, ypr, rush_td_pct, ypc, touches, total_tds, td_pct, total_yards, yptouch, games, pass_ypg, rec_ypg, rush_ypg, ypg, ppg, passer_rating, game_type, offense_pct, delta_depth, delta_pass_attempts, delta_targets, delta_rush_attempts, delta_air_yards_share, delta_target_share, delta_comp_pct, delta_int_pct, delta_pass_td_pct, delta_ypa, delta_rec_td_pct, delta_yptarget, delta_rush_td_pct, delta_ypc, delta_touches, delta_td_pct, delta_yptouch, delta_games, delta_pass_ypg, delta_rec_ypg, delta_rush_ypg, delta_ypg, delta_

In [74]:
year_df['player_name'] = year_df['player_name'].str.strip().str.lower()
print(year_df.player_name)

0           a.j. brown
1           a.j. brown
2           a.j. brown
3           a.j. brown
4           a.j. brown
             ...      
5658         zay jones
5659         zay jones
5660         zay jones
5661    zonovan knight
5662    zonovan knight
Name: player_name, Length: 5663, dtype: object


# Predictions 2024 

Overview of the Dataset 

In [59]:
import pandas as pd

# Check the shape
print("Dataset Shape:", pred_df.shape)

print(pred_df.head())

print(pred_df.tail())

# List the column names
print(pred_df.columns)

# Check data types for each column
print(pred_df.dtypes)


Dataset Shape: (176, 35)
  team   player_id   player_name position  depth  draft_year  draft_round  \
0  BUF  00-0034857    Josh Allen       QB      1        2018            1   
1  DAL  00-0036358   CeeDee Lamb       WR      1        2020            1   
2  DAL  00-0033077  Dak Prescott       QB      1        2016            4   
3  MIA  00-0033040   Tyreek Hill       WR      1        2016            5   
4  PHI  00-0036389   Jalen Hurts       QB      1        2020            2   

   draft_pick  draft_ovr  height  ...  fantasy_points_ppr  \
0           7          7      77  ...              374.48   
1          17         17      74  ...              373.00   
2          37        135      74  ...              368.36   
3          28        165      70  ...              365.90   
4          21         53      73  ...              357.16   

  position_fantasy_rank  total_fantasy_rank  pass_attempts  complete_pass  \
0                     1                   1            522          

Statistics 

Check for missing values 

In [66]:
if 'player_name' in pred_df.columns:
    print("The 'player_name' column exists!")
else:
    print("The 'player_name' column does not exist.")
print(pred_df.player_name)

The 'player_name' column exists!
0        Josh Allen
1       CeeDee Lamb
2      Dak Prescott
3       Tyreek Hill
4       Jalen Hurts
           ...     
171     Mo Alie-Cox
172    Luke Farrell
173      Ian Thomas
174    J.K. Dobbins
175    Greg Dulcich
Name: player_name, Length: 176, dtype: object


In [75]:
nan_player_name3 = pred_df[pred_df['player_name'].isna()]
print(nan_player_name3)

Empty DataFrame
Columns: [team, player_id, player_name, position, depth, draft_year, draft_round, draft_pick, draft_ovr, height, weight, college, age, seasons_played, rush_attempts, rushing_yards, touches, receptions, targets, receiving_yards, yards_after_catch, total_yards, total_tds, run_td, reception_td, fantasy_points_ppr, position_fantasy_rank, total_fantasy_rank, pass_attempts, complete_pass, incomplete_pass, passing_yards, passing_air_yards, interception, pass_td]
Index: []

[0 rows x 35 columns]


In [76]:
pred_df['player_name'] = pred_df['player_name'].str.strip().str.lower()
print(pred_df.player_name)

0        josh allen
1       ceedee lamb
2      dak prescott
3       tyreek hill
4       jalen hurts
           ...     
171     mo alie-cox
172    luke farrell
173      ian thomas
174    j.k. dobbins
175    greg dulcich
Name: player_name, Length: 176, dtype: object


In [79]:
# Define the player name to search for
player_name = "justin jefferson"

# Search for the player in the 'players' column by checking if the player's name exists
search_results = pred_df[pred_df['player_name'].str.contains(player_name, case=False, na=False)]

# Print the rows that contain the player's name
print(search_results)

   team   player_id       player_name position  depth  draft_year  \
59  MIN  00-0036322  justin jefferson       WR      1        2020   

    draft_round  draft_pick  draft_ovr  height  ...  fantasy_points_ppr  \
59            1          22         22      73  ...               215.5   

   position_fantasy_rank  total_fantasy_rank  pass_attempts  complete_pass  \
59                    25                  60              0              0   

    incomplete_pass  passing_yards  passing_air_yards  interception  pass_td  
59                0              0                  0             0        0  

[1 rows x 35 columns]
