In [9]:
# Import dependencies
import pandas as pd
import os
import csv
from pprint import pprint


In [10]:
# Read data in CSV file and convert to DataFrame
full_player_data = pd.read_csv("data/2024_player_predictions.csv")
full_player_data.head()


Unnamed: 0,team,player_id,player_name,position,depth,draft_year,draft_round,draft_pick,draft_ovr,height,...,fantasy_points_ppr,position_fantasy_rank,total_fantasy_rank,pass_attempts,complete_pass,incomplete_pass,passing_yards,passing_air_yards,interception,pass_td
0,BUF,00-0034857,Josh Allen,QB,1,2018,1,7,7,77,...,374.48,1,1,522,322,150,3537,3817,11,25
1,DAL,00-0036358,CeeDee Lamb,WR,1,2020,1,17,17,74,...,373.0,1,2,0,0,0,0,0,0,0
2,DAL,00-0033077,Dak Prescott,QB,1,2016,4,37,135,74,...,368.36,2,3,611,391,161,3949,4334,6,29
3,MIA,00-0033040,Tyreek Hill,WR,1,2016,5,28,165,70,...,365.9,2,4,0,0,0,0,0,0,0
4,PHI,00-0036389,Jalen Hurts,QB,1,2020,2,21,53,73,...,357.16,3,5,501,303,143,3174,3520,8,20


In [14]:
# Inspect data in our DataFrame
print(full_player_data.info())
print(full_player_data.describe())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 176 entries, 0 to 175
Data columns (total 35 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   team                   176 non-null    object 
 1   player_id              176 non-null    object 
 2   player_name            176 non-null    object 
 3   position               176 non-null    object 
 4   depth                  176 non-null    int64  
 5   draft_year             176 non-null    int64  
 6   draft_round            176 non-null    int64  
 7   draft_pick             176 non-null    int64  
 8   draft_ovr              176 non-null    int64  
 9   height                 176 non-null    int64  
 10  weight                 176 non-null    int64  
 11  college                176 non-null    object 
 12  age                    176 non-null    int64  
 13  seasons_played         176 non-null    int64  
 14  rush_attempts          176 non-null    int64  
 15  rushin

In [30]:
# Get the unique values from the 'position' column
unique_positions = full_player_data['position'].unique()

# Print the unique positions
print("Unique positions:")
print(unique_positions)

Unique positions:
['QB' 'WR' 'RB' 'TE']


In [15]:
# Filter the DataFrame to only include rows where 'position' = 'QB'
qb_data = full_player_data[full_player_data['position'] == 'QB']

# Display QB predictions DataFrame
print(qb_data.head())

  team   player_id     player_name position  depth  draft_year  draft_round  \
0  BUF  00-0034857      Josh Allen       QB      1        2018            1   
2  DAL  00-0033077    Dak Prescott       QB      1        2016            4   
4  PHI  00-0036389     Jalen Hurts       QB      1        2020            2   
6  MIA  00-0036212  Tua Tagovailoa       QB      1        2020            1   
8   SF  00-0037834     Brock Purdy       QB      1        2022            7   

   draft_pick  draft_ovr  height  ...  fantasy_points_ppr  \
0           7          7      77  ...              374.48   
2          37        135      74  ...              368.36   
4          21         53      73  ...              357.16   
6           5          5      73  ...              328.64   
8          41        262      73  ...              321.70   

  position_fantasy_rank  total_fantasy_rank  pass_attempts  complete_pass  \
0                     1                   1            522            322   
2   

In [16]:
# Analyze specific columns of interest to decide importance for position
question_qb_columns = [
    'rush_attempts', 'touches', 'receptions', 'targets', 'receiving_yards',
    'yards_after_catch', 'total_yards', 'reception_td', 'run_td'
]

# Filter to keep only the columns of interest, if they exist
columns_qb_question = qb_data[question_qb_columns]

# Display the first 5 QBs and only the columns of interest
print(columns_qb_question.head())

   rush_attempts  touches  receptions  targets  receiving_yards  \
0             90      618           0        0                0   
2             42      652           0        0                0   
4            110      621           0        0                0   
6             27      569           0        0                0   
8             30      531           0        0                0   

   yards_after_catch  total_yards  reception_td  run_td  
0                  0         4172             0       9  
2                  0         4211             0       4  
4                  0         3722             0      11  
6                  0         3992             0       1  
8                  0         4038             0       2  


In [17]:
remove_qb_columns = [
    'player_id', 'depth', 'draft_year', 'draft_round', 'draft_pick',
    'receptions', 'targets', 'receiving_yards', 'yards_after_catch', 'reception_td'
]

# Remove the specified columns from the DataFrame
qb_data_cleaned = qb_data.drop(columns=remove_qb_columns, errors='ignore')

# Display the first few rows of the cleaned DataFrame
print(qb_data_cleaned.head())

  team     player_name position  draft_ovr  height  weight            college  \
0  BUF      Josh Allen       QB          7      77     237            Wyoming   
2  DAL    Dak Prescott       QB        135      74     238  Mississippi State   
4  PHI     Jalen Hurts       QB         53      73     223           Oklahoma   
6  MIA  Tua Tagovailoa       QB          5      73     225            Alabama   
8   SF     Brock Purdy       QB        262      73     220         Iowa State   

   age  seasons_played  rush_attempts  ...  fantasy_points_ppr  \
0   28               5             90  ...              374.48   
2   31               7             42  ...              368.36   
4   26               3            110  ...              357.16   
6   26               3             27  ...              328.64   
8   25               1             30  ...              321.70   

   position_fantasy_rank  total_fantasy_rank  pass_attempts  complete_pass  \
0                      1              

In [31]:
# Inspect data in our DataFrame
print(qb_data_cleaned.info())

<class 'pandas.core.frame.DataFrame'>
Index: 29 entries, 0 to 153
Data columns (total 25 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   team                   29 non-null     object 
 1   player_name            29 non-null     object 
 2   position               29 non-null     object 
 3   draft_ovr              29 non-null     int64  
 4   height                 29 non-null     int64  
 5   weight                 29 non-null     int64  
 6   college                29 non-null     object 
 7   age                    29 non-null     int64  
 8   seasons_played         29 non-null     int64  
 9   rush_attempts          29 non-null     int64  
 10  rushing_yards          29 non-null     int64  
 11  touches                29 non-null     int64  
 12  total_yards            29 non-null     int64  
 13  total_tds              29 non-null     int64  
 14  run_td                 29 non-null     int64  
 15  fantasy_poin

In [19]:
# Export the cleaned DataFrame to a CSV file in the /data directory
qb_data_cleaned.to_csv("data/qb_data.csv", index=False)

In [22]:
# Filter the DataFrame to only include rows where 'position' = 'WR'
wr_data = full_player_data[full_player_data['position'] == 'WR']

# Display QB predictions DataFrame
wr_data.head()

Unnamed: 0,team,player_id,player_name,position,depth,draft_year,draft_round,draft_pick,draft_ovr,height,...,fantasy_points_ppr,position_fantasy_rank,total_fantasy_rank,pass_attempts,complete_pass,incomplete_pass,passing_yards,passing_air_yards,interception,pass_td
1,DAL,00-0036358,CeeDee Lamb,WR,1,2020,1,17,17,74,...,373.0,1,2,0,0,0,0,0,0,0
3,MIA,00-0033040,Tyreek Hill,WR,1,2016,5,28,165,70,...,365.9,2,4,0,0,0,0,0,0,0
7,DET,00-0036963,Amon-Ra St. Brown,WR,1,2021,4,7,112,72,...,324.2,3,8,0,0,0,0,0,0,0
11,PHI,00-0035676,A.J. Brown,WR,1,2019,2,19,51,73,...,301.2,4,12,0,0,0,0,0,0,0
13,LA,00-0033908,Cooper Kupp,WR,1,2017,3,5,69,74,...,289.8,5,14,0,0,0,0,0,0,0


In [21]:
remove_wr_columns = [
    'player_id', 'depth', 'draft_year', 'draft_round', 'draft_pick',
    'pass_attempts', 'complete_pass', 'incomplete_pass', 'passing_yards', 
    'passing_air_yards', 'interception', 'pass_td' 
]

# Remove the specified columns from the DataFrame
wr_data_cleaned = wr_data.drop(columns=remove_wr_columns, errors='ignore')

# Display the first few rows of the cleaned DataFrame
print(wr_data_cleaned.head())

   team        player_name position  draft_ovr  height  weight  \
1   DAL        CeeDee Lamb       WR         17      74     200   
3   MIA        Tyreek Hill       WR        165      70     191   
7   DET  Amon-Ra St. Brown       WR        112      72     202   
11  PHI         A.J. Brown       WR         51      73     226   
13   LA        Cooper Kupp       WR         69      74     207   

               college  age  seasons_played  rush_attempts  ...  targets  \
1             Oklahoma   25               3             12  ...      177   
3         West Alabama   29               7              6  ...      163   
7                  USC   25               2              4  ...      163   
11         Mississippi   27               4              0  ...      154   
13  Eastern Washington   31               6              1  ...      159   

    receiving_yards  yards_after_catch  total_yards  total_tds  run_td  \
1              1639                577         1794         11       1  

In [23]:
# Export cleaned WR DataFrame as CSV file in /data directory
wr_data_cleaned.to_csv("data/wr_data.csv", index=False)

In [32]:
# Inspect data in our DataFrame
print(wr_data_cleaned.info())

<class 'pandas.core.frame.DataFrame'>
Index: 71 entries, 1 to 167
Data columns (total 23 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   team                   71 non-null     object 
 1   player_name            71 non-null     object 
 2   position               71 non-null     object 
 3   draft_ovr              71 non-null     int64  
 4   height                 71 non-null     int64  
 5   weight                 71 non-null     int64  
 6   college                71 non-null     object 
 7   age                    71 non-null     int64  
 8   seasons_played         71 non-null     int64  
 9   rush_attempts          71 non-null     int64  
 10  rushing_yards          71 non-null     int64  
 11  touches                71 non-null     int64  
 12  receptions             71 non-null     int64  
 13  targets                71 non-null     int64  
 14  receiving_yards        71 non-null     int64  
 15  yards_after_

In [24]:
# Filter the DataFrame to only include rows where 'position' = 'RB'
rb_data = full_player_data[full_player_data['position'] == 'RB']

# Display QB predictions DataFrame
rb_data.head()

Unnamed: 0,team,player_id,player_name,position,depth,draft_year,draft_round,draft_pick,draft_ovr,height,...,fantasy_points_ppr,position_fantasy_rank,total_fantasy_rank,pass_attempts,complete_pass,incomplete_pass,passing_yards,passing_air_yards,interception,pass_td
5,SF,00-0033280,Christian McCaffrey,RB,1,2017,1,8,8,71,...,333.4,1,6,0,0,0,0,0,0,0
15,NO,00-0033906,Alvin Kamara,RB,1,2017,3,3,67,70,...,289.5,2,16,0,0,0,0,0,0,0
20,NYJ,00-0038120,Breece Hall,RB,1,2022,2,4,36,71,...,281.0,3,21,0,0,0,0,0,0,0
21,JAX,00-0036973,Travis Etienne,RB,1,2021,1,25,25,70,...,276.5,4,22,0,0,0,0,0,0,0
26,DET,00-0039139,Jahmyr Gibbs,RB,1,2023,1,12,12,69,...,258.9,5,27,0,0,0,0,0,0,0


In [25]:
remove_rb_columns = [
    'player_id', 'depth', 'draft_year', 'draft_round', 'draft_pick',
    'pass_attempts', 'complete_pass', 'incomplete_pass', 'passing_yards', 
    'passing_air_yards', 'interception', 'pass_td' 
]

# Remove the specified columns from the DataFrame
rb_data_cleaned = rb_data.drop(columns=remove_rb_columns, errors='ignore')

# Display the first few rows of the cleaned DataFrame
print(rb_data_cleaned.head())

   team          player_name position  draft_ovr  height  weight     college  \
5    SF  Christian McCaffrey       RB          8      71     210    Stanford   
15   NO         Alvin Kamara       RB         67      70     215   Tennessee   
20  NYJ          Breece Hall       RB         36      71     217  Iowa State   
21  JAX       Travis Etienne       RB         25      70     215     Clemson   
26  DET         Jahmyr Gibbs       RB         12      69     200     Alabama   

    age  seasons_played  rush_attempts  ...  targets  receiving_yards  \
5    28               6            260  ...       77              457   
15   29               6            203  ...      104              630   
20   23               1            234  ...       88              530   
21   25               2            263  ...       73              472   
26   21               0            189  ...       64              289   

    yards_after_catch  total_yards  total_tds  run_td  reception_td  \
5        

In [26]:
# Export cleaned WR DataFrame as CSV file in /data directory
rb_data_cleaned.to_csv("data/rb_data.csv", index=False)

In [33]:
# Inspect data in our DataFrame
print(rb_data_cleaned.info())

<class 'pandas.core.frame.DataFrame'>
Index: 40 entries, 5 to 174
Data columns (total 23 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   team                   40 non-null     object 
 1   player_name            40 non-null     object 
 2   position               40 non-null     object 
 3   draft_ovr              40 non-null     int64  
 4   height                 40 non-null     int64  
 5   weight                 40 non-null     int64  
 6   college                40 non-null     object 
 7   age                    40 non-null     int64  
 8   seasons_played         40 non-null     int64  
 9   rush_attempts          40 non-null     int64  
 10  rushing_yards          40 non-null     int64  
 11  touches                40 non-null     int64  
 12  receptions             40 non-null     int64  
 13  targets                40 non-null     int64  
 14  receiving_yards        40 non-null     int64  
 15  yards_after_

In [27]:
# Filter the DataFrame to only include rows where 'position' = 'TE'
te_data = full_player_data[full_player_data['position'] == 'TE']

# Display QB predictions DataFrame
te_data.head()

Unnamed: 0,team,player_id,player_name,position,depth,draft_year,draft_round,draft_pick,draft_ovr,height,...,fantasy_points_ppr,position_fantasy_rank,total_fantasy_rank,pass_attempts,complete_pass,incomplete_pass,passing_yards,passing_air_yards,interception,pass_td
41,KC,00-0030506,Travis Kelce,TE,1,2013,3,1,63,77,...,230.2,1,42,0,0,0,0,0,0,0
50,SF,00-0033288,George Kittle,TE,1,2017,5,2,146,76,...,223.8,2,51,0,0,0,0,0,0,0
55,DET,00-0039065,Sam LaPorta,TE,1,2023,2,3,34,75,...,219.1,3,56,0,0,0,0,0,0,0
64,MIN,00-0035229,T.J. Hockenson,TE,1,2019,1,8,8,77,...,210.7,4,65,0,0,0,0,0,0,0
75,JAX,00-0033881,Evan Engram,TE,1,2017,1,23,23,75,...,193.7,5,76,0,0,0,0,0,0,0


In [28]:
remove_te_columns = [
    'player_id', 'depth', 'draft_year', 'draft_round', 'draft_pick',
    'pass_attempts', 'complete_pass', 'incomplete_pass', 'passing_yards', 
    'passing_air_yards', 'interception', 'pass_td' 
]

# Remove the specified columns from the DataFrame
te_data_cleaned = te_data.drop(columns=remove_te_columns, errors='ignore')

# Display the first few rows of the cleaned DataFrame
print(te_data_cleaned.head())

   team     player_name position  draft_ovr  height  weight      college  age  \
41   KC    Travis Kelce       TE         63      77     250   Cincinnati   35   
50   SF   George Kittle       TE        146      76     250         Iowa   31   
55  DET     Sam LaPorta       TE         34      75     245         Iowa   23   
64  MIN  T.J. Hockenson       TE          8      77     248         Iowa   27   
75  JAX     Evan Engram       TE         23      75     240  Mississippi   30   

    seasons_played  rush_attempts  ...  targets  receiving_yards  \
41              10              0  ...      129             1022   
50               6              0  ...      107             1058   
55               0              0  ...      125              891   
64               4              0  ...      124              937   
75               6              0  ...      124              877   

    yards_after_catch  total_yards  total_tds  run_td  reception_td  \
41                477         111

In [29]:
# Export cleaned TE DataFrame as CSV file in /data directory
te_data_cleaned.to_csv("data/te_data.csv", index=False)

In [34]:
# Inspect data in our DataFrame
print(te_data_cleaned.info())

<class 'pandas.core.frame.DataFrame'>
Index: 36 entries, 41 to 175
Data columns (total 23 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   team                   36 non-null     object 
 1   player_name            36 non-null     object 
 2   position               36 non-null     object 
 3   draft_ovr              36 non-null     int64  
 4   height                 36 non-null     int64  
 5   weight                 36 non-null     int64  
 6   college                36 non-null     object 
 7   age                    36 non-null     int64  
 8   seasons_played         36 non-null     int64  
 9   rush_attempts          36 non-null     int64  
 10  rushing_yards          36 non-null     int64  
 11  touches                36 non-null     int64  
 12  receptions             36 non-null     int64  
 13  targets                36 non-null     int64  
 14  receiving_yards        36 non-null     int64  
 15  yards_after