# Assembling FLEX dataset
This notebook will wrangle the FanDuel and DraftKings lists of running backs, wide receivers and tight ends and create a CSV that is ready to run through the model for predictions.

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import nfl_data_py as nfl
#import itertools
from functions import get_current_weekday, calculate_nfl_week, get_next_sunday, get_current_year

In [2]:
import sqlite3

In [3]:
day = get_current_weekday()

In [4]:
date_string = get_next_sunday(day)

In [5]:
week = calculate_nfl_week(date_string)

In [6]:
season = get_current_year()

# Pulling from database
Fetching the FD and DK lists from the database. These tables include names, positions, teams, opponents and salaries.

In [7]:
# Connect to the SQLite database
#These are the FD and DK player lists for the current week
conn = sqlite3.connect('nfl_dfs.db')

query_fd = "SELECT * FROM fd_table_" + str(week) + "_" + str(season)[2:]
query_dk = "SELECT * FROM dk_table_" + str(week) + "_" + str(season)[2:]

fanduel_df = pd.read_sql_query(query_fd, conn)
draftkings_df = pd.read_sql_query(query_dk, conn)

# Close the database connection
conn.close()

In [8]:
#The weekly and play-by-play data through the previous week
#We went into R Studio to get these and save them as CSVs that we can use here
weekly_df = pd.read_csv('weekly_data_' + str(season) + '_' + str(week) + '.csv')
pbp_df = pd.read_csv('pbp_data_' + str(season) + '_' + str(week) + '.csv', low_memory = False)

In [9]:
weekly_df.replace({'LA': 'LAR'}, inplace = True)
pbp_df.replace({'LA': 'LAR'}, inplace = True)

<!-- This is weekly data for each player. We can use this to calculate fantasy points for most players. -->

In [10]:
weekly_df.head()

Unnamed: 0,player_id,player_name,player_display_name,position,position_group,headshot_url,recent_team,season,week,season_type,...,receiving_first_downs,receiving_epa,receiving_2pt_conversions,racr,target_share,air_yards_share,wopr,special_teams_tds,fantasy_points,fantasy_points_ppr
0,00-0019596,T.Brady,Tom Brady,QB,QB,https://static.www.nfl.com/image/private/f_aut...,TB,2022,1,REG,...,0,,0,,,,,0,10.38,10.38
1,00-0019596,T.Brady,Tom Brady,QB,QB,https://static.www.nfl.com/image/private/f_aut...,TB,2022,2,REG,...,0,,0,,,,,0,9.4,9.4
2,00-0019596,T.Brady,Tom Brady,QB,QB,https://static.www.nfl.com/image/private/f_aut...,TB,2022,3,REG,...,0,,0,,,,,0,14.74,14.74
3,00-0019596,T.Brady,Tom Brady,QB,QB,https://static.www.nfl.com/image/private/f_aut...,TB,2022,4,REG,...,0,,0,,,,,0,25.4,25.4
4,00-0019596,T.Brady,Tom Brady,QB,QB,https://static.www.nfl.com/image/private/f_aut...,TB,2022,5,REG,...,0,,0,,,,,0,19.74,19.74


In [11]:
pbp_df.head()

Unnamed: 0,play_id,game_id,old_game_id,home_team,away_team,season_type,week,posteam,posteam_type,defteam,...,out_of_bounds,home_opening_kickoff,qb_epa,xyac_epa,xyac_mean_yardage,xyac_median_yardage,xyac_success,xyac_fd,xpass,pass_oe
0,1,2022_01_BAL_NYJ,2022091107,NYJ,BAL,REG,1,,,,...,0,1,0.0,,,,,,,
1,43,2022_01_BAL_NYJ,2022091107,NYJ,BAL,REG,1,NYJ,home,BAL,...,0,1,-0.443521,,,,,,,
2,68,2022_01_BAL_NYJ,2022091107,NYJ,BAL,REG,1,NYJ,home,BAL,...,0,1,1.468819,,,,,,0.440373,-44.037291
3,89,2022_01_BAL_NYJ,2022091107,NYJ,BAL,REG,1,NYJ,home,BAL,...,0,1,-0.492192,0.727261,6.988125,6.0,0.60693,0.227598,0.389904,61.009598
4,115,2022_01_BAL_NYJ,2022091107,NYJ,BAL,REG,1,NYJ,home,BAL,...,0,1,-0.325931,,,,,,0.443575,-44.357494


In [12]:
#rb_wr_te_df.head()

In [13]:
#rb_wr_te_df.info()

In [14]:
#nfl.see_weekly_cols()

In [15]:
weekly_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 13057 entries, 0 to 13056
Data columns (total 53 columns):
 #   Column                       Non-Null Count  Dtype  
---  ------                       --------------  -----  
 0   player_id                    13057 non-null  object 
 1   player_name                  13057 non-null  object 
 2   player_display_name          13057 non-null  object 
 3   position                     13057 non-null  object 
 4   position_group               13057 non-null  object 
 5   headshot_url                 12981 non-null  object 
 6   recent_team                  13057 non-null  object 
 7   season                       13057 non-null  int64  
 8   week                         13057 non-null  int64  
 9   season_type                  13057 non-null  object 
 10  opponent_team                13057 non-null  object 
 11  completions                  13057 non-null  int64  
 12  attempts                     13057 non-null  int64  
 13  passing_yards   

In [16]:
weekly_df['position_group'].value_counts()

position_group
WR      5290
RB      3492
TE      2582
QB      1609
SPEC      30
DB        27
LB        14
OL        11
DL         2
Name: count, dtype: int64

In [17]:
weekly_df['position'].value_counts()

position
WR     5290
RB     3294
TE     2582
QB     1609
FB      198
P        30
CB       11
T        10
SS        9
ILB       7
FS        7
OLB       6
DT        2
G         1
MLB       1
Name: count, dtype: int64

Weekly data filtered for RBs, WRs and TEs

In [18]:
flex_df = weekly_df[weekly_df['position'].isin(['RB', 'HB', 'FB', 'WR', 'TE'])]

In [19]:
cols_to_group = ['season', 'week', 'recent_team', 'opponent_team', 'player_id', 'player_display_name', 'position']

So we do have quite a few duplicate names so player_id becomes more important than names.

In [20]:
# Group by 'player_display_name' and check the number of unique 'player_id's
duplicate_names = flex_df.groupby('player_display_name')['player_id'].nunique()

# Filter the results to find names associated with more than one unique ID
duplicate_names = duplicate_names[duplicate_names > 1]

In [21]:
duplicate_names

Series([], Name: player_id, dtype: int64)

In [22]:
scoring_cols = ['passing_yards', 'passing_tds', 'interceptions', 'passing_2pt_conversions', 'rushing_yards',\
                   'rushing_tds', 'rushing_fumbles_lost', 'rushing_2pt_conversions', 'receptions', 'receiving_yards',\
                   'receiving_tds', 'receiving_fumbles_lost', 'receiving_2pt_conversions', 'sack_fumbles_lost', 'special_teams_tds',\
               'targets', 'carries', 'receiving_yards_after_catch', 'receiving_air_yards', 'air_yards_share', 'target_share']

In [23]:
flex_df = flex_df.groupby(cols_to_group)[scoring_cols].sum()

In [24]:
flex_df.info()

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 11364 entries, (2022, 1, 'ARI', 'KC', '00-0027942', 'A.J. Green', 'WR') to (2024, 6, 'WAS', 'BAL', '00-0039355', 'Luke McCaffrey', 'WR')
Data columns (total 21 columns):
 #   Column                       Non-Null Count  Dtype  
---  ------                       --------------  -----  
 0   passing_yards                11364 non-null  int64  
 1   passing_tds                  11364 non-null  int64  
 2   interceptions                11364 non-null  int64  
 3   passing_2pt_conversions      11364 non-null  int64  
 4   rushing_yards                11364 non-null  int64  
 5   rushing_tds                  11364 non-null  int64  
 6   rushing_fumbles_lost         11364 non-null  int64  
 7   rushing_2pt_conversions      11364 non-null  int64  
 8   receptions                   11364 non-null  int64  
 9   receiving_yards              11364 non-null  int64  
 10  receiving_tds                11364 non-null  int64  
 11  receiving_fumbles_los

In [25]:
flex_df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,passing_yards,passing_tds,interceptions,passing_2pt_conversions,rushing_yards,rushing_tds,rushing_fumbles_lost,rushing_2pt_conversions,receptions,receiving_yards,...,receiving_fumbles_lost,receiving_2pt_conversions,sack_fumbles_lost,special_teams_tds,targets,carries,receiving_yards_after_catch,receiving_air_yards,air_yards_share,target_share
season,week,recent_team,opponent_team,player_id,player_display_name,position,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1
2022,1,ARI,KC,00-0027942,A.J. Green,WR,0,0,0,0,0,0,0,0,2,13,...,0,0,0,0,4,0,0,42,0.157895,0.111111
2022,1,ARI,KC,00-0030061,Zach Ertz,TE,0,0,0,0,0,0,0,0,2,14,...,0,1,0,0,4,0,4,22,0.082707,0.111111
2022,1,ARI,KC,00-0033553,James Conner,RB,0,0,0,0,26,1,0,0,5,29,...,0,0,0,0,6,10,38,7,0.026316,0.166667
2022,1,ARI,KC,00-0035500,Greg Dortch,WR,0,0,0,0,0,0,0,0,7,63,...,0,0,0,0,9,0,31,62,0.233083,0.250000
2022,1,ARI,KC,00-0035527,Andy Isabella,WR,0,0,0,0,0,0,0,0,1,10,...,0,0,0,0,3,0,4,30,0.112782,0.083333
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024,6,WAS,BAL,00-0033955,Jeremy McNichols,RB,0,0,0,0,9,0,0,0,0,0,...,0,0,0,0,0,2,0,0,0.000000,0.000000
2024,6,WAS,BAL,00-0035208,Olamide Zaccheaus,WR,0,0,0,0,0,0,0,0,4,27,...,0,0,0,0,6,0,14,33,0.125954,0.181818
2024,6,WAS,BAL,00-0035659,Terry McLaurin,WR,0,0,0,0,0,0,0,0,6,53,...,0,0,0,0,7,1,1,81,0.309160,0.212121
2024,6,WAS,BAL,00-0036626,Dyami Brown,WR,0,0,0,0,0,0,0,0,1,11,...,0,0,0,0,1,0,12,-1,-0.003817,0.030303


In [26]:
def replace(code, old, new):
    """
    Basically a find and replace
    """
    return code.replace(old, new)

In [27]:
flex_df.reset_index(inplace = True, drop = False)

In [28]:
#There are a few errant rows where the opponent matches the team
flex_df = flex_df[~(flex_df['recent_team'] == flex_df['opponent_team'])]

In [29]:
flex_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 11357 entries, 0 to 11363
Data columns (total 28 columns):
 #   Column                       Non-Null Count  Dtype  
---  ------                       --------------  -----  
 0   season                       11357 non-null  int64  
 1   week                         11357 non-null  int64  
 2   recent_team                  11357 non-null  object 
 3   opponent_team                11357 non-null  object 
 4   player_id                    11357 non-null  object 
 5   player_display_name          11357 non-null  object 
 6   position                     11357 non-null  object 
 7   passing_yards                11357 non-null  int64  
 8   passing_tds                  11357 non-null  int64  
 9   interceptions                11357 non-null  int64  
 10  passing_2pt_conversions      11357 non-null  int64  
 11  rushing_yards                11357 non-null  int64  
 12  rushing_tds                  11357 non-null  int64  
 13  rushing_fumbles_lost 

# What do we need from play-by-play?
Let's take a look at the data we'd like to have and see where we need to integrate the play-by-play data. We have yards_after_catch and air_yards. We have targets and target share. Even though the target share doesn't add up to 100 percent, it's still useful. We'll definitely want to spot-check that.<br>

We have carries but no carry share. Maybe we need to just take the total carries for each team in each game from the pbp and join that with the flex_df. That shouldn't be too hard. Then we can figure out every RBs workload even if they don't all add up to 100.<br>

We also want goal_to_go binary for each play to see how many carries or targets a player gets in goal-line situations and also the rate at which they score TDs in those situations.<br>

In [30]:
run_df = pbp_df[pbp_df['play_type'] == 'run']

In [31]:
running_play_counts = run_df.groupby(['season', 'week', 'posteam', 'defteam', 'game_id']).size().reset_index(name='total_carries')

In [32]:
running_play_counts.rename(columns = {'posteam' : 'recent_team', 'defteam': 'opponent_team'}, inplace = True)

In [33]:
flex_merge = pd.merge(flex_df, running_play_counts, on = ['season', 'week', 'recent_team', 'opponent_team'], how = 'left')

In [34]:
flex_merge.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11357 entries, 0 to 11356
Data columns (total 30 columns):
 #   Column                       Non-Null Count  Dtype  
---  ------                       --------------  -----  
 0   season                       11357 non-null  int64  
 1   week                         11357 non-null  int64  
 2   recent_team                  11357 non-null  object 
 3   opponent_team                11357 non-null  object 
 4   player_id                    11357 non-null  object 
 5   player_display_name          11357 non-null  object 
 6   position                     11357 non-null  object 
 7   passing_yards                11357 non-null  int64  
 8   passing_tds                  11357 non-null  int64  
 9   interceptions                11357 non-null  int64  
 10  passing_2pt_conversions      11357 non-null  int64  
 11  rushing_yards                11357 non-null  int64  
 12  rushing_tds                  11357 non-null  int64  
 13  rushing_fumbles_

In [35]:
flex_merge['carry_share'] = round(flex_merge['carries']/flex_merge['total_carries'], 3)

In [36]:
flex_merge.head()

Unnamed: 0,season,week,recent_team,opponent_team,player_id,player_display_name,position,passing_yards,passing_tds,interceptions,...,special_teams_tds,targets,carries,receiving_yards_after_catch,receiving_air_yards,air_yards_share,target_share,game_id,total_carries,carry_share
0,2022,1,ARI,KC,00-0027942,A.J. Green,WR,0,0,0,...,0,4,0,0,42,0.157895,0.111111,2022_01_KC_ARI,21,0.0
1,2022,1,ARI,KC,00-0030061,Zach Ertz,TE,0,0,0,...,0,4,0,4,22,0.082707,0.111111,2022_01_KC_ARI,21,0.0
2,2022,1,ARI,KC,00-0033553,James Conner,RB,0,0,0,...,0,6,10,38,7,0.026316,0.166667,2022_01_KC_ARI,21,0.476
3,2022,1,ARI,KC,00-0035500,Greg Dortch,WR,0,0,0,...,0,9,0,31,62,0.233083,0.25,2022_01_KC_ARI,21,0.0
4,2022,1,ARI,KC,00-0035527,Andy Isabella,WR,0,0,0,...,0,3,0,4,30,0.112782,0.083333,2022_01_KC_ARI,21,0.0


In [37]:
flex_merge.tail(30)

Unnamed: 0,season,week,recent_team,opponent_team,player_id,player_display_name,position,passing_yards,passing_tds,interceptions,...,special_teams_tds,targets,carries,receiving_yards_after_catch,receiving_air_yards,air_yards_share,target_share,game_id,total_carries,carry_share
11327,2024,6,SF,SEA,00-0036259,Jauan Jennings,WR,0,0,0,...,0,5,0,8,27,0.139896,0.185185,2024_06_SF_SEA,33,0.0
11328,2024,6,SF,SEA,00-0036261,Brandon Aiyuk,WR,0,0,0,...,0,4,0,6,63,0.326425,0.148148,2024_06_SF_SEA,33,0.0
11329,2024,6,SF,SEA,00-0037525,Jordan Mason,RB,0,0,0,...,0,2,9,7,6,0.031088,0.074074,2024_06_SF_SEA,33,0.273
11330,2024,6,SF,SEA,00-0039363,Isaac Guerendo,RB,0,0,0,...,0,0,10,0,0,0.0,0.0,2024_06_SF_SEA,33,0.303
11331,2024,6,TB,NO,00-0031408,Mike Evans,WR,0,0,0,...,0,6,0,14,87,0.399083,0.166667,2024_06_TB_NO,35,0.0
11332,2024,6,TB,NO,00-0032385,Sterling Shepard,WR,0,0,0,...,0,4,4,8,26,0.119266,0.111111,2024_06_TB_NO,35,0.114
11333,2024,6,TB,NO,00-0033921,Chris Godwin,WR,0,0,0,...,0,13,0,99,51,0.233945,0.361111,2024_06_TB_NO,35,0.0
11334,2024,6,TB,NO,00-0038129,Cade Otton,TE,0,0,0,...,0,6,0,2,45,0.206422,0.166667,2024_06_TB_NO,35,0.0
11335,2024,6,TB,NO,00-0038824,Ryan Miller,WR,0,0,0,...,0,1,0,38,1,0.004587,0.027778,2024_06_TB_NO,35,0.0
11336,2024,6,TB,NO,00-0038951,Sean Tucker,RB,0,0,0,...,0,3,14,52,4,0.018349,0.083333,2024_06_TB_NO,35,0.4


# Goal-to-go situations
We'll filter the pbp data by rows in which goal_to_go is True and the play is either a run or a pass. We find that on passing plays, some of the receiver_player_id values are null, but for running plays none of them are null. This implies that on plays in which the QB is sacked, no pass is thrown so there's no receiver.<br>

Those plays probably should be discarded as we are looking for a percentage of times a RB-WR-TE gets the ball in goal-to-go situations. When a QB is sacked, no one gets the ball.<br>

**Update:** goal_to_go leaves out situations where it's 3rd and 2 from the 4-yard line. Maybe we should see if filtering by yardline_100 <= 10 gives us more data points.

In [38]:
gtg_df_10 = pbp_df[(pbp_df['yardline_100'] <= 10) & ((pbp_df['play_type'] == 'run') | (pbp_df['play_type'] == 'pass'))]
#gtg_df = rb_wr_te_df[(rb_wr_te_df['goal_to_go'] == True) & ((rb_wr_te_df['play_type'] == 'run') | (rb_wr_te_df['play_type'] == 'pass'))]

In [39]:
gtg_df_10['play_type'].value_counts()

play_type
run     3107
pass    3009
Name: count, dtype: int64

In [40]:
gtg_df_10 = gtg_df_10[(gtg_df_10['receiver_player_id'].notnull()) | (gtg_df_10['rusher_player_id'].notnull())]
#gtg_df = gtg_df[(gtg_df['receiver_player_id'].notnull()) | (gtg_df['rusher_player_id'].notnull())]

In [41]:
gtg_df_10[(gtg_df_10['receiver_player_id'].isnull()) & (gtg_df_10['rusher_player_id'].isnull())]
#gtg_df[(gtg_df['receiver_player_id'].isnull()) & (gtg_df['rusher_player_id'].isnull())]

Unnamed: 0,play_id,game_id,old_game_id,home_team,away_team,season_type,week,posteam,posteam_type,defteam,...,out_of_bounds,home_opening_kickoff,qb_epa,xyac_epa,xyac_mean_yardage,xyac_median_yardage,xyac_success,xyac_fd,xpass,pass_oe


In [42]:
gtg_df_10[(gtg_df_10['receiver_player_id'].notnull()) & (gtg_df_10['rusher_player_id'].notnull())]
#gtg_df[(gtg_df['receiver_player_id'].notnull()) & (gtg_df['rusher_player_id'].notnull())]

Unnamed: 0,play_id,game_id,old_game_id,home_team,away_team,season_type,week,posteam,posteam_type,defteam,...,out_of_bounds,home_opening_kickoff,qb_epa,xyac_epa,xyac_mean_yardage,xyac_median_yardage,xyac_success,xyac_fd,xpass,pass_oe


In [43]:
gtg_df_10['player_id'] = gtg_df_10['rusher_player_id'].fillna(gtg_df_10['receiver_player_id'])
#gtg_df['player_id'] = gtg_df['rusher_player_id'].fillna(gtg_df['receiver_player_id'])

In [44]:
gtg_df_10.info()

<class 'pandas.core.frame.DataFrame'>
Index: 5751 entries, 31 to 114976
Columns: 373 entries, play_id to player_id
dtypes: float64(182), int64(39), object(152)
memory usage: 16.4+ MB


In [45]:
#Total goal to go situations for each team in each game
grouped_gtg_10 = gtg_df_10.groupby(['season', 'week', 'posteam', 'defteam']).size().reset_index(name = 'plays_in_10')

#grouped_gtg = gtg_df.groupby(['season', 'week', 'posteam', 'defteam']).size().reset_index(name = 'gtg_plays')


In [46]:
gtg_df_10.columns

Index(['play_id', 'game_id', 'old_game_id', 'home_team', 'away_team',
       'season_type', 'week', 'posteam', 'posteam_type', 'defteam',
       ...
       'home_opening_kickoff', 'qb_epa', 'xyac_epa', 'xyac_mean_yardage',
       'xyac_median_yardage', 'xyac_success', 'xyac_fd', 'xpass', 'pass_oe',
       'player_id'],
      dtype='object', length=373)

In [47]:
gtg_cols = ['play_id','week', 'posteam', 'defteam', 'sp', 'desc', 'play_type', 'td_player_id',
      'incomplete_pass', 'rush_attempt', 'pass_attempt',
       'touchdown', 'pass_touchdown', 'rush_touchdown', 'complete_pass', 'receiver_player_id',
       'receiver_player_name', 'rusher_player_id',
       'rusher_player_name', 'season', 'weather', 'player_id']

In [48]:
gtg_df_10 = gtg_df_10[gtg_cols]

In [49]:
gtg_df_10 = pd.merge(gtg_df_10, grouped_gtg_10, on = ['season', 'week', 'posteam', 'defteam'], how = 'left')
#gtg_df = pd.merge(gtg_df, grouped_gtg, on = ['season', 'week', 'posteam', 'defteam'], how = 'left')

In [50]:
##Next will be to derive how many times per gtg play a player gets the ball, and then when they get it their TD percentage

In [51]:
gtg_10_player = gtg_df_10.groupby(['season', 'week', 'posteam', 'defteam', 'player_id']).size().reset_index(name = 'opps_in_10')
#gtg_player = gtg_df.groupby(['season', 'week', 'posteam', 'defteam', 'player_id']).size().reset_index(name = 'gtg_opps')

In [52]:
gtg_df_10 = pd.merge(gtg_df_10, gtg_10_player, on = ['season', 'week', 'posteam', 'defteam', 'player_id'], how = 'left')
#gtg_df = pd.merge(gtg_df, gtg_player, on = ['season', 'week', 'posteam', 'defteam', 'player_id'], how = 'left')

In [53]:
gtg_df_10['in_10_share'] = round(gtg_df_10['opps_in_10']/gtg_df_10['plays_in_10'], 3)

In [54]:
gtg_df_10.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5751 entries, 0 to 5750
Data columns (total 25 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   play_id               5751 non-null   int64  
 1   week                  5751 non-null   int64  
 2   posteam               5751 non-null   object 
 3   defteam               5751 non-null   object 
 4   sp                    5751 non-null   int64  
 5   desc                  5751 non-null   object 
 6   play_type             5751 non-null   object 
 7   td_player_id          1815 non-null   object 
 8   incomplete_pass       5751 non-null   float64
 9   rush_attempt          5751 non-null   float64
 10  pass_attempt          5751 non-null   float64
 11  touchdown             5751 non-null   float64
 12  pass_touchdown        5751 non-null   float64
 13  rush_touchdown        5751 non-null   float64
 14  complete_pass         5751 non-null   float64
 15  receiver_player_id   

In [55]:
gtg_df_10['player_id'].nunique()

591

In [56]:
flex_merge['player_id'].nunique()

690

In [57]:
##Maybe we need to pause here. There are more players in our weekly data than we have in our gtg data, which makes sense.
#Not every player will be used in gtg situations.
#Maybe somehow just check that every player in flex_merge but not gtg doesn't have any True values in goal_to_go
####Maybe we should just use common sense
#We've filtered every gtg play that was a run or a pass.
#If there's any irregularity it's likely to come out during spot-checking

In [58]:
gtg_players = list(gtg_df_10['player_id'].unique())

In [59]:
flex_players = list(flex_merge['player_id'].unique())

In [60]:
len(gtg_players)

591

In [61]:
len(flex_players)

690

In [62]:
#gtg_df = gtg_df.sort_values(by = ['season', 'week', 'posteam', 'defteam'])

In [63]:
gtg_df_10 = gtg_df_10.groupby(['season', 'week', 'posteam', 'defteam', 'player_id'], as_index=False).agg({'in_10_share': 'mean'})


In [64]:
#gtg_df = gtg_df[['season', 'week', 'posteam', 'defteam', 'player_id', 'gtg_share']]

In [65]:
gtg_df_10.tail(30)

Unnamed: 0,season,week,posteam,defteam,player_id,in_10_share
3665,2024,6,NYJ,BUF,00-0034521,0.167
3666,2024,6,NYJ,BUF,00-0037740,0.333
3667,2024,6,NYJ,BUF,00-0038120,0.167
3668,2024,6,NYJ,BUF,00-0039794,0.167
3669,2024,6,PIT,LV,00-0036893,0.222
3670,2024,6,PIT,LV,00-0036945,0.444
3671,2024,6,PIT,LV,00-0037247,0.111
3672,2024,6,PIT,LV,00-0037304,0.111
3673,2024,6,PIT,LV,00-0037609,0.111
3674,2024,6,SEA,SF,00-0030565,0.1


In [66]:
check_for_one = gtg_df_10.groupby(['season', 'week', 'posteam', 'defteam'])['in_10_share'].sum()

In [67]:
check_for_one.min()

0.9989999999999999

In [68]:
gtg_df_10.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3695 entries, 0 to 3694
Data columns (total 6 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   season       3695 non-null   int64  
 1   week         3695 non-null   int64  
 2   posteam      3695 non-null   object 
 3   defteam      3695 non-null   object 
 4   player_id    3695 non-null   object 
 5   in_10_share  3695 non-null   float64
dtypes: float64(1), int64(2), object(3)
memory usage: 173.3+ KB


In [69]:
gtg_df_10.rename(columns = {'posteam': 'recent_team', 'defteam': 'opponent_team'}, inplace = True)

In [70]:
flex_merge = pd.merge(flex_merge, gtg_df_10, on = ['season', 'week', 'recent_team', 'opponent_team', 'player_id'], how = 'left')

In [71]:
flex_merge.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11357 entries, 0 to 11356
Data columns (total 32 columns):
 #   Column                       Non-Null Count  Dtype  
---  ------                       --------------  -----  
 0   season                       11357 non-null  int64  
 1   week                         11357 non-null  int64  
 2   recent_team                  11357 non-null  object 
 3   opponent_team                11357 non-null  object 
 4   player_id                    11357 non-null  object 
 5   player_display_name          11357 non-null  object 
 6   position                     11357 non-null  object 
 7   passing_yards                11357 non-null  int64  
 8   passing_tds                  11357 non-null  int64  
 9   interceptions                11357 non-null  int64  
 10  passing_2pt_conversions      11357 non-null  int64  
 11  rushing_yards                11357 non-null  int64  
 12  rushing_tds                  11357 non-null  int64  
 13  rushing_fumbles_

# A lot of missing in_10_share variables
At first glance it seems like there are too many nulls in the in_10_share column, which is the percentage of plays inside the 10-yard-line that each player gets the ball. But maybe it is plausible. After all, in those goal-line situations only the best players are called upon. We'll replace with zero and investigate further when we examine the data.

In [72]:
flex_merge.fillna(value={'in_10_share': 0}, inplace=True)

Let's rename flex_merge flex_df so we can run the following code

In [73]:
flex_df = flex_merge

# Fantasy points
This is where we calculate FanDuel and DraftKings points.

In [74]:
flex_df['FD_Pts'] = (flex_df['passing_yards'] * 0.04) + (flex_df['rushing_tds'] * 6) + (flex_df['rushing_yards'] * 0.1)\
+ (flex_df['passing_tds'] * 4) + (flex_df['receiving_yards'] * 0.1) + (flex_df['receiving_tds'] * 6) + (flex_df['receptions'] * 0.5)\
+ (flex_df['rushing_2pt_conversions'] * 2) + (flex_df['passing_2pt_conversions'] * 2) + (flex_df['receiving_2pt_conversions'] * 2)\
+ (flex_df['special_teams_tds'] * 6) - (flex_df['interceptions']) - (flex_df['sack_fumbles_lost'] * 2) - (flex_df['receiving_fumbles_lost'] * 2)\
- (flex_df['rushing_fumbles_lost'] * 2)

In [75]:
flex_df['DK_Pts'] = (flex_df['passing_yards'] * 0.04) + (flex_df['rushing_tds'] * 6) + (flex_df['rushing_yards'] * 0.1)\
+ (flex_df['passing_tds'] * 4) + (flex_df['receiving_yards'] * 0.1) + (flex_df['receiving_tds'] * 6) + (flex_df['receptions'] * 1)\
+ (flex_df['rushing_2pt_conversions'] * 2) + (flex_df['passing_2pt_conversions'] * 2) + (flex_df['receiving_2pt_conversions'] * 2)\
+ (flex_df['special_teams_tds'] * 6) - (flex_df['interceptions']) - (flex_df['sack_fumbles_lost']) - (flex_df['receiving_fumbles_lost'])\
- (flex_df['rushing_fumbles_lost'])

Adding DraftKings bonus points

In [76]:
flex_df['DK_Pts'] = np.where(flex_df['passing_yards'] >= 300, flex_df['DK_Pts'] + 3, flex_df['DK_Pts'])
flex_df['DK_Pts'] = np.where(flex_df['receiving_yards'] >= 100, flex_df['DK_Pts'] + 3, flex_df['DK_Pts'])
flex_df['DK_Pts'] = np.where(flex_df['rushing_yards'] >= 100, flex_df['DK_Pts'] + 3, flex_df['DK_Pts'])

In [77]:
flex_df.tail()

Unnamed: 0,season,week,recent_team,opponent_team,player_id,player_display_name,position,passing_yards,passing_tds,interceptions,...,receiving_yards_after_catch,receiving_air_yards,air_yards_share,target_share,game_id,total_carries,carry_share,in_10_share,FD_Pts,DK_Pts
11352,2024,6,WAS,BAL,00-0033955,Jeremy McNichols,RB,0,0,0,...,0,0,0.0,0.0,2024_06_WAS_BAL,18,0.111,0.0,0.9,0.9
11353,2024,6,WAS,BAL,00-0035208,Olamide Zaccheaus,WR,0,0,0,...,14,33,0.125954,0.181818,2024_06_WAS_BAL,18,0.0,0.0,4.7,6.7
11354,2024,6,WAS,BAL,00-0035659,Terry McLaurin,WR,0,0,0,...,1,81,0.30916,0.212121,2024_06_WAS_BAL,18,0.056,0.4,20.3,23.3
11355,2024,6,WAS,BAL,00-0036626,Dyami Brown,WR,0,0,0,...,12,-1,-0.003817,0.030303,2024_06_WAS_BAL,18,0.0,0.0,1.6,2.1
11356,2024,6,WAS,BAL,00-0039355,Luke McCaffrey,WR,0,0,0,...,0,5,0.019084,0.030303,2024_06_WAS_BAL,18,0.0,0.2,1.0,1.5


# Storing last week's points to evaluate model

In [78]:
flex_points_last_week = flex_df[(flex_df['season'] == season) & (flex_df['week'] == week - 1)]\
[['season', 'week', 'player_display_name', 'recent_team', 'opponent_team', 'position', 'FD_Pts', 'DK_Pts']]

In [79]:
flex_points_last_week.to_csv('flex_target_' + str(season) + '_' + str(week - 1) + '.csv')

In [80]:
flex_df['position'].value_counts()

position
WR    5283
RB    3294
TE    2582
FB     198
Name: count, dtype: int64

In [81]:
flex_df['position'] = np.where(flex_df['position'].isin(['HB', 'FB']), 'RB', flex_df['position'])

In [82]:
pos_mean_by_team = flex_df.groupby(['season', 'week', 'recent_team', 'opponent_team', 'position']).size().reset_index(name = 'num_players')

In [83]:
mean_by_pos = pos_mean_by_team.groupby('position')['num_players'].agg(['mean', 'max', 'min'])

In [84]:
mean_by_pos

Unnamed: 0_level_0,mean,max,min
position,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
RB,2.641452,5,1
TE,1.966489,4,1
WR,3.996218,7,2


# Grouping points allowed by position
First step in deriving DvP variables.

In [85]:
grouped_pts = flex_df.groupby(['season', 'week', 'recent_team', 'opponent_team', 'position'])[['DK_Pts', 'FD_Pts']].sum().round(3)

In [86]:
grouped_pts

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,DK_Pts,FD_Pts
season,week,recent_team,opponent_team,position,Unnamed: 5_level_1,Unnamed: 6_level_1
2022,1,ARI,KC,RB,25.6,21.6
2022,1,ARI,KC,TE,11.4,10.4
2022,1,ARI,KC,WR,32.9,25.9
2022,1,ATL,NO,RB,28.3,23.3
2022,1,ATL,NO,TE,7.0,5.0
...,...,...,...,...,...,...
2024,6,TEN,IND,TE,4.8,2.8
2024,6,TEN,IND,WR,25.1,20.6
2024,6,WAS,BAL,RB,11.7,9.7
2024,6,WAS,BAL,TE,10.8,8.8


In [87]:
grouped_pts.reset_index(inplace = True)

In [88]:
grouped_pts = grouped_pts.sort_values(by = ['opponent_team', 'season', 'week'])
#grouped_pts['opp_game_num'] = grouped_pts.groupby(['opponent_team', 'season', 'week', 'recent_team']).cumcount() + 1
grouped_pts.reset_index(inplace = True)

In [89]:
grouped_pts_raw = grouped_pts.copy()

In [90]:
grouped_pts.drop(columns = ['index'], inplace = True)

In [91]:
grouped_pts

Unnamed: 0,season,week,recent_team,opponent_team,position,DK_Pts,FD_Pts
0,2022,1,KC,ARI,RB,42.5,39.5
1,2022,1,KC,ARI,TE,38.2,30.2
2,2022,1,KC,ARI,WR,35.6,27.6
3,2022,2,LV,ARI,RB,12.6,11.6
4,2022,2,LV,ARI,TE,23.0,18.5
...,...,...,...,...,...,...,...
3952,2024,5,CLE,WAS,TE,13.6,11.6
3953,2024,5,CLE,WAS,WR,16.7,12.7
3954,2024,6,BAL,WAS,RB,29.6,26.1
3955,2024,6,BAL,WAS,TE,20.3,17.8


In [92]:
# Create the pivot table
pivot_df = grouped_pts.pivot_table(
    index=['season', 'week', 'recent_team', 'opponent_team'],
    columns='position',
    values=['DK_Pts', 'FD_Pts'],
    aggfunc='sum',  # Assuming you want to sum the points if there are duplicates
    fill_value=0  # Fill missing values with 0
)

In [93]:
# Flatten the MultiIndex columns created by pivot_table
pivot_df.columns = ['_'.join(col).strip() for col in pivot_df.columns.values]

In [94]:
# Reset the index to turn the pivot table back into a DataFrame
pivot_df.reset_index(inplace=True)

In [95]:
pivot_df.head()

Unnamed: 0,season,week,recent_team,opponent_team,DK_Pts_RB,DK_Pts_TE,DK_Pts_WR,FD_Pts_RB,FD_Pts_TE,FD_Pts_WR
0,2022,1,ARI,KC,25.6,11.4,32.9,21.6,10.4,25.9
1,2022,1,ATL,NO,28.3,7.0,27.1,23.3,5.0,20.1
2,2022,1,BAL,NYJ,11.5,10.2,39.2,9.5,7.7,35.2
3,2022,1,BUF,LAR,16.0,1.5,63.7,10.0,1.0,52.2
4,2022,1,CAR,CLE,16.5,8.4,34.4,14.5,6.9,26.9


In [96]:
pivot_df = pivot_df.sort_values(by = ['opponent_team', 'season', 'week'])

# Bringing in current week's FanDuel and DraftKings rows here
We're basically cloning this notebook from model training. We need features that apply to the current NFL week. We're trying the **one extra row** concept. For now we can probably add one row for each team, with the season value being 2024 and the week value being 1.

In [97]:
# Connect to the SQLite database
conn = sqlite3.connect('nfl_dfs.db')

# query_flex = "SELECT * FROM flex_dataset"
# #query_weekly = "SELECT * FROM weekly_data"
# query_fd_spread = "SELECT * FROM fd_spreads"
# query_dk_spread = "SELECT * FROM dk_spreads"
query_qb_model = "SELECT * FROM fd_qb_model_ready"
# # query_pbp = "S#ELECT * FROM pbp_non_defense"
query_fd = "SELECT * FROM fd_table"
query_dk = "SELECT * FROM dk_table"
#query_rb_wr_te = "SELECT * FROM rb_wr_te_data WHERE season >= 2022"

# flex_dataset = pd.read_sql_query(query_flex, conn)
# #weekly_df = pd.read_sql_query(query_weekly, conn)
# fd_spreads = pd.read_sql_query(query_fd_spread, conn)
# dk_spreads = pd.read_sql_query(query_dk_spread, conn)
# #pbp_df = pd.read_sql_query(query_pbp, conn)
fd_table = pd.read_sql_query(query_fd, conn)
dk_table = pd.read_sql_query(query_dk, conn)
qb_model = pd.read_sql_query(query_qb_model, conn)
# rb_wr_te_df = pd.read_sql_query(query_rb_wr_te, conn)

# Close the database connection
conn.close()

In [98]:
fanduel_df.head()

Unnamed: 0,ID,name,position,salary,team,opponent,home_team,status,date,week
0,108151-85671,Justin Jefferson,WR,9400,MIN,DET,1,Active,10-20-2024,7
1,108151-85701,Ja'Marr Chase,WR,9300,CIN,CLE,0,Active,10-20-2024,7
2,108151-62239,Josh Allen,QB,9200,BUF,TEN,1,Active,10-20-2024,7
3,108151-102785,Jayden Daniels,QB,9100,WAS,CAR,1,Active,10-20-2024,7
4,108151-64401,Saquon Barkley,RB,9000,PHI,NYG,0,Active,10-20-2024,7


In [99]:
draftkings_df.head()

Unnamed: 0,ID,name,position,salary,team,opponent,home_team,status,date,week
0,36291011,Justin Jefferson,WR,8500,MIN,DET,1,Active,10-20-2024,7
1,36291013,Ja'Marr Chase,WR,8400,CIN,CLE,0,Active,10-20-2024,7
2,36291015,Amon-Ra St Brown,WR,8300,DET,MIN,0,Active,10-20-2024,7
3,36290791,Saquon Barkley,RB,8200,PHI,NYG,0,Active,10-20-2024,7
4,36290793,Kyren Williams,RB,8100,LAR,LV,1,Active,10-20-2024,7


In [100]:
# Initialize empty lists for teams and opponents
#We'll use FanDuel here because they tend to list more players than DK and we're less likely to miss anyone.
team_list = []
opponent_list = []

# Initialize a set to keep track of already added matchups
added_matchups = set()

# Loop through each row in the dataframe
for index, row in fanduel_df.iterrows():
    team = row['team']
    opponent = row['opponent']
    
    # Ensure each team and opponent are added only once in reverse order as well
    if (team, opponent) not in added_matchups and (opponent, team) not in added_matchups:
        # Add the matchup as-is
        team_list.append(team)
        opponent_list.append(opponent)
        
        # Add the reverse matchup
        team_list.append(opponent)
        opponent_list.append(team)
        
        # Track the added matchups
        added_matchups.add((team, opponent))
        added_matchups.add((opponent, team))

# Display the resulting lists
print("Team list:", team_list)
print("Opponent list:", opponent_list)


Team list: ['MIN', 'DET', 'CIN', 'CLE', 'BUF', 'TEN', 'WAS', 'CAR', 'PHI', 'NYG', 'SF', 'KC', 'GB', 'HOU', 'LAR', 'LV', 'IND', 'MIA', 'ATL', 'SEA', 'NYJ', 'PIT']
Opponent list: ['DET', 'MIN', 'CLE', 'CIN', 'TEN', 'BUF', 'CAR', 'WAS', 'NYG', 'PHI', 'KC', 'SF', 'HOU', 'GB', 'LV', 'LAR', 'MIA', 'IND', 'SEA', 'ATL', 'PIT', 'NYJ']


In [101]:
len(team_list), len(opponent_list)

(22, 22)

In [102]:
append_to_pivot = {'season': [season] * len(team_list), 'week': [week] * len(team_list), 'recent_team': team_list, 'opponent_team': opponent_list}

In [103]:
append_to_pivot = pd.DataFrame(append_to_pivot)

In [104]:
pivot_df = pd.concat([pivot_df, append_to_pivot], axis = 0)

In [105]:
pivot_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 1344 entries, 15 to 21
Data columns (total 10 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   season         1344 non-null   int64  
 1   week           1344 non-null   int64  
 2   recent_team    1344 non-null   object 
 3   opponent_team  1344 non-null   object 
 4   DK_Pts_RB      1322 non-null   float64
 5   DK_Pts_TE      1322 non-null   float64
 6   DK_Pts_WR      1322 non-null   float64
 7   FD_Pts_RB      1322 non-null   float64
 8   FD_Pts_TE      1322 non-null   float64
 9   FD_Pts_WR      1322 non-null   float64
dtypes: float64(6), int64(2), object(2)
memory usage: 115.5+ KB


In [106]:
pivot_df['opp_game_num'] = pivot_df.groupby(['opponent_team']).cumcount() + 1

In [107]:
cols_for_dvp = ['DK_Pts_RB', 'DK_Pts_TE', 'DK_Pts_WR', 'FD_Pts_RB', 'FD_Pts_TE', 'FD_Pts_WR']

In [108]:
pivot_df_by_game = pivot_df.copy()

In [109]:
pivot_df.head()

Unnamed: 0,season,week,recent_team,opponent_team,DK_Pts_RB,DK_Pts_TE,DK_Pts_WR,FD_Pts_RB,FD_Pts_TE,FD_Pts_WR,opp_game_num
15,2022,1,KC,ARI,42.5,38.2,35.6,39.5,30.2,27.6,1
50,2022,2,LV,ARI,12.6,23.0,32.7,11.6,18.5,24.7,2
81,2022,3,LAR,ARI,12.8,13.9,37.0,11.8,11.4,30.5,3
100,2022,4,CAR,ARI,26.0,8.0,17.7,21.5,6.0,13.2,4
153,2022,5,PHI,ARI,10.4,17.5,29.8,9.4,13.5,21.8,5


In [110]:
pivot_df.tail()

Unnamed: 0,season,week,recent_team,opponent_team,DK_Pts_RB,DK_Pts_TE,DK_Pts_WR,FD_Pts_RB,FD_Pts_TE,FD_Pts_WR,opp_game_num
17,2024,7,MIA,IND,,,,,,,41
18,2024,7,ATL,SEA,,,,,,,42
19,2024,7,SEA,ATL,,,,,,,41
20,2024,7,NYJ,PIT,,,,,,,42
21,2024,7,PIT,NYJ,,,,,,,41


In [111]:
pivot_df['opponent_team'].nunique()

32

# DvP variables
We'll take an 8-game rolling mean for fantasy points allowed to RBs, WRs and TEs for each team, even if the games go back to last season. DvP means defense vs. position.

In [112]:
def calculate_equal_rolling_mean(group, cols, suffix):
    """
    This function calculates a rolling mean for the last eight games, going back to previous season if necessary.
    It also calculates when there are less than eight games to use.
    """
    for col in cols:
        group[f'{col}{suffix}'] = (
            group[col].shift().rolling(window=8, min_periods=1).mean()
        )
    return group

In [113]:
pivot_df = pivot_df.groupby('opponent_team', as_index = False).apply(calculate_equal_rolling_mean, cols=cols_for_dvp, suffix = '_DvP')

In [114]:
pivot_df.drop(columns = cols_for_dvp + ['opp_game_num'], inplace = True)

In [115]:
pivot_df

Unnamed: 0,Unnamed: 1,season,week,recent_team,opponent_team,DK_Pts_RB_DvP,DK_Pts_TE_DvP,DK_Pts_WR_DvP,FD_Pts_RB_DvP,FD_Pts_TE_DvP,FD_Pts_WR_DvP
0,15,2022,1,KC,ARI,,,,,,
0,50,2022,2,LV,ARI,42.500000,38.200000,35.6000,39.500000,30.200000,27.6000
0,81,2022,3,LAR,ARI,27.550000,30.600000,34.1500,25.550000,24.350000,26.1500
0,100,2022,4,CAR,ARI,22.633333,25.033333,35.1000,20.966667,20.033333,27.6000
0,153,2022,5,PHI,ARI,23.475000,20.775000,30.7500,21.100000,16.525000,24.0000
...,...,...,...,...,...,...,...,...,...,...,...
31,1208,2024,3,CIN,WAS,31.725000,6.537500,50.0000,27.850000,5.037500,41.3125
31,1234,2024,4,ARI,WAS,31.437500,7.562500,49.8000,27.562500,5.687500,40.6750
31,1273,2024,5,CLE,WAS,30.250000,7.425000,47.3000,26.187500,5.550000,38.4250
31,1296,2024,6,BAL,WAS,28.250000,7.875000,42.7500,24.875000,6.062500,34.6875


In [116]:
pivot_df.info()

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 1344 entries, (0, 15) to (31, 7)
Data columns (total 10 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   season         1344 non-null   int64  
 1   week           1344 non-null   int64  
 2   recent_team    1344 non-null   object 
 3   opponent_team  1344 non-null   object 
 4   DK_Pts_RB_DvP  1312 non-null   float64
 5   DK_Pts_TE_DvP  1312 non-null   float64
 6   DK_Pts_WR_DvP  1312 non-null   float64
 7   FD_Pts_RB_DvP  1312 non-null   float64
 8   FD_Pts_TE_DvP  1312 non-null   float64
 9   FD_Pts_WR_DvP  1312 non-null   float64
dtypes: float64(6), int64(2), object(2)
memory usage: 152.9+ KB


In [117]:
flex_df.head()

Unnamed: 0,season,week,recent_team,opponent_team,player_id,player_display_name,position,passing_yards,passing_tds,interceptions,...,receiving_yards_after_catch,receiving_air_yards,air_yards_share,target_share,game_id,total_carries,carry_share,in_10_share,FD_Pts,DK_Pts
0,2022,1,ARI,KC,00-0027942,A.J. Green,WR,0,0,0,...,0,42,0.157895,0.111111,2022_01_KC_ARI,21,0.0,0.143,2.3,3.3
1,2022,1,ARI,KC,00-0030061,Zach Ertz,TE,0,0,0,...,4,22,0.082707,0.111111,2022_01_KC_ARI,21,0.0,0.429,10.4,11.4
2,2022,1,ARI,KC,00-0033553,James Conner,RB,0,0,0,...,38,7,0.026316,0.166667,2022_01_KC_ARI,21,0.476,0.286,14.0,16.5
3,2022,1,ARI,KC,00-0035500,Greg Dortch,WR,0,0,0,...,31,62,0.233083,0.25,2022_01_KC_ARI,21,0.0,0.0,9.8,13.3
4,2022,1,ARI,KC,00-0035527,Andy Isabella,WR,0,0,0,...,4,30,0.112782,0.083333,2022_01_KC_ARI,21,0.0,0.0,1.5,2.0


In [118]:
flex_df = flex_df.merge(
    pivot_df, 
    on=['season', 'week', 'recent_team', 'opponent_team'],  # common columns to merge on
    how='outer'  # outer join to keep all rows from both dataframes
)

In [119]:
flex_df.tail(30)

Unnamed: 0,season,week,recent_team,opponent_team,player_id,player_display_name,position,passing_yards,passing_tds,interceptions,...,carry_share,in_10_share,FD_Pts,DK_Pts,DK_Pts_RB_DvP,DK_Pts_TE_DvP,DK_Pts_WR_DvP,FD_Pts_RB_DvP,FD_Pts_TE_DvP,FD_Pts_WR_DvP
11349,2024,6,WAS,BAL,00-0030061,Zach Ertz,TE,0.0,0.0,0.0,...,0.0,0.0,8.8,10.8,20.2625,15.0625,35.75,17.0125,11.25,28.8125
11350,2024,6,WAS,BAL,00-0033591,Noah Brown,WR,0.0,0.0,0.0,...,0.0,0.2,7.8,9.8,20.2625,15.0625,35.75,17.0125,11.25,28.8125
11351,2024,6,WAS,BAL,00-0033699,Austin Ekeler,RB,0.0,0.0,0.0,...,0.5,0.2,8.8,10.8,20.2625,15.0625,35.75,17.0125,11.25,28.8125
11352,2024,6,WAS,BAL,00-0033955,Jeremy McNichols,RB,0.0,0.0,0.0,...,0.111,0.0,0.9,0.9,20.2625,15.0625,35.75,17.0125,11.25,28.8125
11353,2024,6,WAS,BAL,00-0035208,Olamide Zaccheaus,WR,0.0,0.0,0.0,...,0.0,0.0,4.7,6.7,20.2625,15.0625,35.75,17.0125,11.25,28.8125
11354,2024,6,WAS,BAL,00-0035659,Terry McLaurin,WR,0.0,0.0,0.0,...,0.056,0.4,20.3,23.3,20.2625,15.0625,35.75,17.0125,11.25,28.8125
11355,2024,6,WAS,BAL,00-0036626,Dyami Brown,WR,0.0,0.0,0.0,...,0.0,0.0,1.6,2.1,20.2625,15.0625,35.75,17.0125,11.25,28.8125
11356,2024,6,WAS,BAL,00-0039355,Luke McCaffrey,WR,0.0,0.0,0.0,...,0.0,0.2,1.0,1.5,20.2625,15.0625,35.75,17.0125,11.25,28.8125
11357,2024,7,SEA,ATL,,,,,,,...,,,,,22.0625,8.25,38.425,19.0625,6.1875,31.175
11358,2024,7,TEN,BUF,,,,,,,...,,,,,28.8625,12.3625,29.0125,25.175,9.675,22.7625


In [120]:
flex_df.reset_index(inplace = True)

In [121]:
flex_df.columns

Index(['index', 'season', 'week', 'recent_team', 'opponent_team', 'player_id',
       'player_display_name', 'position', 'passing_yards', 'passing_tds',
       'interceptions', 'passing_2pt_conversions', 'rushing_yards',
       'rushing_tds', 'rushing_fumbles_lost', 'rushing_2pt_conversions',
       'receptions', 'receiving_yards', 'receiving_tds',
       'receiving_fumbles_lost', 'receiving_2pt_conversions',
       'sack_fumbles_lost', 'special_teams_tds', 'targets', 'carries',
       'receiving_yards_after_catch', 'receiving_air_yards', 'air_yards_share',
       'target_share', 'game_id', 'total_carries', 'carry_share',
       'in_10_share', 'FD_Pts', 'DK_Pts', 'DK_Pts_RB_DvP', 'DK_Pts_TE_DvP',
       'DK_Pts_WR_DvP', 'FD_Pts_RB_DvP', 'FD_Pts_TE_DvP', 'FD_Pts_WR_DvP'],
      dtype='object')

In [122]:
flex_df['opponent_team'].nunique()

32

# Paring down some of the columns
We're drop columns that won't be needed for features.

In [123]:
cols_to_keep = ['season', 'week', 'recent_team', 'opponent_team', 'player_id',
       'player_display_name', 'position', 'rushing_yards', 'rushing_tds', 'rushing_fumbles_lost', 
       'receptions', 'receiving_yards', 'receiving_tds', 'receiving_fumbles_lost', 'targets', 'carries',
       'receiving_yards_after_catch', 'receiving_air_yards', 'air_yards_share',
       'target_share', 'carry_share','in_10_share', 'FD_Pts', 'DK_Pts', 'DK_Pts_RB_DvP', 'DK_Pts_TE_DvP',
       'DK_Pts_WR_DvP', 'FD_Pts_RB_DvP', 'FD_Pts_TE_DvP', 'FD_Pts_WR_DvP']

In [124]:
flex_df = flex_df[cols_to_keep]

In [125]:
#quarterback_df[quarterback_df['season'] >= 2006]['passing_air_yards']

In [126]:
flex_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11379 entries, 0 to 11378
Data columns (total 30 columns):
 #   Column                       Non-Null Count  Dtype  
---  ------                       --------------  -----  
 0   season                       11379 non-null  int64  
 1   week                         11379 non-null  int64  
 2   recent_team                  11379 non-null  object 
 3   opponent_team                11379 non-null  object 
 4   player_id                    11357 non-null  object 
 5   player_display_name          11357 non-null  object 
 6   position                     11357 non-null  object 
 7   rushing_yards                11357 non-null  float64
 8   rushing_tds                  11357 non-null  float64
 9   rushing_fumbles_lost         11357 non-null  float64
 10  receptions                   11357 non-null  float64
 11  receiving_yards              11357 non-null  float64
 12  receiving_tds                11357 non-null  float64
 13  receiving_fumble

# Filling missing values
We have found that dropping rows with missing values can affect calculations down the line. So we need to find ways to fill the missing values.<br>

The first row of every team grouping when we calculated DvP was NaN because there was no previous value. We'll fill those in with the mean.

In [127]:
flex_df['FD_Pts_RB_DvP'] = flex_df['FD_Pts_RB_DvP'].fillna(flex_df['FD_Pts_RB_DvP'].mean())
flex_df['DK_Pts_RB_DvP'] = flex_df['DK_Pts_RB_DvP'].fillna(flex_df['DK_Pts_RB_DvP'].mean())
flex_df['FD_Pts_TE_DvP'] = flex_df['FD_Pts_TE_DvP'].fillna(flex_df['FD_Pts_TE_DvP'].mean())
flex_df['DK_Pts_TE_DvP'] = flex_df['DK_Pts_TE_DvP'].fillna(flex_df['DK_Pts_TE_DvP'].mean())
flex_df['FD_Pts_WR_DvP'] = flex_df['FD_Pts_WR_DvP'].fillna(flex_df['FD_Pts_WR_DvP'].mean())
flex_df['DK_Pts_WR_DvP'] = flex_df['DK_Pts_WR_DvP'].fillna(flex_df['DK_Pts_WR_DvP'].mean())

In [128]:
flex_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11379 entries, 0 to 11378
Data columns (total 30 columns):
 #   Column                       Non-Null Count  Dtype  
---  ------                       --------------  -----  
 0   season                       11379 non-null  int64  
 1   week                         11379 non-null  int64  
 2   recent_team                  11379 non-null  object 
 3   opponent_team                11379 non-null  object 
 4   player_id                    11357 non-null  object 
 5   player_display_name          11357 non-null  object 
 6   position                     11357 non-null  object 
 7   rushing_yards                11357 non-null  float64
 8   rushing_tds                  11357 non-null  float64
 9   rushing_fumbles_lost         11357 non-null  float64
 10  receptions                   11357 non-null  float64
 11  receiving_yards              11357 non-null  float64
 12  receiving_tds                11357 non-null  float64
 13  receiving_fumble

In [129]:
flex_24 = flex_df[(flex_df['season'] == season) & (flex_df['week'] == week)]

In [130]:
flex_24.head()

Unnamed: 0,season,week,recent_team,opponent_team,player_id,player_display_name,position,rushing_yards,rushing_tds,rushing_fumbles_lost,...,carry_share,in_10_share,FD_Pts,DK_Pts,DK_Pts_RB_DvP,DK_Pts_TE_DvP,DK_Pts_WR_DvP,FD_Pts_RB_DvP,FD_Pts_TE_DvP,FD_Pts_WR_DvP
11357,2024,7,SEA,ATL,,,,,,,...,,,,,22.0625,8.25,38.425,19.0625,6.1875,31.175
11358,2024,7,TEN,BUF,,,,,,,...,,,,,28.8625,12.3625,29.0125,25.175,9.675,22.7625
11359,2024,7,WAS,CAR,,,,,,,...,,,,,32.2375,11.4625,31.6375,29.175,9.0875,25.6375
11360,2024,7,CLE,CIN,,,,,,,...,,,,,24.025,10.8375,31.95,20.4625,8.65,25.3875
11361,2024,7,CIN,CLE,,,,,,,...,,,,,21.375,9.5375,37.175,19.3125,7.9125,30.6125


In [131]:
fd_flex = fanduel_df[fanduel_df['position'].isin(['RB', 'WR', 'TE'])]
dk_flex = draftkings_df[draftkings_df['position'].isin(['RB', 'WR', 'TE'])]

In [132]:
fd_names = set(list(fd_flex['name'].unique()))
dk_names = set(list(dk_flex['name'].unique()))

In [133]:
# Initialize empty lists for names, positions, teams, and opponents
name_list = []
position_list = []
team_list = []
opponent_list = []

# Loop through the dataframe to extract names, positions, teams, and opponents
for index, row in fd_flex.iterrows():
    name_list.append(row['name'])          # Add player name to name_list
    position_list.append(row['position'])  # Add player position to position_list
    team_list.append(row['team'])          # Add player's team to team_list
    opponent_list.append(row['opponent'])  # Add opponent team to opponent_list

# dk_name_list = []
# dk_position_list = []
# dk_team_list = []
# dk_opponent_list = []

# for index, row in dk_flex.iterrows():
#     dk_name_list.append(row['name'])          # Add player name to name_list
#     dk_position_list.append(row['position'])  # Add player position to position_list
#     dk_team_list.append(row['team'])          # Add player's team to team_list
#     dk_opponent_list.append(row['opponent'])  # Add opponent team to opponent_list

# Display the resulting lists
# print("Name list:", name_list)
# print("Position list:", position_list)
# print("Team list:", team_list)
# print("Opponent list:", opponent_list)


In [134]:
len(name_list), len(position_list)

(606, 606)

In [135]:
to_concat = {'season': [season] * len(name_list), 'week': [week] * len(name_list), 'player_display_name': name_list, 'position': position_list,\
            'recent_team': team_list, 'opponent_team': opponent_list}

In [136]:
to_concat = pd.DataFrame(to_concat)

In [137]:
to_concat

Unnamed: 0,season,week,player_display_name,position,recent_team,opponent_team
0,2024,7,Justin Jefferson,WR,MIN,DET
1,2024,7,Ja'Marr Chase,WR,CIN,CLE
2,2024,7,Saquon Barkley,RB,PHI,NYG
3,2024,7,AJ Brown,WR,PHI,NYG
4,2024,7,Jordan Mason,RB,SF,KC
...,...,...,...,...,...,...
601,2024,7,Jesper Horsted,TE,CAR,WAS
602,2024,7,Dalton Keene,TE,HOU,GB
603,2024,7,Jody Fortson,TE,KC,SF
604,2024,7,Tre'Quan Smith,WR,DET,MIN


In [138]:
to_concat['recent_team'].value_counts()

recent_team
KC     32
CAR    32
GB     31
MIA    31
MIN    30
PIT    30
NYJ    29
PHI    28
IND    28
ATL    28
SEA    28
SF     27
NYG    27
CLE    26
BUF    26
CIN    26
HOU    26
TEN    25
WAS    25
LV     24
DET    24
LAR    23
Name: count, dtype: int64

In [139]:
flex_24.columns

Index(['season', 'week', 'recent_team', 'opponent_team', 'player_id',
       'player_display_name', 'position', 'rushing_yards', 'rushing_tds',
       'rushing_fumbles_lost', 'receptions', 'receiving_yards',
       'receiving_tds', 'receiving_fumbles_lost', 'targets', 'carries',
       'receiving_yards_after_catch', 'receiving_air_yards', 'air_yards_share',
       'target_share', 'carry_share', 'in_10_share', 'FD_Pts', 'DK_Pts',
       'DK_Pts_RB_DvP', 'DK_Pts_TE_DvP', 'DK_Pts_WR_DvP', 'FD_Pts_RB_DvP',
       'FD_Pts_TE_DvP', 'FD_Pts_WR_DvP'],
      dtype='object')

In [140]:
flex_24 = flex_24[['season', 'week', 'recent_team', 'opponent_team', 
       'DK_Pts_RB_DvP', 'DK_Pts_TE_DvP', 'DK_Pts_WR_DvP', 'FD_Pts_RB_DvP',
       'FD_Pts_TE_DvP', 'FD_Pts_WR_DvP']]

In [141]:
to_concat = pd.merge(to_concat, flex_24, on = ['season', 'week', 'recent_team', 'opponent_team'])

In [142]:
to_concat

Unnamed: 0,season,week,player_display_name,position,recent_team,opponent_team,DK_Pts_RB_DvP,DK_Pts_TE_DvP,DK_Pts_WR_DvP,FD_Pts_RB_DvP,FD_Pts_TE_DvP,FD_Pts_WR_DvP
0,2024,7,Justin Jefferson,WR,MIN,DET,20.550,7.9500,46.575,18.3625,6.3875,35.5125
1,2024,7,Aaron Jones,RB,MIN,DET,20.550,7.9500,46.575,18.3625,6.3875,35.5125
2,2024,7,Ty Chandler,RB,MIN,DET,20.550,7.9500,46.575,18.3625,6.3875,35.5125
3,2024,7,Jordan Addison,WR,MIN,DET,20.550,7.9500,46.575,18.3625,6.3875,35.5125
4,2024,7,TJ Hockenson,TE,MIN,DET,20.550,7.9500,46.575,18.3625,6.3875,35.5125
...,...,...,...,...,...,...,...,...,...,...,...,...
601,2024,7,James Proche,WR,CLE,CIN,24.025,10.8375,31.950,20.4625,8.6500,25.3875
602,2024,7,Aidan Robbins,RB,CLE,CIN,24.025,10.8375,31.950,20.4625,8.6500,25.3875
603,2024,7,Michael Woods,WR,CLE,CIN,24.025,10.8375,31.950,20.4625,8.6500,25.3875
604,2024,7,Jaelon Darden,WR,CLE,CIN,24.025,10.8375,31.950,20.4625,8.6500,25.3875


In [143]:
to_concat['opponent_team'].value_counts()

opponent_team
SF     32
WAS    32
HOU    31
IND    31
DET    30
NYJ    30
PIT    29
NYG    28
MIA    28
SEA    28
ATL    28
KC     27
PHI    27
CIN    26
TEN    26
CLE    26
GB     26
BUF    25
CAR    25
LAR    24
MIN    24
LV     23
Name: count, dtype: int64

In [144]:
flex_df = flex_df[~((flex_df['season'] == season) & (flex_df['week'] == week))]

In [145]:
flex_df = pd.concat([flex_df, to_concat], axis = 0)

In [146]:
flex_df

Unnamed: 0,season,week,recent_team,opponent_team,player_id,player_display_name,position,rushing_yards,rushing_tds,rushing_fumbles_lost,...,carry_share,in_10_share,FD_Pts,DK_Pts,DK_Pts_RB_DvP,DK_Pts_TE_DvP,DK_Pts_WR_DvP,FD_Pts_RB_DvP,FD_Pts_TE_DvP,FD_Pts_WR_DvP
0,2022,1,ARI,KC,00-0027942,A.J. Green,WR,0.0,0.0,0.0,...,0.000,0.143,2.3,3.3,22.721614,11.878494,34.380093,19.716492,9.384399,27.311969
1,2022,1,ARI,KC,00-0030061,Zach Ertz,TE,0.0,0.0,0.0,...,0.000,0.429,10.4,11.4,22.721614,11.878494,34.380093,19.716492,9.384399,27.311969
2,2022,1,ARI,KC,00-0033553,James Conner,RB,26.0,1.0,0.0,...,0.476,0.286,14.0,16.5,22.721614,11.878494,34.380093,19.716492,9.384399,27.311969
3,2022,1,ARI,KC,00-0035500,Greg Dortch,WR,0.0,0.0,0.0,...,0.000,0.000,9.8,13.3,22.721614,11.878494,34.380093,19.716492,9.384399,27.311969
4,2022,1,ARI,KC,00-0035527,Andy Isabella,WR,0.0,0.0,0.0,...,0.000,0.000,1.5,2.0,22.721614,11.878494,34.380093,19.716492,9.384399,27.311969
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
601,2024,7,CLE,CIN,,James Proche,WR,,,,...,,,,,24.025000,10.837500,31.950000,20.462500,8.650000,25.387500
602,2024,7,CLE,CIN,,Aidan Robbins,RB,,,,...,,,,,24.025000,10.837500,31.950000,20.462500,8.650000,25.387500
603,2024,7,CLE,CIN,,Michael Woods,WR,,,,...,,,,,24.025000,10.837500,31.950000,20.462500,8.650000,25.387500
604,2024,7,CLE,CIN,,Jaelon Darden,WR,,,,...,,,,,24.025000,10.837500,31.950000,20.462500,8.650000,25.387500


In [147]:
cols_L8 = ['rushing_yards', 'rushing_tds', 'rushing_fumbles_lost', 'receptions', 'receiving_yards', 'receiving_tds', 'receiving_fumbles_lost', 'targets',\
           'carries', 'receiving_yards_after_catch', 'receiving_air_yards', 'air_yards_share', 'target_share', 'carry_share', 'in_10_share']

In [148]:
flex_L8_features = flex_df.groupby(['player_display_name', 'season', 'week'])[cols_L8].sum()

# L8 variables
L8 variables are rolling means of features over the last eight games. Just like we did for the DvP variables, we'll calculate features over the previous eight games for individual QBs.

In [149]:
flex_L8_features = flex_L8_features.sort_values(by = ['player_display_name', 'season', 'week'])
flex_L8_features['game_num'] = flex_L8_features.groupby(['player_display_name', 'season']).cumcount() + 1
#quarterback_df.reset_index(drop = True, inplace = True)

In [150]:
flex_L8_features = flex_L8_features.groupby(['player_display_name'], as_index = False).apply(calculate_equal_rolling_mean, cols=cols_L8, suffix = '_L8')

In [151]:
flex_L8_features.reset_index(inplace = True)

In [152]:
flex_L8_features.columns

Index(['level_0', 'player_display_name', 'season', 'week', 'rushing_yards',
       'rushing_tds', 'rushing_fumbles_lost', 'receptions', 'receiving_yards',
       'receiving_tds', 'receiving_fumbles_lost', 'targets', 'carries',
       'receiving_yards_after_catch', 'receiving_air_yards', 'air_yards_share',
       'target_share', 'carry_share', 'in_10_share', 'game_num',
       'rushing_yards_L8', 'rushing_tds_L8', 'rushing_fumbles_lost_L8',
       'receptions_L8', 'receiving_yards_L8', 'receiving_tds_L8',
       'receiving_fumbles_lost_L8', 'targets_L8', 'carries_L8',
       'receiving_yards_after_catch_L8', 'receiving_air_yards_L8',
       'air_yards_share_L8', 'target_share_L8', 'carry_share_L8',
       'in_10_share_L8'],
      dtype='object')

In [153]:
flex_L8_features.drop(columns = ['level_0', 'rushing_yards', 'rushing_tds', 'rushing_fumbles_lost', 'receptions', 'receiving_yards',
       'receiving_tds', 'receiving_fumbles_lost', 'targets', 'carries', 'receiving_yards_after_catch', 'receiving_air_yards', 'air_yards_share',
       'target_share', 'carry_share', 'in_10_share'], inplace = True)

In [154]:
flex_L8_features

Unnamed: 0,player_display_name,season,week,game_num,rushing_yards_L8,rushing_tds_L8,rushing_fumbles_lost_L8,receptions_L8,receiving_yards_L8,receiving_tds_L8,receiving_fumbles_lost_L8,targets_L8,carries_L8,receiving_yards_after_catch_L8,receiving_air_yards_L8,air_yards_share_L8,target_share_L8,carry_share_L8,in_10_share_L8
0,A.J. Barner,2024,3,1,,,,,,,,,,,,,,,
1,A.J. Barner,2024,4,2,0.000000,0.000000,0.0,3.000000,13.000000,0.000000,0.0,3.000000,0.000000,11.000000,2.000000,0.010363,0.090909,0.000000,0.000000
2,A.J. Barner,2024,5,3,0.000000,0.000000,0.0,2.500000,20.000000,0.500000,0.0,2.500000,0.000000,17.000000,3.000000,0.011739,0.064322,0.000000,0.062500
3,A.J. Barner,2024,6,4,0.000000,0.000000,0.0,2.000000,17.666667,0.333333,0.0,2.000000,0.000000,13.333333,4.333333,0.019670,0.051429,0.000000,0.041667
4,A.J. Brown,2022,1,1,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11958,Zonovan Knight,2022,17,6,50.200000,0.200000,0.0,2.200000,16.600000,0.000000,0.0,2.200000,13.000000,22.400000,-5.800000,-0.014043,0.055368,0.551200,0.116600
11959,Zonovan Knight,2022,18,7,46.333333,0.166667,0.0,2.166667,16.666667,0.000000,0.0,2.333333,12.166667,22.000000,-5.000000,-0.012165,0.057251,0.537833,0.097167
11960,Zonovan Knight,2023,3,1,42.857143,0.142857,0.0,1.857143,14.285714,0.000000,0.0,2.000000,12.142857,18.857143,-4.285714,-0.010427,0.049072,0.546714,0.083286
11961,Zonovan Knight,2023,5,2,39.125000,0.125000,0.0,1.625000,12.500000,0.000000,0.0,1.750000,11.000000,16.500000,-3.750000,-0.009124,0.042938,0.491750,0.072875


In [155]:
flex_df = pd.merge(flex_df, flex_L8_features, on = ['player_display_name', 'season', 'week'], how = 'left')

In [156]:
flex_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11963 entries, 0 to 11962
Data columns (total 46 columns):
 #   Column                          Non-Null Count  Dtype  
---  ------                          --------------  -----  
 0   season                          11963 non-null  int64  
 1   week                            11963 non-null  int64  
 2   recent_team                     11963 non-null  object 
 3   opponent_team                   11963 non-null  object 
 4   player_id                       11357 non-null  object 
 5   player_display_name             11963 non-null  object 
 6   position                        11963 non-null  object 
 7   rushing_yards                   11357 non-null  float64
 8   rushing_tds                     11357 non-null  float64
 9   rushing_fumbles_lost            11357 non-null  float64
 10  receptions                      11357 non-null  float64
 11  receiving_yards                 11357 non-null  float64
 12  receiving_tds                   

# Filling missing values with mean
In our QB model we made the mistake of dropping missing values at this point. Rookies making their debut won't have L8 variables because they haven't played before. So let's fill those values with the means.

In [157]:
cols_to_fill = ['rushing_yards_L8', 'rushing_tds_L8', 'rushing_fumbles_lost_L8', 'receptions_L8',
       'receiving_yards_L8', 'receiving_tds_L8', 'receiving_fumbles_lost_L8',
       'targets_L8', 'carries_L8', 'receiving_yards_after_catch_L8',
       'receiving_air_yards_L8', 'air_yards_share_L8', 'target_share_L8',
       'carry_share_L8', 'in_10_share_L8']

In [158]:
for col in cols_to_fill:
    flex_df[col] = flex_df.groupby('position')[col].transform(lambda x: x.fillna(x.mean()))

In [159]:
flex_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11963 entries, 0 to 11962
Data columns (total 46 columns):
 #   Column                          Non-Null Count  Dtype  
---  ------                          --------------  -----  
 0   season                          11963 non-null  int64  
 1   week                            11963 non-null  int64  
 2   recent_team                     11963 non-null  object 
 3   opponent_team                   11963 non-null  object 
 4   player_id                       11357 non-null  object 
 5   player_display_name             11963 non-null  object 
 6   position                        11963 non-null  object 
 7   rushing_yards                   11357 non-null  float64
 8   rushing_tds                     11357 non-null  float64
 9   rushing_fumbles_lost            11357 non-null  float64
 10  receptions                      11357 non-null  float64
 11  receiving_yards                 11357 non-null  float64
 12  receiving_tds                   

# A few more features
yards_per_carry, yards_per_reception, yards_per_target

In [160]:
flex_df['yards_per_carry_L8'] = flex_df['rushing_yards_L8']/flex_df['carries_L8']
flex_df['yards_per_reception_L8'] = flex_df['receiving_yards_L8']/flex_df['receptions_L8']
flex_df['yards_per_target_L8'] = flex_df['receiving_yards_L8']/flex_df['targets_L8']

# More missing
Now this gives us some more missing values. In most cases, it's because we've tried to divide by zero when a player has averaged 0 carries over the last eight games. So here we should fill the missing values with 0

In [161]:
flex_df.fillna(0, inplace = True)

In [162]:
flex_df_full = flex_df.copy()

In [163]:
flex_df.drop(columns = cols_L8, inplace = True)

In [164]:
flex_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11963 entries, 0 to 11962
Data columns (total 34 columns):
 #   Column                          Non-Null Count  Dtype  
---  ------                          --------------  -----  
 0   season                          11963 non-null  int64  
 1   week                            11963 non-null  int64  
 2   recent_team                     11963 non-null  object 
 3   opponent_team                   11963 non-null  object 
 4   player_id                       11963 non-null  object 
 5   player_display_name             11963 non-null  object 
 6   position                        11963 non-null  object 
 7   FD_Pts                          11963 non-null  float64
 8   DK_Pts                          11963 non-null  float64
 9   DK_Pts_RB_DvP                   11963 non-null  float64
 10  DK_Pts_TE_DvP                   11963 non-null  float64
 11  DK_Pts_WR_DvP                   11963 non-null  float64
 12  FD_Pts_RB_DvP                   

In [165]:
flex_df = flex_df[(flex_df['season'] == season) & (flex_df['week'] == week)]

In [166]:
flex_df['opponent_team'].nunique()

22

In [167]:
flex_df.replace([np.inf, -np.inf], np.nan, inplace=True)

# Odds, grass, outdoors, wind
We'll bring in CSVs with FanDuel and DraftKings odds.<br>
This will be where we need to separate FanDuel from DraftKings dataframes since the odds and totals might be different.

In [168]:
fd_spreads = pd.read_csv('fd_spreads_' + str(season) + '_' + str(week) + '.csv')
dk_spreads = pd.read_csv('dk_spreads_' + str(season) + '_' + str(week) + '.csv')

In [169]:
fd_spreads.head()

Unnamed: 0.1,Unnamed: 0,opponent,team,total_line,outdoors,grass,home_team,spread_line,pred_total,opp_total,wind,season,week
0,0,JAX,NE,42.5,1,1,0,-5.5,18.5,24.0,14,2024,7
1,1,ATL,SEA,51.5,0,0,0,-3.0,24.25,27.25,0,2024,7
2,2,BUF,TEN,41.5,1,0,0,-8.5,16.5,25.0,8,2024,7
3,3,CLE,CIN,41.5,1,1,0,5.5,23.5,18.0,6,2024,7
4,4,MIN,DET,50.5,0,1,0,-1.5,24.5,26.0,0,2024,7


In [170]:
dk_spreads.head()

Unnamed: 0.1,Unnamed: 0,opponent,team,total_line,outdoors,grass,home_team,spread_line,pred_total,opp_total,wind,season,week
0,0,JAX,NE,42.5,1,1,0,-5.5,18.5,24.0,14,2024,7
1,1,ATL,SEA,51.0,0,0,0,-3.0,24.0,27.0,0,2024,7
2,2,BUF,TEN,41.0,1,0,0,-9.0,16.0,25.0,8,2024,7
3,3,CLE,CIN,41.5,1,1,0,6.0,23.75,17.75,6,2024,7
4,4,MIN,DET,51.0,0,1,0,-1.5,24.75,26.25,0,2024,7


In [171]:
fd_spreads.drop(columns = ['Unnamed: 0'], inplace = True)
dk_spreads.drop(columns = ['Unnamed: 0'], inplace = True)

In [172]:
flex_df.rename(columns = {'recent_team': 'team', 'opponent_team': 'opponent'}, inplace = True)

In [173]:
flex_df_fd = pd.merge(flex_df, fd_spreads, on = ['team', 'opponent', 'season', 'week'])
flex_df_dk = pd.merge(flex_df, dk_spreads, on = ['team', 'opponent', 'season', 'week'])

In [174]:
len(flex_df_fd), len(flex_df_dk)

(606, 606)

In [175]:
# weekly_df[weekly_df['player_display_name'] == 'Jonathan Taylor'].tail()

<!-- # Bringing in point spreads and totals
Pulling this dataframe from our database. It also includes binary variables for indoors and grass. -->

In [176]:
# # Connect to the SQLite database
# conn = sqlite3.connect('nfl_dfs.db')

# # Query specific columns from the table
# query1 = "SELECT * FROM spreads_totals"
# spread_df = pd.read_sql_query(query1, conn)

# # Close the connection
# conn.close()

In [177]:
flex_df.columns

Index(['season', 'week', 'team', 'opponent', 'player_id',
       'player_display_name', 'position', 'FD_Pts', 'DK_Pts', 'DK_Pts_RB_DvP',
       'DK_Pts_TE_DvP', 'DK_Pts_WR_DvP', 'FD_Pts_RB_DvP', 'FD_Pts_TE_DvP',
       'FD_Pts_WR_DvP', 'game_num', 'rushing_yards_L8', 'rushing_tds_L8',
       'rushing_fumbles_lost_L8', 'receptions_L8', 'receiving_yards_L8',
       'receiving_tds_L8', 'receiving_fumbles_lost_L8', 'targets_L8',
       'carries_L8', 'receiving_yards_after_catch_L8',
       'receiving_air_yards_L8', 'air_yards_share_L8', 'target_share_L8',
       'carry_share_L8', 'in_10_share_L8', 'yards_per_carry_L8',
       'yards_per_reception_L8', 'yards_per_target_L8'],
      dtype='object')

In [178]:
#quarterback_df.drop(columns = ['defteam', 'passing_air_yards'], inplace = True)

In [179]:
flex_df.head()

Unnamed: 0,season,week,team,opponent,player_id,player_display_name,position,FD_Pts,DK_Pts,DK_Pts_RB_DvP,...,carries_L8,receiving_yards_after_catch_L8,receiving_air_yards_L8,air_yards_share_L8,target_share_L8,carry_share_L8,in_10_share_L8,yards_per_carry_L8,yards_per_reception_L8,yards_per_target_L8
11357,2024,7,MIN,DET,0,Justin Jefferson,WR,0.0,0.0,20.55,...,0.125,25.375,137.875,0.463374,0.325339,0.005625,0.172375,-12.0,17.183673,10.935065
11358,2024,7,MIN,DET,0,Aaron Jones,RB,0.0,0.0,20.55,...,16.5,27.375,2.375,0.001016,0.147943,0.6045,0.43175,5.204545,8.384615,6.8125
11359,2024,7,MIN,DET,0,Ty Chandler,RB,0.0,0.0,20.55,...,9.0,13.625,-3.625,-0.012668,0.05068,0.44625,0.1545,3.791667,7.818182,7.166667
11360,2024,7,MIN,DET,0,Jordan Addison,WR,0.0,0.0,20.55,...,0.125,10.25,81.375,0.258862,0.168476,0.004,0.121,7.0,14.72,8.761905
11361,2024,7,MIN,DET,0,TJ Hockenson,TE,0.0,0.0,20.55,...,0.025185,12.110995,23.048785,0.095233,0.109175,0.000888,0.061252,2.086663,10.365028,7.278637


In [180]:
divisions = {
    'CLE': 'AFC North',
    'LAR': 'NFC West',
    'LV': 'AFC West',
    'KC': 'AFC West',
    'CAR': 'NFC South',
    'NYG': 'NFC East',
    'HOU': 'AFC South',
    'DEN': 'AFC West',
    'MIN': 'NFC North',
    'TEN': 'AFC South',
    'JAX': 'AFC South',
    'SEA': 'NFC West',
    'DET': 'NFC North',
    'NO': 'NFC South',
    'CIN': 'AFC North',
    'ATL': 'NFC South',
    'NYJ': 'AFC East',
    'PHI': 'NFC East',
    'DAL': 'NFC East',
    'WAS': 'NFC East',
    'PIT': 'AFC North',
    'ARI': 'NFC West',
    'CHI': 'NFC North',
    'MIA': 'AFC East',
    'BUF': 'AFC East',
    'BAL': 'AFC North',
    'TB': 'NFC South',
    'SF': 'NFC West',
    'LAC': 'AFC West',
    'IND': 'AFC South',
    'GB': 'NFC North',
    'NE': 'AFC East'
}

# You now have a dictionary `divisions` where each team is mapped to its division.flex_df.info()

# Adding div_game binary column

In [181]:
# Map the team and opponent columns to their respective divisions
flex_df_fd['team_division'] = flex_df_fd['team'].map(divisions)
flex_df_fd['opponent_division'] = flex_df_fd['opponent'].map(divisions)

# Create the div_game column (1 if they are in the same division, 0 otherwise)
flex_df_fd['div_game'] = np.where(flex_df_fd['team_division'] == flex_df_fd['opponent_division'], 1, 0)

# Optionally, drop the temporary division columns if you don't need them
flex_df_fd.drop(['team_division', 'opponent_division'], axis=1, inplace=True)

In [182]:
# Map the team and opponent columns to their respective divisions
flex_df_dk['team_division'] = flex_df_dk['team'].map(divisions)
flex_df_dk['opponent_division'] = flex_df_dk['opponent'].map(divisions)

# Create the div_game column (1 if they are in the same division, 0 otherwise)
flex_df_dk['div_game'] = np.where(flex_df_dk['team_division'] == flex_df_dk['opponent_division'], 1, 0)

# Optionally, drop the temporary division columns if you don't need them
flex_df_dk.drop(['team_division', 'opponent_division'], axis=1, inplace=True)

In [183]:
# Check for positive or negative infinity in the entire DataFrame
# infinity_mask_fd = np.isinf(flex_df_fd)
# infinity_mask_dk = np.isinf(flex_df_dk)

# # Display rows with infinity values
# infinity_rows_fd = flex_df_fd[infinity_mask_fd.any(axis=1)]
# infinity_rows_dk = flex_df_dk[infinity_mask_dk.any(axis=1)]
# print(infinity_rows_fd)
# print(infinity_rows_dk)

# One-hot encoding for position

In [184]:
position_dummies_fd = pd.get_dummies(flex_df_fd['position'], prefix='pos')
position_dummies_dk = pd.get_dummies(flex_df_dk['position'], prefix='pos')

In [185]:
flex_df_fd = pd.concat([flex_df_fd, position_dummies_fd], axis = 1)

In [186]:
flex_df_dk = pd.concat([flex_df_dk, position_dummies_dk], axis = 1)

In [187]:
flex_df_fd['pos_RB'] = flex_df_fd['pos_RB'].astype(int)
flex_df_fd['pos_TE'] = flex_df_fd['pos_TE'].astype(int)
flex_df_fd['pos_WR'] = flex_df_fd['pos_WR'].astype(int)
flex_df_dk['pos_RB'] = flex_df_dk['pos_RB'].astype(int)
flex_df_dk['pos_TE'] = flex_df_dk['pos_TE'].astype(int)
flex_df_dk['pos_WR'] = flex_df_dk['pos_WR'].astype(int)

In [188]:
flex_df_fd.drop(columns = ['FD_Pts', 'DK_Pts'], inplace = True)
flex_df_dk.drop(columns = ['FD_Pts', 'DK_Pts'], inplace = True)

In [189]:
fanduel_df = fanduel_df[['name', 'position', 'salary', 'team', 'opponent', 'status', 'week']]
draftkings_df = draftkings_df[['name', 'position', 'salary', 'team', 'opponent', 'status', 'week']]

In [190]:
fanduel_df = fanduel_df[fanduel_df['position'].isin(['RB', 'WR', 'TE'])]
draftkings_df = draftkings_df[draftkings_df['position'].isin(['RB', 'WR', 'TE'])]

In [191]:
len(fanduel_df), len(draftkings_df)

(606, 410)

In [192]:
#################### Sept. 16, 2024 #####################
#Just filtered for Flex positions.
#Next will be to rename player_display_name to name and try to merge again with the flex data.
#Don't forget QB strength variables!!!!!

In [193]:
len(flex_df_fd), len(flex_df_dk)

(606, 606)

In [194]:
flex_df_fd.rename(columns = {'player_display_name': 'name'}, inplace = True)
flex_df_dk.rename(columns = {'player_display_name': 'name'}, inplace = True)

# Name matching here

In [195]:
def clean_name(name):
    # Remove periods between initials like C.J., D.J. (case-sensitive)
    name = re.sub(r'\b([A-Z])\.\s*([A-Z])\.\b', r'\1\2', name)
    
    # Remove common suffixes like Jr., Sr., III, II, IV (case-sensitive)
    cleaned_name = re.sub(r'(\,|\.|Sr|Jr|III|II|IV)', '', name).strip()
    
    return cleaned_name

In [196]:
import re
from rapidfuzz import process, fuzz

In [197]:
flex_df_fd['name'] = flex_df_fd['name'].apply(clean_name)
flex_df_dk['name'] = flex_df_dk['name'].apply(clean_name)

In [198]:
SIMILARITY_THRESHOLD = 80

In [199]:
def fuzzy_match(name, dk_names):
    match, score, _ = process.extractOne(name, dk_names, scorer=fuzz.token_sort_ratio)
    return match if score >= SIMILARITY_THRESHOLD else None

In [200]:
flex_names = flex_df_fd['name']

In [201]:
flex_df_fd['matched_name'] = flex_df_fd['name'].apply(lambda x: fuzzy_match(x, flex_names))
flex_df_dk['matched_name'] = flex_df_dk['name'].apply(lambda x: fuzzy_match(x, flex_names))

In [202]:
unmatched_in_fd = flex_df_fd[flex_df_fd['matched_name'].isna()]

In [203]:
unmatched_in_fd.reset_index(inplace = True, drop = True)

In [204]:
unmatched_in_fd

Unnamed: 0,season,week,team,opponent,player_id,name,position,DK_Pts_RB_DvP,DK_Pts_TE_DvP,DK_Pts_WR_DvP,...,home_team,spread_line,pred_total,opp_total,wind,div_game,pos_RB,pos_TE,pos_WR,matched_name


In [205]:
flex_df_fd[(flex_df_fd['name'] != flex_df_fd['matched_name']) & (flex_df_fd['matched_name'].notna())]

Unnamed: 0,season,week,team,opponent,player_id,name,position,DK_Pts_RB_DvP,DK_Pts_TE_DvP,DK_Pts_WR_DvP,...,home_team,spread_line,pred_total,opp_total,wind,div_game,pos_RB,pos_TE,pos_WR,matched_name


In [206]:
unmatched_in_dk = flex_df_dk[flex_df_dk['matched_name'].isna()]

In [207]:
unmatched_in_dk.reset_index(inplace = True, drop = True)

In [208]:
unmatched_in_dk

Unnamed: 0,season,week,team,opponent,player_id,name,position,DK_Pts_RB_DvP,DK_Pts_TE_DvP,DK_Pts_WR_DvP,...,home_team,spread_line,pred_total,opp_total,wind,div_game,pos_RB,pos_TE,pos_WR,matched_name


In [209]:
flex_df_dk[(flex_df_dk['name'] != flex_df_dk['matched_name']) & (flex_df_dk['matched_name'].notna())]

Unnamed: 0,season,week,team,opponent,player_id,name,position,DK_Pts_RB_DvP,DK_Pts_TE_DvP,DK_Pts_WR_DvP,...,home_team,spread_line,pred_total,opp_total,wind,div_game,pos_RB,pos_TE,pos_WR,matched_name


In [210]:
flex_df_fd = pd.merge(fanduel_df, flex_df_fd, on = ['name', 'team', 'opponent', 'week'], how = 'left')
flex_df_dk = pd.merge(draftkings_df, flex_df_dk, on = ['name', 'team', 'opponent', 'week'], how = 'left')

In [211]:
flex_df_fd['position'] = flex_df_fd['position_x']
flex_df_fd.drop(columns = ['position_x', 'position_y'], inplace = True)
flex_df_dk['position'] = flex_df_dk['position_x']
flex_df_dk.drop(columns = ['position_x', 'position_y'], inplace = True)

In [212]:
flex_df_fd.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 606 entries, 0 to 605
Data columns (total 47 columns):
 #   Column                          Non-Null Count  Dtype  
---  ------                          --------------  -----  
 0   name                            606 non-null    object 
 1   salary                          606 non-null    int64  
 2   team                            606 non-null    object 
 3   opponent                        606 non-null    object 
 4   status                          606 non-null    object 
 5   week                            606 non-null    int64  
 6   season                          606 non-null    int64  
 7   player_id                       606 non-null    int64  
 8   DK_Pts_RB_DvP                   606 non-null    float64
 9   DK_Pts_TE_DvP                   606 non-null    float64
 10  DK_Pts_WR_DvP                   606 non-null    float64
 11  FD_Pts_RB_DvP                   606 non-null    float64
 12  FD_Pts_TE_DvP                   606 

In [213]:
flex_df_dk.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 410 entries, 0 to 409
Data columns (total 47 columns):
 #   Column                          Non-Null Count  Dtype  
---  ------                          --------------  -----  
 0   name                            410 non-null    object 
 1   salary                          410 non-null    int64  
 2   team                            410 non-null    object 
 3   opponent                        410 non-null    object 
 4   status                          410 non-null    object 
 5   week                            410 non-null    int64  
 6   season                          410 non-null    int64  
 7   player_id                       410 non-null    int64  
 8   DK_Pts_RB_DvP                   410 non-null    float64
 9   DK_Pts_TE_DvP                   410 non-null    float64
 10  DK_Pts_WR_DvP                   410 non-null    float64
 11  FD_Pts_RB_DvP                   410 non-null    float64
 12  FD_Pts_TE_DvP                   410 

In [214]:
###FANDUEL SCORING
#Rushing yards made = 0.1pts	
#Rushing touchdowns = 6pts	
#Passing yards = 0.04pts	
#Passing touchdowns = 4pts	
#Interceptions = -1pt	
#Receiving yards = 0.1pts	
#Receiving touchdowns = 6pts	
#Receptions = 0.5pts	
#Kickoff return touchdowns = 6pts	
#Punt return touchdowns = 6pts	
#Fumbles lost = -2pts	
#Own fumbles recovered touchdowns = 6pts	
#Two-point conversions scored = 2pts	
#Two-point conversion passes = 2pts	
#Field-goals from 0-39 yards = 3pts	
#Field-goals from 40-49 yards = 4pts	
#Field-goals from 50+ yards = 5pts	
#Extra-point conversions = 1pt

###DRAFTKINGS SCORING
#PAssing TD = 4 pts
#passing yards = .04 pts
#300 passing yards = 3 pts (bonus)
#Interception = -1 pts
#Rushing TD = 6 pts
#Rushing yds = 0.1 pts
#100 yd rushing game = 3 pts (bonus)
#Receiving TD = 6 pts
#Receiving yds = 0.1 pts
#100 receiving yards game = 3 pts (bonus)
#Receptions = 1 pt
#Punt/kickoff/FG return for TD = 6 pts
#Fumble lost = -1 pt
#2 pt conversion (pass, run or catch) = 2 pts
#Offensive fumble recovery TD = 6

In [215]:
features = ['DK_Pts_RB_DvP', 'DK_Pts_TE_DvP', 'DK_Pts_WR_DvP', 'FD_Pts_RB_DvP',
       'FD_Pts_TE_DvP', 'FD_Pts_WR_DvP', 'rushing_yards_L8', 'rushing_tds_L8',
       'rushing_fumbles_lost_L8', 'receptions_L8', 'receiving_yards_L8',
       'receiving_tds_L8', 'receiving_fumbles_lost_L8', 'targets_L8',
       'carries_L8', 'receiving_yards_after_catch_L8',
       'receiving_air_yards_L8', 'target_share_L8', 'carry_share_L8',
       'in_10_share_L8', 'yards_per_carry_L8', 'yards_per_reception_L8',
       'yards_per_target_L8', 'wind', 'div_game', 'spread_line', 'total_line',
       'outdoors', 'grass', 'home_team', 'pred_total', 'opp_total', 'qb_comp',
       'qb_att', 'qb_yds', 'qb_pass_td', 'qb_int', 'qb_comp_pct',
       'qb_yds_per_att', 'qb_td_pct', 'qb_int_pct', 'pos_RB', 'pos_TE',
       'pos_WR']

# QB strength
One last variable we want to try to add is a variable or variables that look at quarterback strength. If a team's backup QB is playing, the entire offense is downgraded and it could affect the fantasy performance of RBs, WRs and TEs.<br>

We'll take the dataset we used for the QB model, filter for the primary QBs in each game and use some of the variables that look at L8 means.

In [216]:
# # Connect to the SQLite database
# conn = sqlite3.connect('nfl_dfs.db')

# query_qb = "SELECT * FROM qb_dataset WHERE season >= 2006"

# qb_df = pd.read_sql_query(query_qb, conn)

# # Close the database connection
# conn.close()

In [217]:
qb_data_fd = pd.read_csv('FD_QB_for_model_' + str(season) + '_' + str(week) + '.csv')
qb_data_dk = pd.read_csv('DK_QB_for_model_' + str(season) + '_' + str(week) + '.csv')

In [218]:
qb_data_fd = qb_data_fd[qb_data_fd['QB_role'] == 1]
qb_data_dk = qb_data_dk[qb_data_dk['QB_role'] == 1]

In [219]:
qb_data_fd.rename(columns = {'completions_L8': 'qb_comp', 'attempts_L8': 'qb_att', 'passing_yards_L8': 'qb_yds',\
                       'passing_tds_L8': 'qb_pass_td', 'interceptions_L8': 'qb_int', 'comp_pct': 'qb_comp_pct',\
                       'yds_per_attempt': 'qb_yds_per_att', 'td_pct': 'qb_td_pct', 'int_pct': 'qb_int_pct'}, inplace = True)

In [220]:
qb_data_dk.rename(columns = {'completions_L8': 'qb_comp', 'attempts_L8': 'qb_att', 'passing_yards_L8': 'qb_yds',\
                       'passing_tds_L8': 'qb_pass_td', 'interceptions_L8': 'qb_int', 'comp_pct': 'qb_comp_pct',\
                       'yds_per_attempt': 'qb_yds_per_att', 'td_pct': 'qb_td_pct', 'int_pct': 'qb_int_pct'}, inplace = True)

In [221]:
qb_data_fd = qb_data_fd[['season', 'week', 'team', 'opponent', 'qb_comp', 'qb_att', 'qb_yds', 'qb_pass_td',\
               'qb_int', 'qb_comp_pct', 'qb_yds_per_att', 'qb_td_pct', 'qb_int_pct']]

In [222]:
qb_data_dk = qb_data_dk[['season', 'week', 'team', 'opponent', 'qb_comp', 'qb_att', 'qb_yds', 'qb_pass_td',\
               'qb_int', 'qb_comp_pct', 'qb_yds_per_att', 'qb_td_pct', 'qb_int_pct']]

In [223]:
flex_df_fd = pd.merge(flex_df_fd, qb_data_fd, on = ['season', 'week', 'team', 'opponent'], how = 'left')

In [224]:
flex_df_dk = pd.merge(flex_df_dk, qb_data_fd, on = ['season', 'week', 'team', 'opponent'], how = 'left')

In [225]:
flex_df_fd = flex_df_fd.set_index(['name', 'position', 'team', 'opponent', 'salary', 'status', 'week', 'season'], drop = True)
flex_df_dk = flex_df_dk.set_index(['name', 'position', 'team', 'opponent', 'salary', 'status', 'week', 'season'], drop = True)

In [226]:
flex_df_fd = flex_df_fd[features]
flex_df_dk = flex_df_dk[features]

In [227]:
flex_df_fd.replace([np.inf, -np.inf], np.nan, inplace=True)
flex_df_dk.replace([np.inf, -np.inf], np.nan, inplace=True)

In [228]:
# flex_df_dk[flex_df_dk.isna().any(axis = 1)]

In [229]:
# flex_df_dk = flex_df_dk.dropna()

In [230]:
# Fill missing values with the mean of each column
flex_df_fd = flex_df_fd.groupby('position').transform(lambda x: x.fillna(x.mean()))
flex_df_dk = flex_df_dk.groupby('position').transform(lambda x: x.fillna(x.mean()))

In [231]:
flex_df_fd.info()

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 606 entries, ('Justin Jefferson', 'WR', 'MIN', 'DET', 9400, 'Active', 7, 2024) to ('Nick Muse', 'TE', 'MIN', 'DET', 4000, 'IR', 7, 2024)
Data columns (total 44 columns):
 #   Column                          Non-Null Count  Dtype  
---  ------                          --------------  -----  
 0   DK_Pts_RB_DvP                   606 non-null    float64
 1   DK_Pts_TE_DvP                   606 non-null    float64
 2   DK_Pts_WR_DvP                   606 non-null    float64
 3   FD_Pts_RB_DvP                   606 non-null    float64
 4   FD_Pts_TE_DvP                   606 non-null    float64
 5   FD_Pts_WR_DvP                   606 non-null    float64
 6   rushing_yards_L8                606 non-null    float64
 7   rushing_tds_L8                  606 non-null    float64
 8   rushing_fumbles_lost_L8         606 non-null    float64
 9   receptions_L8                   606 non-null    float64
 10  receiving_yards_L8              606 non-nul

In [232]:
flex_df_dk.info()

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 410 entries, ('Justin Jefferson', 'WR', 'MIN', 'DET', 8500, 'Active', 7, 2024) to ('James Winchester', 'TE', 'KC', 'SF', 2500, 'Active', 7, 2024)
Data columns (total 44 columns):
 #   Column                          Non-Null Count  Dtype  
---  ------                          --------------  -----  
 0   DK_Pts_RB_DvP                   410 non-null    float64
 1   DK_Pts_TE_DvP                   410 non-null    float64
 2   DK_Pts_WR_DvP                   410 non-null    float64
 3   FD_Pts_RB_DvP                   410 non-null    float64
 4   FD_Pts_TE_DvP                   410 non-null    float64
 5   FD_Pts_WR_DvP                   410 non-null    float64
 6   rushing_yards_L8                410 non-null    float64
 7   rushing_tds_L8                  410 non-null    float64
 8   rushing_fumbles_lost_L8         410 non-null    float64
 9   receptions_L8                   410 non-null    float64
 10  receiving_yards_L8              41

In [233]:
flex_df_fd.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,Unnamed: 7_level_0,DK_Pts_RB_DvP,DK_Pts_TE_DvP,DK_Pts_WR_DvP,FD_Pts_RB_DvP,FD_Pts_TE_DvP,FD_Pts_WR_DvP,rushing_yards_L8,rushing_tds_L8,rushing_fumbles_lost_L8,receptions_L8,...,qb_yds,qb_pass_td,qb_int,qb_comp_pct,qb_yds_per_att,qb_td_pct,qb_int_pct,pos_RB,pos_TE,pos_WR
name,position,team,opponent,salary,status,week,season,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1
Justin Jefferson,WR,MIN,DET,9400,Active,7,2024,20.55,7.95,46.575,18.3625,6.3875,35.5125,-1.5,0.0,0.0,6.125,...,172.625,1.625,0.625,0.627119,7.80226,0.073446,0.028249,0,0,1
Ja'Marr Chase,WR,CIN,CLE,9300,Active,7,2024,21.375,9.5375,37.175,19.3125,7.9125,30.6125,0.0,0.0,0.0,5.125,...,253.25,1.875,0.5,0.705426,7.852713,0.05814,0.015504,0,0,1
Saquon Barkley,RB,PHI,NYG,9000,Active,7,2024,20.5625,8.825,35.3625,17.8125,6.5125,28.05,80.875,0.875,0.0,2.75,...,208.25,1.25,0.75,0.675325,7.212121,0.04329,0.025974,1,0,0
AJ Brown,WR,PHI,NYG,8900,Active,7,2024,20.5625,8.825,35.3625,17.8125,6.5125,28.05,1.385669,0.01143,0.002039,3.085124,...,208.25,1.25,0.75,0.675325,7.212121,0.04329,0.025974,0,0,1
Jordan Mason,RB,SF,KC,8800,Q,7,2024,18.1625,14.1625,29.9925,15.4125,10.725,24.305,81.0,0.375,0.125,1.125,...,268.875,1.375,0.625,0.645669,8.468504,0.043307,0.019685,1,0,0


In [234]:
flex_df_dk.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,Unnamed: 7_level_0,DK_Pts_RB_DvP,DK_Pts_TE_DvP,DK_Pts_WR_DvP,FD_Pts_RB_DvP,FD_Pts_TE_DvP,FD_Pts_WR_DvP,rushing_yards_L8,rushing_tds_L8,rushing_fumbles_lost_L8,receptions_L8,...,qb_yds,qb_pass_td,qb_int,qb_comp_pct,qb_yds_per_att,qb_td_pct,qb_int_pct,pos_RB,pos_TE,pos_WR
name,position,team,opponent,salary,status,week,season,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1
Justin Jefferson,WR,MIN,DET,8500,Active,7,2024,20.55,7.95,46.575,18.3625,6.3875,35.5125,-1.5,0.0,0.0,6.125,...,172.625,1.625,0.625,0.627119,7.80226,0.073446,0.028249,0,0,1
Ja'Marr Chase,WR,CIN,CLE,8400,Active,7,2024,21.375,9.5375,37.175,19.3125,7.9125,30.6125,0.0,0.0,0.0,5.125,...,253.25,1.875,0.5,0.705426,7.852713,0.05814,0.015504,0,0,1
Amon-Ra St Brown,WR,DET,MIN,8300,Active,7,2024,20.3125,12.625,49.5275,17.5625,9.75,38.4025,1.385669,0.01143,0.002039,3.085124,...,270.875,1.5,0.5,0.703846,8.334615,0.046154,0.015385,0,0,1
Saquon Barkley,RB,PHI,NYG,8200,Active,7,2024,20.5625,8.825,35.3625,17.8125,6.5125,28.05,80.875,0.875,0.0,2.75,...,208.25,1.25,0.75,0.675325,7.212121,0.04329,0.025974,1,0,0
Kyren Williams,RB,LAR,LV,8100,Active,7,2024,27.4625,8.2625,29.8125,23.275,6.45,24.5625,76.5,1.25,0.125,2.125,...,281.25,1.0,0.625,0.684588,8.064516,0.028674,0.017921,1,0,0


In [235]:
flex_df_fd.to_csv('FD_FLEX_for_model_' + str(season) + '_' + str(week) + '.csv')
flex_df_dk.to_csv('DK_FLEX_for_model_' + str(season) + '_' + str(week) + '.csv')