# Assembling FLEX dataset
This notebook will wrangle the FanDuel and DraftKings lists of running backs, wide receivers and tight ends and create a CSV that is ready to run through the model for predictions.

In [655]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import nfl_data_py as nfl
#import itertools
from functions import get_current_weekday, calculate_nfl_week, get_next_sunday, get_current_year

In [656]:
import sqlite3

In [657]:
day = get_current_weekday()

In [658]:
date_string = get_next_sunday(day)

In [659]:
week = calculate_nfl_week(date_string)

In [660]:
season = get_current_year()

# Pulling from database
Fetching the FD and DK lists from the database. These tables include names, positions, teams, opponents and salaries.

In [661]:
# Connect to the SQLite database
#These are the FD and DK player lists for the current week
conn = sqlite3.connect('nfl_dfs.db')

query_fd = "SELECT * FROM fd_table_" + str(week) + "_" + str(season)[2:]
query_dk = "SELECT * FROM dk_table_" + str(week) + "_" + str(season)[2:]

fanduel_df = pd.read_sql_query(query_fd, conn)
draftkings_df = pd.read_sql_query(query_dk, conn)

# Close the database connection
conn.close()

In [662]:
#The weekly and play-by-play data through the previous week
#We went into R Studio to get these and save them as CSVs that we can use here
weekly_df = pd.read_csv('weekly_data_' + str(season) + '_' + str(week) + '.csv')
pbp_df = pd.read_csv('pbp_data_' + str(season) + '_' + str(week) + '.csv', low_memory = False)

In [663]:
weekly_df.replace({'LA': 'LAR'}, inplace = True)
pbp_df.replace({'LA': 'LAR'}, inplace = True)

This is weekly data for each player. We can use this to calculate fantasy points for most players.

In [664]:
weekly_df.head()

Unnamed: 0,player_id,player_name,player_display_name,position,position_group,headshot_url,recent_team,season,week,season_type,...,receiving_first_downs,receiving_epa,receiving_2pt_conversions,racr,target_share,air_yards_share,wopr,special_teams_tds,fantasy_points,fantasy_points_ppr
0,00-0019596,T.Brady,Tom Brady,QB,QB,https://static.www.nfl.com/image/private/f_aut...,TB,2022,1,REG,...,0,,0,,,,,0,10.38,10.38
1,00-0019596,T.Brady,Tom Brady,QB,QB,https://static.www.nfl.com/image/private/f_aut...,TB,2022,2,REG,...,0,,0,,,,,0,9.4,9.4
2,00-0019596,T.Brady,Tom Brady,QB,QB,https://static.www.nfl.com/image/private/f_aut...,TB,2022,3,REG,...,0,,0,,,,,0,14.74,14.74
3,00-0019596,T.Brady,Tom Brady,QB,QB,https://static.www.nfl.com/image/private/f_aut...,TB,2022,4,REG,...,0,,0,,,,,0,25.4,25.4
4,00-0019596,T.Brady,Tom Brady,QB,QB,https://static.www.nfl.com/image/private/f_aut...,TB,2022,5,REG,...,0,,0,,,,,0,19.74,19.74


In [665]:
pbp_df.head()

Unnamed: 0,play_id,game_id,old_game_id,home_team,away_team,season_type,week,posteam,posteam_type,defteam,...,out_of_bounds,home_opening_kickoff,qb_epa,xyac_epa,xyac_mean_yardage,xyac_median_yardage,xyac_success,xyac_fd,xpass,pass_oe
0,1,2022_01_BAL_NYJ,2022091107,NYJ,BAL,REG,1,,,,...,0,1,0.0,,,,,,,
1,43,2022_01_BAL_NYJ,2022091107,NYJ,BAL,REG,1,NYJ,home,BAL,...,0,1,-0.443521,,,,,,,
2,68,2022_01_BAL_NYJ,2022091107,NYJ,BAL,REG,1,NYJ,home,BAL,...,0,1,1.468819,,,,,,0.440373,-44.037291
3,89,2022_01_BAL_NYJ,2022091107,NYJ,BAL,REG,1,NYJ,home,BAL,...,0,1,-0.492192,0.727261,6.988125,6.0,0.60693,0.227598,0.389904,61.009598
4,115,2022_01_BAL_NYJ,2022091107,NYJ,BAL,REG,1,NYJ,home,BAL,...,0,1,-0.325931,,,,,,0.443575,-44.357494


In [666]:
#rb_wr_te_df.head()

In [667]:
#rb_wr_te_df.info()

In [668]:
#nfl.see_weekly_cols()

In [669]:
weekly_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11903 entries, 0 to 11902
Data columns (total 53 columns):
 #   Column                       Non-Null Count  Dtype  
---  ------                       --------------  -----  
 0   player_id                    11903 non-null  object 
 1   player_name                  11903 non-null  object 
 2   player_display_name          11903 non-null  object 
 3   position                     11903 non-null  object 
 4   position_group               11903 non-null  object 
 5   headshot_url                 11853 non-null  object 
 6   recent_team                  11903 non-null  object 
 7   season                       11903 non-null  int64  
 8   week                         11903 non-null  int64  
 9   season_type                  11903 non-null  object 
 10  opponent_team                11903 non-null  object 
 11  completions                  11903 non-null  int64  
 12  attempts                     11903 non-null  int64  
 13  passing_yards   

In [670]:
weekly_df['position_group'].value_counts()

position_group
WR      4820
RB      3186
TE      2358
QB      1462
SPEC      28
DB        25
LB        14
OL         8
DL         2
Name: count, dtype: int64

In [671]:
weekly_df['position'].value_counts()

position
WR     4820
RB     2998
TE     2358
QB     1462
FB      188
P        28
CB       10
SS        9
ILB       7
T         7
FS        6
OLB       6
DT        2
G         1
MLB       1
Name: count, dtype: int64

Weekly data filtered for RBs, WRs and TEs

In [672]:
flex_df = weekly_df[weekly_df['position'].isin(['RB', 'HB', 'FB', 'WR', 'TE'])]

In [673]:
cols_to_group = ['season', 'week', 'recent_team', 'opponent_team', 'player_id', 'player_display_name', 'position']

So we do have quite a few duplicate names so player_id becomes more important than names.

In [674]:
# Group by 'player_display_name' and check the number of unique 'player_id's
duplicate_names = flex_df.groupby('player_display_name')['player_id'].nunique()

# Filter the results to find names associated with more than one unique ID
duplicate_names = duplicate_names[duplicate_names > 1]

In [675]:
duplicate_names

Series([], Name: player_id, dtype: int64)

In [676]:
scoring_cols = ['passing_yards', 'passing_tds', 'interceptions', 'passing_2pt_conversions', 'rushing_yards',\
                   'rushing_tds', 'rushing_fumbles_lost', 'rushing_2pt_conversions', 'receptions', 'receiving_yards',\
                   'receiving_tds', 'receiving_fumbles_lost', 'receiving_2pt_conversions', 'sack_fumbles_lost', 'special_teams_tds',\
               'targets', 'carries', 'receiving_yards_after_catch', 'receiving_air_yards', 'air_yards_share', 'target_share']

In [677]:
flex_df = flex_df.groupby(cols_to_group)[scoring_cols].sum()

In [678]:
flex_df.info()

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 10364 entries, (2022, 1, 'ARI', 'KC', '00-0027942', 'A.J. Green', 'WR') to (2024, 2, 'WAS', 'NYG', '00-0037746', 'Brian Robinson', 'RB')
Data columns (total 21 columns):
 #   Column                       Non-Null Count  Dtype  
---  ------                       --------------  -----  
 0   passing_yards                10364 non-null  int64  
 1   passing_tds                  10364 non-null  int64  
 2   interceptions                10364 non-null  int64  
 3   passing_2pt_conversions      10364 non-null  int64  
 4   rushing_yards                10364 non-null  int64  
 5   rushing_tds                  10364 non-null  int64  
 6   rushing_fumbles_lost         10364 non-null  int64  
 7   rushing_2pt_conversions      10364 non-null  int64  
 8   receptions                   10364 non-null  int64  
 9   receiving_yards              10364 non-null  int64  
 10  receiving_tds                10364 non-null  int64  
 11  receiving_fumbles_los

In [679]:
flex_df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,passing_yards,passing_tds,interceptions,passing_2pt_conversions,rushing_yards,rushing_tds,rushing_fumbles_lost,rushing_2pt_conversions,receptions,receiving_yards,...,receiving_fumbles_lost,receiving_2pt_conversions,sack_fumbles_lost,special_teams_tds,targets,carries,receiving_yards_after_catch,receiving_air_yards,air_yards_share,target_share
season,week,recent_team,opponent_team,player_id,player_display_name,position,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1
2022,1,ARI,KC,00-0027942,A.J. Green,WR,0,0,0,0,0,0,0,0,2,13,...,0,0,0,0,4,0,0,42,0.157895,0.111111
2022,1,ARI,KC,00-0030061,Zach Ertz,TE,0,0,0,0,0,0,0,0,2,14,...,0,1,0,0,4,0,4,22,0.082707,0.111111
2022,1,ARI,KC,00-0033553,James Conner,RB,0,0,0,0,26,1,0,0,5,29,...,0,0,0,0,6,10,38,7,0.026316,0.166667
2022,1,ARI,KC,00-0035500,Greg Dortch,WR,0,0,0,0,0,0,0,0,7,63,...,0,0,0,0,9,0,31,62,0.233083,0.250000
2022,1,ARI,KC,00-0035527,Andy Isabella,WR,0,0,0,0,0,0,0,0,1,10,...,0,0,0,0,3,0,4,30,0.112782,0.083333
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024,2,WAS,NYG,00-0035208,Olamide Zaccheaus,WR,0,0,0,0,0,0,0,0,3,14,...,0,0,0,0,3,0,19,-5,-0.040984,0.103448
2024,2,WAS,NYG,00-0035659,Terry McLaurin,WR,0,0,0,0,0,0,0,0,6,22,...,0,0,0,0,8,0,11,31,0.254098,0.275862
2024,2,WAS,NYG,00-0036626,Dyami Brown,WR,0,0,0,0,0,0,0,0,2,17,...,0,0,0,0,4,0,20,17,0.139344,0.137931
2024,2,WAS,NYG,00-0036628,John Bates,TE,0,0,0,0,0,0,0,0,1,5,...,0,0,0,0,1,0,0,5,0.040984,0.034483


In [680]:
def replace(code, old, new):
    """
    Basically a find and replace
    """
    return code.replace(old, new)

In [681]:
flex_df.reset_index(inplace = True, drop = False)

In [682]:
#There are a few errant rows where the opponent matches the team
flex_df = flex_df[~(flex_df['recent_team'] == flex_df['opponent_team'])]

In [683]:
flex_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 10358 entries, 0 to 10363
Data columns (total 28 columns):
 #   Column                       Non-Null Count  Dtype  
---  ------                       --------------  -----  
 0   season                       10358 non-null  int64  
 1   week                         10358 non-null  int64  
 2   recent_team                  10358 non-null  object 
 3   opponent_team                10358 non-null  object 
 4   player_id                    10358 non-null  object 
 5   player_display_name          10358 non-null  object 
 6   position                     10358 non-null  object 
 7   passing_yards                10358 non-null  int64  
 8   passing_tds                  10358 non-null  int64  
 9   interceptions                10358 non-null  int64  
 10  passing_2pt_conversions      10358 non-null  int64  
 11  rushing_yards                10358 non-null  int64  
 12  rushing_tds                  10358 non-null  int64  
 13  rushing_fumbles_lost 

# What do we need from play-by-play?
Let's take a look at the data we'd like to have and see where we need to integrate the play-by-play data. We have yards_after_catch and air_yards. We have targets and target share. Even though the target share doesn't add up to 100 percent, it's still useful. We'll definitely want to spot-check that.<br>

We have carries but no carry share. Maybe we need to just take the total carries for each team in each game from the pbp and join that with the flex_df. That shouldn't be too hard. Then we can figure out every RBs workload even if they don't all add up to 100.<br>

We also want goal_to_go binary for each play to see how many carries or targets a player gets in goal-line situations and also the rate at which they score TDs in those situations.<br>

In [684]:
run_df = pbp_df[pbp_df['play_type'] == 'run']

In [685]:
running_play_counts = run_df.groupby(['season', 'week', 'posteam', 'defteam', 'game_id']).size().reset_index(name='total_carries')

In [686]:
running_play_counts.rename(columns = {'posteam' : 'recent_team', 'defteam': 'opponent_team'}, inplace = True)

In [687]:
flex_merge = pd.merge(flex_df, running_play_counts, on = ['season', 'week', 'recent_team', 'opponent_team'], how = 'left')

In [688]:
flex_merge.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10358 entries, 0 to 10357
Data columns (total 30 columns):
 #   Column                       Non-Null Count  Dtype  
---  ------                       --------------  -----  
 0   season                       10358 non-null  int64  
 1   week                         10358 non-null  int64  
 2   recent_team                  10358 non-null  object 
 3   opponent_team                10358 non-null  object 
 4   player_id                    10358 non-null  object 
 5   player_display_name          10358 non-null  object 
 6   position                     10358 non-null  object 
 7   passing_yards                10358 non-null  int64  
 8   passing_tds                  10358 non-null  int64  
 9   interceptions                10358 non-null  int64  
 10  passing_2pt_conversions      10358 non-null  int64  
 11  rushing_yards                10358 non-null  int64  
 12  rushing_tds                  10358 non-null  int64  
 13  rushing_fumbles_

In [689]:
flex_merge['carry_share'] = round(flex_merge['carries']/flex_merge['total_carries'], 3)

In [690]:
flex_merge.head()

Unnamed: 0,season,week,recent_team,opponent_team,player_id,player_display_name,position,passing_yards,passing_tds,interceptions,...,special_teams_tds,targets,carries,receiving_yards_after_catch,receiving_air_yards,air_yards_share,target_share,game_id,total_carries,carry_share
0,2022,1,ARI,KC,00-0027942,A.J. Green,WR,0,0,0,...,0,4,0,0,42,0.157895,0.111111,2022_01_KC_ARI,21,0.0
1,2022,1,ARI,KC,00-0030061,Zach Ertz,TE,0,0,0,...,0,4,0,4,22,0.082707,0.111111,2022_01_KC_ARI,21,0.0
2,2022,1,ARI,KC,00-0033553,James Conner,RB,0,0,0,...,0,6,10,38,7,0.026316,0.166667,2022_01_KC_ARI,21,0.476
3,2022,1,ARI,KC,00-0035500,Greg Dortch,WR,0,0,0,...,0,9,0,31,62,0.233083,0.25,2022_01_KC_ARI,21,0.0
4,2022,1,ARI,KC,00-0035527,Andy Isabella,WR,0,0,0,...,0,3,0,4,30,0.112782,0.083333,2022_01_KC_ARI,21,0.0


In [691]:
flex_merge.tail(30)

Unnamed: 0,season,week,recent_team,opponent_team,player_id,player_display_name,position,passing_yards,passing_tds,interceptions,...,special_teams_tds,targets,carries,receiving_yards_after_catch,receiving_air_yards,air_yards_share,target_share,game_id,total_carries,carry_share
10328,2024,2,SF,MIN,00-0032128,Chris Conley,WR,0,0,0,...,0,1,0,0,4,0.013029,0.027778,2024_02_SF_MIN,25,0.0
10329,2024,2,SF,MIN,00-0033288,George Kittle,TE,0,0,0,...,0,8,0,45,58,0.188925,0.222222,2024_02_SF_MIN,25,0.0
10330,2024,2,SF,MIN,00-0033576,Eric Saubert,TE,0,0,0,...,0,2,0,11,15,0.04886,0.055556,2024_02_SF_MIN,25,0.0
10331,2024,2,SF,MIN,00-0035719,Deebo Samuel,WR,0,0,0,...,0,10,2,19,104,0.338762,0.277778,2024_02_SF_MIN,25,0.08
10332,2024,2,SF,MIN,00-0036259,Jauan Jennings,WR,0,0,0,...,0,4,0,6,55,0.179153,0.111111,2024_02_SF_MIN,25,0.0
10333,2024,2,SF,MIN,00-0036261,Brandon Aiyuk,WR,0,0,0,...,0,5,0,5,42,0.136808,0.138889,2024_02_SF_MIN,25,0.0
10334,2024,2,SF,MIN,00-0037525,Jordan Mason,RB,0,0,0,...,0,1,20,1,3,0.009772,0.027778,2024_02_SF_MIN,25,0.8
10335,2024,2,SF,MIN,00-0039363,Isaac Guerendo,RB,0,0,0,...,0,0,1,0,0,0.0,0.0,2024_02_SF_MIN,25,0.04
10336,2024,2,TB,DET,00-0031408,Mike Evans,WR,0,0,0,...,0,6,0,18,43,0.282895,0.315789,2024_02_TB_DET,22,0.0
10337,2024,2,TB,DET,00-0033921,Chris Godwin,WR,0,0,0,...,0,8,0,61,76,0.5,0.421053,2024_02_TB_DET,22,0.0


# Goal-to-go situations
We'll filter the pbp data by rows in which goal_to_go is True and the play is either a run or a pass. We find that on passing plays, some of the receiver_player_id values are null, but for running plays none of them are null. This implies that on plays in which the QB is sacked, no pass is thrown so there's no receiver.<br>

Those plays probably should be discarded as we are looking for a percentage of times a RB-WR-TE gets the ball in goal-to-go situations. When a QB is sacked, no one gets the ball.<br>

**Update:** goal_to_go leaves out situations where it's 3rd and 2 from the 4-yard line. Maybe we should see if filtering by yardline_100 <= 10 gives us more data points.

In [692]:
gtg_df_10 = pbp_df[(pbp_df['yardline_100'] <= 10) & ((pbp_df['play_type'] == 'run') | (pbp_df['play_type'] == 'pass'))]
#gtg_df = rb_wr_te_df[(rb_wr_te_df['goal_to_go'] == True) & ((rb_wr_te_df['play_type'] == 'run') | (rb_wr_te_df['play_type'] == 'pass'))]

In [693]:
gtg_df_10['play_type'].value_counts()

play_type
run     2805
pass    2720
Name: count, dtype: int64

In [694]:
gtg_df_10 = gtg_df_10[(gtg_df_10['receiver_player_id'].notnull()) | (gtg_df_10['rusher_player_id'].notnull())]
#gtg_df = gtg_df[(gtg_df['receiver_player_id'].notnull()) | (gtg_df['rusher_player_id'].notnull())]

In [695]:
gtg_df_10[(gtg_df_10['receiver_player_id'].isnull()) & (gtg_df_10['rusher_player_id'].isnull())]
#gtg_df[(gtg_df['receiver_player_id'].isnull()) & (gtg_df['rusher_player_id'].isnull())]

Unnamed: 0,play_id,game_id,old_game_id,home_team,away_team,season_type,week,posteam,posteam_type,defteam,...,out_of_bounds,home_opening_kickoff,qb_epa,xyac_epa,xyac_mean_yardage,xyac_median_yardage,xyac_success,xyac_fd,xpass,pass_oe


In [696]:
gtg_df_10[(gtg_df_10['receiver_player_id'].notnull()) & (gtg_df_10['rusher_player_id'].notnull())]
#gtg_df[(gtg_df['receiver_player_id'].notnull()) & (gtg_df['rusher_player_id'].notnull())]

Unnamed: 0,play_id,game_id,old_game_id,home_team,away_team,season_type,week,posteam,posteam_type,defteam,...,out_of_bounds,home_opening_kickoff,qb_epa,xyac_epa,xyac_mean_yardage,xyac_median_yardage,xyac_success,xyac_fd,xpass,pass_oe


In [697]:
gtg_df_10['player_id'] = gtg_df_10['rusher_player_id'].fillna(gtg_df_10['receiver_player_id'])
#gtg_df['player_id'] = gtg_df['rusher_player_id'].fillna(gtg_df['receiver_player_id'])

In [698]:
gtg_df_10.info()

<class 'pandas.core.frame.DataFrame'>
Index: 5197 entries, 31 to 104495
Columns: 373 entries, play_id to player_id
dtypes: float64(182), int64(39), object(152)
memory usage: 14.8+ MB


In [699]:
#Total goal to go situations for each team in each game
grouped_gtg_10 = gtg_df_10.groupby(['season', 'week', 'posteam', 'defteam']).size().reset_index(name = 'plays_in_10')

#grouped_gtg = gtg_df.groupby(['season', 'week', 'posteam', 'defteam']).size().reset_index(name = 'gtg_plays')


In [700]:
gtg_df_10.columns

Index(['play_id', 'game_id', 'old_game_id', 'home_team', 'away_team',
       'season_type', 'week', 'posteam', 'posteam_type', 'defteam',
       ...
       'home_opening_kickoff', 'qb_epa', 'xyac_epa', 'xyac_mean_yardage',
       'xyac_median_yardage', 'xyac_success', 'xyac_fd', 'xpass', 'pass_oe',
       'player_id'],
      dtype='object', length=373)

In [701]:
gtg_cols = ['play_id','week', 'posteam', 'defteam', 'sp', 'desc', 'play_type', 'td_player_id',
      'incomplete_pass', 'rush_attempt', 'pass_attempt',
       'touchdown', 'pass_touchdown', 'rush_touchdown', 'complete_pass', 'receiver_player_id',
       'receiver_player_name', 'rusher_player_id',
       'rusher_player_name', 'season', 'weather', 'player_id']

In [702]:
gtg_df_10 = gtg_df_10[gtg_cols]

In [703]:
gtg_df_10 = pd.merge(gtg_df_10, grouped_gtg_10, on = ['season', 'week', 'posteam', 'defteam'], how = 'left')
#gtg_df = pd.merge(gtg_df, grouped_gtg, on = ['season', 'week', 'posteam', 'defteam'], how = 'left')

In [704]:
##Next will be to derive how many times per gtg play a player gets the ball, and then when they get it their TD percentage

In [705]:
gtg_10_player = gtg_df_10.groupby(['season', 'week', 'posteam', 'defteam', 'player_id']).size().reset_index(name = 'opps_in_10')
#gtg_player = gtg_df.groupby(['season', 'week', 'posteam', 'defteam', 'player_id']).size().reset_index(name = 'gtg_opps')

In [706]:
gtg_df_10 = pd.merge(gtg_df_10, gtg_10_player, on = ['season', 'week', 'posteam', 'defteam', 'player_id'], how = 'left')
#gtg_df = pd.merge(gtg_df, gtg_player, on = ['season', 'week', 'posteam', 'defteam', 'player_id'], how = 'left')

In [707]:
gtg_df_10['in_10_share'] = round(gtg_df_10['opps_in_10']/gtg_df_10['plays_in_10'], 3)

In [708]:
gtg_df_10.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5197 entries, 0 to 5196
Data columns (total 25 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   play_id               5197 non-null   int64  
 1   week                  5197 non-null   int64  
 2   posteam               5197 non-null   object 
 3   defteam               5197 non-null   object 
 4   sp                    5197 non-null   int64  
 5   desc                  5197 non-null   object 
 6   play_type             5197 non-null   object 
 7   td_player_id          1622 non-null   object 
 8   incomplete_pass       5197 non-null   float64
 9   rush_attempt          5197 non-null   float64
 10  pass_attempt          5197 non-null   float64
 11  touchdown             5197 non-null   float64
 12  pass_touchdown        5197 non-null   float64
 13  rush_touchdown        5197 non-null   float64
 14  complete_pass         5197 non-null   float64
 15  receiver_player_id   

In [709]:
gtg_df_10['player_id'].nunique()

561

In [710]:
flex_merge['player_id'].nunique()

668

In [711]:
##Maybe we need to pause here. There are more players in our weekly data than we have in our gtg data, which makes sense.
#Not every player will be used in gtg situations.
#Maybe somehow just check that every player in flex_merge but not gtg doesn't have any True values in goal_to_go
####Maybe we should just use common sense
#We've filtered every gtg play that was a run or a pass.
#If there's any irregularity it's likely to come out during spot-checking

In [712]:
gtg_players = list(gtg_df_10['player_id'].unique())

In [713]:
flex_players = list(flex_merge['player_id'].unique())

In [714]:
len(gtg_players)

561

In [715]:
len(flex_players)

668

In [716]:
#gtg_df = gtg_df.sort_values(by = ['season', 'week', 'posteam', 'defteam'])

In [717]:
gtg_df_10 = gtg_df_10.groupby(['season', 'week', 'posteam', 'defteam', 'player_id'], as_index=False).agg({'in_10_share': 'mean'})


In [718]:
#gtg_df = gtg_df[['season', 'week', 'posteam', 'defteam', 'player_id', 'gtg_share']]

In [719]:
gtg_df_10.tail(30)

Unnamed: 0,season,week,posteam,defteam,player_id,in_10_share
3310,2024,2,NO,DAL,00-0033906,0.5
3311,2024,2,NO,DAL,00-0033948,0.25
3312,2024,2,NYG,WAS,00-0035250,0.25
3313,2024,2,NYG,WAS,00-0035535,0.125
3314,2024,2,NYG,WAS,00-0038117,0.125
3315,2024,2,NYG,WAS,00-0039337,0.375
3316,2024,2,NYG,WAS,00-0039384,0.125
3317,2024,2,PHI,ATL,00-0034351,0.091
3318,2024,2,PHI,ATL,00-0034844,0.364
3319,2024,2,PHI,ATL,00-0036389,0.091


In [720]:
check_for_one = gtg_df_10.groupby(['season', 'week', 'posteam', 'defteam'])['in_10_share'].sum()

In [721]:
check_for_one.min()

0.9989999999999999

In [722]:
gtg_df_10.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3340 entries, 0 to 3339
Data columns (total 6 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   season       3340 non-null   int64  
 1   week         3340 non-null   int64  
 2   posteam      3340 non-null   object 
 3   defteam      3340 non-null   object 
 4   player_id    3340 non-null   object 
 5   in_10_share  3340 non-null   float64
dtypes: float64(1), int64(2), object(3)
memory usage: 156.7+ KB


In [723]:
gtg_df_10.rename(columns = {'posteam': 'recent_team', 'defteam': 'opponent_team'}, inplace = True)

In [724]:
flex_merge = pd.merge(flex_merge, gtg_df_10, on = ['season', 'week', 'recent_team', 'opponent_team', 'player_id'], how = 'left')

In [725]:
flex_merge.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10358 entries, 0 to 10357
Data columns (total 32 columns):
 #   Column                       Non-Null Count  Dtype  
---  ------                       --------------  -----  
 0   season                       10358 non-null  int64  
 1   week                         10358 non-null  int64  
 2   recent_team                  10358 non-null  object 
 3   opponent_team                10358 non-null  object 
 4   player_id                    10358 non-null  object 
 5   player_display_name          10358 non-null  object 
 6   position                     10358 non-null  object 
 7   passing_yards                10358 non-null  int64  
 8   passing_tds                  10358 non-null  int64  
 9   interceptions                10358 non-null  int64  
 10  passing_2pt_conversions      10358 non-null  int64  
 11  rushing_yards                10358 non-null  int64  
 12  rushing_tds                  10358 non-null  int64  
 13  rushing_fumbles_

# A lot of missing in_10_share variables
At first glance it seems like there are too many nulls in the in_10_share column, which is the percentage of plays inside the 10-yard-line that each player gets the ball. But maybe it is plausible. After all, in those goal-line situations only the best players are called upon. We'll replace with zero and investigate further when we examine the data.

In [726]:
flex_merge.fillna(value={'in_10_share': 0}, inplace=True)

Let's rename flex_merge flex_df so we can run the following code

In [727]:
flex_df = flex_merge

# Fantasy points
This is where we calculate FanDuel and DraftKings points.

In [728]:
flex_df['FD_Pts'] = (flex_df['passing_yards'] * 0.04) + (flex_df['rushing_tds'] * 6) + (flex_df['rushing_yards'] * 0.1)\
+ (flex_df['passing_tds'] * 4) + (flex_df['receiving_yards'] * 0.1) + (flex_df['receiving_tds'] * 6) + (flex_df['receptions'] * 0.5)\
+ (flex_df['rushing_2pt_conversions'] * 2) + (flex_df['passing_2pt_conversions'] * 2) + (flex_df['receiving_2pt_conversions'] * 2)\
+ (flex_df['special_teams_tds'] * 6) - (flex_df['interceptions']) - (flex_df['sack_fumbles_lost'] * 2) - (flex_df['receiving_fumbles_lost'] * 2)\
- (flex_df['rushing_fumbles_lost'] * 2)

In [729]:
flex_df['DK_Pts'] = (flex_df['passing_yards'] * 0.04) + (flex_df['rushing_tds'] * 6) + (flex_df['rushing_yards'] * 0.1)\
+ (flex_df['passing_tds'] * 4) + (flex_df['receiving_yards'] * 0.1) + (flex_df['receiving_tds'] * 6) + (flex_df['receptions'] * 1)\
+ (flex_df['rushing_2pt_conversions'] * 2) + (flex_df['passing_2pt_conversions'] * 2) + (flex_df['receiving_2pt_conversions'] * 2)\
+ (flex_df['special_teams_tds'] * 6) - (flex_df['interceptions']) - (flex_df['sack_fumbles_lost']) - (flex_df['receiving_fumbles_lost'])\
- (flex_df['rushing_fumbles_lost'])

Adding DraftKings bonus points

In [730]:
flex_df['DK_Pts'] = np.where(flex_df['passing_yards'] >= 300, flex_df['DK_Pts'] + 3, flex_df['DK_Pts'])
flex_df['DK_Pts'] = np.where(flex_df['receiving_yards'] >= 100, flex_df['DK_Pts'] + 3, flex_df['DK_Pts'])
flex_df['DK_Pts'] = np.where(flex_df['rushing_yards'] >= 100, flex_df['DK_Pts'] + 3, flex_df['DK_Pts'])

In [731]:
flex_df.tail()

Unnamed: 0,season,week,recent_team,opponent_team,player_id,player_display_name,position,passing_yards,passing_tds,interceptions,...,receiving_yards_after_catch,receiving_air_yards,air_yards_share,target_share,game_id,total_carries,carry_share,in_10_share,FD_Pts,DK_Pts
10353,2024,2,WAS,NYG,00-0035208,Olamide Zaccheaus,WR,0,0,0,...,19,-5,-0.040984,0.103448,2024_02_NYG_WAS,33,0.0,0.0,2.9,4.4
10354,2024,2,WAS,NYG,00-0035659,Terry McLaurin,WR,0,0,0,...,11,31,0.254098,0.275862,2024_02_NYG_WAS,33,0.0,0.0,5.2,8.2
10355,2024,2,WAS,NYG,00-0036626,Dyami Brown,WR,0,0,0,...,20,17,0.139344,0.137931,2024_02_NYG_WAS,33,0.0,0.167,2.7,3.7
10356,2024,2,WAS,NYG,00-0036628,John Bates,TE,0,0,0,...,0,5,0.040984,0.034483,2024_02_NYG_WAS,33,0.0,0.0,1.0,1.5
10357,2024,2,WAS,NYG,00-0037746,Brian Robinson,RB,0,0,0,...,10,-9,-0.07377,0.103448,2024_02_NYG_WAS,33,0.515,0.5,14.1,17.6


# Storing last week's points to evaluate model

In [732]:
flex_points_last_week = flex_df[(flex_df['season'] == season) & (flex_df['week'] == week - 1)]\
[['season', 'week', 'player_display_name', 'recent_team', 'opponent_team', 'position', 'FD_Pts', 'DK_Pts']]

In [733]:
flex_points_last_week.to_csv('flex_points_' + str(season) + '_' + str(week - 1) + '.csv')

In [734]:
flex_df['position'].value_counts()

position
WR    4814
RB    2998
TE    2358
FB     188
Name: count, dtype: int64

In [735]:
flex_df['position'] = np.where(flex_df['position'].isin(['HB', 'FB']), 'RB', flex_df['position'])

In [736]:
pos_mean_by_team = flex_df.groupby(['season', 'week', 'recent_team', 'opponent_team', 'position']).size().reset_index(name = 'num_players')

In [737]:
mean_by_pos = pos_mean_by_team.groupby('position')['num_players'].agg(['mean', 'max', 'min'])

In [738]:
mean_by_pos

Unnamed: 0_level_0,mean,max,min
position,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
RB,2.650582,5,1
TE,1.97653,4,1
WR,4.004992,7,2


# Grouping points allowed by position
First step in deriving DvP variables.

In [739]:
grouped_pts = flex_df.groupby(['season', 'week', 'recent_team', 'opponent_team', 'position'])[['DK_Pts', 'FD_Pts']].sum().round(3)

In [740]:
grouped_pts

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,DK_Pts,FD_Pts
season,week,recent_team,opponent_team,position,Unnamed: 5_level_1,Unnamed: 6_level_1
2022,1,ARI,KC,RB,25.6,21.6
2022,1,ARI,KC,TE,11.4,10.4
2022,1,ARI,KC,WR,32.9,25.9
2022,1,ATL,NO,RB,28.3,23.3
2022,1,ATL,NO,TE,7.0,5.0
...,...,...,...,...,...,...
2024,2,TEN,NYJ,TE,7.9,5.4
2024,2,TEN,NYJ,WR,31.2,27.7
2024,2,WAS,NYG,RB,29.1,24.1
2024,2,WAS,NYG,TE,11.7,9.2


In [741]:
grouped_pts.reset_index(inplace = True)

In [742]:
grouped_pts = grouped_pts.sort_values(by = ['opponent_team', 'season', 'week'])
#grouped_pts['opp_game_num'] = grouped_pts.groupby(['opponent_team', 'season', 'week', 'recent_team']).cumcount() + 1
grouped_pts.reset_index(inplace = True)

In [743]:
grouped_pts_raw = grouped_pts.copy()

In [744]:
grouped_pts.drop(columns = ['index'], inplace = True)

In [745]:
grouped_pts

Unnamed: 0,season,week,recent_team,opponent_team,position,DK_Pts,FD_Pts
0,2022,1,KC,ARI,RB,42.5,39.5
1,2022,1,KC,ARI,TE,38.2,30.2
2,2022,1,KC,ARI,WR,35.6,27.6
3,2022,2,LV,ARI,RB,12.6,11.6
4,2022,2,LV,ARI,TE,23.0,18.5
...,...,...,...,...,...,...,...
3592,2024,1,TB,WAS,RB,26.2,22.2
3593,2024,1,TB,WAS,TE,1.5,1.0
3594,2024,1,TB,WAS,WR,58.5,51.0
3595,2024,2,NYG,WAS,RB,15.7,14.2


In [746]:
# Create the pivot table
pivot_df = grouped_pts.pivot_table(
    index=['season', 'week', 'recent_team', 'opponent_team'],
    columns='position',
    values=['DK_Pts', 'FD_Pts'],
    aggfunc='sum',  # Assuming you want to sum the points if there are duplicates
    fill_value=0  # Fill missing values with 0
)

In [747]:
# Flatten the MultiIndex columns created by pivot_table
pivot_df.columns = ['_'.join(col).strip() for col in pivot_df.columns.values]

In [748]:
# Reset the index to turn the pivot table back into a DataFrame
pivot_df.reset_index(inplace=True)

In [749]:
pivot_df.head()

Unnamed: 0,season,week,recent_team,opponent_team,DK_Pts_RB,DK_Pts_TE,DK_Pts_WR,FD_Pts_RB,FD_Pts_TE,FD_Pts_WR
0,2022,1,ARI,KC,25.6,11.4,32.9,21.6,10.4,25.9
1,2022,1,ATL,NO,28.3,7.0,27.1,23.3,5.0,20.1
2,2022,1,BAL,NYJ,11.5,10.2,39.2,9.5,7.7,35.2
3,2022,1,BUF,LAR,16.0,1.5,63.7,10.0,1.0,52.2
4,2022,1,CAR,CLE,16.5,8.4,34.4,14.5,6.9,26.9


In [750]:
pivot_df = pivot_df.sort_values(by = ['opponent_team', 'season', 'week'])

# Bringing in current week's FanDuel and DraftKings rows here
We're basically cloning this notebook from model training. We need features that apply to the current NFL week. We're trying the **one extra row** concept. For now we can probably add one row for each team, with the season value being 2024 and the week value being 1.

In [751]:
# Connect to the SQLite database
conn = sqlite3.connect('nfl_dfs.db')

# query_flex = "SELECT * FROM flex_dataset"
# #query_weekly = "SELECT * FROM weekly_data"
# query_fd_spread = "SELECT * FROM fd_spreads"
# query_dk_spread = "SELECT * FROM dk_spreads"
query_qb_model = "SELECT * FROM fd_qb_model_ready"
# # query_pbp = "S#ELECT * FROM pbp_non_defense"
query_fd = "SELECT * FROM fd_table"
query_dk = "SELECT * FROM dk_table"
#query_rb_wr_te = "SELECT * FROM rb_wr_te_data WHERE season >= 2022"

# flex_dataset = pd.read_sql_query(query_flex, conn)
# #weekly_df = pd.read_sql_query(query_weekly, conn)
# fd_spreads = pd.read_sql_query(query_fd_spread, conn)
# dk_spreads = pd.read_sql_query(query_dk_spread, conn)
# #pbp_df = pd.read_sql_query(query_pbp, conn)
fd_table = pd.read_sql_query(query_fd, conn)
dk_table = pd.read_sql_query(query_dk, conn)
qb_model = pd.read_sql_query(query_qb_model, conn)
# rb_wr_te_df = pd.read_sql_query(query_rb_wr_te, conn)

# Close the database connection
conn.close()

In [752]:
fanduel_df.head()

Unnamed: 0,ID,name,position,salary,team,opponent,home_team,status,date,week
0,107027-86631,CeeDee Lamb,WR,9300,DAL,BAL,1,Active,09-22-2024,3
1,107027-85671,Justin Jefferson,WR,9200,MIN,HOU,1,Active,09-22-2024,3
2,107027-86997,Amon-Ra St. Brown,WR,9100,DET,ARI,0,Active,09-22-2024,3
3,107027-53681,Tyreek Hill,WR,9000,MIA,SEA,0,Active,09-22-2024,3
4,107027-63115,Lamar Jackson,QB,8800,BAL,DAL,0,Active,09-22-2024,3


In [753]:
draftkings_df.head()

Unnamed: 0,ID,name,position,salary,team,opponent,home_team,status,date,week
0,35940249,CeeDee Lamb,WR,8800,DAL,BAL,1,Active,09-22-2024,3
1,35940251,Justin Jefferson,WR,8600,MIN,HOU,1,Active,09-22-2024,3
2,35939981,Christian McCaffrey,RB,8500,SF,LAR,0,IR,09-22-2024,3
3,35940253,Tyreek Hill,WR,8400,MIA,SEA,0,Active,09-22-2024,3
4,35940255,Amon-Ra St. Brown,WR,8200,DET,ARI,0,Active,09-22-2024,3


In [754]:
# Initialize empty lists for teams and opponents
#We'll use FanDuel here because they tend to list more players than DK and we're less likely to miss anyone.
team_list = []
opponent_list = []

# Initialize a set to keep track of already added matchups
added_matchups = set()

# Loop through each row in the dataframe
for index, row in fanduel_df.iterrows():
    team = row['team']
    opponent = row['opponent']
    
    # Ensure each team and opponent are added only once in reverse order as well
    if (team, opponent) not in added_matchups and (opponent, team) not in added_matchups:
        # Add the matchup as-is
        team_list.append(team)
        opponent_list.append(opponent)
        
        # Add the reverse matchup
        team_list.append(opponent)
        opponent_list.append(team)
        
        # Track the added matchups
        added_matchups.add((team, opponent))
        added_matchups.add((opponent, team))

# Display the resulting lists
print("Team list:", team_list)
print("Opponent list:", opponent_list)


Team list: ['DAL', 'BAL', 'MIN', 'HOU', 'DET', 'ARI', 'MIA', 'SEA', 'KC', 'ATL', 'PHI', 'NO', 'SF', 'LAR', 'TB', 'DEN', 'IND', 'CHI', 'LV', 'CAR', 'GB', 'TEN', 'LAC', 'PIT', 'CLE', 'NYG']
Opponent list: ['BAL', 'DAL', 'HOU', 'MIN', 'ARI', 'DET', 'SEA', 'MIA', 'ATL', 'KC', 'NO', 'PHI', 'LAR', 'SF', 'DEN', 'TB', 'CHI', 'IND', 'CAR', 'LV', 'TEN', 'GB', 'PIT', 'LAC', 'NYG', 'CLE']


In [755]:
len(team_list), len(opponent_list)

(26, 26)

In [756]:
append_to_pivot = {'season': [season] * len(team_list), 'week': [week] * len(team_list), 'recent_team': team_list, 'opponent_team': opponent_list}

In [757]:
append_to_pivot = pd.DataFrame(append_to_pivot)

In [758]:
pivot_df = pd.concat([pivot_df, append_to_pivot], axis = 0)

In [759]:
pivot_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 1228 entries, 15 to 25
Data columns (total 10 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   season         1228 non-null   int64  
 1   week           1228 non-null   int64  
 2   recent_team    1228 non-null   object 
 3   opponent_team  1228 non-null   object 
 4   DK_Pts_RB      1202 non-null   float64
 5   DK_Pts_TE      1202 non-null   float64
 6   DK_Pts_WR      1202 non-null   float64
 7   FD_Pts_RB      1202 non-null   float64
 8   FD_Pts_TE      1202 non-null   float64
 9   FD_Pts_WR      1202 non-null   float64
dtypes: float64(6), int64(2), object(2)
memory usage: 105.5+ KB


In [760]:
pivot_df['opp_game_num'] = pivot_df.groupby(['opponent_team']).cumcount() + 1

In [761]:
cols_for_dvp = ['DK_Pts_RB', 'DK_Pts_TE', 'DK_Pts_WR', 'FD_Pts_RB', 'FD_Pts_TE', 'FD_Pts_WR']

In [762]:
pivot_df_by_game = pivot_df.copy()

In [763]:
pivot_df.head()

Unnamed: 0,season,week,recent_team,opponent_team,DK_Pts_RB,DK_Pts_TE,DK_Pts_WR,FD_Pts_RB,FD_Pts_TE,FD_Pts_WR,opp_game_num
15,2022,1,KC,ARI,42.5,38.2,35.6,39.5,30.2,27.6,1
50,2022,2,LV,ARI,12.6,23.0,32.7,11.6,18.5,24.7,2
81,2022,3,LAR,ARI,12.8,13.9,37.0,11.8,11.4,30.5,3
100,2022,4,CAR,ARI,26.0,8.0,17.7,21.5,6.0,13.2,4
153,2022,5,PHI,ARI,10.4,17.5,29.8,9.4,13.5,21.8,5


In [764]:
pivot_df.tail()

Unnamed: 0,season,week,recent_team,opponent_team,DK_Pts_RB,DK_Pts_TE,DK_Pts_WR,FD_Pts_RB,FD_Pts_TE,FD_Pts_WR,opp_game_num
21,2024,3,TEN,GB,,,,,,,39
22,2024,3,LAC,PIT,,,,,,,38
23,2024,3,PIT,LAC,,,,,,,38
24,2024,3,CLE,NYG,,,,,,,39
25,2024,3,NYG,CLE,,,,,,,38


In [765]:
pivot_df['opponent_team'].nunique()

32

# DvP variables
We'll take an 8-game rolling mean for fantasy points allowed to RBs, WRs and TEs for each team, even if the games go back to last season. DvP means defense vs. position.

In [766]:
def calculate_equal_rolling_mean(group, cols, suffix):
    """
    This function calculates a rolling mean for the last eight games, going back to previous season if necessary.
    It also calculates when there are less than eight games to use.
    """
    for col in cols:
        group[f'{col}{suffix}'] = (
            group[col].shift().rolling(window=8, min_periods=1).mean()
        )
    return group

In [767]:
pivot_df = pivot_df.groupby('opponent_team', as_index = False).apply(calculate_equal_rolling_mean, cols=cols_for_dvp, suffix = '_DvP')

In [768]:
pivot_df.drop(columns = cols_for_dvp + ['opp_game_num'], inplace = True)

In [769]:
pivot_df

Unnamed: 0,Unnamed: 1,season,week,recent_team,opponent_team,DK_Pts_RB_DvP,DK_Pts_TE_DvP,DK_Pts_WR_DvP,FD_Pts_RB_DvP,FD_Pts_TE_DvP,FD_Pts_WR_DvP
0,15,2022,1,KC,ARI,,,,,,
0,50,2022,2,LV,ARI,42.500000,38.200000,35.6000,39.500000,30.200000,27.6000
0,81,2022,3,LAR,ARI,27.550000,30.600000,34.1500,25.550000,24.350000,26.1500
0,100,2022,4,CAR,ARI,22.633333,25.033333,35.1000,20.966667,20.033333,27.6000
0,153,2022,5,PHI,ARI,23.475000,20.775000,30.7500,21.100000,16.525000,24.0000
...,...,...,...,...,...,...,...,...,...,...,...
31,1040,2023,16,NYJ,WAS,29.400000,10.025000,44.8375,25.712500,8.025000,36.9000
31,1076,2023,17,SF,WAS,32.612500,8.125000,45.5625,28.550000,6.312500,37.3750
31,1088,2023,18,DAL,WAS,33.900000,7.912500,42.7375,30.025000,6.162500,34.8625
31,1167,2024,1,TB,WAS,33.962500,8.212500,47.3250,30.212500,6.275000,38.8875


In [770]:
pivot_df.info()

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 1228 entries, (0, 15) to (31, 1193)
Data columns (total 10 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   season         1228 non-null   int64  
 1   week           1228 non-null   int64  
 2   recent_team    1228 non-null   object 
 3   opponent_team  1228 non-null   object 
 4   DK_Pts_RB_DvP  1196 non-null   float64
 5   DK_Pts_TE_DvP  1196 non-null   float64
 6   DK_Pts_WR_DvP  1196 non-null   float64
 7   FD_Pts_RB_DvP  1196 non-null   float64
 8   FD_Pts_TE_DvP  1196 non-null   float64
 9   FD_Pts_WR_DvP  1196 non-null   float64
dtypes: float64(6), int64(2), object(2)
memory usage: 142.5+ KB


In [771]:
flex_df.head()

Unnamed: 0,season,week,recent_team,opponent_team,player_id,player_display_name,position,passing_yards,passing_tds,interceptions,...,receiving_yards_after_catch,receiving_air_yards,air_yards_share,target_share,game_id,total_carries,carry_share,in_10_share,FD_Pts,DK_Pts
0,2022,1,ARI,KC,00-0027942,A.J. Green,WR,0,0,0,...,0,42,0.157895,0.111111,2022_01_KC_ARI,21,0.0,0.143,2.3,3.3
1,2022,1,ARI,KC,00-0030061,Zach Ertz,TE,0,0,0,...,4,22,0.082707,0.111111,2022_01_KC_ARI,21,0.0,0.429,10.4,11.4
2,2022,1,ARI,KC,00-0033553,James Conner,RB,0,0,0,...,38,7,0.026316,0.166667,2022_01_KC_ARI,21,0.476,0.286,14.0,16.5
3,2022,1,ARI,KC,00-0035500,Greg Dortch,WR,0,0,0,...,31,62,0.233083,0.25,2022_01_KC_ARI,21,0.0,0.0,9.8,13.3
4,2022,1,ARI,KC,00-0035527,Andy Isabella,WR,0,0,0,...,4,30,0.112782,0.083333,2022_01_KC_ARI,21,0.0,0.0,1.5,2.0


In [772]:
flex_df = flex_df.merge(
    pivot_df, 
    on=['season', 'week', 'recent_team', 'opponent_team'],  # common columns to merge on
    how='outer'  # outer join to keep all rows from both dataframes
)

In [773]:
flex_df.tail(30)

Unnamed: 0,season,week,recent_team,opponent_team,player_id,player_display_name,position,passing_yards,passing_tds,interceptions,...,carry_share,in_10_share,FD_Pts,DK_Pts,DK_Pts_RB_DvP,DK_Pts_TE_DvP,DK_Pts_WR_DvP,FD_Pts_RB_DvP,FD_Pts_TE_DvP,FD_Pts_WR_DvP
10354,2024,2,WAS,NYG,00-0035659,Terry McLaurin,WR,0.0,0.0,0.0,...,0.0,0.0,5.2,8.2,23.3625,12.0375,35.225,20.7375,9.2875,27.975
10355,2024,2,WAS,NYG,00-0036626,Dyami Brown,WR,0.0,0.0,0.0,...,0.0,0.167,2.7,3.7,23.3625,12.0375,35.225,20.7375,9.2875,27.975
10356,2024,2,WAS,NYG,00-0036628,John Bates,TE,0.0,0.0,0.0,...,0.0,0.0,1.0,1.5,23.3625,12.0375,35.225,20.7375,9.2875,27.975
10357,2024,2,WAS,NYG,00-0037746,Brian Robinson,RB,0.0,0.0,0.0,...,0.515,0.5,14.1,17.6,23.3625,12.0375,35.225,20.7375,9.2875,27.975
10358,2024,3,DET,ARI,,,,,,,...,,,,,27.71,12.8375,28.4875,24.2725,10.525,23.4875
10359,2024,3,KC,ATL,,,,,,,...,,,,,22.0125,10.2375,27.025,19.1375,7.9875,21.5875
10360,2024,3,DAL,BAL,,,,,,,...,,,,,22.75,16.025,30.85,18.4375,11.9,24.35
10361,2024,3,LV,CAR,,,,,,,...,,,,,24.225,11.775,25.9,21.35,9.525,21.275
10362,2024,3,IND,CHI,,,,,,,...,,,,,22.4625,12.5375,30.175,18.7125,9.5375,23.8625
10363,2024,3,NYG,CLE,,,,,,,...,,,,,23.2,15.9125,29.6125,20.075,12.975,24.175


In [774]:
flex_df.reset_index(inplace = True)

In [775]:
flex_df.columns

Index(['index', 'season', 'week', 'recent_team', 'opponent_team', 'player_id',
       'player_display_name', 'position', 'passing_yards', 'passing_tds',
       'interceptions', 'passing_2pt_conversions', 'rushing_yards',
       'rushing_tds', 'rushing_fumbles_lost', 'rushing_2pt_conversions',
       'receptions', 'receiving_yards', 'receiving_tds',
       'receiving_fumbles_lost', 'receiving_2pt_conversions',
       'sack_fumbles_lost', 'special_teams_tds', 'targets', 'carries',
       'receiving_yards_after_catch', 'receiving_air_yards', 'air_yards_share',
       'target_share', 'game_id', 'total_carries', 'carry_share',
       'in_10_share', 'FD_Pts', 'DK_Pts', 'DK_Pts_RB_DvP', 'DK_Pts_TE_DvP',
       'DK_Pts_WR_DvP', 'FD_Pts_RB_DvP', 'FD_Pts_TE_DvP', 'FD_Pts_WR_DvP'],
      dtype='object')

In [776]:
flex_df['opponent_team'].nunique()

32

# Paring down some of the columns
We're drop columns that won't be needed for features.

In [777]:
cols_to_keep = ['season', 'week', 'recent_team', 'opponent_team', 'player_id',
       'player_display_name', 'position', 'rushing_yards', 'rushing_tds', 'rushing_fumbles_lost', 
       'receptions', 'receiving_yards', 'receiving_tds', 'receiving_fumbles_lost', 'targets', 'carries',
       'receiving_yards_after_catch', 'receiving_air_yards', 'air_yards_share',
       'target_share', 'carry_share','in_10_share', 'FD_Pts', 'DK_Pts', 'DK_Pts_RB_DvP', 'DK_Pts_TE_DvP',
       'DK_Pts_WR_DvP', 'FD_Pts_RB_DvP', 'FD_Pts_TE_DvP', 'FD_Pts_WR_DvP']

In [778]:
flex_df = flex_df[cols_to_keep]

In [779]:
#quarterback_df[quarterback_df['season'] >= 2006]['passing_air_yards']

In [780]:
flex_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10384 entries, 0 to 10383
Data columns (total 30 columns):
 #   Column                       Non-Null Count  Dtype  
---  ------                       --------------  -----  
 0   season                       10384 non-null  int64  
 1   week                         10384 non-null  int64  
 2   recent_team                  10384 non-null  object 
 3   opponent_team                10384 non-null  object 
 4   player_id                    10358 non-null  object 
 5   player_display_name          10358 non-null  object 
 6   position                     10358 non-null  object 
 7   rushing_yards                10358 non-null  float64
 8   rushing_tds                  10358 non-null  float64
 9   rushing_fumbles_lost         10358 non-null  float64
 10  receptions                   10358 non-null  float64
 11  receiving_yards              10358 non-null  float64
 12  receiving_tds                10358 non-null  float64
 13  receiving_fumble

# Filling missing values
We have found that dropping rows with missing values can affect calculations down the line. So we need to find ways to fill the missing values.<br>

The first row of every team grouping when we calculated DvP was NaN because there was no previous value. We'll fill those in with the mean.

In [781]:
flex_df['FD_Pts_RB_DvP'] = flex_df['FD_Pts_RB_DvP'].fillna(flex_df['FD_Pts_RB_DvP'].mean())
flex_df['DK_Pts_RB_DvP'] = flex_df['DK_Pts_RB_DvP'].fillna(flex_df['DK_Pts_RB_DvP'].mean())
flex_df['FD_Pts_TE_DvP'] = flex_df['FD_Pts_TE_DvP'].fillna(flex_df['FD_Pts_TE_DvP'].mean())
flex_df['DK_Pts_TE_DvP'] = flex_df['DK_Pts_TE_DvP'].fillna(flex_df['DK_Pts_TE_DvP'].mean())
flex_df['FD_Pts_WR_DvP'] = flex_df['FD_Pts_WR_DvP'].fillna(flex_df['FD_Pts_WR_DvP'].mean())
flex_df['DK_Pts_WR_DvP'] = flex_df['DK_Pts_WR_DvP'].fillna(flex_df['DK_Pts_WR_DvP'].mean())

In [782]:
flex_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10384 entries, 0 to 10383
Data columns (total 30 columns):
 #   Column                       Non-Null Count  Dtype  
---  ------                       --------------  -----  
 0   season                       10384 non-null  int64  
 1   week                         10384 non-null  int64  
 2   recent_team                  10384 non-null  object 
 3   opponent_team                10384 non-null  object 
 4   player_id                    10358 non-null  object 
 5   player_display_name          10358 non-null  object 
 6   position                     10358 non-null  object 
 7   rushing_yards                10358 non-null  float64
 8   rushing_tds                  10358 non-null  float64
 9   rushing_fumbles_lost         10358 non-null  float64
 10  receptions                   10358 non-null  float64
 11  receiving_yards              10358 non-null  float64
 12  receiving_tds                10358 non-null  float64
 13  receiving_fumble

In [783]:
flex_24 = flex_df[(flex_df['season'] == season) & (flex_df['week'] == week)]

In [784]:
flex_24.head()

Unnamed: 0,season,week,recent_team,opponent_team,player_id,player_display_name,position,rushing_yards,rushing_tds,rushing_fumbles_lost,...,carry_share,in_10_share,FD_Pts,DK_Pts,DK_Pts_RB_DvP,DK_Pts_TE_DvP,DK_Pts_WR_DvP,FD_Pts_RB_DvP,FD_Pts_TE_DvP,FD_Pts_WR_DvP
10358,2024,3,DET,ARI,,,,,,,...,,,,,27.71,12.8375,28.4875,24.2725,10.525,23.4875
10359,2024,3,KC,ATL,,,,,,,...,,,,,22.0125,10.2375,27.025,19.1375,7.9875,21.5875
10360,2024,3,DAL,BAL,,,,,,,...,,,,,22.75,16.025,30.85,18.4375,11.9,24.35
10361,2024,3,LV,CAR,,,,,,,...,,,,,24.225,11.775,25.9,21.35,9.525,21.275
10362,2024,3,IND,CHI,,,,,,,...,,,,,22.4625,12.5375,30.175,18.7125,9.5375,23.8625


In [785]:
fd_flex = fanduel_df[fanduel_df['position'].isin(['RB', 'WR', 'TE'])]
dk_flex = draftkings_df[draftkings_df['position'].isin(['RB', 'WR', 'TE'])]

In [786]:
fd_names = set(list(fd_flex['name'].unique()))
dk_names = set(list(dk_flex['name'].unique()))

In [787]:
# Initialize empty lists for names, positions, teams, and opponents
name_list = []
position_list = []
team_list = []
opponent_list = []

# Loop through the dataframe to extract names, positions, teams, and opponents
for index, row in fd_flex.iterrows():
    name_list.append(row['name'])          # Add player name to name_list
    position_list.append(row['position'])  # Add player position to position_list
    team_list.append(row['team'])          # Add player's team to team_list
    opponent_list.append(row['opponent'])  # Add opponent team to opponent_list

# dk_name_list = []
# dk_position_list = []
# dk_team_list = []
# dk_opponent_list = []

# for index, row in dk_flex.iterrows():
#     dk_name_list.append(row['name'])          # Add player name to name_list
#     dk_position_list.append(row['position'])  # Add player position to position_list
#     dk_team_list.append(row['team'])          # Add player's team to team_list
#     dk_opponent_list.append(row['opponent'])  # Add opponent team to opponent_list

# Display the resulting lists
# print("Name list:", name_list)
# print("Position list:", position_list)
# print("Team list:", team_list)
# print("Opponent list:", opponent_list)


In [788]:
len(name_list), len(position_list)

(747, 747)

In [789]:
to_concat = {'season': [season] * len(name_list), 'week': [week] * len(name_list), 'player_display_name': name_list, 'position': position_list,\
            'recent_team': team_list, 'opponent_team': opponent_list}

In [790]:
to_concat = pd.DataFrame(to_concat)

In [791]:
to_concat

Unnamed: 0,season,week,player_display_name,position,recent_team,opponent_team
0,2024,3,CeeDee Lamb,WR,DAL,BAL
1,2024,3,Justin Jefferson,WR,MIN,HOU
2,2024,3,Amon-Ra St. Brown,WR,DET,ARI
3,2024,3,Tyreek Hill,WR,MIA,SEA
4,2024,3,Saquon Barkley,RB,PHI,NO
...,...,...,...,...,...,...
742,2024,3,Dalton Keene,TE,HOU,MIN
743,2024,3,John Kelly Jr.,RB,BAL,DAL
744,2024,3,Jody Fortson Jr.,TE,MIA,SEA
745,2024,3,Tre'Quan Smith,WR,DET,ARI


In [792]:
to_concat['recent_team'].value_counts()

recent_team
MIA    34
KC     34
CAR    32
PHI    31
NO     31
MIN    30
CLE    30
GB     30
SEA    30
BAL    30
PIT    29
LAC    29
ARI    29
DAL    29
ATL    29
IND    28
HOU    28
SF     27
NYG    27
CHI    27
DEN    27
TB     26
TEN    26
LV     26
LAR    24
DET    24
Name: count, dtype: int64

In [793]:
flex_24.columns

Index(['season', 'week', 'recent_team', 'opponent_team', 'player_id',
       'player_display_name', 'position', 'rushing_yards', 'rushing_tds',
       'rushing_fumbles_lost', 'receptions', 'receiving_yards',
       'receiving_tds', 'receiving_fumbles_lost', 'targets', 'carries',
       'receiving_yards_after_catch', 'receiving_air_yards', 'air_yards_share',
       'target_share', 'carry_share', 'in_10_share', 'FD_Pts', 'DK_Pts',
       'DK_Pts_RB_DvP', 'DK_Pts_TE_DvP', 'DK_Pts_WR_DvP', 'FD_Pts_RB_DvP',
       'FD_Pts_TE_DvP', 'FD_Pts_WR_DvP'],
      dtype='object')

In [794]:
flex_24 = flex_24[['season', 'week', 'recent_team', 'opponent_team', 
       'DK_Pts_RB_DvP', 'DK_Pts_TE_DvP', 'DK_Pts_WR_DvP', 'FD_Pts_RB_DvP',
       'FD_Pts_TE_DvP', 'FD_Pts_WR_DvP']]

In [795]:
to_concat = pd.merge(to_concat, flex_24, on = ['season', 'week', 'recent_team', 'opponent_team'])

In [796]:
to_concat

Unnamed: 0,season,week,player_display_name,position,recent_team,opponent_team,DK_Pts_RB_DvP,DK_Pts_TE_DvP,DK_Pts_WR_DvP,FD_Pts_RB_DvP,FD_Pts_TE_DvP,FD_Pts_WR_DvP
0,2024,3,CeeDee Lamb,WR,DAL,BAL,22.750,16.0250,30.85,18.4375,11.900,24.3500
1,2024,3,Ezekiel Elliott,RB,DAL,BAL,22.750,16.0250,30.85,18.4375,11.900,24.3500
2,2024,3,Brandin Cooks,WR,DAL,BAL,22.750,16.0250,30.85,18.4375,11.900,24.3500
3,2024,3,Rico Dowdle,RB,DAL,BAL,22.750,16.0250,30.85,18.4375,11.900,24.3500
4,2024,3,Jake Ferguson,TE,DAL,BAL,22.750,16.0250,30.85,18.4375,11.900,24.3500
...,...,...,...,...,...,...,...,...,...,...,...,...
742,2024,3,Dillon Johnson,RB,CAR,LV,22.325,11.6875,33.00,19.2000,8.875,26.9375
743,2024,3,Feleipe Franks,TE,CAR,LV,22.325,11.6875,33.00,19.2000,8.875,26.9375
744,2024,3,Devin Carter,WR,CAR,LV,22.325,11.6875,33.00,19.2000,8.875,26.9375
745,2024,3,Curtis Hodges,TE,CAR,LV,22.325,11.6875,33.00,19.2000,8.875,26.9375


In [797]:
to_concat['opponent_team'].value_counts()

opponent_team
SEA    34
ATL    34
LV     32
NO     31
PHI    31
HOU    30
NYG    30
TEN    30
MIA    30
DAL    30
LAC    29
PIT    29
DET    29
BAL    29
KC     29
CHI    28
MIN    28
LAR    27
CLE    27
IND    27
TB     27
DEN    26
GB     26
CAR    26
SF     24
ARI    24
Name: count, dtype: int64

In [798]:
flex_df = flex_df[~((flex_df['season'] == season) & (flex_df['week'] == week))]

In [799]:
flex_df = pd.concat([flex_df, to_concat], axis = 0)

In [800]:
flex_df

Unnamed: 0,season,week,recent_team,opponent_team,player_id,player_display_name,position,rushing_yards,rushing_tds,rushing_fumbles_lost,...,carry_share,in_10_share,FD_Pts,DK_Pts,DK_Pts_RB_DvP,DK_Pts_TE_DvP,DK_Pts_WR_DvP,FD_Pts_RB_DvP,FD_Pts_TE_DvP,FD_Pts_WR_DvP
0,2022,1,ARI,KC,00-0027942,A.J. Green,WR,0.0,0.0,0.0,...,0.000,0.143,2.3,3.3,22.658752,11.91623,34.486949,19.662571,9.422346,27.380205
1,2022,1,ARI,KC,00-0030061,Zach Ertz,TE,0.0,0.0,0.0,...,0.000,0.429,10.4,11.4,22.658752,11.91623,34.486949,19.662571,9.422346,27.380205
2,2022,1,ARI,KC,00-0033553,James Conner,RB,26.0,1.0,0.0,...,0.476,0.286,14.0,16.5,22.658752,11.91623,34.486949,19.662571,9.422346,27.380205
3,2022,1,ARI,KC,00-0035500,Greg Dortch,WR,0.0,0.0,0.0,...,0.000,0.000,9.8,13.3,22.658752,11.91623,34.486949,19.662571,9.422346,27.380205
4,2022,1,ARI,KC,00-0035527,Andy Isabella,WR,0.0,0.0,0.0,...,0.000,0.000,1.5,2.0,22.658752,11.91623,34.486949,19.662571,9.422346,27.380205
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
742,2024,3,CAR,LV,,Dillon Johnson,RB,,,,...,,,,,22.325000,11.68750,33.000000,19.200000,8.875000,26.937500
743,2024,3,CAR,LV,,Feleipe Franks,TE,,,,...,,,,,22.325000,11.68750,33.000000,19.200000,8.875000,26.937500
744,2024,3,CAR,LV,,Devin Carter,WR,,,,...,,,,,22.325000,11.68750,33.000000,19.200000,8.875000,26.937500
745,2024,3,CAR,LV,,Curtis Hodges,TE,,,,...,,,,,22.325000,11.68750,33.000000,19.200000,8.875000,26.937500


In [801]:
cols_L8 = ['rushing_yards', 'rushing_tds', 'rushing_fumbles_lost', 'receptions', 'receiving_yards', 'receiving_tds', 'receiving_fumbles_lost', 'targets',\
           'carries', 'receiving_yards_after_catch', 'receiving_air_yards', 'air_yards_share', 'target_share', 'carry_share', 'in_10_share']

In [802]:
flex_L8_features = flex_df.groupby(['player_display_name', 'season', 'week'])[cols_L8].sum()

# L8 variables
L8 variables are rolling means of features over the last eight games. Just like we did for the DvP variables, we'll calculate features over the previous eight games for individual QBs.

In [803]:
flex_L8_features = flex_L8_features.sort_values(by = ['player_display_name', 'season', 'week'])
flex_L8_features['game_num'] = flex_L8_features.groupby(['player_display_name', 'season']).cumcount() + 1
#quarterback_df.reset_index(drop = True, inplace = True)

In [804]:
flex_L8_features = flex_L8_features.groupby(['player_display_name'], as_index = False).apply(calculate_equal_rolling_mean, cols=cols_L8, suffix = '_L8')

In [805]:
flex_L8_features.reset_index(inplace = True)

In [806]:
flex_L8_features.columns

Index(['level_0', 'player_display_name', 'season', 'week', 'rushing_yards',
       'rushing_tds', 'rushing_fumbles_lost', 'receptions', 'receiving_yards',
       'receiving_tds', 'receiving_fumbles_lost', 'targets', 'carries',
       'receiving_yards_after_catch', 'receiving_air_yards', 'air_yards_share',
       'target_share', 'carry_share', 'in_10_share', 'game_num',
       'rushing_yards_L8', 'rushing_tds_L8', 'rushing_fumbles_lost_L8',
       'receptions_L8', 'receiving_yards_L8', 'receiving_tds_L8',
       'receiving_fumbles_lost_L8', 'targets_L8', 'carries_L8',
       'receiving_yards_after_catch_L8', 'receiving_air_yards_L8',
       'air_yards_share_L8', 'target_share_L8', 'carry_share_L8',
       'in_10_share_L8'],
      dtype='object')

In [807]:
flex_L8_features.drop(columns = ['level_0', 'rushing_yards', 'rushing_tds', 'rushing_fumbles_lost', 'receptions', 'receiving_yards',
       'receiving_tds', 'receiving_fumbles_lost', 'targets', 'carries', 'receiving_yards_after_catch', 'receiving_air_yards', 'air_yards_share',
       'target_share', 'carry_share', 'in_10_share'], inplace = True)

In [808]:
flex_L8_features

Unnamed: 0,player_display_name,season,week,game_num,rushing_yards_L8,rushing_tds_L8,rushing_fumbles_lost_L8,receptions_L8,receiving_yards_L8,receiving_tds_L8,receiving_fumbles_lost_L8,targets_L8,carries_L8,receiving_yards_after_catch_L8,receiving_air_yards_L8,air_yards_share_L8,target_share_L8,carry_share_L8,in_10_share_L8
0,A.J. Brown,2022,1,1,,,,,,,,,,,,,,,
1,A.J. Brown,2022,2,2,0.000000,0.000000,0.0,10.000000,155.000000,0.000000,0.0,13.000000,0.000000,63.000000,94.000000,0.728682,0.448276,0.000000,0.167000
2,A.J. Brown,2022,3,3,0.000000,0.000000,0.0,7.500000,112.000000,0.000000,0.0,10.500000,0.000000,48.500000,102.500000,0.557721,0.353170,0.000000,0.083500
3,A.J. Brown,2022,4,4,0.000000,0.000000,0.0,6.666667,103.000000,0.333333,0.0,10.333333,0.000000,42.333333,110.333333,0.481474,0.333486,0.000000,0.198667
4,A.J. Brown,2022,5,5,0.000000,0.000000,0.0,6.250000,101.000000,0.250000,0.0,9.500000,0.000000,40.250000,105.250000,0.499995,0.323031,0.000000,0.149000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11100,Zonovan Knight,2022,17,6,50.200000,0.200000,0.0,2.200000,16.600000,0.000000,0.0,2.200000,13.000000,22.400000,-5.800000,-0.014043,0.055368,0.551200,0.116600
11101,Zonovan Knight,2022,18,7,46.333333,0.166667,0.0,2.166667,16.666667,0.000000,0.0,2.333333,12.166667,22.000000,-5.000000,-0.012165,0.057251,0.537833,0.097167
11102,Zonovan Knight,2023,3,1,42.857143,0.142857,0.0,1.857143,14.285714,0.000000,0.0,2.000000,12.142857,18.857143,-4.285714,-0.010427,0.049072,0.546714,0.083286
11103,Zonovan Knight,2023,5,2,39.125000,0.125000,0.0,1.625000,12.500000,0.000000,0.0,1.750000,11.000000,16.500000,-3.750000,-0.009124,0.042938,0.491750,0.072875


In [809]:
flex_df = pd.merge(flex_df, flex_L8_features, on = ['player_display_name', 'season', 'week'], how = 'left')

In [810]:
flex_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11105 entries, 0 to 11104
Data columns (total 46 columns):
 #   Column                          Non-Null Count  Dtype  
---  ------                          --------------  -----  
 0   season                          11105 non-null  int64  
 1   week                            11105 non-null  int64  
 2   recent_team                     11105 non-null  object 
 3   opponent_team                   11105 non-null  object 
 4   player_id                       10358 non-null  object 
 5   player_display_name             11105 non-null  object 
 6   position                        11105 non-null  object 
 7   rushing_yards                   10358 non-null  float64
 8   rushing_tds                     10358 non-null  float64
 9   rushing_fumbles_lost            10358 non-null  float64
 10  receptions                      10358 non-null  float64
 11  receiving_yards                 10358 non-null  float64
 12  receiving_tds                   

# Filling missing values with mean
In our QB model we made the mistake of dropping missing values at this point. Rookies making their debut won't have L8 variables because they haven't played before. So let's fill those values with the means.

In [811]:
cols_to_fill = ['rushing_yards_L8', 'rushing_tds_L8', 'rushing_fumbles_lost_L8', 'receptions_L8',
       'receiving_yards_L8', 'receiving_tds_L8', 'receiving_fumbles_lost_L8',
       'targets_L8', 'carries_L8', 'receiving_yards_after_catch_L8',
       'receiving_air_yards_L8', 'air_yards_share_L8', 'target_share_L8',
       'carry_share_L8', 'in_10_share_L8']

In [812]:
for col in cols_to_fill:
    flex_df[col] = flex_df.groupby('position')[col].transform(lambda x: x.fillna(x.mean()))

In [813]:
flex_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11105 entries, 0 to 11104
Data columns (total 46 columns):
 #   Column                          Non-Null Count  Dtype  
---  ------                          --------------  -----  
 0   season                          11105 non-null  int64  
 1   week                            11105 non-null  int64  
 2   recent_team                     11105 non-null  object 
 3   opponent_team                   11105 non-null  object 
 4   player_id                       10358 non-null  object 
 5   player_display_name             11105 non-null  object 
 6   position                        11105 non-null  object 
 7   rushing_yards                   10358 non-null  float64
 8   rushing_tds                     10358 non-null  float64
 9   rushing_fumbles_lost            10358 non-null  float64
 10  receptions                      10358 non-null  float64
 11  receiving_yards                 10358 non-null  float64
 12  receiving_tds                   

# A few more features
yards_per_carry, yards_per_reception, yards_per_target

In [814]:
flex_df['yards_per_carry_L8'] = flex_df['rushing_yards_L8']/flex_df['carries_L8']
flex_df['yards_per_reception_L8'] = flex_df['receiving_yards_L8']/flex_df['receptions_L8']
flex_df['yards_per_target_L8'] = flex_df['receiving_yards_L8']/flex_df['targets_L8']

# More missing
Now this gives us some more missing values. In most cases, it's because we've tried to divide by zero when a player has averaged 0 carries over the last eight games. So here we should fill the missing values with 0

In [815]:
flex_df.fillna(0, inplace = True)

In [816]:
flex_df_full = flex_df.copy()

In [817]:
flex_df.drop(columns = cols_L8, inplace = True)

In [818]:
flex_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11105 entries, 0 to 11104
Data columns (total 34 columns):
 #   Column                          Non-Null Count  Dtype  
---  ------                          --------------  -----  
 0   season                          11105 non-null  int64  
 1   week                            11105 non-null  int64  
 2   recent_team                     11105 non-null  object 
 3   opponent_team                   11105 non-null  object 
 4   player_id                       11105 non-null  object 
 5   player_display_name             11105 non-null  object 
 6   position                        11105 non-null  object 
 7   FD_Pts                          11105 non-null  float64
 8   DK_Pts                          11105 non-null  float64
 9   DK_Pts_RB_DvP                   11105 non-null  float64
 10  DK_Pts_TE_DvP                   11105 non-null  float64
 11  DK_Pts_WR_DvP                   11105 non-null  float64
 12  FD_Pts_RB_DvP                   

In [819]:
flex_df = flex_df[(flex_df['season'] == season) & (flex_df['week'] == week)]

In [820]:
flex_df['opponent_team'].nunique()

26

In [821]:
flex_df.replace([np.inf, -np.inf], np.nan, inplace=True)

# Odds, grass, outdoors, wind
We'll bring in CSVs with FanDuel and DraftKings odds.<br>
This will be where we need to separate FanDuel from DraftKings dataframes since the odds and totals might be different.

In [822]:
fd_spreads = pd.read_csv('fd_spreads_' + str(season) + '_' + str(week) + '.csv')
dk_spreads = pd.read_csv('dk_spreads_' + str(season) + '_' + str(week) + '.csv')

In [823]:
fd_spreads.head()

Unnamed: 0.1,Unnamed: 0,opponent,team,total_line,outdoors,grass,home_team,spread_line,pred_total,opp_total,wind,season,week
0,0,IND,CHI,43.5,0,0,0,-1.5,21.0,22.5,0,2024,3
1,1,CLE,NYG,38.5,1,1,0,-6.5,16.0,22.5,5,2024,3
2,2,TB,DEN,40.5,1,1,0,-6.5,17.0,23.5,5,2024,3
3,3,TEN,GB,38.5,1,0,0,-2.5,18.0,20.5,4,2024,3
4,4,MIN,HOU,45.5,0,1,0,1.5,23.5,22.0,0,2024,3


In [824]:
dk_spreads.head()

Unnamed: 0.1,Unnamed: 0,opponent,team,total_line,outdoors,grass,home_team,spread_line,pred_total,opp_total,wind,season,week
0,0,IND,CHI,43.5,0,0,0,-1.0,21.25,22.25,0,2024,3
1,1,CLE,NYG,38.0,1,1,0,-6.5,15.75,22.25,5,2024,3
2,2,TB,DEN,41.0,1,1,0,-6.0,17.5,23.5,5,2024,3
3,3,TEN,GB,38.0,1,0,0,-3.0,17.5,20.5,4,2024,3
4,4,MIN,HOU,46.0,0,1,0,1.5,23.75,22.25,0,2024,3


In [825]:
fd_spreads.drop(columns = ['Unnamed: 0'], inplace = True)
dk_spreads.drop(columns = ['Unnamed: 0'], inplace = True)

In [826]:
flex_df.rename(columns = {'recent_team': 'team', 'opponent_team': 'opponent'}, inplace = True)

In [827]:
flex_df_fd = pd.merge(flex_df, fd_spreads, on = ['team', 'opponent', 'season', 'week'])
flex_df_dk = pd.merge(flex_df, dk_spreads, on = ['team', 'opponent', 'season', 'week'])

In [828]:
len(flex_df_fd), len(flex_df_dk)

(747, 747)

In [829]:
# weekly_df[weekly_df['player_display_name'] == 'Jonathan Taylor'].tail()

<!-- # Bringing in point spreads and totals
Pulling this dataframe from our database. It also includes binary variables for indoors and grass. -->

In [830]:
# # Connect to the SQLite database
# conn = sqlite3.connect('nfl_dfs.db')

# # Query specific columns from the table
# query1 = "SELECT * FROM spreads_totals"
# spread_df = pd.read_sql_query(query1, conn)

# # Close the connection
# conn.close()

In [831]:
flex_df.columns

Index(['season', 'week', 'team', 'opponent', 'player_id',
       'player_display_name', 'position', 'FD_Pts', 'DK_Pts', 'DK_Pts_RB_DvP',
       'DK_Pts_TE_DvP', 'DK_Pts_WR_DvP', 'FD_Pts_RB_DvP', 'FD_Pts_TE_DvP',
       'FD_Pts_WR_DvP', 'game_num', 'rushing_yards_L8', 'rushing_tds_L8',
       'rushing_fumbles_lost_L8', 'receptions_L8', 'receiving_yards_L8',
       'receiving_tds_L8', 'receiving_fumbles_lost_L8', 'targets_L8',
       'carries_L8', 'receiving_yards_after_catch_L8',
       'receiving_air_yards_L8', 'air_yards_share_L8', 'target_share_L8',
       'carry_share_L8', 'in_10_share_L8', 'yards_per_carry_L8',
       'yards_per_reception_L8', 'yards_per_target_L8'],
      dtype='object')

In [832]:
#quarterback_df.drop(columns = ['defteam', 'passing_air_yards'], inplace = True)

In [833]:
flex_df.head()

Unnamed: 0,season,week,team,opponent,player_id,player_display_name,position,FD_Pts,DK_Pts,DK_Pts_RB_DvP,...,carries_L8,receiving_yards_after_catch_L8,receiving_air_yards_L8,air_yards_share_L8,target_share_L8,carry_share_L8,in_10_share_L8,yards_per_carry_L8,yards_per_reception_L8,yards_per_target_L8
10358,2024,3,DAL,BAL,0,CeeDee Lamb,WR,0.0,0.0,22.75,...,1.125,49.75,107.25,0.36436,0.306536,0.047375,0.238875,6.555556,13.142857,8.808511
10359,2024,3,DAL,BAL,0,Ezekiel Elliott,RB,0.0,0.0,22.75,...,13.125,34.375,-3.5,-0.018676,0.190852,0.575125,0.375,3.057143,6.4,5.090909
10360,2024,3,DAL,BAL,0,Brandin Cooks,WR,0.0,0.0,22.75,...,0.625,3.5,77.25,0.265803,0.150867,0.025375,0.119125,4.0,9.172414,5.782609
10361,2024,3,DAL,BAL,0,Rico Dowdle,RB,0.0,0.0,22.75,...,6.0,13.75,7.25,0.024891,0.064103,0.22675,0.0335,3.791667,8.857143,6.2
10362,2024,3,DAL,BAL,0,Jake Ferguson,TE,0.0,0.0,22.75,...,0.0,33.5,35.5,0.119437,0.198185,0.0,0.063,0.0,10.181818,7.344262


In [834]:
divisions = {
    'CLE': 'AFC North',
    'LAR': 'NFC West',
    'LV': 'AFC West',
    'KC': 'AFC West',
    'CAR': 'NFC South',
    'NYG': 'NFC East',
    'HOU': 'AFC South',
    'DEN': 'AFC West',
    'MIN': 'NFC North',
    'TEN': 'AFC South',
    'JAX': 'AFC South',
    'SEA': 'NFC West',
    'DET': 'NFC North',
    'NO': 'NFC South',
    'CIN': 'AFC North',
    'ATL': 'NFC South',
    'NYJ': 'AFC East',
    'PHI': 'NFC East',
    'DAL': 'NFC East',
    'WAS': 'NFC East',
    'PIT': 'AFC North',
    'ARI': 'NFC West',
    'CHI': 'NFC North',
    'MIA': 'AFC East',
    'BUF': 'AFC East',
    'BAL': 'AFC North',
    'TB': 'NFC South',
    'SF': 'NFC West',
    'LAC': 'AFC West',
    'IND': 'AFC South',
    'GB': 'NFC North',
    'NE': 'AFC East'
}

# You now have a dictionary `divisions` where each team is mapped to its division.flex_df.info()

# Adding div_game binary column

In [835]:
# Map the team and opponent columns to their respective divisions
flex_df_fd['team_division'] = flex_df_fd['team'].map(divisions)
flex_df_fd['opponent_division'] = flex_df_fd['opponent'].map(divisions)

# Create the div_game column (1 if they are in the same division, 0 otherwise)
flex_df_fd['div_game'] = np.where(flex_df_fd['team_division'] == flex_df_fd['opponent_division'], 1, 0)

# Optionally, drop the temporary division columns if you don't need them
flex_df_fd.drop(['team_division', 'opponent_division'], axis=1, inplace=True)

In [836]:
# Map the team and opponent columns to their respective divisions
flex_df_dk['team_division'] = flex_df_dk['team'].map(divisions)
flex_df_dk['opponent_division'] = flex_df_dk['opponent'].map(divisions)

# Create the div_game column (1 if they are in the same division, 0 otherwise)
flex_df_dk['div_game'] = np.where(flex_df_dk['team_division'] == flex_df_dk['opponent_division'], 1, 0)

# Optionally, drop the temporary division columns if you don't need them
flex_df_dk.drop(['team_division', 'opponent_division'], axis=1, inplace=True)

In [837]:
# Check for positive or negative infinity in the entire DataFrame
# infinity_mask_fd = np.isinf(flex_df_fd)
# infinity_mask_dk = np.isinf(flex_df_dk)

# # Display rows with infinity values
# infinity_rows_fd = flex_df_fd[infinity_mask_fd.any(axis=1)]
# infinity_rows_dk = flex_df_dk[infinity_mask_dk.any(axis=1)]
# print(infinity_rows_fd)
# print(infinity_rows_dk)

# One-hot encoding for position

In [838]:
position_dummies_fd = pd.get_dummies(flex_df_fd['position'], prefix='pos')
position_dummies_dk = pd.get_dummies(flex_df_dk['position'], prefix='pos')

In [839]:
flex_df_fd = pd.concat([flex_df_fd, position_dummies_fd], axis = 1)

In [840]:
flex_df_dk = pd.concat([flex_df_dk, position_dummies_dk], axis = 1)

In [841]:
flex_df_fd['pos_RB'] = flex_df_fd['pos_RB'].astype(int)
flex_df_fd['pos_TE'] = flex_df_fd['pos_TE'].astype(int)
flex_df_fd['pos_WR'] = flex_df_fd['pos_WR'].astype(int)
flex_df_dk['pos_RB'] = flex_df_dk['pos_RB'].astype(int)
flex_df_dk['pos_TE'] = flex_df_dk['pos_TE'].astype(int)
flex_df_dk['pos_WR'] = flex_df_dk['pos_WR'].astype(int)

In [842]:
flex_df_fd.drop(columns = ['FD_Pts', 'DK_Pts'], inplace = True)
flex_df_dk.drop(columns = ['FD_Pts', 'DK_Pts'], inplace = True)

In [843]:
fanduel_df = fanduel_df[['name', 'position', 'salary', 'team', 'opponent', 'status', 'week']]
draftkings_df = draftkings_df[['name', 'position', 'salary', 'team', 'opponent', 'status', 'week']]

In [844]:
fanduel_df = fanduel_df[fanduel_df['position'].isin(['RB', 'WR', 'TE'])]
draftkings_df = draftkings_df[draftkings_df['position'].isin(['RB', 'WR', 'TE'])]

In [845]:
len(fanduel_df), len(draftkings_df)

(747, 486)

In [846]:
#################### Sept. 16, 2024 #####################
#Just filtered for Flex positions.
#Next will be to rename player_display_name to name and try to merge again with the flex data.
#Don't forget QB strength variables!!!!!

In [847]:
len(flex_df_fd), len(flex_df_dk)

(747, 747)

In [848]:
flex_df_fd.rename(columns = {'player_display_name': 'name'}, inplace = True)
flex_df_dk.rename(columns = {'player_display_name': 'name'}, inplace = True)

In [849]:
flex_df_fd = pd.merge(fanduel_df, flex_df_fd, on = ['name', 'team', 'opponent', 'week'], how = 'left')
flex_df_dk = pd.merge(draftkings_df, flex_df_dk, on = ['name', 'team', 'opponent', 'week'], how = 'left')

In [850]:
flex_df_fd['position'] = flex_df_fd['position_x']
flex_df_fd.drop(columns = ['position_x', 'position_y'], inplace = True)
flex_df_dk['position'] = flex_df_dk['position_x']
flex_df_dk.drop(columns = ['position_x', 'position_y'], inplace = True)

In [851]:
flex_df_fd.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 747 entries, 0 to 746
Data columns (total 46 columns):
 #   Column                          Non-Null Count  Dtype  
---  ------                          --------------  -----  
 0   name                            747 non-null    object 
 1   salary                          747 non-null    int64  
 2   team                            747 non-null    object 
 3   opponent                        747 non-null    object 
 4   status                          747 non-null    object 
 5   week                            747 non-null    int64  
 6   season                          747 non-null    int64  
 7   player_id                       747 non-null    int64  
 8   DK_Pts_RB_DvP                   747 non-null    float64
 9   DK_Pts_TE_DvP                   747 non-null    float64
 10  DK_Pts_WR_DvP                   747 non-null    float64
 11  FD_Pts_RB_DvP                   747 non-null    float64
 12  FD_Pts_TE_DvP                   747 

In [852]:
flex_df_dk.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 486 entries, 0 to 485
Data columns (total 46 columns):
 #   Column                          Non-Null Count  Dtype  
---  ------                          --------------  -----  
 0   name                            486 non-null    object 
 1   salary                          486 non-null    int64  
 2   team                            486 non-null    object 
 3   opponent                        486 non-null    object 
 4   status                          486 non-null    object 
 5   week                            486 non-null    int64  
 6   season                          482 non-null    float64
 7   player_id                       482 non-null    float64
 8   DK_Pts_RB_DvP                   482 non-null    float64
 9   DK_Pts_TE_DvP                   482 non-null    float64
 10  DK_Pts_WR_DvP                   482 non-null    float64
 11  FD_Pts_RB_DvP                   482 non-null    float64
 12  FD_Pts_TE_DvP                   482 

In [853]:
###FANDUEL SCORING
#Rushing yards made = 0.1pts	
#Rushing touchdowns = 6pts	
#Passing yards = 0.04pts	
#Passing touchdowns = 4pts	
#Interceptions = -1pt	
#Receiving yards = 0.1pts	
#Receiving touchdowns = 6pts	
#Receptions = 0.5pts	
#Kickoff return touchdowns = 6pts	
#Punt return touchdowns = 6pts	
#Fumbles lost = -2pts	
#Own fumbles recovered touchdowns = 6pts	
#Two-point conversions scored = 2pts	
#Two-point conversion passes = 2pts	
#Field-goals from 0-39 yards = 3pts	
#Field-goals from 40-49 yards = 4pts	
#Field-goals from 50+ yards = 5pts	
#Extra-point conversions = 1pt

###DRAFTKINGS SCORING
#PAssing TD = 4 pts
#passing yards = .04 pts
#300 passing yards = 3 pts (bonus)
#Interception = -1 pts
#Rushing TD = 6 pts
#Rushing yds = 0.1 pts
#100 yd rushing game = 3 pts (bonus)
#Receiving TD = 6 pts
#Receiving yds = 0.1 pts
#100 receiving yards game = 3 pts (bonus)
#Receptions = 1 pt
#Punt/kickoff/FG return for TD = 6 pts
#Fumble lost = -1 pt
#2 pt conversion (pass, run or catch) = 2 pts
#Offensive fumble recovery TD = 6

In [854]:
features = ['DK_Pts_RB_DvP', 'DK_Pts_TE_DvP', 'DK_Pts_WR_DvP', 'FD_Pts_RB_DvP',
       'FD_Pts_TE_DvP', 'FD_Pts_WR_DvP', 'rushing_yards_L8', 'rushing_tds_L8',
       'rushing_fumbles_lost_L8', 'receptions_L8', 'receiving_yards_L8',
       'receiving_tds_L8', 'receiving_fumbles_lost_L8', 'targets_L8',
       'carries_L8', 'receiving_yards_after_catch_L8',
       'receiving_air_yards_L8', 'target_share_L8', 'carry_share_L8',
       'in_10_share_L8', 'yards_per_carry_L8', 'yards_per_reception_L8',
       'yards_per_target_L8', 'wind', 'div_game', 'spread_line', 'total_line',
       'outdoors', 'grass', 'home_team', 'pred_total', 'opp_total', 'qb_comp',
       'qb_att', 'qb_yds', 'qb_pass_td', 'qb_int', 'qb_comp_pct',
       'qb_yds_per_att', 'qb_td_pct', 'qb_int_pct', 'pos_RB', 'pos_TE',
       'pos_WR']

# QB strength
One last variable we want to try to add is a variable or variables that look at quarterback strength. If a team's backup QB is playing, the entire offense is downgraded and it could affect the fantasy performance of RBs, WRs and TEs.<br>

We'll take the dataset we used for the QB model, filter for the primary QBs in each game and use some of the variables that look at L8 means.

In [855]:
# # Connect to the SQLite database
# conn = sqlite3.connect('nfl_dfs.db')

# query_qb = "SELECT * FROM qb_dataset WHERE season >= 2006"

# qb_df = pd.read_sql_query(query_qb, conn)

# # Close the database connection
# conn.close()

In [856]:
qb_data_fd = pd.read_csv('FD_QB_for_model_' + str(season) + '_' + str(week) + '.csv')
qb_data_dk = pd.read_csv('DK_QB_for_model_' + str(season) + '_' + str(week) + '.csv')

In [857]:
qb_data_fd = qb_data_fd[qb_data_fd['QB_role'] == 1]
qb_data_dk = qb_data_dk[qb_data_dk['QB_role'] == 1]

In [858]:
qb_data_fd.rename(columns = {'completions_L8': 'qb_comp', 'attempts_L8': 'qb_att', 'passing_yards_L8': 'qb_yds',\
                       'passing_tds_L8': 'qb_pass_td', 'interceptions_L8': 'qb_int', 'comp_pct': 'qb_comp_pct',\
                       'yds_per_attempt': 'qb_yds_per_att', 'td_pct': 'qb_td_pct', 'int_pct': 'qb_int_pct'}, inplace = True)

In [859]:
qb_data_dk.rename(columns = {'completions_L8': 'qb_comp', 'attempts_L8': 'qb_att', 'passing_yards_L8': 'qb_yds',\
                       'passing_tds_L8': 'qb_pass_td', 'interceptions_L8': 'qb_int', 'comp_pct': 'qb_comp_pct',\
                       'yds_per_attempt': 'qb_yds_per_att', 'td_pct': 'qb_td_pct', 'int_pct': 'qb_int_pct'}, inplace = True)

In [860]:
qb_data_fd = qb_data_fd[['season', 'week', 'team', 'opponent', 'qb_comp', 'qb_att', 'qb_yds', 'qb_pass_td',\
               'qb_int', 'qb_comp_pct', 'qb_yds_per_att', 'qb_td_pct', 'qb_int_pct']]

In [861]:
qb_data_dk = qb_data_dk[['season', 'week', 'team', 'opponent', 'qb_comp', 'qb_att', 'qb_yds', 'qb_pass_td',\
               'qb_int', 'qb_comp_pct', 'qb_yds_per_att', 'qb_td_pct', 'qb_int_pct']]

In [862]:
flex_df_fd = pd.merge(flex_df_fd, qb_data_fd, on = ['season', 'week', 'team', 'opponent'], how = 'left')

In [863]:
flex_df_dk = pd.merge(flex_df_dk, qb_data_fd, on = ['season', 'week', 'team', 'opponent'], how = 'left')

In [864]:
flex_df_fd = flex_df_fd.set_index(['name', 'position', 'team', 'opponent', 'salary', 'status', 'week', 'season'], drop = True)
flex_df_dk = flex_df_dk.set_index(['name', 'position', 'team', 'opponent', 'salary', 'status', 'week', 'season'], drop = True)

In [865]:
flex_df_fd = flex_df_fd[features]
flex_df_dk = flex_df_dk[features]

In [866]:
flex_df_fd.replace([np.inf, -np.inf], np.nan, inplace=True)
flex_df_dk.replace([np.inf, -np.inf], np.nan, inplace=True)

In [867]:
# Fill missing values with the mean of each column
flex_df_fd = flex_df_fd.groupby('position').transform(lambda x: x.fillna(x.mean()))
flex_df_dk = flex_df_dk.groupby('position').transform(lambda x: x.fillna(x.mean()))

In [868]:
flex_df_fd.info()

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 747 entries, ('CeeDee Lamb', 'WR', 'DAL', 'BAL', 9300, 'Active', 3, 2024) to ('Nick Muse', 'TE', 'MIN', 'HOU', 4000, 'IR', 3, 2024)
Data columns (total 44 columns):
 #   Column                          Non-Null Count  Dtype  
---  ------                          --------------  -----  
 0   DK_Pts_RB_DvP                   747 non-null    float64
 1   DK_Pts_TE_DvP                   747 non-null    float64
 2   DK_Pts_WR_DvP                   747 non-null    float64
 3   FD_Pts_RB_DvP                   747 non-null    float64
 4   FD_Pts_TE_DvP                   747 non-null    float64
 5   FD_Pts_WR_DvP                   747 non-null    float64
 6   rushing_yards_L8                747 non-null    float64
 7   rushing_tds_L8                  747 non-null    float64
 8   rushing_fumbles_lost_L8         747 non-null    float64
 9   receptions_L8                   747 non-null    float64
 10  receiving_yards_L8              747 non-null    

In [869]:
flex_df_dk.info()

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 486 entries, ('CeeDee Lamb', 'WR', 'DAL', 'BAL', 8800, 'Active', 3, 2024.0) to ('Miller Forristall', 'TE', 'LAR', 'SF', 2500, 'Active', 3, 2024.0)
Data columns (total 44 columns):
 #   Column                          Non-Null Count  Dtype  
---  ------                          --------------  -----  
 0   DK_Pts_RB_DvP                   486 non-null    float64
 1   DK_Pts_TE_DvP                   486 non-null    float64
 2   DK_Pts_WR_DvP                   486 non-null    float64
 3   FD_Pts_RB_DvP                   486 non-null    float64
 4   FD_Pts_TE_DvP                   486 non-null    float64
 5   FD_Pts_WR_DvP                   486 non-null    float64
 6   rushing_yards_L8                486 non-null    float64
 7   rushing_tds_L8                  486 non-null    float64
 8   rushing_fumbles_lost_L8         486 non-null    float64
 9   receptions_L8                   486 non-null    float64
 10  receiving_yards_L8              4

In [870]:
flex_df_fd.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,Unnamed: 7_level_0,DK_Pts_RB_DvP,DK_Pts_TE_DvP,DK_Pts_WR_DvP,FD_Pts_RB_DvP,FD_Pts_TE_DvP,FD_Pts_WR_DvP,rushing_yards_L8,rushing_tds_L8,rushing_fumbles_lost_L8,receptions_L8,...,qb_yds,qb_pass_td,qb_int,qb_comp_pct,qb_yds_per_att,qb_td_pct,qb_int_pct,pos_RB,pos_TE,pos_WR
name,position,team,opponent,salary,status,week,season,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1
CeeDee Lamb,WR,DAL,BAL,9300,Active,3,2024,22.75,16.025,30.85,18.4375,11.9,24.35,7.375,0.125,0.0,7.875,...,269.625,1.875,0.875,0.674194,6.958065,0.048387,0.022581,0,0,1
Justin Jefferson,WR,MIN,HOU,9200,Active,3,2024,18.6875,11.7375,30.95,16.3125,9.3,24.7625,-1.5,0.0,0.0,5.375,...,96.5,0.75,0.375,0.663158,8.126316,0.063158,0.031579,0,0,1
Amon-Ra St. Brown,WR,DET,ARI,9100,Active,3,2024,27.71,12.8375,28.4875,24.2725,10.525,23.4875,1.625,0.0,0.0,7.625,...,276.125,1.125,0.625,0.67,7.363333,0.03,0.016667,0,0,1
Tyreek Hill,WR,MIA,SEA,9000,Active,3,2024,27.335,14.6,31.3625,24.0225,11.725,23.9875,1.0,0.0,0.0,5.75,...,83.5,0.25,0.5,0.51145,5.099237,0.015267,0.030534,0,0,1
Saquon Barkley,RB,PHI,NO,8700,Active,3,2024,19.2875,11.3,33.25,16.0375,8.6125,27.125,64.375,0.875,0.125,2.5,...,196.75,1.0,1.0,0.649573,6.726496,0.034188,0.034188,1,0,0


In [871]:
flex_df_dk.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,Unnamed: 7_level_0,DK_Pts_RB_DvP,DK_Pts_TE_DvP,DK_Pts_WR_DvP,FD_Pts_RB_DvP,FD_Pts_TE_DvP,FD_Pts_WR_DvP,rushing_yards_L8,rushing_tds_L8,rushing_fumbles_lost_L8,receptions_L8,...,qb_yds,qb_pass_td,qb_int,qb_comp_pct,qb_yds_per_att,qb_td_pct,qb_int_pct,pos_RB,pos_TE,pos_WR
name,position,team,opponent,salary,status,week,season,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1
CeeDee Lamb,WR,DAL,BAL,8800,Active,3,2024.0,22.75,16.025,30.85,18.4375,11.9,24.35,7.375,0.125,0.0,7.875,...,269.625,1.875,0.875,0.674194,6.958065,0.048387,0.022581,0.0,0.0,1.0
Justin Jefferson,WR,MIN,HOU,8600,Active,3,2024.0,18.6875,11.7375,30.95,16.3125,9.3,24.7625,-1.5,0.0,0.0,5.375,...,96.5,0.75,0.375,0.663158,8.126316,0.063158,0.031579,0.0,0.0,1.0
Christian McCaffrey,RB,SF,LAR,8500,IR,3,2024.0,19.825,12.5875,45.0125,17.45,10.3375,36.3875,98.5,0.875,0.125,4.75,...,256.375,1.25,0.75,0.655039,7.949612,0.03876,0.023256,1.0,0.0,0.0
Tyreek Hill,WR,MIA,SEA,8400,Active,3,2024.0,27.335,14.6,31.3625,24.0225,11.725,23.9875,1.0,0.0,0.0,5.75,...,83.5,0.25,0.5,0.51145,5.099237,0.015267,0.030534,0.0,0.0,1.0
Amon-Ra St. Brown,WR,DET,ARI,8200,Active,3,2024.0,27.71,12.8375,28.4875,24.2725,10.525,23.4875,1.625,0.0,0.0,7.625,...,276.125,1.125,0.625,0.67,7.363333,0.03,0.016667,0.0,0.0,1.0


In [872]:
flex_df_fd.to_csv('FD_FLEX_for_model_' + str(season) + '_' + str(week) + '.csv')
flex_df_dk.to_csv('DK_FLEX_for_model_' + str(season) + '_' + str(week) + '.csv')