# Assembling FLEX dataset
This notebook will wrangle the FanDuel and DraftKings lists of running backs, wide receivers and tight ends and create a CSV that is ready to run through the model for predictions.

In [472]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import nfl_data_py as nfl
#import itertools
from functions import get_current_weekday, calculate_nfl_week, get_next_sunday, get_current_year

In [473]:
import sqlite3

In [474]:
day = get_current_weekday()

In [475]:
date_string = get_next_sunday(day)

In [476]:
week = calculate_nfl_week(date_string)

In [477]:
season = get_current_year()

# Pulling from database
Fetching the FD and DK lists from the database. These tables include names, positions, teams, opponents and salaries.

In [478]:
# Connect to the SQLite database
#These are the FD and DK player lists for the current week
conn = sqlite3.connect('nfl_dfs.db')

query_fd = "SELECT * FROM fd_table_" + str(week) + "_" + str(season)[2:]
query_dk = "SELECT * FROM dk_table_" + str(week) + "_" + str(season)[2:]

fanduel_df = pd.read_sql_query(query_fd, conn)
draftkings_df = pd.read_sql_query(query_dk, conn)

# Close the database connection
conn.close()

In [479]:
#The weekly and play-by-play data through the previous week
#We went into R Studio to get these and save them as CSVs that we can use here
weekly_df = pd.read_csv('weekly_data_' + str(season) + '_' + str(week) + '.csv')
pbp_df = pd.read_csv('pbp_data_' + str(season) + '_' + str(week) + '.csv', low_memory = False)

In [480]:
weekly_df.replace({'LA': 'LAR'}, inplace = True)
pbp_df.replace({'LA': 'LAR'}, inplace = True)

This is weekly data for each player. We can use this to calculate fantasy points for most players.

In [481]:
weekly_df.head()

Unnamed: 0,player_id,player_name,player_display_name,position,position_group,headshot_url,recent_team,season,week,season_type,...,receiving_first_downs,receiving_epa,receiving_2pt_conversions,racr,target_share,air_yards_share,wopr,special_teams_tds,fantasy_points,fantasy_points_ppr
0,00-0019596,T.Brady,Tom Brady,QB,QB,https://static.www.nfl.com/image/private/f_aut...,TB,2022,1,REG,...,0,,0,,,,,0,10.38,10.38
1,00-0019596,T.Brady,Tom Brady,QB,QB,https://static.www.nfl.com/image/private/f_aut...,TB,2022,2,REG,...,0,,0,,,,,0,9.4,9.4
2,00-0019596,T.Brady,Tom Brady,QB,QB,https://static.www.nfl.com/image/private/f_aut...,TB,2022,3,REG,...,0,,0,,,,,0,14.74,14.74
3,00-0019596,T.Brady,Tom Brady,QB,QB,https://static.www.nfl.com/image/private/f_aut...,TB,2022,4,REG,...,0,,0,,,,,0,25.4,25.4
4,00-0019596,T.Brady,Tom Brady,QB,QB,https://static.www.nfl.com/image/private/f_aut...,TB,2022,5,REG,...,0,,0,,,,,0,19.74,19.74


In [482]:
pbp_df.head()

Unnamed: 0,play_id,game_id,old_game_id,home_team,away_team,season_type,week,posteam,posteam_type,defteam,...,out_of_bounds,home_opening_kickoff,qb_epa,xyac_epa,xyac_mean_yardage,xyac_median_yardage,xyac_success,xyac_fd,xpass,pass_oe
0,1,2022_01_BAL_NYJ,2022091107,NYJ,BAL,REG,1,,,,...,0,1,0.0,,,,,,,
1,43,2022_01_BAL_NYJ,2022091107,NYJ,BAL,REG,1,NYJ,home,BAL,...,0,1,-0.443521,,,,,,,
2,68,2022_01_BAL_NYJ,2022091107,NYJ,BAL,REG,1,NYJ,home,BAL,...,0,1,1.468819,,,,,,0.440373,-44.037291
3,89,2022_01_BAL_NYJ,2022091107,NYJ,BAL,REG,1,NYJ,home,BAL,...,0,1,-0.492192,0.727261,6.988125,6.0,0.60693,0.227598,0.389904,61.009598
4,115,2022_01_BAL_NYJ,2022091107,NYJ,BAL,REG,1,NYJ,home,BAL,...,0,1,-0.325931,,,,,,0.443575,-44.357494


In [483]:
#rb_wr_te_df.head()

In [484]:
#rb_wr_te_df.info()

In [485]:
#nfl.see_weekly_cols()

In [486]:
weekly_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12518 entries, 0 to 12517
Data columns (total 53 columns):
 #   Column                       Non-Null Count  Dtype  
---  ------                       --------------  -----  
 0   player_id                    12518 non-null  object 
 1   player_name                  12518 non-null  object 
 2   player_display_name          12518 non-null  object 
 3   position                     12518 non-null  object 
 4   position_group               12518 non-null  object 
 5   headshot_url                 12456 non-null  object 
 6   recent_team                  12518 non-null  object 
 7   season                       12518 non-null  int64  
 8   week                         12518 non-null  int64  
 9   season_type                  12518 non-null  object 
 10  opponent_team                12518 non-null  object 
 11  completions                  12518 non-null  int64  
 12  attempts                     12518 non-null  int64  
 13  passing_yards   

In [487]:
weekly_df['position_group'].value_counts()

position_group
WR      5074
RB      3349
TE      2475
QB      1540
SPEC      29
DB        25
LB        14
OL        10
DL         2
Name: count, dtype: int64

In [488]:
weekly_df['position'].value_counts()

position
WR     5074
RB     3156
TE     2475
QB     1540
FB      193
P        29
CB       10
T         9
SS        9
ILB       7
FS        6
OLB       6
DT        2
G         1
MLB       1
Name: count, dtype: int64

Weekly data filtered for RBs, WRs and TEs

In [489]:
flex_df = weekly_df[weekly_df['position'].isin(['RB', 'HB', 'FB', 'WR', 'TE'])]

In [490]:
cols_to_group = ['season', 'week', 'recent_team', 'opponent_team', 'player_id', 'player_display_name', 'position']

So we do have quite a few duplicate names so player_id becomes more important than names.

In [491]:
# Group by 'player_display_name' and check the number of unique 'player_id's
duplicate_names = flex_df.groupby('player_display_name')['player_id'].nunique()

# Filter the results to find names associated with more than one unique ID
duplicate_names = duplicate_names[duplicate_names > 1]

In [492]:
duplicate_names

Series([], Name: player_id, dtype: int64)

In [493]:
scoring_cols = ['passing_yards', 'passing_tds', 'interceptions', 'passing_2pt_conversions', 'rushing_yards',\
                   'rushing_tds', 'rushing_fumbles_lost', 'rushing_2pt_conversions', 'receptions', 'receiving_yards',\
                   'receiving_tds', 'receiving_fumbles_lost', 'receiving_2pt_conversions', 'sack_fumbles_lost', 'special_teams_tds',\
               'targets', 'carries', 'receiving_yards_after_catch', 'receiving_air_yards', 'air_yards_share', 'target_share']

In [494]:
flex_df = flex_df.groupby(cols_to_group)[scoring_cols].sum()

In [495]:
flex_df.info()

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 10898 entries, (2022, 1, 'ARI', 'KC', '00-0027942', 'A.J. Green', 'WR') to (2024, 4, 'WAS', 'ARI', '00-0039355', 'Luke McCaffrey', 'WR')
Data columns (total 21 columns):
 #   Column                       Non-Null Count  Dtype  
---  ------                       --------------  -----  
 0   passing_yards                10898 non-null  int64  
 1   passing_tds                  10898 non-null  int64  
 2   interceptions                10898 non-null  int64  
 3   passing_2pt_conversions      10898 non-null  int64  
 4   rushing_yards                10898 non-null  int64  
 5   rushing_tds                  10898 non-null  int64  
 6   rushing_fumbles_lost         10898 non-null  int64  
 7   rushing_2pt_conversions      10898 non-null  int64  
 8   receptions                   10898 non-null  int64  
 9   receiving_yards              10898 non-null  int64  
 10  receiving_tds                10898 non-null  int64  
 11  receiving_fumbles_los

In [496]:
flex_df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,passing_yards,passing_tds,interceptions,passing_2pt_conversions,rushing_yards,rushing_tds,rushing_fumbles_lost,rushing_2pt_conversions,receptions,receiving_yards,...,receiving_fumbles_lost,receiving_2pt_conversions,sack_fumbles_lost,special_teams_tds,targets,carries,receiving_yards_after_catch,receiving_air_yards,air_yards_share,target_share
season,week,recent_team,opponent_team,player_id,player_display_name,position,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1
2022,1,ARI,KC,00-0027942,A.J. Green,WR,0,0,0,0,0,0,0,0,2,13,...,0,0,0,0,4,0,0,42,0.157895,0.111111
2022,1,ARI,KC,00-0030061,Zach Ertz,TE,0,0,0,0,0,0,0,0,2,14,...,0,1,0,0,4,0,4,22,0.082707,0.111111
2022,1,ARI,KC,00-0033553,James Conner,RB,0,0,0,0,26,1,0,0,5,29,...,0,0,0,0,6,10,38,7,0.026316,0.166667
2022,1,ARI,KC,00-0035500,Greg Dortch,WR,0,0,0,0,0,0,0,0,7,63,...,0,0,0,0,9,0,31,62,0.233083,0.250000
2022,1,ARI,KC,00-0035527,Andy Isabella,WR,0,0,0,0,0,0,0,0,1,10,...,0,0,0,0,3,0,4,30,0.112782,0.083333
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024,4,WAS,ARI,00-0035659,Terry McLaurin,WR,0,0,0,0,0,0,0,0,7,52,...,0,0,0,0,10,0,6,103,0.591954,0.333333
2024,4,WAS,ARI,00-0036626,Dyami Brown,WR,0,0,0,0,0,0,0,0,1,4,...,0,0,0,0,1,0,0,4,0.022989,0.033333
2024,4,WAS,ARI,00-0036628,John Bates,TE,0,0,0,0,0,0,0,0,1,9,...,0,0,0,0,1,0,6,3,0.017241,0.033333
2024,4,WAS,ARI,00-0037746,Brian Robinson,RB,0,0,0,0,101,1,0,0,3,12,...,0,0,0,0,3,21,18,-6,-0.034483,0.100000


In [497]:
def replace(code, old, new):
    """
    Basically a find and replace
    """
    return code.replace(old, new)

In [498]:
flex_df.reset_index(inplace = True, drop = False)

In [499]:
#There are a few errant rows where the opponent matches the team
flex_df = flex_df[~(flex_df['recent_team'] == flex_df['opponent_team'])]

In [500]:
flex_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 10892 entries, 0 to 10897
Data columns (total 28 columns):
 #   Column                       Non-Null Count  Dtype  
---  ------                       --------------  -----  
 0   season                       10892 non-null  int64  
 1   week                         10892 non-null  int64  
 2   recent_team                  10892 non-null  object 
 3   opponent_team                10892 non-null  object 
 4   player_id                    10892 non-null  object 
 5   player_display_name          10892 non-null  object 
 6   position                     10892 non-null  object 
 7   passing_yards                10892 non-null  int64  
 8   passing_tds                  10892 non-null  int64  
 9   interceptions                10892 non-null  int64  
 10  passing_2pt_conversions      10892 non-null  int64  
 11  rushing_yards                10892 non-null  int64  
 12  rushing_tds                  10892 non-null  int64  
 13  rushing_fumbles_lost 

# What do we need from play-by-play?
Let's take a look at the data we'd like to have and see where we need to integrate the play-by-play data. We have yards_after_catch and air_yards. We have targets and target share. Even though the target share doesn't add up to 100 percent, it's still useful. We'll definitely want to spot-check that.<br>

We have carries but no carry share. Maybe we need to just take the total carries for each team in each game from the pbp and join that with the flex_df. That shouldn't be too hard. Then we can figure out every RBs workload even if they don't all add up to 100.<br>

We also want goal_to_go binary for each play to see how many carries or targets a player gets in goal-line situations and also the rate at which they score TDs in those situations.<br>

In [501]:
run_df = pbp_df[pbp_df['play_type'] == 'run']

In [502]:
running_play_counts = run_df.groupby(['season', 'week', 'posteam', 'defteam', 'game_id']).size().reset_index(name='total_carries')

In [503]:
running_play_counts.rename(columns = {'posteam' : 'recent_team', 'defteam': 'opponent_team'}, inplace = True)

In [504]:
flex_merge = pd.merge(flex_df, running_play_counts, on = ['season', 'week', 'recent_team', 'opponent_team'], how = 'left')

In [505]:
flex_merge.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10892 entries, 0 to 10891
Data columns (total 30 columns):
 #   Column                       Non-Null Count  Dtype  
---  ------                       --------------  -----  
 0   season                       10892 non-null  int64  
 1   week                         10892 non-null  int64  
 2   recent_team                  10892 non-null  object 
 3   opponent_team                10892 non-null  object 
 4   player_id                    10892 non-null  object 
 5   player_display_name          10892 non-null  object 
 6   position                     10892 non-null  object 
 7   passing_yards                10892 non-null  int64  
 8   passing_tds                  10892 non-null  int64  
 9   interceptions                10892 non-null  int64  
 10  passing_2pt_conversions      10892 non-null  int64  
 11  rushing_yards                10892 non-null  int64  
 12  rushing_tds                  10892 non-null  int64  
 13  rushing_fumbles_

In [506]:
flex_merge['carry_share'] = round(flex_merge['carries']/flex_merge['total_carries'], 3)

In [507]:
flex_merge.head()

Unnamed: 0,season,week,recent_team,opponent_team,player_id,player_display_name,position,passing_yards,passing_tds,interceptions,...,special_teams_tds,targets,carries,receiving_yards_after_catch,receiving_air_yards,air_yards_share,target_share,game_id,total_carries,carry_share
0,2022,1,ARI,KC,00-0027942,A.J. Green,WR,0,0,0,...,0,4,0,0,42,0.157895,0.111111,2022_01_KC_ARI,21,0.0
1,2022,1,ARI,KC,00-0030061,Zach Ertz,TE,0,0,0,...,0,4,0,4,22,0.082707,0.111111,2022_01_KC_ARI,21,0.0
2,2022,1,ARI,KC,00-0033553,James Conner,RB,0,0,0,...,0,6,10,38,7,0.026316,0.166667,2022_01_KC_ARI,21,0.476
3,2022,1,ARI,KC,00-0035500,Greg Dortch,WR,0,0,0,...,0,9,0,31,62,0.233083,0.25,2022_01_KC_ARI,21,0.0
4,2022,1,ARI,KC,00-0035527,Andy Isabella,WR,0,0,0,...,0,3,0,4,30,0.112782,0.083333,2022_01_KC_ARI,21,0.0


In [508]:
flex_merge.tail(30)

Unnamed: 0,season,week,recent_team,opponent_team,player_id,player_display_name,position,passing_yards,passing_tds,interceptions,...,special_teams_tds,targets,carries,receiving_yards_after_catch,receiving_air_yards,air_yards_share,target_share,game_id,total_carries,carry_share
10862,2024,4,SF,NE,00-0036259,Jauan Jennings,WR,0,0,0,...,0,6,0,20,84,0.227027,0.230769,2024_04_NE_SF,31,0.0
10863,2024,4,SF,NE,00-0036261,Brandon Aiyuk,WR,0,0,0,...,0,5,0,20,90,0.243243,0.192308,2024_04_NE_SF,31,0.0
10864,2024,4,SF,NE,00-0037525,Jordan Mason,RB,0,0,0,...,0,3,24,25,28,0.075676,0.115385,2024_04_NE_SF,31,0.774
10865,2024,4,SF,NE,00-0039363,Isaac Guerendo,RB,0,0,0,...,0,0,1,0,0,0.0,0.0,2024_04_NE_SF,31,0.032
10866,2024,4,TB,PHI,00-0031408,Mike Evans,WR,0,0,0,...,0,14,0,32,145,0.480132,0.297872,2024_04_PHI_TB,24,0.0
10867,2024,4,TB,PHI,00-0032385,Sterling Shepard,WR,0,0,0,...,0,5,0,8,78,0.258278,0.106383,2024_04_PHI_TB,24,0.0
10868,2024,4,TB,PHI,00-0033921,Chris Godwin,WR,0,0,0,...,0,9,0,50,39,0.129139,0.191489,2024_04_PHI_TB,24,0.0
10869,2024,4,TB,PHI,00-0037256,Rachaad White,RB,0,0,0,...,0,3,10,39,-6,-0.019868,0.06383,2024_04_PHI_TB,24,0.417
10870,2024,4,TB,PHI,00-0038129,Cade Otton,TE,0,0,0,...,0,9,0,27,40,0.13245,0.191489,2024_04_PHI_TB,24,0.0
10871,2024,4,TB,PHI,00-0038951,Sean Tucker,RB,0,0,0,...,0,1,1,14,0,0.0,0.021277,2024_04_PHI_TB,24,0.042


# Goal-to-go situations
We'll filter the pbp data by rows in which goal_to_go is True and the play is either a run or a pass. We find that on passing plays, some of the receiver_player_id values are null, but for running plays none of them are null. This implies that on plays in which the QB is sacked, no pass is thrown so there's no receiver.<br>

Those plays probably should be discarded as we are looking for a percentage of times a RB-WR-TE gets the ball in goal-to-go situations. When a QB is sacked, no one gets the ball.<br>

**Update:** goal_to_go leaves out situations where it's 3rd and 2 from the 4-yard line. Maybe we should see if filtering by yardline_100 <= 10 gives us more data points.

In [509]:
gtg_df_10 = pbp_df[(pbp_df['yardline_100'] <= 10) & ((pbp_df['play_type'] == 'run') | (pbp_df['play_type'] == 'pass'))]
#gtg_df = rb_wr_te_df[(rb_wr_te_df['goal_to_go'] == True) & ((rb_wr_te_df['play_type'] == 'run') | (rb_wr_te_df['play_type'] == 'pass'))]

In [510]:
gtg_df_10['play_type'].value_counts()

play_type
run     2977
pass    2863
Name: count, dtype: int64

In [511]:
gtg_df_10 = gtg_df_10[(gtg_df_10['receiver_player_id'].notnull()) | (gtg_df_10['rusher_player_id'].notnull())]
#gtg_df = gtg_df[(gtg_df['receiver_player_id'].notnull()) | (gtg_df['rusher_player_id'].notnull())]

In [512]:
gtg_df_10[(gtg_df_10['receiver_player_id'].isnull()) & (gtg_df_10['rusher_player_id'].isnull())]
#gtg_df[(gtg_df['receiver_player_id'].isnull()) & (gtg_df['rusher_player_id'].isnull())]

Unnamed: 0,play_id,game_id,old_game_id,home_team,away_team,season_type,week,posteam,posteam_type,defteam,...,out_of_bounds,home_opening_kickoff,qb_epa,xyac_epa,xyac_mean_yardage,xyac_median_yardage,xyac_success,xyac_fd,xpass,pass_oe


In [513]:
gtg_df_10[(gtg_df_10['receiver_player_id'].notnull()) & (gtg_df_10['rusher_player_id'].notnull())]
#gtg_df[(gtg_df['receiver_player_id'].notnull()) & (gtg_df['rusher_player_id'].notnull())]

Unnamed: 0,play_id,game_id,old_game_id,home_team,away_team,season_type,week,posteam,posteam_type,defteam,...,out_of_bounds,home_opening_kickoff,qb_epa,xyac_epa,xyac_mean_yardage,xyac_median_yardage,xyac_success,xyac_fd,xpass,pass_oe


In [514]:
gtg_df_10['player_id'] = gtg_df_10['rusher_player_id'].fillna(gtg_df_10['receiver_player_id'])
#gtg_df['player_id'] = gtg_df['rusher_player_id'].fillna(gtg_df['receiver_player_id'])

In [515]:
gtg_df_10.info()

<class 'pandas.core.frame.DataFrame'>
Index: 5499 entries, 31 to 110028
Columns: 373 entries, play_id to player_id
dtypes: float64(182), int64(39), object(152)
memory usage: 15.7+ MB


In [516]:
#Total goal to go situations for each team in each game
grouped_gtg_10 = gtg_df_10.groupby(['season', 'week', 'posteam', 'defteam']).size().reset_index(name = 'plays_in_10')

#grouped_gtg = gtg_df.groupby(['season', 'week', 'posteam', 'defteam']).size().reset_index(name = 'gtg_plays')


In [517]:
gtg_df_10.columns

Index(['play_id', 'game_id', 'old_game_id', 'home_team', 'away_team',
       'season_type', 'week', 'posteam', 'posteam_type', 'defteam',
       ...
       'home_opening_kickoff', 'qb_epa', 'xyac_epa', 'xyac_mean_yardage',
       'xyac_median_yardage', 'xyac_success', 'xyac_fd', 'xpass', 'pass_oe',
       'player_id'],
      dtype='object', length=373)

In [518]:
gtg_cols = ['play_id','week', 'posteam', 'defteam', 'sp', 'desc', 'play_type', 'td_player_id',
      'incomplete_pass', 'rush_attempt', 'pass_attempt',
       'touchdown', 'pass_touchdown', 'rush_touchdown', 'complete_pass', 'receiver_player_id',
       'receiver_player_name', 'rusher_player_id',
       'rusher_player_name', 'season', 'weather', 'player_id']

In [519]:
gtg_df_10 = gtg_df_10[gtg_cols]

In [520]:
gtg_df_10 = pd.merge(gtg_df_10, grouped_gtg_10, on = ['season', 'week', 'posteam', 'defteam'], how = 'left')
#gtg_df = pd.merge(gtg_df, grouped_gtg, on = ['season', 'week', 'posteam', 'defteam'], how = 'left')

In [521]:
##Next will be to derive how many times per gtg play a player gets the ball, and then when they get it their TD percentage

In [522]:
gtg_10_player = gtg_df_10.groupby(['season', 'week', 'posteam', 'defteam', 'player_id']).size().reset_index(name = 'opps_in_10')
#gtg_player = gtg_df.groupby(['season', 'week', 'posteam', 'defteam', 'player_id']).size().reset_index(name = 'gtg_opps')

In [523]:
gtg_df_10 = pd.merge(gtg_df_10, gtg_10_player, on = ['season', 'week', 'posteam', 'defteam', 'player_id'], how = 'left')
#gtg_df = pd.merge(gtg_df, gtg_player, on = ['season', 'week', 'posteam', 'defteam', 'player_id'], how = 'left')

In [524]:
gtg_df_10['in_10_share'] = round(gtg_df_10['opps_in_10']/gtg_df_10['plays_in_10'], 3)

In [525]:
gtg_df_10.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5499 entries, 0 to 5498
Data columns (total 25 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   play_id               5499 non-null   int64  
 1   week                  5499 non-null   int64  
 2   posteam               5499 non-null   object 
 3   defteam               5499 non-null   object 
 4   sp                    5499 non-null   int64  
 5   desc                  5499 non-null   object 
 6   play_type             5499 non-null   object 
 7   td_player_id          1728 non-null   object 
 8   incomplete_pass       5499 non-null   float64
 9   rush_attempt          5499 non-null   float64
 10  pass_attempt          5499 non-null   float64
 11  touchdown             5499 non-null   float64
 12  pass_touchdown        5499 non-null   float64
 13  rush_touchdown        5499 non-null   float64
 14  complete_pass         5499 non-null   float64
 15  receiver_player_id   

In [526]:
gtg_df_10['player_id'].nunique()

575

In [527]:
flex_merge['player_id'].nunique()

679

In [528]:
##Maybe we need to pause here. There are more players in our weekly data than we have in our gtg data, which makes sense.
#Not every player will be used in gtg situations.
#Maybe somehow just check that every player in flex_merge but not gtg doesn't have any True values in goal_to_go
####Maybe we should just use common sense
#We've filtered every gtg play that was a run or a pass.
#If there's any irregularity it's likely to come out during spot-checking

In [529]:
gtg_players = list(gtg_df_10['player_id'].unique())

In [530]:
flex_players = list(flex_merge['player_id'].unique())

In [531]:
len(gtg_players)

575

In [532]:
len(flex_players)

679

In [533]:
#gtg_df = gtg_df.sort_values(by = ['season', 'week', 'posteam', 'defteam'])

In [534]:
gtg_df_10 = gtg_df_10.groupby(['season', 'week', 'posteam', 'defteam', 'player_id'], as_index=False).agg({'in_10_share': 'mean'})


In [535]:
#gtg_df = gtg_df[['season', 'week', 'posteam', 'defteam', 'player_id', 'gtg_share']]

In [536]:
gtg_df_10.tail(30)

Unnamed: 0,season,week,posteam,defteam,player_id,in_10_share
3501,2024,4,PHI,TB,00-0034844,0.429
3502,2024,4,PHI,TB,00-0035639,0.143
3503,2024,4,PHI,TB,00-0036389,0.286
3504,2024,4,PHI,TB,00-0036919,0.143
3505,2024,4,PIT,IND,00-0036893,0.143
3506,2024,4,PIT,IND,00-0036894,0.143
3507,2024,4,PIT,IND,00-0036945,0.286
3508,2024,4,PIT,IND,00-0037247,0.429
3509,2024,4,SEA,DET,00-0032211,0.25
3510,2024,4,SEA,DET,00-0035640,0.125


In [537]:
check_for_one = gtg_df_10.groupby(['season', 'week', 'posteam', 'defteam'])['in_10_share'].sum()

In [538]:
check_for_one.min()

0.9989999999999999

In [539]:
gtg_df_10.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3531 entries, 0 to 3530
Data columns (total 6 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   season       3531 non-null   int64  
 1   week         3531 non-null   int64  
 2   posteam      3531 non-null   object 
 3   defteam      3531 non-null   object 
 4   player_id    3531 non-null   object 
 5   in_10_share  3531 non-null   float64
dtypes: float64(1), int64(2), object(3)
memory usage: 165.6+ KB


In [540]:
gtg_df_10.rename(columns = {'posteam': 'recent_team', 'defteam': 'opponent_team'}, inplace = True)

In [541]:
flex_merge = pd.merge(flex_merge, gtg_df_10, on = ['season', 'week', 'recent_team', 'opponent_team', 'player_id'], how = 'left')

In [542]:
flex_merge.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10892 entries, 0 to 10891
Data columns (total 32 columns):
 #   Column                       Non-Null Count  Dtype  
---  ------                       --------------  -----  
 0   season                       10892 non-null  int64  
 1   week                         10892 non-null  int64  
 2   recent_team                  10892 non-null  object 
 3   opponent_team                10892 non-null  object 
 4   player_id                    10892 non-null  object 
 5   player_display_name          10892 non-null  object 
 6   position                     10892 non-null  object 
 7   passing_yards                10892 non-null  int64  
 8   passing_tds                  10892 non-null  int64  
 9   interceptions                10892 non-null  int64  
 10  passing_2pt_conversions      10892 non-null  int64  
 11  rushing_yards                10892 non-null  int64  
 12  rushing_tds                  10892 non-null  int64  
 13  rushing_fumbles_

# A lot of missing in_10_share variables
At first glance it seems like there are too many nulls in the in_10_share column, which is the percentage of plays inside the 10-yard-line that each player gets the ball. But maybe it is plausible. After all, in those goal-line situations only the best players are called upon. We'll replace with zero and investigate further when we examine the data.

In [543]:
flex_merge.fillna(value={'in_10_share': 0}, inplace=True)

Let's rename flex_merge flex_df so we can run the following code

In [544]:
flex_df = flex_merge

# Fantasy points
This is where we calculate FanDuel and DraftKings points.

In [545]:
flex_df['FD_Pts'] = (flex_df['passing_yards'] * 0.04) + (flex_df['rushing_tds'] * 6) + (flex_df['rushing_yards'] * 0.1)\
+ (flex_df['passing_tds'] * 4) + (flex_df['receiving_yards'] * 0.1) + (flex_df['receiving_tds'] * 6) + (flex_df['receptions'] * 0.5)\
+ (flex_df['rushing_2pt_conversions'] * 2) + (flex_df['passing_2pt_conversions'] * 2) + (flex_df['receiving_2pt_conversions'] * 2)\
+ (flex_df['special_teams_tds'] * 6) - (flex_df['interceptions']) - (flex_df['sack_fumbles_lost'] * 2) - (flex_df['receiving_fumbles_lost'] * 2)\
- (flex_df['rushing_fumbles_lost'] * 2)

In [546]:
flex_df['DK_Pts'] = (flex_df['passing_yards'] * 0.04) + (flex_df['rushing_tds'] * 6) + (flex_df['rushing_yards'] * 0.1)\
+ (flex_df['passing_tds'] * 4) + (flex_df['receiving_yards'] * 0.1) + (flex_df['receiving_tds'] * 6) + (flex_df['receptions'] * 1)\
+ (flex_df['rushing_2pt_conversions'] * 2) + (flex_df['passing_2pt_conversions'] * 2) + (flex_df['receiving_2pt_conversions'] * 2)\
+ (flex_df['special_teams_tds'] * 6) - (flex_df['interceptions']) - (flex_df['sack_fumbles_lost']) - (flex_df['receiving_fumbles_lost'])\
- (flex_df['rushing_fumbles_lost'])

Adding DraftKings bonus points

In [547]:
flex_df['DK_Pts'] = np.where(flex_df['passing_yards'] >= 300, flex_df['DK_Pts'] + 3, flex_df['DK_Pts'])
flex_df['DK_Pts'] = np.where(flex_df['receiving_yards'] >= 100, flex_df['DK_Pts'] + 3, flex_df['DK_Pts'])
flex_df['DK_Pts'] = np.where(flex_df['rushing_yards'] >= 100, flex_df['DK_Pts'] + 3, flex_df['DK_Pts'])

In [548]:
flex_df.tail()

Unnamed: 0,season,week,recent_team,opponent_team,player_id,player_display_name,position,passing_yards,passing_tds,interceptions,...,receiving_yards_after_catch,receiving_air_yards,air_yards_share,target_share,game_id,total_carries,carry_share,in_10_share,FD_Pts,DK_Pts
10887,2024,4,WAS,ARI,00-0035659,Terry McLaurin,WR,0,0,0,...,6,103,0.591954,0.333333,2024_04_WAS_ARI,37,0.0,0.143,14.7,18.2
10888,2024,4,WAS,ARI,00-0036626,Dyami Brown,WR,0,0,0,...,0,4,0.022989,0.033333,2024_04_WAS_ARI,37,0.0,0.0,0.9,1.4
10889,2024,4,WAS,ARI,00-0036628,John Bates,TE,0,0,0,...,6,3,0.017241,0.033333,2024_04_WAS_ARI,37,0.0,0.0,1.4,1.9
10890,2024,4,WAS,ARI,00-0037746,Brian Robinson,RB,0,0,0,...,18,-6,-0.034483,0.1,2024_04_WAS_ARI,37,0.568,0.286,18.8,23.3
10891,2024,4,WAS,ARI,00-0039355,Luke McCaffrey,WR,0,0,0,...,4,13,0.074713,0.033333,2024_04_WAS_ARI,37,0.0,0.0,2.2,2.7


# Storing last week's points to evaluate model

In [549]:
flex_points_last_week = flex_df[(flex_df['season'] == season) & (flex_df['week'] == week - 1)]\
[['season', 'week', 'player_display_name', 'recent_team', 'opponent_team', 'position', 'FD_Pts', 'DK_Pts']]

In [550]:
flex_points_last_week.to_csv('flex_target_' + str(season) + '_' + str(week - 1) + '.csv')

In [551]:
flex_df['position'].value_counts()

position
WR    5068
RB    3156
TE    2475
FB     193
Name: count, dtype: int64

In [552]:
flex_df['position'] = np.where(flex_df['position'].isin(['HB', 'FB']), 'RB', flex_df['position'])

In [553]:
pos_mean_by_team = flex_df.groupby(['season', 'week', 'recent_team', 'opponent_team', 'position']).size().reset_index(name = 'num_players')

In [554]:
mean_by_pos = pos_mean_by_team.groupby('position')['num_players'].agg(['mean', 'max', 'min'])

In [555]:
mean_by_pos

Unnamed: 0_level_0,mean,max,min
position,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
RB,2.64534,5,1
TE,1.968974,4,1
WR,4.00316,7,2


# Grouping points allowed by position
First step in deriving DvP variables.

In [556]:
grouped_pts = flex_df.groupby(['season', 'week', 'recent_team', 'opponent_team', 'position'])[['DK_Pts', 'FD_Pts']].sum().round(3)

In [557]:
grouped_pts

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,DK_Pts,FD_Pts
season,week,recent_team,opponent_team,position,Unnamed: 5_level_1,Unnamed: 6_level_1
2022,1,ARI,KC,RB,25.6,21.6
2022,1,ARI,KC,TE,11.4,10.4
2022,1,ARI,KC,WR,32.9,25.9
2022,1,ATL,NO,RB,28.3,23.3
2022,1,ATL,NO,TE,7.0,5.0
...,...,...,...,...,...,...
2024,4,TEN,MIA,TE,3.0,2.0
2024,4,TEN,MIA,WR,15.0,12.0
2024,4,WAS,ARI,RB,43.7,38.7
2024,4,WAS,ARI,TE,9.1,7.1


In [558]:
grouped_pts.reset_index(inplace = True)

In [559]:
grouped_pts = grouped_pts.sort_values(by = ['opponent_team', 'season', 'week'])
#grouped_pts['opp_game_num'] = grouped_pts.groupby(['opponent_team', 'season', 'week', 'recent_team']).cumcount() + 1
grouped_pts.reset_index(inplace = True)

In [560]:
grouped_pts_raw = grouped_pts.copy()

In [561]:
grouped_pts.drop(columns = ['index'], inplace = True)

In [562]:
grouped_pts

Unnamed: 0,season,week,recent_team,opponent_team,position,DK_Pts,FD_Pts
0,2022,1,KC,ARI,RB,42.5,39.5
1,2022,1,KC,ARI,TE,38.2,30.2
2,2022,1,KC,ARI,WR,35.6,27.6
3,2022,2,LV,ARI,RB,12.6,11.6
4,2022,2,LV,ARI,TE,23.0,18.5
...,...,...,...,...,...,...,...
3784,2024,3,CIN,WAS,TE,14.9,10.9
3785,2024,3,CIN,WAS,WR,55.9,45.9
3786,2024,4,ARI,WAS,RB,29.7,25.7
3787,2024,4,ARI,WAS,TE,3.2,2.2


In [563]:
# Create the pivot table
pivot_df = grouped_pts.pivot_table(
    index=['season', 'week', 'recent_team', 'opponent_team'],
    columns='position',
    values=['DK_Pts', 'FD_Pts'],
    aggfunc='sum',  # Assuming you want to sum the points if there are duplicates
    fill_value=0  # Fill missing values with 0
)

In [564]:
# Flatten the MultiIndex columns created by pivot_table
pivot_df.columns = ['_'.join(col).strip() for col in pivot_df.columns.values]

In [565]:
# Reset the index to turn the pivot table back into a DataFrame
pivot_df.reset_index(inplace=True)

In [566]:
pivot_df.head()

Unnamed: 0,season,week,recent_team,opponent_team,DK_Pts_RB,DK_Pts_TE,DK_Pts_WR,FD_Pts_RB,FD_Pts_TE,FD_Pts_WR
0,2022,1,ARI,KC,25.6,11.4,32.9,21.6,10.4,25.9
1,2022,1,ATL,NO,28.3,7.0,27.1,23.3,5.0,20.1
2,2022,1,BAL,NYJ,11.5,10.2,39.2,9.5,7.7,35.2
3,2022,1,BUF,LAR,16.0,1.5,63.7,10.0,1.0,52.2
4,2022,1,CAR,CLE,16.5,8.4,34.4,14.5,6.9,26.9


In [567]:
pivot_df = pivot_df.sort_values(by = ['opponent_team', 'season', 'week'])

# Bringing in current week's FanDuel and DraftKings rows here
We're basically cloning this notebook from model training. We need features that apply to the current NFL week. We're trying the **one extra row** concept. For now we can probably add one row for each team, with the season value being 2024 and the week value being 1.

In [568]:
# Connect to the SQLite database
conn = sqlite3.connect('nfl_dfs.db')

# query_flex = "SELECT * FROM flex_dataset"
# #query_weekly = "SELECT * FROM weekly_data"
# query_fd_spread = "SELECT * FROM fd_spreads"
# query_dk_spread = "SELECT * FROM dk_spreads"
query_qb_model = "SELECT * FROM fd_qb_model_ready"
# # query_pbp = "S#ELECT * FROM pbp_non_defense"
query_fd = "SELECT * FROM fd_table"
query_dk = "SELECT * FROM dk_table"
#query_rb_wr_te = "SELECT * FROM rb_wr_te_data WHERE season >= 2022"

# flex_dataset = pd.read_sql_query(query_flex, conn)
# #weekly_df = pd.read_sql_query(query_weekly, conn)
# fd_spreads = pd.read_sql_query(query_fd_spread, conn)
# dk_spreads = pd.read_sql_query(query_dk_spread, conn)
# #pbp_df = pd.read_sql_query(query_pbp, conn)
fd_table = pd.read_sql_query(query_fd, conn)
dk_table = pd.read_sql_query(query_dk, conn)
qb_model = pd.read_sql_query(query_qb_model, conn)
# rb_wr_te_df = pd.read_sql_query(query_rb_wr_te, conn)

# Close the database connection
conn.close()

In [569]:
fanduel_df.head()

Unnamed: 0,ID,name,position,salary,team,opponent,home_team,status,date,week
0,107566-86631,CeeDee Lamb,WR,9400,DAL,PIT,0,Active,10-06-2024,5
1,107566-85701,Ja'Marr Chase,WR,9300,CIN,BAL,1,Active,10-06-2024,5
2,107566-62239,Josh Allen,QB,9300,BUF,HOU,0,Active,10-06-2024,5
3,107566-39280,Derrick Henry,RB,9200,BAL,CIN,0,Active,10-06-2024,5
4,107566-91419,Nico Collins,WR,8800,HOU,BUF,1,Active,10-06-2024,5


In [570]:
draftkings_df.head()

Unnamed: 0,ID,name,position,salary,team,opponent,home_team,status,date,week
0,36142122,Ja'Marr Chase,WR,8000,CIN,BAL,1,Active,10-06-2024,5
1,36142124,Cooper Kupp,WR,7900,LAR,GB,1,O,10-06-2024,5
2,36141894,Derrick Henry,RB,7800,BAL,CIN,0,Active,10-06-2024,5
3,36141829,Josh Allen,QB,7700,BUF,HOU,0,Active,10-06-2024,5
4,36142126,Nico Collins,WR,7700,HOU,BUF,1,Active,10-06-2024,5


In [571]:
# Initialize empty lists for teams and opponents
#We'll use FanDuel here because they tend to list more players than DK and we're less likely to miss anyone.
team_list = []
opponent_list = []

# Initialize a set to keep track of already added matchups
added_matchups = set()

# Loop through each row in the dataframe
for index, row in fanduel_df.iterrows():
    team = row['team']
    opponent = row['opponent']
    
    # Ensure each team and opponent are added only once in reverse order as well
    if (team, opponent) not in added_matchups and (opponent, team) not in added_matchups:
        # Add the matchup as-is
        team_list.append(team)
        opponent_list.append(opponent)
        
        # Add the reverse matchup
        team_list.append(opponent)
        opponent_list.append(team)
        
        # Track the added matchups
        added_matchups.add((team, opponent))
        added_matchups.add((opponent, team))

# Display the resulting lists
print("Team list:", team_list)
print("Opponent list:", opponent_list)


Team list: ['DAL', 'PIT', 'CIN', 'BAL', 'BUF', 'HOU', 'WAS', 'CLE', 'SF', 'ARI', 'MIA', 'NE', 'IND', 'JAX', 'LAR', 'GB', 'SEA', 'NYG', 'CHI', 'CAR', 'LV', 'DEN']
Opponent list: ['PIT', 'DAL', 'BAL', 'CIN', 'HOU', 'BUF', 'CLE', 'WAS', 'ARI', 'SF', 'NE', 'MIA', 'JAX', 'IND', 'GB', 'LAR', 'NYG', 'SEA', 'CAR', 'CHI', 'DEN', 'LV']


In [572]:
len(team_list), len(opponent_list)

(22, 22)

In [573]:
append_to_pivot = {'season': [season] * len(team_list), 'week': [week] * len(team_list), 'recent_team': team_list, 'opponent_team': opponent_list}

In [574]:
append_to_pivot = pd.DataFrame(append_to_pivot)

In [575]:
pivot_df = pd.concat([pivot_df, append_to_pivot], axis = 0)

In [576]:
pivot_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 1288 entries, 15 to 21
Data columns (total 10 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   season         1288 non-null   int64  
 1   week           1288 non-null   int64  
 2   recent_team    1288 non-null   object 
 3   opponent_team  1288 non-null   object 
 4   DK_Pts_RB      1266 non-null   float64
 5   DK_Pts_TE      1266 non-null   float64
 6   DK_Pts_WR      1266 non-null   float64
 7   FD_Pts_RB      1266 non-null   float64
 8   FD_Pts_TE      1266 non-null   float64
 9   FD_Pts_WR      1266 non-null   float64
dtypes: float64(6), int64(2), object(2)
memory usage: 110.7+ KB


In [577]:
pivot_df['opp_game_num'] = pivot_df.groupby(['opponent_team']).cumcount() + 1

In [578]:
cols_for_dvp = ['DK_Pts_RB', 'DK_Pts_TE', 'DK_Pts_WR', 'FD_Pts_RB', 'FD_Pts_TE', 'FD_Pts_WR']

In [579]:
pivot_df_by_game = pivot_df.copy()

In [580]:
pivot_df.head()

Unnamed: 0,season,week,recent_team,opponent_team,DK_Pts_RB,DK_Pts_TE,DK_Pts_WR,FD_Pts_RB,FD_Pts_TE,FD_Pts_WR,opp_game_num
15,2022,1,KC,ARI,42.5,38.2,35.6,39.5,30.2,27.6,1
50,2022,2,LV,ARI,12.6,23.0,32.7,11.6,18.5,24.7,2
81,2022,3,LAR,ARI,12.8,13.9,37.0,11.8,11.4,30.5,3
100,2022,4,CAR,ARI,26.0,8.0,17.7,21.5,6.0,13.2,4
153,2022,5,PHI,ARI,10.4,17.5,29.8,9.4,13.5,21.8,5


In [581]:
pivot_df.tail()

Unnamed: 0,season,week,recent_team,opponent_team,DK_Pts_RB,DK_Pts_TE,DK_Pts_WR,FD_Pts_RB,FD_Pts_TE,FD_Pts_WR,opp_game_num
17,2024,5,NYG,SEA,,,,,,,40
18,2024,5,CHI,CAR,,,,,,,39
19,2024,5,CAR,CHI,,,,,,,39
20,2024,5,LV,DEN,,,,,,,39
21,2024,5,DEN,LV,,,,,,,39


In [582]:
pivot_df['opponent_team'].nunique()

32

# DvP variables
We'll take an 8-game rolling mean for fantasy points allowed to RBs, WRs and TEs for each team, even if the games go back to last season. DvP means defense vs. position.

In [583]:
def calculate_equal_rolling_mean(group, cols, suffix):
    """
    This function calculates a rolling mean for the last eight games, going back to previous season if necessary.
    It also calculates when there are less than eight games to use.
    """
    for col in cols:
        group[f'{col}{suffix}'] = (
            group[col].shift().rolling(window=8, min_periods=1).mean()
        )
    return group

In [584]:
pivot_df = pivot_df.groupby('opponent_team', as_index = False).apply(calculate_equal_rolling_mean, cols=cols_for_dvp, suffix = '_DvP')

In [585]:
pivot_df.drop(columns = cols_for_dvp + ['opp_game_num'], inplace = True)

In [586]:
pivot_df

Unnamed: 0,Unnamed: 1,season,week,recent_team,opponent_team,DK_Pts_RB_DvP,DK_Pts_TE_DvP,DK_Pts_WR_DvP,FD_Pts_RB_DvP,FD_Pts_TE_DvP,FD_Pts_WR_DvP
0,15,2022,1,KC,ARI,,,,,,
0,50,2022,2,LV,ARI,42.500000,38.200000,35.6000,39.500000,30.200000,27.6000
0,81,2022,3,LAR,ARI,27.550000,30.600000,34.1500,25.550000,24.350000,26.1500
0,100,2022,4,CAR,ARI,22.633333,25.033333,35.1000,20.966667,20.033333,27.6000
0,153,2022,5,PHI,ARI,23.475000,20.775000,30.7500,21.100000,16.525000,24.0000
...,...,...,...,...,...,...,...,...,...,...,...
31,1167,2024,1,TB,WAS,33.962500,8.212500,47.3250,30.212500,6.275000,38.8875
31,1193,2024,2,NYG,WAS,33.500000,7.387500,48.0625,29.562500,5.700000,40.0000
31,1208,2024,3,CIN,WAS,31.725000,6.537500,50.0000,27.850000,5.037500,41.3125
31,1234,2024,4,ARI,WAS,31.437500,7.562500,49.8000,27.562500,5.687500,40.6750


In [587]:
pivot_df.info()

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 1288 entries, (0, 15) to (31, 7)
Data columns (total 10 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   season         1288 non-null   int64  
 1   week           1288 non-null   int64  
 2   recent_team    1288 non-null   object 
 3   opponent_team  1288 non-null   object 
 4   DK_Pts_RB_DvP  1256 non-null   float64
 5   DK_Pts_TE_DvP  1256 non-null   float64
 6   DK_Pts_WR_DvP  1256 non-null   float64
 7   FD_Pts_RB_DvP  1256 non-null   float64
 8   FD_Pts_TE_DvP  1256 non-null   float64
 9   FD_Pts_WR_DvP  1256 non-null   float64
dtypes: float64(6), int64(2), object(2)
memory usage: 147.9+ KB


In [588]:
flex_df.head()

Unnamed: 0,season,week,recent_team,opponent_team,player_id,player_display_name,position,passing_yards,passing_tds,interceptions,...,receiving_yards_after_catch,receiving_air_yards,air_yards_share,target_share,game_id,total_carries,carry_share,in_10_share,FD_Pts,DK_Pts
0,2022,1,ARI,KC,00-0027942,A.J. Green,WR,0,0,0,...,0,42,0.157895,0.111111,2022_01_KC_ARI,21,0.0,0.143,2.3,3.3
1,2022,1,ARI,KC,00-0030061,Zach Ertz,TE,0,0,0,...,4,22,0.082707,0.111111,2022_01_KC_ARI,21,0.0,0.429,10.4,11.4
2,2022,1,ARI,KC,00-0033553,James Conner,RB,0,0,0,...,38,7,0.026316,0.166667,2022_01_KC_ARI,21,0.476,0.286,14.0,16.5
3,2022,1,ARI,KC,00-0035500,Greg Dortch,WR,0,0,0,...,31,62,0.233083,0.25,2022_01_KC_ARI,21,0.0,0.0,9.8,13.3
4,2022,1,ARI,KC,00-0035527,Andy Isabella,WR,0,0,0,...,4,30,0.112782,0.083333,2022_01_KC_ARI,21,0.0,0.0,1.5,2.0


In [589]:
flex_df = flex_df.merge(
    pivot_df, 
    on=['season', 'week', 'recent_team', 'opponent_team'],  # common columns to merge on
    how='outer'  # outer join to keep all rows from both dataframes
)

In [590]:
flex_df.tail(30)

Unnamed: 0,season,week,recent_team,opponent_team,player_id,player_display_name,position,passing_yards,passing_tds,interceptions,...,carry_share,in_10_share,FD_Pts,DK_Pts,DK_Pts_RB_DvP,DK_Pts_TE_DvP,DK_Pts_WR_DvP,FD_Pts_RB_DvP,FD_Pts_TE_DvP,FD_Pts_WR_DvP
10884,2024,4,WAS,ARI,00-0033591,Noah Brown,WR,0.0,0.0,0.0,...,0.0,0.0,4.1,5.6,24.81,11.775,28.1375,21.56,9.4625,23.3875
10885,2024,4,WAS,ARI,00-0033955,Jeremy McNichols,RB,0.0,0.0,0.0,...,0.216,0.286,19.9,20.4,24.81,11.775,28.1375,21.56,9.4625,23.3875
10886,2024,4,WAS,ARI,00-0035208,Olamide Zaccheaus,WR,0.0,0.0,0.0,...,0.0,0.0,11.5,14.5,24.81,11.775,28.1375,21.56,9.4625,23.3875
10887,2024,4,WAS,ARI,00-0035659,Terry McLaurin,WR,0.0,0.0,0.0,...,0.0,0.143,14.7,18.2,24.81,11.775,28.1375,21.56,9.4625,23.3875
10888,2024,4,WAS,ARI,00-0036626,Dyami Brown,WR,0.0,0.0,0.0,...,0.0,0.0,0.9,1.4,24.81,11.775,28.1375,21.56,9.4625,23.3875
10889,2024,4,WAS,ARI,00-0036628,John Bates,TE,0.0,0.0,0.0,...,0.0,0.0,1.4,1.9,24.81,11.775,28.1375,21.56,9.4625,23.3875
10890,2024,4,WAS,ARI,00-0037746,Brian Robinson,RB,0.0,0.0,0.0,...,0.568,0.286,18.8,23.3,24.81,11.775,28.1375,21.56,9.4625,23.3875
10891,2024,4,WAS,ARI,00-0039355,Luke McCaffrey,WR,0.0,0.0,0.0,...,0.0,0.0,2.2,2.7,24.81,11.775,28.1375,21.56,9.4625,23.3875
10892,2024,5,SF,ARI,,,,,,,...,,,,,28.2475,12.175,29.3375,24.56,9.8,24.2125
10893,2024,5,CIN,BAL,,,,,,,...,,,,,21.825,15.3375,29.35,18.075,11.525,23.4125


In [591]:
flex_df.reset_index(inplace = True)

In [592]:
flex_df.columns

Index(['index', 'season', 'week', 'recent_team', 'opponent_team', 'player_id',
       'player_display_name', 'position', 'passing_yards', 'passing_tds',
       'interceptions', 'passing_2pt_conversions', 'rushing_yards',
       'rushing_tds', 'rushing_fumbles_lost', 'rushing_2pt_conversions',
       'receptions', 'receiving_yards', 'receiving_tds',
       'receiving_fumbles_lost', 'receiving_2pt_conversions',
       'sack_fumbles_lost', 'special_teams_tds', 'targets', 'carries',
       'receiving_yards_after_catch', 'receiving_air_yards', 'air_yards_share',
       'target_share', 'game_id', 'total_carries', 'carry_share',
       'in_10_share', 'FD_Pts', 'DK_Pts', 'DK_Pts_RB_DvP', 'DK_Pts_TE_DvP',
       'DK_Pts_WR_DvP', 'FD_Pts_RB_DvP', 'FD_Pts_TE_DvP', 'FD_Pts_WR_DvP'],
      dtype='object')

In [593]:
flex_df['opponent_team'].nunique()

32

# Paring down some of the columns
We're drop columns that won't be needed for features.

In [594]:
cols_to_keep = ['season', 'week', 'recent_team', 'opponent_team', 'player_id',
       'player_display_name', 'position', 'rushing_yards', 'rushing_tds', 'rushing_fumbles_lost', 
       'receptions', 'receiving_yards', 'receiving_tds', 'receiving_fumbles_lost', 'targets', 'carries',
       'receiving_yards_after_catch', 'receiving_air_yards', 'air_yards_share',
       'target_share', 'carry_share','in_10_share', 'FD_Pts', 'DK_Pts', 'DK_Pts_RB_DvP', 'DK_Pts_TE_DvP',
       'DK_Pts_WR_DvP', 'FD_Pts_RB_DvP', 'FD_Pts_TE_DvP', 'FD_Pts_WR_DvP']

In [595]:
flex_df = flex_df[cols_to_keep]

In [596]:
#quarterback_df[quarterback_df['season'] >= 2006]['passing_air_yards']

In [597]:
flex_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10914 entries, 0 to 10913
Data columns (total 30 columns):
 #   Column                       Non-Null Count  Dtype  
---  ------                       --------------  -----  
 0   season                       10914 non-null  int64  
 1   week                         10914 non-null  int64  
 2   recent_team                  10914 non-null  object 
 3   opponent_team                10914 non-null  object 
 4   player_id                    10892 non-null  object 
 5   player_display_name          10892 non-null  object 
 6   position                     10892 non-null  object 
 7   rushing_yards                10892 non-null  float64
 8   rushing_tds                  10892 non-null  float64
 9   rushing_fumbles_lost         10892 non-null  float64
 10  receptions                   10892 non-null  float64
 11  receiving_yards              10892 non-null  float64
 12  receiving_tds                10892 non-null  float64
 13  receiving_fumble

# Filling missing values
We have found that dropping rows with missing values can affect calculations down the line. So we need to find ways to fill the missing values.<br>

The first row of every team grouping when we calculated DvP was NaN because there was no previous value. We'll fill those in with the mean.

In [598]:
flex_df['FD_Pts_RB_DvP'] = flex_df['FD_Pts_RB_DvP'].fillna(flex_df['FD_Pts_RB_DvP'].mean())
flex_df['DK_Pts_RB_DvP'] = flex_df['DK_Pts_RB_DvP'].fillna(flex_df['DK_Pts_RB_DvP'].mean())
flex_df['FD_Pts_TE_DvP'] = flex_df['FD_Pts_TE_DvP'].fillna(flex_df['FD_Pts_TE_DvP'].mean())
flex_df['DK_Pts_TE_DvP'] = flex_df['DK_Pts_TE_DvP'].fillna(flex_df['DK_Pts_TE_DvP'].mean())
flex_df['FD_Pts_WR_DvP'] = flex_df['FD_Pts_WR_DvP'].fillna(flex_df['FD_Pts_WR_DvP'].mean())
flex_df['DK_Pts_WR_DvP'] = flex_df['DK_Pts_WR_DvP'].fillna(flex_df['DK_Pts_WR_DvP'].mean())

In [599]:
flex_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10914 entries, 0 to 10913
Data columns (total 30 columns):
 #   Column                       Non-Null Count  Dtype  
---  ------                       --------------  -----  
 0   season                       10914 non-null  int64  
 1   week                         10914 non-null  int64  
 2   recent_team                  10914 non-null  object 
 3   opponent_team                10914 non-null  object 
 4   player_id                    10892 non-null  object 
 5   player_display_name          10892 non-null  object 
 6   position                     10892 non-null  object 
 7   rushing_yards                10892 non-null  float64
 8   rushing_tds                  10892 non-null  float64
 9   rushing_fumbles_lost         10892 non-null  float64
 10  receptions                   10892 non-null  float64
 11  receiving_yards              10892 non-null  float64
 12  receiving_tds                10892 non-null  float64
 13  receiving_fumble

In [600]:
flex_24 = flex_df[(flex_df['season'] == season) & (flex_df['week'] == week)]

In [601]:
flex_24.head()

Unnamed: 0,season,week,recent_team,opponent_team,player_id,player_display_name,position,rushing_yards,rushing_tds,rushing_fumbles_lost,...,carry_share,in_10_share,FD_Pts,DK_Pts,DK_Pts_RB_DvP,DK_Pts_TE_DvP,DK_Pts_WR_DvP,FD_Pts_RB_DvP,FD_Pts_TE_DvP,FD_Pts_WR_DvP
10892,2024,5,SF,ARI,,,,,,,...,,,,,28.2475,12.175,29.3375,24.56,9.8,24.2125
10893,2024,5,CIN,BAL,,,,,,,...,,,,,21.825,15.3375,29.35,18.075,11.525,23.4125
10894,2024,5,HOU,BUF,,,,,,,...,,,,,26.975,12.3,24.1625,24.225,9.55,19.1625
10895,2024,5,CHI,CAR,,,,,,,...,,,,,26.95,11.9375,27.6375,23.825,9.4375,22.825
10896,2024,5,CAR,CHI,,,,,,,...,,,,,24.8,11.1625,29.95,21.05,8.4125,23.7625


In [602]:
fd_flex = fanduel_df[fanduel_df['position'].isin(['RB', 'WR', 'TE'])]
dk_flex = draftkings_df[draftkings_df['position'].isin(['RB', 'WR', 'TE'])]

In [603]:
fd_names = set(list(fd_flex['name'].unique()))
dk_names = set(list(dk_flex['name'].unique()))

In [604]:
# Initialize empty lists for names, positions, teams, and opponents
name_list = []
position_list = []
team_list = []
opponent_list = []

# Loop through the dataframe to extract names, positions, teams, and opponents
for index, row in fd_flex.iterrows():
    name_list.append(row['name'])          # Add player name to name_list
    position_list.append(row['position'])  # Add player position to position_list
    team_list.append(row['team'])          # Add player's team to team_list
    opponent_list.append(row['opponent'])  # Add opponent team to opponent_list

# dk_name_list = []
# dk_position_list = []
# dk_team_list = []
# dk_opponent_list = []

# for index, row in dk_flex.iterrows():
#     dk_name_list.append(row['name'])          # Add player name to name_list
#     dk_position_list.append(row['position'])  # Add player position to position_list
#     dk_team_list.append(row['team'])          # Add player's team to team_list
#     dk_opponent_list.append(row['opponent'])  # Add opponent team to opponent_list

# Display the resulting lists
# print("Name list:", name_list)
# print("Position list:", position_list)
# print("Team list:", team_list)
# print("Opponent list:", opponent_list)


In [605]:
len(name_list), len(position_list)

(603, 603)

In [606]:
to_concat = {'season': [season] * len(name_list), 'week': [week] * len(name_list), 'player_display_name': name_list, 'position': position_list,\
            'recent_team': team_list, 'opponent_team': opponent_list}

In [607]:
to_concat = pd.DataFrame(to_concat)

In [608]:
to_concat

Unnamed: 0,season,week,player_display_name,position,recent_team,opponent_team
0,2024,5,CeeDee Lamb,WR,DAL,PIT
1,2024,5,Ja'Marr Chase,WR,CIN,BAL
2,2024,5,Derrick Henry,RB,BAL,CIN
3,2024,5,Nico Collins,WR,HOU,BUF
4,2024,5,Jordan Mason,RB,SF,ARI
...,...,...,...,...,...,...
598,2024,5,Tre Mosley,WR,CIN,BAL
599,2024,5,Matt Sokol,TE,PIT,DAL
600,2024,5,Jesper Horsted,TE,CAR,CHI
601,2024,5,Dalton Keene,TE,HOU,BUF


In [609]:
to_concat['recent_team'].value_counts()

recent_team
CAR    32
MIA    31
BAL    30
DAL    29
PIT    29
GB     29
SEA    28
JAX    28
ARI    28
IND    28
NYG    27
SF     27
CHI    27
CLE    27
DEN    27
CIN    26
NE     26
HOU    26
BUF    25
WAS    25
LV     25
LAR    23
Name: count, dtype: int64

In [610]:
flex_24.columns

Index(['season', 'week', 'recent_team', 'opponent_team', 'player_id',
       'player_display_name', 'position', 'rushing_yards', 'rushing_tds',
       'rushing_fumbles_lost', 'receptions', 'receiving_yards',
       'receiving_tds', 'receiving_fumbles_lost', 'targets', 'carries',
       'receiving_yards_after_catch', 'receiving_air_yards', 'air_yards_share',
       'target_share', 'carry_share', 'in_10_share', 'FD_Pts', 'DK_Pts',
       'DK_Pts_RB_DvP', 'DK_Pts_TE_DvP', 'DK_Pts_WR_DvP', 'FD_Pts_RB_DvP',
       'FD_Pts_TE_DvP', 'FD_Pts_WR_DvP'],
      dtype='object')

In [611]:
flex_24 = flex_24[['season', 'week', 'recent_team', 'opponent_team', 
       'DK_Pts_RB_DvP', 'DK_Pts_TE_DvP', 'DK_Pts_WR_DvP', 'FD_Pts_RB_DvP',
       'FD_Pts_TE_DvP', 'FD_Pts_WR_DvP']]

In [612]:
to_concat = pd.merge(to_concat, flex_24, on = ['season', 'week', 'recent_team', 'opponent_team'])

In [613]:
to_concat

Unnamed: 0,season,week,player_display_name,position,recent_team,opponent_team,DK_Pts_RB_DvP,DK_Pts_TE_DvP,DK_Pts_WR_DvP,FD_Pts_RB_DvP,FD_Pts_TE_DvP,FD_Pts_WR_DvP
0,2024,5,CeeDee Lamb,WR,DAL,PIT,16.4500,12.5375,29.925,13.7625,10.4125,23.6125
1,2024,5,Jake Ferguson,TE,DAL,PIT,16.4500,12.5375,29.925,13.7625,10.4125,23.6125
2,2024,5,Rico Dowdle,RB,DAL,PIT,16.4500,12.5375,29.925,13.7625,10.4125,23.6125
3,2024,5,Ezekiel Elliott,RB,DAL,PIT,16.4500,12.5375,29.925,13.7625,10.4125,23.6125
4,2024,5,Brandin Cooks,WR,DAL,PIT,16.4500,12.5375,29.925,13.7625,10.4125,23.6125
...,...,...,...,...,...,...,...,...,...,...,...,...
598,2024,5,Salvon Ahmed,RB,DEN,LV,24.5625,9.8625,35.125,20.6250,7.5500,28.7500
599,2024,5,Blake Watson,RB,DEN,LV,24.5625,9.8625,35.125,20.6250,7.5500,28.7500
600,2024,5,Mitchell Fraboni,TE,DEN,LV,24.5625,9.8625,35.125,20.6250,7.5500,28.7500
601,2024,5,Donald Parham,TE,DEN,LV,24.5625,9.8625,35.125,20.6250,7.5500,28.7500


In [614]:
to_concat['opponent_team'].value_counts()

opponent_team
CHI    32
NE     31
CIN    30
PIT    29
DAL    29
LAR    29
NYG    28
IND    28
SF     28
JAX    28
SEA    27
ARI    27
CAR    27
WAS    27
LV     27
BAL    26
MIA    26
BUF    26
HOU    25
CLE    25
DEN    25
GB     23
Name: count, dtype: int64

In [615]:
flex_df = flex_df[~((flex_df['season'] == season) & (flex_df['week'] == week))]

In [616]:
flex_df = pd.concat([flex_df, to_concat], axis = 0)

In [617]:
flex_df

Unnamed: 0,season,week,recent_team,opponent_team,player_id,player_display_name,position,rushing_yards,rushing_tds,rushing_fumbles_lost,...,carry_share,in_10_share,FD_Pts,DK_Pts,DK_Pts_RB_DvP,DK_Pts_TE_DvP,DK_Pts_WR_DvP,FD_Pts_RB_DvP,FD_Pts_TE_DvP,FD_Pts_WR_DvP
0,2022,1,ARI,KC,00-0027942,A.J. Green,WR,0.0,0.0,0.0,...,0.000,0.143,2.3,3.3,22.681675,11.910102,34.423663,19.682594,9.413345,27.33893
1,2022,1,ARI,KC,00-0030061,Zach Ertz,TE,0.0,0.0,0.0,...,0.000,0.429,10.4,11.4,22.681675,11.910102,34.423663,19.682594,9.413345,27.33893
2,2022,1,ARI,KC,00-0033553,James Conner,RB,26.0,1.0,0.0,...,0.476,0.286,14.0,16.5,22.681675,11.910102,34.423663,19.682594,9.413345,27.33893
3,2022,1,ARI,KC,00-0035500,Greg Dortch,WR,0.0,0.0,0.0,...,0.000,0.000,9.8,13.3,22.681675,11.910102,34.423663,19.682594,9.413345,27.33893
4,2022,1,ARI,KC,00-0035527,Andy Isabella,WR,0.0,0.0,0.0,...,0.000,0.000,1.5,2.0,22.681675,11.910102,34.423663,19.682594,9.413345,27.33893
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
598,2024,5,DEN,LV,,Salvon Ahmed,RB,,,,...,,,,,24.562500,9.862500,35.125000,20.625000,7.550000,28.75000
599,2024,5,DEN,LV,,Blake Watson,RB,,,,...,,,,,24.562500,9.862500,35.125000,20.625000,7.550000,28.75000
600,2024,5,DEN,LV,,Mitchell Fraboni,TE,,,,...,,,,,24.562500,9.862500,35.125000,20.625000,7.550000,28.75000
601,2024,5,DEN,LV,,Donald Parham,TE,,,,...,,,,,24.562500,9.862500,35.125000,20.625000,7.550000,28.75000


In [618]:
cols_L8 = ['rushing_yards', 'rushing_tds', 'rushing_fumbles_lost', 'receptions', 'receiving_yards', 'receiving_tds', 'receiving_fumbles_lost', 'targets',\
           'carries', 'receiving_yards_after_catch', 'receiving_air_yards', 'air_yards_share', 'target_share', 'carry_share', 'in_10_share']

In [619]:
flex_L8_features = flex_df.groupby(['player_display_name', 'season', 'week'])[cols_L8].sum()

# L8 variables
L8 variables are rolling means of features over the last eight games. Just like we did for the DvP variables, we'll calculate features over the previous eight games for individual QBs.

In [620]:
flex_L8_features = flex_L8_features.sort_values(by = ['player_display_name', 'season', 'week'])
flex_L8_features['game_num'] = flex_L8_features.groupby(['player_display_name', 'season']).cumcount() + 1
#quarterback_df.reset_index(drop = True, inplace = True)

In [621]:
flex_L8_features = flex_L8_features.groupby(['player_display_name'], as_index = False).apply(calculate_equal_rolling_mean, cols=cols_L8, suffix = '_L8')

In [622]:
flex_L8_features.reset_index(inplace = True)

In [623]:
flex_L8_features.columns

Index(['level_0', 'player_display_name', 'season', 'week', 'rushing_yards',
       'rushing_tds', 'rushing_fumbles_lost', 'receptions', 'receiving_yards',
       'receiving_tds', 'receiving_fumbles_lost', 'targets', 'carries',
       'receiving_yards_after_catch', 'receiving_air_yards', 'air_yards_share',
       'target_share', 'carry_share', 'in_10_share', 'game_num',
       'rushing_yards_L8', 'rushing_tds_L8', 'rushing_fumbles_lost_L8',
       'receptions_L8', 'receiving_yards_L8', 'receiving_tds_L8',
       'receiving_fumbles_lost_L8', 'targets_L8', 'carries_L8',
       'receiving_yards_after_catch_L8', 'receiving_air_yards_L8',
       'air_yards_share_L8', 'target_share_L8', 'carry_share_L8',
       'in_10_share_L8'],
      dtype='object')

In [624]:
flex_L8_features.drop(columns = ['level_0', 'rushing_yards', 'rushing_tds', 'rushing_fumbles_lost', 'receptions', 'receiving_yards',
       'receiving_tds', 'receiving_fumbles_lost', 'targets', 'carries', 'receiving_yards_after_catch', 'receiving_air_yards', 'air_yards_share',
       'target_share', 'carry_share', 'in_10_share'], inplace = True)

In [625]:
flex_L8_features

Unnamed: 0,player_display_name,season,week,game_num,rushing_yards_L8,rushing_tds_L8,rushing_fumbles_lost_L8,receptions_L8,receiving_yards_L8,receiving_tds_L8,receiving_fumbles_lost_L8,targets_L8,carries_L8,receiving_yards_after_catch_L8,receiving_air_yards_L8,air_yards_share_L8,target_share_L8,carry_share_L8,in_10_share_L8
0,A.J. Barner,2024,3,1,,,,,,,,,,,,,,,
1,A.J. Barner,2024,4,2,0.000000,0.000000,0.0,3.000000,13.000000,0.0,0.0,3.000000,0.000000,11.000000,2.000000,0.010363,0.090909,0.000000,0.000000
2,A.J. Brown,2022,1,1,,,,,,,,,,,,,,,
3,A.J. Brown,2022,2,2,0.000000,0.000000,0.0,10.000000,155.000000,0.0,0.0,13.000000,0.000000,63.000000,94.000000,0.728682,0.448276,0.000000,0.167000
4,A.J. Brown,2022,3,3,0.000000,0.000000,0.0,7.500000,112.000000,0.0,0.0,10.500000,0.000000,48.500000,102.500000,0.557721,0.353170,0.000000,0.083500
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11490,Zonovan Knight,2022,16,5,63.250000,0.250000,0.0,2.500000,17.000000,0.0,0.0,2.500000,14.750000,25.000000,-8.000000,-0.020311,0.060877,0.614000,0.145750
11491,Zonovan Knight,2022,17,6,50.200000,0.200000,0.0,2.200000,16.600000,0.0,0.0,2.200000,13.000000,22.400000,-5.800000,-0.014043,0.055368,0.551200,0.116600
11492,Zonovan Knight,2022,18,7,46.333333,0.166667,0.0,2.166667,16.666667,0.0,0.0,2.333333,12.166667,22.000000,-5.000000,-0.012165,0.057251,0.537833,0.097167
11493,Zonovan Knight,2023,3,1,42.857143,0.142857,0.0,1.857143,14.285714,0.0,0.0,2.000000,12.142857,18.857143,-4.285714,-0.010427,0.049072,0.546714,0.083286


In [626]:
flex_df = pd.merge(flex_df, flex_L8_features, on = ['player_display_name', 'season', 'week'], how = 'left')

In [627]:
flex_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11495 entries, 0 to 11494
Data columns (total 46 columns):
 #   Column                          Non-Null Count  Dtype  
---  ------                          --------------  -----  
 0   season                          11495 non-null  int64  
 1   week                            11495 non-null  int64  
 2   recent_team                     11495 non-null  object 
 3   opponent_team                   11495 non-null  object 
 4   player_id                       10892 non-null  object 
 5   player_display_name             11495 non-null  object 
 6   position                        11495 non-null  object 
 7   rushing_yards                   10892 non-null  float64
 8   rushing_tds                     10892 non-null  float64
 9   rushing_fumbles_lost            10892 non-null  float64
 10  receptions                      10892 non-null  float64
 11  receiving_yards                 10892 non-null  float64
 12  receiving_tds                   

# Filling missing values with mean
In our QB model we made the mistake of dropping missing values at this point. Rookies making their debut won't have L8 variables because they haven't played before. So let's fill those values with the means.

In [628]:
cols_to_fill = ['rushing_yards_L8', 'rushing_tds_L8', 'rushing_fumbles_lost_L8', 'receptions_L8',
       'receiving_yards_L8', 'receiving_tds_L8', 'receiving_fumbles_lost_L8',
       'targets_L8', 'carries_L8', 'receiving_yards_after_catch_L8',
       'receiving_air_yards_L8', 'air_yards_share_L8', 'target_share_L8',
       'carry_share_L8', 'in_10_share_L8']

In [629]:
for col in cols_to_fill:
    flex_df[col] = flex_df.groupby('position')[col].transform(lambda x: x.fillna(x.mean()))

In [630]:
flex_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11495 entries, 0 to 11494
Data columns (total 46 columns):
 #   Column                          Non-Null Count  Dtype  
---  ------                          --------------  -----  
 0   season                          11495 non-null  int64  
 1   week                            11495 non-null  int64  
 2   recent_team                     11495 non-null  object 
 3   opponent_team                   11495 non-null  object 
 4   player_id                       10892 non-null  object 
 5   player_display_name             11495 non-null  object 
 6   position                        11495 non-null  object 
 7   rushing_yards                   10892 non-null  float64
 8   rushing_tds                     10892 non-null  float64
 9   rushing_fumbles_lost            10892 non-null  float64
 10  receptions                      10892 non-null  float64
 11  receiving_yards                 10892 non-null  float64
 12  receiving_tds                   

# A few more features
yards_per_carry, yards_per_reception, yards_per_target

In [631]:
flex_df['yards_per_carry_L8'] = flex_df['rushing_yards_L8']/flex_df['carries_L8']
flex_df['yards_per_reception_L8'] = flex_df['receiving_yards_L8']/flex_df['receptions_L8']
flex_df['yards_per_target_L8'] = flex_df['receiving_yards_L8']/flex_df['targets_L8']

# More missing
Now this gives us some more missing values. In most cases, it's because we've tried to divide by zero when a player has averaged 0 carries over the last eight games. So here we should fill the missing values with 0

In [632]:
flex_df.fillna(0, inplace = True)

In [633]:
flex_df_full = flex_df.copy()

In [634]:
flex_df.drop(columns = cols_L8, inplace = True)

In [635]:
flex_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11495 entries, 0 to 11494
Data columns (total 34 columns):
 #   Column                          Non-Null Count  Dtype  
---  ------                          --------------  -----  
 0   season                          11495 non-null  int64  
 1   week                            11495 non-null  int64  
 2   recent_team                     11495 non-null  object 
 3   opponent_team                   11495 non-null  object 
 4   player_id                       11495 non-null  object 
 5   player_display_name             11495 non-null  object 
 6   position                        11495 non-null  object 
 7   FD_Pts                          11495 non-null  float64
 8   DK_Pts                          11495 non-null  float64
 9   DK_Pts_RB_DvP                   11495 non-null  float64
 10  DK_Pts_TE_DvP                   11495 non-null  float64
 11  DK_Pts_WR_DvP                   11495 non-null  float64
 12  FD_Pts_RB_DvP                   

In [636]:
flex_df = flex_df[(flex_df['season'] == season) & (flex_df['week'] == week)]

In [637]:
flex_df['opponent_team'].nunique()

22

In [638]:
flex_df.replace([np.inf, -np.inf], np.nan, inplace=True)

# Odds, grass, outdoors, wind
We'll bring in CSVs with FanDuel and DraftKings odds.<br>
This will be where we need to separate FanDuel from DraftKings dataframes since the odds and totals might be different.

In [639]:
fd_spreads = pd.read_csv('fd_spreads_' + str(season) + '_' + str(week) + '.csv')
dk_spreads = pd.read_csv('dk_spreads_' + str(season) + '_' + str(week) + '.csv')

In [640]:
fd_spreads.head()

Unnamed: 0.1,Unnamed: 0,opponent,team,total_line,outdoors,grass,home_team,spread_line,pred_total,opp_total,wind,season,week
0,0,MIN,NYJ,40.5,0,1,0,-2.5,19.0,21.5,0,2024,5
1,1,CIN,BAL,48.5,1,0,0,2.5,25.5,23.0,12,2024,5
2,2,HOU,BUF,47.5,0,0,0,-1.5,23.0,24.5,0,2024,5
3,3,CHI,CAR,40.5,1,1,0,-3.5,18.5,22.0,17,2024,5
4,4,WAS,CLE,43.5,1,1,0,-3.0,20.25,23.25,5,2024,5


In [641]:
dk_spreads.head()

Unnamed: 0.1,Unnamed: 0,opponent,team,total_line,outdoors,grass,home_team,spread_line,pred_total,opp_total,wind,season,week
0,0,MIN,NYJ,40.5,0,1,0,-2.0,19.25,21.25,0,2024,5
1,1,CIN,BAL,48.5,1,0,0,2.5,25.5,23.0,12,2024,5
2,2,HOU,BUF,47.0,0,0,0,-1.0,23.0,24.0,0,2024,5
3,3,CHI,CAR,41.0,1,1,0,-4.0,18.5,22.5,17,2024,5
4,4,WAS,CLE,43.5,1,1,0,-3.0,20.25,23.25,5,2024,5


In [642]:
fd_spreads.drop(columns = ['Unnamed: 0'], inplace = True)
dk_spreads.drop(columns = ['Unnamed: 0'], inplace = True)

In [643]:
flex_df.rename(columns = {'recent_team': 'team', 'opponent_team': 'opponent'}, inplace = True)

In [644]:
flex_df_fd = pd.merge(flex_df, fd_spreads, on = ['team', 'opponent', 'season', 'week'])
flex_df_dk = pd.merge(flex_df, dk_spreads, on = ['team', 'opponent', 'season', 'week'])

In [645]:
len(flex_df_fd), len(flex_df_dk)

(603, 603)

In [646]:
# weekly_df[weekly_df['player_display_name'] == 'Jonathan Taylor'].tail()

<!-- # Bringing in point spreads and totals
Pulling this dataframe from our database. It also includes binary variables for indoors and grass. -->

In [647]:
# # Connect to the SQLite database
# conn = sqlite3.connect('nfl_dfs.db')

# # Query specific columns from the table
# query1 = "SELECT * FROM spreads_totals"
# spread_df = pd.read_sql_query(query1, conn)

# # Close the connection
# conn.close()

In [648]:
flex_df.columns

Index(['season', 'week', 'team', 'opponent', 'player_id',
       'player_display_name', 'position', 'FD_Pts', 'DK_Pts', 'DK_Pts_RB_DvP',
       'DK_Pts_TE_DvP', 'DK_Pts_WR_DvP', 'FD_Pts_RB_DvP', 'FD_Pts_TE_DvP',
       'FD_Pts_WR_DvP', 'game_num', 'rushing_yards_L8', 'rushing_tds_L8',
       'rushing_fumbles_lost_L8', 'receptions_L8', 'receiving_yards_L8',
       'receiving_tds_L8', 'receiving_fumbles_lost_L8', 'targets_L8',
       'carries_L8', 'receiving_yards_after_catch_L8',
       'receiving_air_yards_L8', 'air_yards_share_L8', 'target_share_L8',
       'carry_share_L8', 'in_10_share_L8', 'yards_per_carry_L8',
       'yards_per_reception_L8', 'yards_per_target_L8'],
      dtype='object')

In [649]:
#quarterback_df.drop(columns = ['defteam', 'passing_air_yards'], inplace = True)

In [650]:
flex_df.head()

Unnamed: 0,season,week,team,opponent,player_id,player_display_name,position,FD_Pts,DK_Pts,DK_Pts_RB_DvP,...,carries_L8,receiving_yards_after_catch_L8,receiving_air_yards_L8,air_yards_share_L8,target_share_L8,carry_share_L8,in_10_share_L8,yards_per_carry_L8,yards_per_reception_L8,yards_per_target_L8
10892,2024,5,DAL,PIT,0,CeeDee Lamb,WR,0.0,0.0,16.45,...,1.375,52.125,100.125,0.335701,0.291383,0.058125,0.218,5.818182,14.245902,9.764045
10893,2024,5,DAL,PIT,0,Jake Ferguson,TE,0.0,0.0,16.45,...,0.0,27.125,42.0,0.1345,0.208697,0.0,0.03125,0.0,9.630435,7.031746
10894,2024,5,DAL,PIT,0,Rico Dowdle,RB,0.0,0.0,16.45,...,6.25,18.125,6.75,0.020864,0.075081,0.285375,0.18225,3.98,9.375,6.25
10895,2024,5,DAL,PIT,0,Ezekiel Elliott,RB,0.0,0.0,16.45,...,9.25,20.5,-2.875,-0.011907,0.136156,0.450625,0.375,3.054054,4.730769,3.617647
10896,2024,5,DAL,PIT,0,Brandin Cooks,WR,0.0,0.0,16.45,...,0.625,2.5,75.125,0.243621,0.145635,0.025375,0.10125,4.0,8.964286,5.577778


In [651]:
divisions = {
    'CLE': 'AFC North',
    'LAR': 'NFC West',
    'LV': 'AFC West',
    'KC': 'AFC West',
    'CAR': 'NFC South',
    'NYG': 'NFC East',
    'HOU': 'AFC South',
    'DEN': 'AFC West',
    'MIN': 'NFC North',
    'TEN': 'AFC South',
    'JAX': 'AFC South',
    'SEA': 'NFC West',
    'DET': 'NFC North',
    'NO': 'NFC South',
    'CIN': 'AFC North',
    'ATL': 'NFC South',
    'NYJ': 'AFC East',
    'PHI': 'NFC East',
    'DAL': 'NFC East',
    'WAS': 'NFC East',
    'PIT': 'AFC North',
    'ARI': 'NFC West',
    'CHI': 'NFC North',
    'MIA': 'AFC East',
    'BUF': 'AFC East',
    'BAL': 'AFC North',
    'TB': 'NFC South',
    'SF': 'NFC West',
    'LAC': 'AFC West',
    'IND': 'AFC South',
    'GB': 'NFC North',
    'NE': 'AFC East'
}

# You now have a dictionary `divisions` where each team is mapped to its division.flex_df.info()

# Adding div_game binary column

In [652]:
# Map the team and opponent columns to their respective divisions
flex_df_fd['team_division'] = flex_df_fd['team'].map(divisions)
flex_df_fd['opponent_division'] = flex_df_fd['opponent'].map(divisions)

# Create the div_game column (1 if they are in the same division, 0 otherwise)
flex_df_fd['div_game'] = np.where(flex_df_fd['team_division'] == flex_df_fd['opponent_division'], 1, 0)

# Optionally, drop the temporary division columns if you don't need them
flex_df_fd.drop(['team_division', 'opponent_division'], axis=1, inplace=True)

In [653]:
# Map the team and opponent columns to their respective divisions
flex_df_dk['team_division'] = flex_df_dk['team'].map(divisions)
flex_df_dk['opponent_division'] = flex_df_dk['opponent'].map(divisions)

# Create the div_game column (1 if they are in the same division, 0 otherwise)
flex_df_dk['div_game'] = np.where(flex_df_dk['team_division'] == flex_df_dk['opponent_division'], 1, 0)

# Optionally, drop the temporary division columns if you don't need them
flex_df_dk.drop(['team_division', 'opponent_division'], axis=1, inplace=True)

In [654]:
# Check for positive or negative infinity in the entire DataFrame
# infinity_mask_fd = np.isinf(flex_df_fd)
# infinity_mask_dk = np.isinf(flex_df_dk)

# # Display rows with infinity values
# infinity_rows_fd = flex_df_fd[infinity_mask_fd.any(axis=1)]
# infinity_rows_dk = flex_df_dk[infinity_mask_dk.any(axis=1)]
# print(infinity_rows_fd)
# print(infinity_rows_dk)

# One-hot encoding for position

In [655]:
position_dummies_fd = pd.get_dummies(flex_df_fd['position'], prefix='pos')
position_dummies_dk = pd.get_dummies(flex_df_dk['position'], prefix='pos')

In [656]:
flex_df_fd = pd.concat([flex_df_fd, position_dummies_fd], axis = 1)

In [657]:
flex_df_dk = pd.concat([flex_df_dk, position_dummies_dk], axis = 1)

In [658]:
flex_df_fd['pos_RB'] = flex_df_fd['pos_RB'].astype(int)
flex_df_fd['pos_TE'] = flex_df_fd['pos_TE'].astype(int)
flex_df_fd['pos_WR'] = flex_df_fd['pos_WR'].astype(int)
flex_df_dk['pos_RB'] = flex_df_dk['pos_RB'].astype(int)
flex_df_dk['pos_TE'] = flex_df_dk['pos_TE'].astype(int)
flex_df_dk['pos_WR'] = flex_df_dk['pos_WR'].astype(int)

In [659]:
flex_df_fd.drop(columns = ['FD_Pts', 'DK_Pts'], inplace = True)
flex_df_dk.drop(columns = ['FD_Pts', 'DK_Pts'], inplace = True)

In [660]:
fanduel_df = fanduel_df[['name', 'position', 'salary', 'team', 'opponent', 'status', 'week']]
draftkings_df = draftkings_df[['name', 'position', 'salary', 'team', 'opponent', 'status', 'week']]

In [661]:
fanduel_df = fanduel_df[fanduel_df['position'].isin(['RB', 'WR', 'TE'])]
draftkings_df = draftkings_df[draftkings_df['position'].isin(['RB', 'WR', 'TE'])]

In [662]:
len(fanduel_df), len(draftkings_df)

(603, 410)

In [663]:
#################### Sept. 16, 2024 #####################
#Just filtered for Flex positions.
#Next will be to rename player_display_name to name and try to merge again with the flex data.
#Don't forget QB strength variables!!!!!

In [664]:
len(flex_df_fd), len(flex_df_dk)

(603, 603)

In [665]:
flex_df_fd.rename(columns = {'player_display_name': 'name'}, inplace = True)
flex_df_dk.rename(columns = {'player_display_name': 'name'}, inplace = True)

# Name matching here

In [666]:
def clean_name(name):
    # Remove periods between initials like C.J., D.J. (case-sensitive)
    name = re.sub(r'\b([A-Z])\.\s*([A-Z])\.\b', r'\1\2', name)
    
    # Remove common suffixes like Jr., Sr., III, II, IV (case-sensitive)
    cleaned_name = re.sub(r'(\,|\.|Sr|Jr|III|II|IV)', '', name).strip()
    
    return cleaned_name

In [667]:
import re
from rapidfuzz import process, fuzz

In [668]:
flex_df_fd['name'] = flex_df_fd['name'].apply(clean_name)
flex_df_dk['name'] = flex_df_dk['name'].apply(clean_name)

In [669]:
SIMILARITY_THRESHOLD = 80

In [670]:
def fuzzy_match(name, dk_names):
    match, score, _ = process.extractOne(name, dk_names, scorer=fuzz.token_sort_ratio)
    return match if score >= SIMILARITY_THRESHOLD else None

In [671]:
flex_names = flex_df_fd['name']

In [672]:
flex_df_fd['matched_name'] = flex_df_fd['name'].apply(lambda x: fuzzy_match(x, flex_names))
flex_df_dk['matched_name'] = flex_df_dk['name'].apply(lambda x: fuzzy_match(x, flex_names))

In [673]:
unmatched_in_fd = flex_df_fd[flex_df_fd['matched_name'].isna()]

In [674]:
unmatched_in_fd.reset_index(inplace = True, drop = True)

In [675]:
unmatched_in_fd

Unnamed: 0,season,week,team,opponent,player_id,name,position,DK_Pts_RB_DvP,DK_Pts_TE_DvP,DK_Pts_WR_DvP,...,home_team,spread_line,pred_total,opp_total,wind,div_game,pos_RB,pos_TE,pos_WR,matched_name


In [676]:
flex_df_fd[(flex_df_fd['name'] != flex_df_fd['matched_name']) & (flex_df_fd['matched_name'].notna())]

Unnamed: 0,season,week,team,opponent,player_id,name,position,DK_Pts_RB_DvP,DK_Pts_TE_DvP,DK_Pts_WR_DvP,...,home_team,spread_line,pred_total,opp_total,wind,div_game,pos_RB,pos_TE,pos_WR,matched_name


In [677]:
unmatched_in_dk = flex_df_dk[flex_df_dk['matched_name'].isna()]

In [678]:
unmatched_in_dk.reset_index(inplace = True, drop = True)

In [679]:
unmatched_in_dk

Unnamed: 0,season,week,team,opponent,player_id,name,position,DK_Pts_RB_DvP,DK_Pts_TE_DvP,DK_Pts_WR_DvP,...,home_team,spread_line,pred_total,opp_total,wind,div_game,pos_RB,pos_TE,pos_WR,matched_name


In [680]:
flex_df_dk[(flex_df_dk['name'] != flex_df_dk['matched_name']) & (flex_df_dk['matched_name'].notna())]

Unnamed: 0,season,week,team,opponent,player_id,name,position,DK_Pts_RB_DvP,DK_Pts_TE_DvP,DK_Pts_WR_DvP,...,home_team,spread_line,pred_total,opp_total,wind,div_game,pos_RB,pos_TE,pos_WR,matched_name


In [681]:
flex_df_fd = pd.merge(fanduel_df, flex_df_fd, on = ['name', 'team', 'opponent', 'week'], how = 'left')
flex_df_dk = pd.merge(draftkings_df, flex_df_dk, on = ['name', 'team', 'opponent', 'week'], how = 'left')

In [682]:
flex_df_fd['position'] = flex_df_fd['position_x']
flex_df_fd.drop(columns = ['position_x', 'position_y'], inplace = True)
flex_df_dk['position'] = flex_df_dk['position_x']
flex_df_dk.drop(columns = ['position_x', 'position_y'], inplace = True)

In [683]:
flex_df_fd.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 603 entries, 0 to 602
Data columns (total 47 columns):
 #   Column                          Non-Null Count  Dtype  
---  ------                          --------------  -----  
 0   name                            603 non-null    object 
 1   salary                          603 non-null    int64  
 2   team                            603 non-null    object 
 3   opponent                        603 non-null    object 
 4   status                          603 non-null    object 
 5   week                            603 non-null    int64  
 6   season                          603 non-null    int64  
 7   player_id                       603 non-null    int64  
 8   DK_Pts_RB_DvP                   603 non-null    float64
 9   DK_Pts_TE_DvP                   603 non-null    float64
 10  DK_Pts_WR_DvP                   603 non-null    float64
 11  FD_Pts_RB_DvP                   603 non-null    float64
 12  FD_Pts_TE_DvP                   603 

In [684]:
flex_df_dk.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 410 entries, 0 to 409
Data columns (total 47 columns):
 #   Column                          Non-Null Count  Dtype  
---  ------                          --------------  -----  
 0   name                            410 non-null    object 
 1   salary                          410 non-null    int64  
 2   team                            410 non-null    object 
 3   opponent                        410 non-null    object 
 4   status                          410 non-null    object 
 5   week                            410 non-null    int64  
 6   season                          409 non-null    float64
 7   player_id                       409 non-null    float64
 8   DK_Pts_RB_DvP                   409 non-null    float64
 9   DK_Pts_TE_DvP                   409 non-null    float64
 10  DK_Pts_WR_DvP                   409 non-null    float64
 11  FD_Pts_RB_DvP                   409 non-null    float64
 12  FD_Pts_TE_DvP                   409 

In [685]:
###FANDUEL SCORING
#Rushing yards made = 0.1pts	
#Rushing touchdowns = 6pts	
#Passing yards = 0.04pts	
#Passing touchdowns = 4pts	
#Interceptions = -1pt	
#Receiving yards = 0.1pts	
#Receiving touchdowns = 6pts	
#Receptions = 0.5pts	
#Kickoff return touchdowns = 6pts	
#Punt return touchdowns = 6pts	
#Fumbles lost = -2pts	
#Own fumbles recovered touchdowns = 6pts	
#Two-point conversions scored = 2pts	
#Two-point conversion passes = 2pts	
#Field-goals from 0-39 yards = 3pts	
#Field-goals from 40-49 yards = 4pts	
#Field-goals from 50+ yards = 5pts	
#Extra-point conversions = 1pt

###DRAFTKINGS SCORING
#PAssing TD = 4 pts
#passing yards = .04 pts
#300 passing yards = 3 pts (bonus)
#Interception = -1 pts
#Rushing TD = 6 pts
#Rushing yds = 0.1 pts
#100 yd rushing game = 3 pts (bonus)
#Receiving TD = 6 pts
#Receiving yds = 0.1 pts
#100 receiving yards game = 3 pts (bonus)
#Receptions = 1 pt
#Punt/kickoff/FG return for TD = 6 pts
#Fumble lost = -1 pt
#2 pt conversion (pass, run or catch) = 2 pts
#Offensive fumble recovery TD = 6

In [686]:
features = ['DK_Pts_RB_DvP', 'DK_Pts_TE_DvP', 'DK_Pts_WR_DvP', 'FD_Pts_RB_DvP',
       'FD_Pts_TE_DvP', 'FD_Pts_WR_DvP', 'rushing_yards_L8', 'rushing_tds_L8',
       'rushing_fumbles_lost_L8', 'receptions_L8', 'receiving_yards_L8',
       'receiving_tds_L8', 'receiving_fumbles_lost_L8', 'targets_L8',
       'carries_L8', 'receiving_yards_after_catch_L8',
       'receiving_air_yards_L8', 'target_share_L8', 'carry_share_L8',
       'in_10_share_L8', 'yards_per_carry_L8', 'yards_per_reception_L8',
       'yards_per_target_L8', 'wind', 'div_game', 'spread_line', 'total_line',
       'outdoors', 'grass', 'home_team', 'pred_total', 'opp_total', 'qb_comp',
       'qb_att', 'qb_yds', 'qb_pass_td', 'qb_int', 'qb_comp_pct',
       'qb_yds_per_att', 'qb_td_pct', 'qb_int_pct', 'pos_RB', 'pos_TE',
       'pos_WR']

# QB strength
One last variable we want to try to add is a variable or variables that look at quarterback strength. If a team's backup QB is playing, the entire offense is downgraded and it could affect the fantasy performance of RBs, WRs and TEs.<br>

We'll take the dataset we used for the QB model, filter for the primary QBs in each game and use some of the variables that look at L8 means.

In [687]:
# # Connect to the SQLite database
# conn = sqlite3.connect('nfl_dfs.db')

# query_qb = "SELECT * FROM qb_dataset WHERE season >= 2006"

# qb_df = pd.read_sql_query(query_qb, conn)

# # Close the database connection
# conn.close()

In [688]:
qb_data_fd = pd.read_csv('FD_QB_for_model_' + str(season) + '_' + str(week) + '.csv')
qb_data_dk = pd.read_csv('DK_QB_for_model_' + str(season) + '_' + str(week) + '.csv')

In [689]:
qb_data_fd = qb_data_fd[qb_data_fd['QB_role'] == 1]
qb_data_dk = qb_data_dk[qb_data_dk['QB_role'] == 1]

In [690]:
qb_data_fd.rename(columns = {'completions_L8': 'qb_comp', 'attempts_L8': 'qb_att', 'passing_yards_L8': 'qb_yds',\
                       'passing_tds_L8': 'qb_pass_td', 'interceptions_L8': 'qb_int', 'comp_pct': 'qb_comp_pct',\
                       'yds_per_attempt': 'qb_yds_per_att', 'td_pct': 'qb_td_pct', 'int_pct': 'qb_int_pct'}, inplace = True)

In [691]:
qb_data_dk.rename(columns = {'completions_L8': 'qb_comp', 'attempts_L8': 'qb_att', 'passing_yards_L8': 'qb_yds',\
                       'passing_tds_L8': 'qb_pass_td', 'interceptions_L8': 'qb_int', 'comp_pct': 'qb_comp_pct',\
                       'yds_per_attempt': 'qb_yds_per_att', 'td_pct': 'qb_td_pct', 'int_pct': 'qb_int_pct'}, inplace = True)

In [692]:
qb_data_fd = qb_data_fd[['season', 'week', 'team', 'opponent', 'qb_comp', 'qb_att', 'qb_yds', 'qb_pass_td',\
               'qb_int', 'qb_comp_pct', 'qb_yds_per_att', 'qb_td_pct', 'qb_int_pct']]

In [693]:
qb_data_dk = qb_data_dk[['season', 'week', 'team', 'opponent', 'qb_comp', 'qb_att', 'qb_yds', 'qb_pass_td',\
               'qb_int', 'qb_comp_pct', 'qb_yds_per_att', 'qb_td_pct', 'qb_int_pct']]

In [694]:
flex_df_fd = pd.merge(flex_df_fd, qb_data_fd, on = ['season', 'week', 'team', 'opponent'], how = 'left')

In [695]:
flex_df_dk = pd.merge(flex_df_dk, qb_data_fd, on = ['season', 'week', 'team', 'opponent'], how = 'left')

In [696]:
flex_df_fd = flex_df_fd.set_index(['name', 'position', 'team', 'opponent', 'salary', 'status', 'week', 'season'], drop = True)
flex_df_dk = flex_df_dk.set_index(['name', 'position', 'team', 'opponent', 'salary', 'status', 'week', 'season'], drop = True)

In [697]:
flex_df_fd = flex_df_fd[features]
flex_df_dk = flex_df_dk[features]

In [698]:
flex_df_fd.replace([np.inf, -np.inf], np.nan, inplace=True)
flex_df_dk.replace([np.inf, -np.inf], np.nan, inplace=True)

In [699]:
# flex_df_dk[flex_df_dk.isna().any(axis = 1)]

In [700]:
# flex_df_dk = flex_df_dk.dropna()

In [701]:
# Fill missing values with the mean of each column
flex_df_fd = flex_df_fd.groupby('position').transform(lambda x: x.fillna(x.mean()))
flex_df_dk = flex_df_dk.groupby('position').transform(lambda x: x.fillna(x.mean()))

In [702]:
flex_df_fd.info()

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 603 entries, ('CeeDee Lamb', 'WR', 'DAL', 'PIT', 9400, 'Active', 5, 2024) to ('John Kelly', 'RB', 'BAL', 'CIN', 4000, 'Active', 5, 2024)
Data columns (total 44 columns):
 #   Column                          Non-Null Count  Dtype  
---  ------                          --------------  -----  
 0   DK_Pts_RB_DvP                   603 non-null    float64
 1   DK_Pts_TE_DvP                   603 non-null    float64
 2   DK_Pts_WR_DvP                   603 non-null    float64
 3   FD_Pts_RB_DvP                   603 non-null    float64
 4   FD_Pts_TE_DvP                   603 non-null    float64
 5   FD_Pts_WR_DvP                   603 non-null    float64
 6   rushing_yards_L8                603 non-null    float64
 7   rushing_tds_L8                  603 non-null    float64
 8   rushing_fumbles_lost_L8         603 non-null    float64
 9   receptions_L8                   603 non-null    float64
 10  receiving_yards_L8              603 non-nul

In [703]:
flex_df_dk.info()

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 410 entries, ("Ja'Marr Chase", 'WR', 'CIN', 'BAL', 8000, 'Active', 5, 2024.0) to ('Joel Wilson', 'TE', 'NYG', 'SEA', 2500, 'Active', 5, 2024.0)
Data columns (total 44 columns):
 #   Column                          Non-Null Count  Dtype  
---  ------                          --------------  -----  
 0   DK_Pts_RB_DvP                   410 non-null    float64
 1   DK_Pts_TE_DvP                   410 non-null    float64
 2   DK_Pts_WR_DvP                   410 non-null    float64
 3   FD_Pts_RB_DvP                   410 non-null    float64
 4   FD_Pts_TE_DvP                   410 non-null    float64
 5   FD_Pts_WR_DvP                   410 non-null    float64
 6   rushing_yards_L8                410 non-null    float64
 7   rushing_tds_L8                  410 non-null    float64
 8   rushing_fumbles_lost_L8         410 non-null    float64
 9   receptions_L8                   410 non-null    float64
 10  receiving_yards_L8              410 

In [704]:
flex_df_fd.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,Unnamed: 7_level_0,DK_Pts_RB_DvP,DK_Pts_TE_DvP,DK_Pts_WR_DvP,FD_Pts_RB_DvP,FD_Pts_TE_DvP,FD_Pts_WR_DvP,rushing_yards_L8,rushing_tds_L8,rushing_fumbles_lost_L8,receptions_L8,...,qb_yds,qb_pass_td,qb_int,qb_comp_pct,qb_yds_per_att,qb_td_pct,qb_int_pct,pos_RB,pos_TE,pos_WR
name,position,team,opponent,salary,status,week,season,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1
CeeDee Lamb,WR,DAL,PIT,9400,Active,5,2024,16.45,12.5375,29.925,13.7625,10.4125,23.6125,8.0,0.0,0.0,7.625,...,294.0,2.125,0.75,0.679365,7.466667,0.053968,0.019048,0,0,1
Ja'Marr Chase,WR,CIN,BAL,9300,Active,5,2024,21.825,15.3375,29.35,18.075,11.525,23.4125,0.0,0.0,0.0,4.125,...,257.125,1.875,0.375,0.719101,7.70412,0.05618,0.011236,0,0,1
Derrick Henry,RB,BAL,CIN,9200,Active,5,2024,25.0125,7.2625,36.65,21.075,5.2625,29.8375,96.5,0.875,0.0,1.25,...,231.875,1.875,0.25,0.668161,8.318386,0.067265,0.008969,1,0,0
Nico Collins,WR,HOU,BUF,8800,Active,5,2024,26.975,12.3,24.1625,24.225,9.55,19.1625,1.375,0.0,0.0,7.125,...,196.322049,1.025608,0.563368,0.645006,6.825769,0.034883,0.02013,0,0,1
Jordan Mason,RB,SF,ARI,8700,Active,5,2024,28.2475,12.175,29.3375,24.56,9.8,24.2125,65.75,0.5,0.0,1.0,...,266.75,1.25,0.375,0.666667,8.271318,0.03876,0.011628,1,0,0


In [705]:
flex_df_dk.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,Unnamed: 7_level_0,DK_Pts_RB_DvP,DK_Pts_TE_DvP,DK_Pts_WR_DvP,FD_Pts_RB_DvP,FD_Pts_TE_DvP,FD_Pts_WR_DvP,rushing_yards_L8,rushing_tds_L8,rushing_fumbles_lost_L8,receptions_L8,...,qb_yds,qb_pass_td,qb_int,qb_comp_pct,qb_yds_per_att,qb_td_pct,qb_int_pct,pos_RB,pos_TE,pos_WR
name,position,team,opponent,salary,status,week,season,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1
Ja'Marr Chase,WR,CIN,BAL,8000,Active,5,2024.0,21.825,15.3375,29.35,18.075,11.525,23.4125,0.0,0.0,0.0,4.125,...,257.125,1.875,0.375,0.719101,7.70412,0.05618,0.011236,0.0,0.0,1.0
Cooper Kupp,WR,LAR,GB,7900,O,5,2024.0,22.2875,16.3125,32.525,18.9125,13.625,25.7125,0.875,0.0,0.0,6.875,...,281.0,1.125,0.5,0.700375,8.419476,0.033708,0.014981,0.0,0.0,1.0
Derrick Henry,RB,BAL,CIN,7800,Active,5,2024.0,25.0125,7.2625,36.65,21.075,5.2625,29.8375,96.5,0.875,0.0,1.25,...,231.875,1.875,0.25,0.668161,8.318386,0.067265,0.008969,1.0,0.0,0.0
Nico Collins,WR,HOU,BUF,7700,Active,5,2024.0,26.975,12.3,24.1625,24.225,9.55,19.1625,1.375,0.0,0.0,7.125,...,191.382682,0.971369,0.558659,0.646489,6.77952,0.033994,0.020194,0.0,0.0,1.0
Jonathan Taylor,RB,IND,JAX,7700,O,5,2024.0,27.475,7.8875,35.825,24.475,6.5125,27.95,95.875,1.125,0.0,1.125,...,153.875,0.75,0.875,0.552795,7.645963,0.037267,0.043478,1.0,0.0,0.0


In [706]:
flex_df_fd.to_csv('FD_FLEX_for_model_' + str(season) + '_' + str(week) + '.csv')
flex_df_dk.to_csv('DK_FLEX_for_model_' + str(season) + '_' + str(week) + '.csv')