# Prediction setup (defense)
This notebook will create a dataset the can be used for our model that predicts fantasy points for defense.

In [1]:
import numpy as np
import pandas as pd
#from datetime import datetime, timedelta
from functions import get_current_weekday, calculate_nfl_week, get_next_sunday, get_current_year
import sqlite3

In [2]:
day = get_current_weekday()

In [3]:
date_string = get_next_sunday(day)

In [4]:
week = calculate_nfl_week(date_string)

In [5]:
season = get_current_year()

# Defense
Pulling from the database 2023 data so we can get points scored by team defenses in each game and derive our L8 features. L8 stands for last eight games.<br>

For the first half of the season, we'll need to pull the data we used to train our model, as the last eight games every team has played will go back into last season until every team has played eight games this season.

In [6]:
#Dynamically creating strings for the fd and dk table names that we use in our query.
# fd_table_name = "fd_table_" + str(week) + "_24"
# dk_table_name = "dk_table_" + str(week) + "_24"

# Getting data from previous notebook
This is the fantasy points for every defense from the beginning of the previous season through last week.

In [7]:
def_model = pd.read_csv('def_points_update_' + str(season) + '_' + str(week) + '.csv')

In [8]:
def_model.head()

Unnamed: 0,season,week,game_id,defteam,FD_Pts,DK_Pts
0,2022,1,2022_01_BAL_NYJ,BAL,11.0,11.0
1,2022,1,2022_01_BAL_NYJ,NYJ,4.0,4.0
2,2022,1,2022_01_BUF_LA,BUF,17.0,17.0
3,2022,1,2022_01_BUF_LA,LA,9.0,9.0
4,2022,1,2022_01_CLE_CAR,CAR,1.0,1.0


In [9]:
##Defensive fantasy points for FanDuel and DraftKings
# Sacks = 1
# Opponent-fumbles recovered = 2
# Return touchdowns = 6
# Extra Point Return = 2
# Safeties = 2
# Blocked Punt/Kick = 2
# Interceptions made = 2
# 0 points allowed = 10
# 1-6 points allowed = 7
# 7-13 points allowed = 4
# 14-20 points allowed = 1
# 21-27 points allowed = 0
# 28-34 points allowed = -1
# 35+ points allowed = -4

In [10]:
#Almost no difference between FD and DK defense scoring, so we take the mean and have one target variable
def_model['fantasy_points'] = def_model[['FD_Pts', 'DK_Pts']].mean(axis=1)

In [11]:
def_model.drop(columns = ['FD_Pts', 'DK_Pts'], inplace = True)

In [12]:
def_model.head()

Unnamed: 0,season,week,game_id,defteam,fantasy_points
0,2022,1,2022_01_BAL_NYJ,BAL,11.0
1,2022,1,2022_01_BAL_NYJ,NYJ,4.0
2,2022,1,2022_01_BUF_LA,BUF,17.0
3,2022,1,2022_01_BUF_LA,LA,9.0
4,2022,1,2022_01_CLE_CAR,CAR,1.0


In [13]:
def_model['defteam'] = def_model['defteam'].replace({'LA' : 'LAR', 'JAC' : 'JAX'})

In [14]:
def_model.rename(columns = {'defteam' : 'team'}, inplace = True)

In [15]:
def_model.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1322 entries, 0 to 1321
Data columns (total 5 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   season          1322 non-null   int64  
 1   week            1322 non-null   int64  
 2   game_id         1322 non-null   object 
 3   team            1322 non-null   object 
 4   fantasy_points  1322 non-null   float64
dtypes: float64(1), int64(2), object(2)
memory usage: 51.8+ KB


# Need opponents
In all our code refactoring we've discovered that we've somehow lost the opposing teams to the fantasy defenses. We can fix that right here rather than jumping back into that monstrous notebook that compiles defensive fantasy points.

In [16]:
#Weekly data last two seasons
weekly_df = pd.read_csv('weekly_data_' + str(season) + '_' + str(week) + '.csv')

In [17]:
weekly_opponents = weekly_df.groupby(['season', 'week', 'recent_team', 'opponent_team']).first().reset_index()

In [18]:
weekly_opponents = weekly_opponents[['season', 'week', 'recent_team', 'opponent_team']]

In [19]:
weekly_opponents.rename(columns = {'recent_team': 'team', 'opponent_team': 'opponent'}, inplace = True)

In [20]:
weekly_opponents['team'] = weekly_opponents['team'].replace({'LA' : 'LAR', 'JAC': 'JAX'})
weekly_opponents['opponent'] = weekly_opponents['opponent'].replace({'LA' : 'LAR', 'JAC': 'JAX'})

In [21]:
weekly_opponents = weekly_opponents[weekly_opponents['team'] != weekly_opponents['opponent']]

In [22]:
def_model = pd.merge(def_model, weekly_opponents, on = ['season', 'week', 'team'], how = 'left')

In [23]:
def_model

Unnamed: 0,season,week,game_id,team,fantasy_points,opponent
0,2022,1,2022_01_BAL_NYJ,BAL,11.0,NYJ
1,2022,1,2022_01_BAL_NYJ,NYJ,4.0,BAL
2,2022,1,2022_01_BUF_LA,BUF,17.0,LAR
3,2022,1,2022_01_BUF_LA,LAR,9.0,BUF
4,2022,1,2022_01_CLE_CAR,CAR,1.0,CLE
...,...,...,...,...,...,...
1317,2024,6,2024_06_SF_SEA,SF,7.0,SEA
1318,2024,6,2024_06_TB_NO,NO,9.0,TB
1319,2024,6,2024_06_TB_NO,TB,17.0,NO
1320,2024,6,2024_06_WAS_BAL,BAL,5.0,WAS


In [24]:
def_model[def_model.isna().any(axis = 1)]

Unnamed: 0,season,week,game_id,team,fantasy_points,opponent


# Adding live rows
We need to add a row for each team so we can calculate the respective variables entering the upcoming week of games. This will incorporate the last eight games of last season for each team.

In [25]:
# List of all 32 NFL teams
nfl_teams = list(def_model['opponent'].unique())

In [26]:
# Create a new DataFrame for the upcoming week, to be filled with predictive data
new_rows = pd.DataFrame({
    'season': [season] * 32,
    'week': [week] * 32,
    'game_id': [None] * 32,
    'fantasy_points': [None] * 32,
    'opponent': nfl_teams
})

In [27]:
#pd.set_option('display.max_rows', None)

In [28]:
# Concatenate the new rows to the existing DataFrame
def_model = pd.concat([def_model, new_rows], ignore_index=True)

In [29]:
def_model.head()

Unnamed: 0,season,week,game_id,team,fantasy_points,opponent
0,2022,1,2022_01_BAL_NYJ,BAL,11.0,NYJ
1,2022,1,2022_01_BAL_NYJ,NYJ,4.0,BAL
2,2022,1,2022_01_BUF_LA,BUF,17.0,LAR
3,2022,1,2022_01_BUF_LA,LAR,9.0,BUF
4,2022,1,2022_01_CLE_CAR,CAR,1.0,CLE


# Getting last week's points
We'll store this in a CSV and use it to check last week's predictions.

In [30]:
def_points_last_week = def_model[(def_model['season'] == 2024) & (def_model['week'] == week - 1)]

In [31]:
def_points_last_week.to_csv('def_target_' + str(season) + '_' + str(week - 1) + '.csv')

# Sequential game numbers
This gives each game a number, grouped by team, so that we can calculate means for features over the last eight games.

In [32]:
def_model['game_num'] = def_model.groupby(['opponent', 'season']).cumcount() + 1

# Rolling mean functions
These functions calculate the mean of points allowed to fantasy defenses over the previous eight games. If there are less than eight previous games in the current season, we go back to the previous season.<br>

We use both equal rolling means and weighted rolling means, with more weight being placed on more recent games.<br>

These functions will produce a dataframe that indicates the fantasy points scored by opposing defenses against the team in the opponent column.

In [33]:
# Function to calculate equally weighted rolling mean
def calculate_equal_rolling_mean(group):
    # Calculate the equally weighted rolling mean of the last 8 games, excluding the current game
    group['rolling_mean_8'] = (
        group['fantasy_points'].shift().rolling(window=8, min_periods=1).mean()
    )
    return group

# Apply the rolling mean calculation using groupby
def_model = def_model.groupby('opponent', group_keys=False).apply(calculate_equal_rolling_mean)

In [34]:
# Linear and Exponential weights
# linear_weights = np.array([0.25, 0.20, 0.15, 0.13, 0.10, 0.08, 0.05, 0.04])
# exponential_weights = np.array([0.27, 0.23, 0.19, 0.15, 0.11, 0.09, 0.07, 0.05])

# Corrected Linear and Exponential weights
linear_weights = np.array([0.01, 0.03, 0.06, 0.10, 0.14, 0.18, 0.24, 0.28])
exponential_weights = np.array([0.015, 0.025, 0.04, 0.07, 0.10, 0.15, 0.25, 0.35])

In [35]:
# Function to calculate rolling means with different weights
def calculate_weighted_means(group):
    # Calculate the linear weighted rolling mean
    group['linear_rolling_mean_8'] = (
        group['fantasy_points'].shift().rolling(window=8, min_periods=1)
        .apply(lambda x: np.dot(x, linear_weights[-len(x):]), raw=False)
    )
    
    # Calculate the exponential weighted rolling mean
    group['exponential_rolling_mean_8'] = (
        group['fantasy_points'].shift().rolling(window=8, min_periods=1)
        .apply(lambda x: np.dot(x, exponential_weights[-len(x):]), raw=False)
    )
    return group

In [36]:
# Apply the calculations using groupby
def_model = def_model.groupby('opponent', group_keys=False).apply(calculate_weighted_means)

In [37]:
def_model.rename(columns = {'rolling_mean_8': 'opp_vs_def_L8',\
                             'linear_rolling_mean_8': 'opp_vs_def_L8_lin',\
                            'exponential_rolling_mean_8': 'opp_vs_def_L8_exp'}, inplace = True)

In [38]:
def_model.drop(columns = ['game_num', 'game_id', 'fantasy_points'], inplace = True)

In [39]:
def_model.info()

<class 'pandas.core.frame.DataFrame'>
Index: 1354 entries, 0 to 1353
Data columns (total 7 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   season             1354 non-null   int64  
 1   week               1354 non-null   int64  
 2   team               1322 non-null   object 
 3   opponent           1354 non-null   object 
 4   opp_vs_def_L8      1322 non-null   float64
 5   opp_vs_def_L8_lin  1098 non-null   float64
 6   opp_vs_def_L8_exp  1098 non-null   float64
dtypes: float64(3), int64(2), object(2)
memory usage: 116.9+ KB


In [40]:
def_model.tail(10)

Unnamed: 0,season,week,team,opponent,opp_vs_def_L8,opp_vs_def_L8_lin,opp_vs_def_L8_exp
1344,2024,7,,TEN,10.75,9.32,7.81
1345,2024,7,,NYG,8.375,8.36,8.13
1346,2024,7,,PHI,8.0,7.93,7.405
1347,2024,7,,DET,2.625,2.2,1.62
1348,2024,7,,PIT,5.5,5.18,4.71
1349,2024,7,,CIN,5.375,4.47,4.365
1350,2024,7,,SF,5.5,5.7,5.21
1351,2024,7,,CHI,5.25,4.26,3.31
1352,2024,7,,TB,5.875,6.89,6.585
1353,2024,7,,DAL,7.375,10.05,10.815


In [41]:
def_model = def_model[(def_model['season'] == 2024) & (def_model['week'] == week)]

In [42]:
def_model = def_model[['opponent', 'opp_vs_def_L8', 'opp_vs_def_L8_lin', 'opp_vs_def_L8_exp']]

At this point we have rolling L8 averages of fantasy points scored by defense against teams in the opponent column heading into the upcoming week.

In [43]:
def_model

Unnamed: 0,opponent,opp_vs_def_L8,opp_vs_def_L8_lin,opp_vs_def_L8_exp
1322,NYJ,7.875,8.58,8.65
1323,BAL,4.0,1.9,1.96
1324,LAR,7.75,8.54,8.87
1325,BUF,2.875,2.95,2.755
1326,CLE,11.625,11.11,10.585
1327,CAR,9.875,7.47,7.375
1328,SEA,6.5,8.73,8.605
1329,DEN,6.375,6.15,5.815
1330,MIN,8.125,7.38,7.34
1331,GB,3.5,5.1,4.72


In [44]:
#spread_df.head()

# Bringing in FanDuel and DraftKings player lists
We'll filter this for the defenses.

In [45]:
#Getting FanDuel and DraftKings player lists
# Establish the connection to the database
conn = sqlite3.connect('nfl_dfs.db')

# Define the table names based on the week you want to retrieve
fd_table_name = 'fd_table_' + str(week) + '_24'
dk_table_name = 'dk_table_' + str(week) + '_24'

# Retrieve the FanDuel table
fd_df_retrieved = pd.read_sql_query(f"SELECT * FROM {fd_table_name}", conn)

# Retrieve the DraftKings table
dk_df_retrieved = pd.read_sql_query(f"SELECT * FROM {dk_table_name}", conn)

# Close the connection
conn.close()


In [46]:
fanduel_df = fd_df_retrieved
draftkings_df = dk_df_retrieved

In [47]:
#Filtering our DK and FD tables for defenses only
fanduel_df = fanduel_df[fanduel_df['position'] == 'D'].reset_index(drop = True)
draftkings_df = draftkings_df[draftkings_df['position'] == 'D'].reset_index(drop = True)

In [48]:
fanduel_df.head()

Unnamed: 0,ID,name,position,salary,team,opponent,home_team,status,date,week
0,108151-12526,BUF,D,5000,BUF,TEN,1,Active,10-20-2024,7
1,108151-12528,CIN,D,4900,CIN,CLE,0,Active,10-20-2024,7
2,108151-12544,NYJ,D,4800,NYJ,PIT,0,Active,10-20-2024,7
3,108151-12545,PHI,D,4700,PHI,NYG,0,Active,10-20-2024,7
4,108151-12552,WAS,D,4600,WAS,CAR,1,Active,10-20-2024,7


In [49]:
draftkings_df.head()

Unnamed: 0,ID,name,position,salary,team,opponent,home_team,status,date,week
0,36291613,PHI,D,3600,PHI,NYG,0,Active,10-20-2024,7
1,36291614,BUF,D,3500,BUF,TEN,1,Active,10-20-2024,7
2,36291615,WAS,D,3400,WAS,CAR,1,Active,10-20-2024,7
3,36291616,CIN,D,3300,CIN,CLE,0,Active,10-20-2024,7
4,36291617,IND,D,3200,IND,MIA,1,Active,10-20-2024,7


In [50]:
def_model

Unnamed: 0,opponent,opp_vs_def_L8,opp_vs_def_L8_lin,opp_vs_def_L8_exp
1322,NYJ,7.875,8.58,8.65
1323,BAL,4.0,1.9,1.96
1324,LAR,7.75,8.54,8.87
1325,BUF,2.875,2.95,2.755
1326,CLE,11.625,11.11,10.585
1327,CAR,9.875,7.47,7.375
1328,SEA,6.5,8.73,8.605
1329,DEN,6.375,6.15,5.815
1330,MIN,8.125,7.38,7.34
1331,GB,3.5,5.1,4.72


In [51]:
#Merging the L8 variables to the tables
fanduel_df = pd.merge(fanduel_df, def_model, on = ['opponent'], how = 'left')
draftkings_df = pd.merge(draftkings_df, def_model, on = ['opponent'], how = 'left')

In [52]:
fanduel_df.drop(columns = ['status'], inplace = True)
draftkings_df.drop(columns = ['status'], inplace = True)

In [53]:
fanduel_df.head()

Unnamed: 0,ID,name,position,salary,team,opponent,home_team,date,week,opp_vs_def_L8,opp_vs_def_L8_lin,opp_vs_def_L8_exp
0,108151-12526,BUF,D,5000,BUF,TEN,1,10-20-2024,7,10.75,9.32,7.81
1,108151-12528,CIN,D,4900,CIN,CLE,0,10-20-2024,7,11.625,11.11,10.585
2,108151-12544,NYJ,D,4800,NYJ,PIT,0,10-20-2024,7,5.5,5.18,4.71
3,108151-12545,PHI,D,4700,PHI,NYG,0,10-20-2024,7,8.375,8.36,8.13
4,108151-12552,WAS,D,4600,WAS,CAR,1,10-20-2024,7,9.875,7.47,7.375


In [54]:
draftkings_df.head()

Unnamed: 0,ID,name,position,salary,team,opponent,home_team,date,week,opp_vs_def_L8,opp_vs_def_L8_lin,opp_vs_def_L8_exp
0,36291613,PHI,D,3600,PHI,NYG,0,10-20-2024,7,8.375,8.36,8.13
1,36291614,BUF,D,3500,BUF,TEN,1,10-20-2024,7,10.75,9.32,7.81
2,36291615,WAS,D,3400,WAS,CAR,1,10-20-2024,7,9.875,7.47,7.375
3,36291616,CIN,D,3300,CIN,CLE,0,10-20-2024,7,11.625,11.11,10.585
4,36291617,IND,D,3200,IND,MIA,1,10-20-2024,7,10.375,10.84,10.1


# Odds, totals, outdoors and grass variables
Our odds CSVs will bring in the rest of the variables.

In [55]:
fd_spreads = pd.read_csv('fd_spreads_' + str(season) + '_' + str(week) + '.csv')
dk_spreads = pd.read_csv('dk_spreads_' + str(season) + '_' + str(week) + '.csv')

In [56]:
fd_spreads

Unnamed: 0.1,Unnamed: 0,opponent,team,total_line,outdoors,grass,home_team,spread_line,pred_total,opp_total,wind,season,week
0,0,JAX,NE,42.5,1,1,0,-5.5,18.5,24.0,14,2024,7
1,1,ATL,SEA,51.5,0,0,0,-3.0,24.25,27.25,0,2024,7
2,2,BUF,TEN,41.5,1,0,0,-8.5,16.5,25.0,8,2024,7
3,3,CLE,CIN,41.5,1,1,0,5.5,23.5,18.0,6,2024,7
4,4,MIN,DET,50.5,0,1,0,-1.5,24.5,26.0,0,2024,7
5,5,GB,HOU,47.5,1,1,0,-2.5,22.5,25.0,9,2024,7
6,6,IND,MIA,43.5,0,0,0,-3.0,20.25,23.25,0,2024,7
7,7,NYG,PHI,43.5,1,0,0,3.0,23.25,20.25,3,2024,7
8,8,WAS,CAR,51.5,1,1,0,-8.5,21.5,30.0,3,2024,7
9,9,LAR,LV,43.5,0,0,0,-6.5,18.5,25.0,0,2024,7


In [57]:
dk_spreads

Unnamed: 0.1,Unnamed: 0,opponent,team,total_line,outdoors,grass,home_team,spread_line,pred_total,opp_total,wind,season,week
0,0,JAX,NE,42.5,1,1,0,-5.5,18.5,24.0,14,2024,7
1,1,ATL,SEA,51.0,0,0,0,-3.0,24.0,27.0,0,2024,7
2,2,BUF,TEN,41.0,1,0,0,-9.0,16.0,25.0,8,2024,7
3,3,CLE,CIN,41.5,1,1,0,6.0,23.75,17.75,6,2024,7
4,4,MIN,DET,51.0,0,1,0,-1.5,24.75,26.25,0,2024,7
5,5,GB,HOU,47.5,1,1,0,-3.0,22.25,25.25,9,2024,7
6,6,IND,MIA,,0,0,0,-3.0,,,0,2024,7
7,7,NYG,PHI,43.0,1,0,0,3.0,23.0,20.0,3,2024,7
8,8,WAS,CAR,52.0,1,1,0,-8.0,22.0,30.0,3,2024,7
9,9,LAR,LV,43.5,0,0,0,-6.5,18.5,25.0,0,2024,7


In [58]:
fanduel_df = pd.merge(fanduel_df, fd_spreads, on = ['team', 'opponent', 'week', 'home_team'], how = 'left')
draftkings_df = pd.merge(draftkings_df, dk_spreads, on = ['team', 'opponent', 'week', 'home_team'], how = 'left')

In [59]:
fanduel_df.drop(columns = ['Unnamed: 0'], inplace = True)
draftkings_df.drop(columns = ['Unnamed: 0'], inplace = True)

In [60]:
fanduel_df.head()

Unnamed: 0,ID,name,position,salary,team,opponent,home_team,date,week,opp_vs_def_L8,opp_vs_def_L8_lin,opp_vs_def_L8_exp,total_line,outdoors,grass,spread_line,pred_total,opp_total,wind,season
0,108151-12526,BUF,D,5000,BUF,TEN,1,10-20-2024,7,10.75,9.32,7.81,41.5,1,0,8.5,25.0,16.5,8,2024
1,108151-12528,CIN,D,4900,CIN,CLE,0,10-20-2024,7,11.625,11.11,10.585,41.5,1,1,5.5,23.5,18.0,6,2024
2,108151-12544,NYJ,D,4800,NYJ,PIT,0,10-20-2024,7,5.5,5.18,4.71,38.5,1,1,1.5,20.0,18.5,2,2024
3,108151-12545,PHI,D,4700,PHI,NYG,0,10-20-2024,7,8.375,8.36,8.13,43.5,1,0,3.0,23.25,20.25,3,2024
4,108151-12552,WAS,D,4600,WAS,CAR,1,10-20-2024,7,9.875,7.47,7.375,51.5,1,1,8.5,30.0,21.5,3,2024


In [61]:
draftkings_df.head()

Unnamed: 0,ID,name,position,salary,team,opponent,home_team,date,week,opp_vs_def_L8,opp_vs_def_L8_lin,opp_vs_def_L8_exp,total_line,outdoors,grass,spread_line,pred_total,opp_total,wind,season
0,36291613,PHI,D,3600,PHI,NYG,0,10-20-2024,7,8.375,8.36,8.13,43.0,1,0,3.0,23.0,20.0,3,2024
1,36291614,BUF,D,3500,BUF,TEN,1,10-20-2024,7,10.75,9.32,7.81,41.0,1,0,9.0,25.0,16.0,8,2024
2,36291615,WAS,D,3400,WAS,CAR,1,10-20-2024,7,9.875,7.47,7.375,52.0,1,1,8.0,30.0,22.0,3,2024
3,36291616,CIN,D,3300,CIN,CLE,0,10-20-2024,7,11.625,11.11,10.585,41.5,1,1,6.0,23.75,17.75,6,2024
4,36291617,IND,D,3200,IND,MIA,1,10-20-2024,7,10.375,10.84,10.1,,0,0,3.0,,,0,2024


In [62]:
fanduel_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 22 entries, 0 to 21
Data columns (total 20 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   ID                 22 non-null     object 
 1   name               22 non-null     object 
 2   position           22 non-null     object 
 3   salary             22 non-null     int64  
 4   team               22 non-null     object 
 5   opponent           22 non-null     object 
 6   home_team          22 non-null     int64  
 7   date               22 non-null     object 
 8   week               22 non-null     int64  
 9   opp_vs_def_L8      22 non-null     float64
 10  opp_vs_def_L8_lin  22 non-null     float64
 11  opp_vs_def_L8_exp  22 non-null     float64
 12  total_line         22 non-null     float64
 13  outdoors           22 non-null     int64  
 14  grass              22 non-null     int64  
 15  spread_line        22 non-null     float64
 16  pred_total         22 non-nu

In [63]:
draftkings_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20 entries, 0 to 19
Data columns (total 20 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   ID                 20 non-null     int64  
 1   name               20 non-null     object 
 2   position           20 non-null     object 
 3   salary             20 non-null     int64  
 4   team               20 non-null     object 
 5   opponent           20 non-null     object 
 6   home_team          20 non-null     int64  
 7   date               20 non-null     object 
 8   week               20 non-null     int64  
 9   opp_vs_def_L8      20 non-null     float64
 10  opp_vs_def_L8_lin  20 non-null     float64
 11  opp_vs_def_L8_exp  20 non-null     float64
 12  total_line         18 non-null     float64
 13  outdoors           20 non-null     int64  
 14  grass              20 non-null     int64  
 15  spread_line        20 non-null     float64
 16  pred_total         18 non-nu

In [64]:
fanduel_df.drop(columns = ['ID', 'name', 'pred_total', 'wind'], inplace = True)
draftkings_df.drop(columns = ['ID', 'name', 'pred_total', 'wind'], inplace = True)

In [65]:
#Setting indexes so that the only columns are the ones that correspond with variables for the model
fanduel_df.set_index(['position', 'salary', 'team', 'opponent', 'date', 'week', 'season'], inplace = True)
draftkings_df.set_index(['position', 'salary', 'team', 'opponent', 'date', 'week', 'season'], inplace = True)

In [66]:
fanduel_df.info()

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 22 entries, ('D', 5000, 'BUF', 'TEN', '10-20-2024', 7, 2024) to ('D', 3000, 'CAR', 'WAS', '10-20-2024', 7, 2024)
Data columns (total 9 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   home_team          22 non-null     int64  
 1   opp_vs_def_L8      22 non-null     float64
 2   opp_vs_def_L8_lin  22 non-null     float64
 3   opp_vs_def_L8_exp  22 non-null     float64
 4   total_line         22 non-null     float64
 5   outdoors           22 non-null     int64  
 6   grass              22 non-null     int64  
 7   spread_line        22 non-null     float64
 8   opp_total          22 non-null     float64
dtypes: float64(6), int64(3)
memory usage: 4.7+ KB


In [67]:
draftkings_df.info()

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 20 entries, ('D', 3600, 'PHI', 'NYG', '10-20-2024', 7, 2024) to ('D', 2300, 'TEN', 'BUF', '10-20-2024', 7, 2024)
Data columns (total 9 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   home_team          20 non-null     int64  
 1   opp_vs_def_L8      20 non-null     float64
 2   opp_vs_def_L8_lin  20 non-null     float64
 3   opp_vs_def_L8_exp  20 non-null     float64
 4   total_line         18 non-null     float64
 5   outdoors           20 non-null     int64  
 6   grass              20 non-null     int64  
 7   spread_line        20 non-null     float64
 8   opp_total          18 non-null     float64
dtypes: float64(6), int64(3)
memory usage: 4.4+ KB


In [68]:
#Now we need to set the variables in the order that they're trained in the model
fanduel_df = fanduel_df[['spread_line', 'home_team', 'outdoors', 'grass', 'total_line', 'opp_total', 'opp_vs_def_L8', 'opp_vs_def_L8_lin', 'opp_vs_def_L8_exp']]

In [69]:
draftkings_df = draftkings_df[['spread_line', 'home_team', 'outdoors', 'grass', 'total_line', 'opp_total', 'opp_vs_def_L8', 'opp_vs_def_L8_lin', 'opp_vs_def_L8_exp']]

In [70]:
fanduel_df.info()

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 22 entries, ('D', 5000, 'BUF', 'TEN', '10-20-2024', 7, 2024) to ('D', 3000, 'CAR', 'WAS', '10-20-2024', 7, 2024)
Data columns (total 9 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   spread_line        22 non-null     float64
 1   home_team          22 non-null     int64  
 2   outdoors           22 non-null     int64  
 3   grass              22 non-null     int64  
 4   total_line         22 non-null     float64
 5   opp_total          22 non-null     float64
 6   opp_vs_def_L8      22 non-null     float64
 7   opp_vs_def_L8_lin  22 non-null     float64
 8   opp_vs_def_L8_exp  22 non-null     float64
dtypes: float64(6), int64(3)
memory usage: 4.7+ KB


In [71]:
draftkings_df.info()

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 20 entries, ('D', 3600, 'PHI', 'NYG', '10-20-2024', 7, 2024) to ('D', 2300, 'TEN', 'BUF', '10-20-2024', 7, 2024)
Data columns (total 9 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   spread_line        20 non-null     float64
 1   home_team          20 non-null     int64  
 2   outdoors           20 non-null     int64  
 3   grass              20 non-null     int64  
 4   total_line         18 non-null     float64
 5   opp_total          18 non-null     float64
 6   opp_vs_def_L8      20 non-null     float64
 7   opp_vs_def_L8_lin  20 non-null     float64
 8   opp_vs_def_L8_exp  20 non-null     float64
dtypes: float64(6), int64(3)
memory usage: 4.4+ KB


In [72]:
draftkings_df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,spread_line,home_team,outdoors,grass,total_line,opp_total,opp_vs_def_L8,opp_vs_def_L8_lin,opp_vs_def_L8_exp
position,salary,team,opponent,date,week,season,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
D,3600,PHI,NYG,10-20-2024,7,2024,3.0,0,1,0,43.0,20.0,8.375,8.36,8.13
D,3500,BUF,TEN,10-20-2024,7,2024,9.0,1,1,0,41.0,16.0,10.75,9.32,7.81
D,3400,WAS,CAR,10-20-2024,7,2024,8.0,1,1,1,52.0,22.0,9.875,7.47,7.375
D,3300,CIN,CLE,10-20-2024,7,2024,6.0,0,1,1,41.5,17.75,11.625,11.11,10.585
D,3200,IND,MIA,10-20-2024,7,2024,3.0,1,0,0,,,10.375,10.84,10.1
D,3100,ATL,SEA,10-20-2024,7,2024,3.0,1,0,0,51.0,24.0,6.5,8.73,8.605
D,3100,KC,SF,10-20-2024,7,2024,-1.5,0,1,1,47.0,24.25,5.5,5.7,5.21
D,3000,LAR,LV,10-20-2024,7,2024,6.5,1,0,0,43.5,18.5,9.0,11.82,11.87
D,3000,SF,KC,10-20-2024,7,2024,1.5,1,1,1,47.0,22.75,4.625,5.57,5.235
D,2900,GB,HOU,10-20-2024,7,2024,3.0,1,1,1,47.5,22.25,4.5,4.75,3.855


In [73]:
fanduel_df.to_csv('FD_def_for_model_' + str(season) + '_' + str(week) + '.csv')

In [74]:
draftkings_df.to_csv('DK_def_for_model_' + str(season) + '_' + str(week) + '.csv')