# Prediction setup (defense)
This notebook will create a dataset the can be used for our model that predicts fantasy points for defense.

In [1]:
import numpy as np
import pandas as pd
#from datetime import datetime, timedelta
from functions import get_current_weekday, calculate_nfl_week, get_next_sunday, get_current_year
import sqlite3

In [2]:
day = get_current_weekday()

In [3]:
date_string = get_next_sunday(day)

In [4]:
week = calculate_nfl_week(date_string)

In [5]:
season = get_current_year()

# Defense
Pulling from the database 2023 data so we can get points scored by team defenses in each game and derive our L8 features. L8 stands for last eight games.<br>

For the first half of the season, we'll need to pull the data we used to train our model, as the last eight games every team has played will go back into last season until every team has played eight games this season.

In [6]:
#Dynamically creating strings for the fd and dk table names that we use in our query.
# fd_table_name = "fd_table_" + str(week) + "_24"
# dk_table_name = "dk_table_" + str(week) + "_24"

# Getting data from previous notebook
This is the fantasy points for every defense from the beginning of the previous season through last week.

In [7]:
def_model = pd.read_csv('def_points_update_' + str(season) + '_' + str(week) + '.csv')

In [8]:
def_model.head()

Unnamed: 0,season,week,game_id,defteam,FD_Pts,DK_Pts
0,2022,1,2022_01_BAL_NYJ,BAL,11.0,11.0
1,2022,1,2022_01_BAL_NYJ,NYJ,4.0,4.0
2,2022,1,2022_01_BUF_LA,BUF,17.0,17.0
3,2022,1,2022_01_BUF_LA,LA,9.0,9.0
4,2022,1,2022_01_CLE_CAR,CAR,1.0,1.0


In [9]:
##Defensive fantasy points for FanDuel and DraftKings
# Sacks = 1
# Opponent-fumbles recovered = 2
# Return touchdowns = 6
# Extra Point Return = 2
# Safeties = 2
# Blocked Punt/Kick = 2
# Interceptions made = 2
# 0 points allowed = 10
# 1-6 points allowed = 7
# 7-13 points allowed = 4
# 14-20 points allowed = 1
# 21-27 points allowed = 0
# 28-34 points allowed = -1
# 35+ points allowed = -4

In [10]:
#Almost no difference between FD and DK defense scoring, so we take the mean and have one target variable
def_model['fantasy_points'] = def_model[['FD_Pts', 'DK_Pts']].mean(axis=1)

In [11]:
def_model.drop(columns = ['FD_Pts', 'DK_Pts'], inplace = True)

In [12]:
def_model.head()

Unnamed: 0,season,week,game_id,defteam,fantasy_points
0,2022,1,2022_01_BAL_NYJ,BAL,11.0
1,2022,1,2022_01_BAL_NYJ,NYJ,4.0
2,2022,1,2022_01_BUF_LA,BUF,17.0
3,2022,1,2022_01_BUF_LA,LA,9.0
4,2022,1,2022_01_CLE_CAR,CAR,1.0


In [13]:
def_model['defteam'] = def_model['defteam'].replace({'LA' : 'LAR', 'JAC' : 'JAX'})

In [14]:
def_model.rename(columns = {'defteam' : 'team'}, inplace = True)

In [15]:
def_model.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1202 entries, 0 to 1201
Data columns (total 5 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   season          1202 non-null   int64  
 1   week            1202 non-null   int64  
 2   game_id         1202 non-null   object 
 3   team            1202 non-null   object 
 4   fantasy_points  1202 non-null   float64
dtypes: float64(1), int64(2), object(2)
memory usage: 47.1+ KB


# Need opponents
In all our code refactoring we've discovered that we've somehow lost the opposing teams to the fantasy defenses. We can fix that right here rather than jumping back into that monstrous notebook that compiles defensive fantasy points.

In [16]:
#Weekly data last two seasons
weekly_df = pd.read_csv('weekly_data_' + str(season) + '_' + str(week) + '.csv')

In [17]:
weekly_opponents = weekly_df.groupby(['season', 'week', 'recent_team', 'opponent_team']).first().reset_index()

In [18]:
weekly_opponents = weekly_opponents[['season', 'week', 'recent_team', 'opponent_team']]

In [19]:
weekly_opponents.rename(columns = {'recent_team': 'team', 'opponent_team': 'opponent'}, inplace = True)

In [20]:
weekly_opponents['team'] = weekly_opponents['team'].replace({'LA' : 'LAR', 'JAC': 'JAX'})
weekly_opponents['opponent'] = weekly_opponents['opponent'].replace({'LA' : 'LAR', 'JAC': 'JAX'})

In [21]:
weekly_opponents = weekly_opponents[weekly_opponents['team'] != weekly_opponents['opponent']]

In [22]:
def_model = pd.merge(def_model, weekly_opponents, on = ['season', 'week', 'team'], how = 'left')

In [23]:
def_model

Unnamed: 0,season,week,game_id,team,fantasy_points,opponent
0,2022,1,2022_01_BAL_NYJ,BAL,11.0,NYJ
1,2022,1,2022_01_BAL_NYJ,NYJ,4.0,BAL
2,2022,1,2022_01_BUF_LA,BUF,17.0,LAR
3,2022,1,2022_01_BUF_LA,LAR,9.0,BUF
4,2022,1,2022_01_CLE_CAR,CAR,1.0,CLE
...,...,...,...,...,...,...
1197,2024,2,2024_02_SEA_NE,SEA,6.0,NE
1198,2024,2,2024_02_SF_MIN,MIN,13.0,SF
1199,2024,2,2024_02_SF_MIN,SF,8.0,MIN
1200,2024,2,2024_02_TB_DET,DET,8.0,TB


In [24]:
def_model[def_model.isna().any(axis = 1)]

Unnamed: 0,season,week,game_id,team,fantasy_points,opponent


# Adding live rows
We need to add a row for each team so we can calculate the respective variables entering the upcoming week of games. This will incorporate the last eight games of last season for each team.

In [25]:
# List of all 32 NFL teams
nfl_teams = list(def_model['opponent'].unique())

In [26]:
# Create a new DataFrame for the upcoming week, to be filled with predictive data
new_rows = pd.DataFrame({
    'season': [2024] * 32,
    'week': [week] * 32,
    'game_id': [None] * 32,
    'fantasy_points': [None] * 32,
    'opponent': nfl_teams
})

In [27]:
#pd.set_option('display.max_rows', None)

In [28]:
# Concatenate the new rows to the existing DataFrame
def_model = pd.concat([def_model, new_rows], ignore_index=True)

In [29]:
def_model.head()

Unnamed: 0,season,week,game_id,team,fantasy_points,opponent
0,2022,1,2022_01_BAL_NYJ,BAL,11.0,NYJ
1,2022,1,2022_01_BAL_NYJ,NYJ,4.0,BAL
2,2022,1,2022_01_BUF_LA,BUF,17.0,LAR
3,2022,1,2022_01_BUF_LA,LAR,9.0,BUF
4,2022,1,2022_01_CLE_CAR,CAR,1.0,CLE


# Getting last week's points
We'll store this in a CSV and use it to check last week's predictions.

In [30]:
def_points_last_week = def_model[(def_model['season'] == 2024) & (def_model['week'] == week - 1)]

In [31]:
def_points_last_week.to_csv('def_points_' + str(season) + '_' + str(week - 1) + '.csv')

# Sequential game numbers
This gives each game a number, grouped by team, so that we can calculate means for features over the last eight games.

In [32]:
def_model['game_num'] = def_model.groupby(['opponent', 'season']).cumcount() + 1

# Rolling mean functions
These functions calculate the mean of points allowed to fantasy defenses over the previous eight games. If there are less than eight previous games in the current season, we go back to the previous season.<br>

We use both equal rolling means and weighted rolling means, with more weight being placed on more recent games.<br>

These functions will produce a dataframe that indicates the fantasy points scored by opposing defenses against the team in the opponent column.

In [33]:
# Function to calculate equally weighted rolling mean
def calculate_equal_rolling_mean(group):
    # Calculate the equally weighted rolling mean of the last 8 games, excluding the current game
    group['rolling_mean_8'] = (
        group['fantasy_points'].shift().rolling(window=8, min_periods=1).mean()
    )
    return group

# Apply the rolling mean calculation using groupby
def_model = def_model.groupby('opponent', group_keys=False).apply(calculate_equal_rolling_mean)

In [34]:
# Linear and Exponential weights
# linear_weights = np.array([0.25, 0.20, 0.15, 0.13, 0.10, 0.08, 0.05, 0.04])
# exponential_weights = np.array([0.27, 0.23, 0.19, 0.15, 0.11, 0.09, 0.07, 0.05])

# Corrected Linear and Exponential weights
linear_weights = np.array([0.01, 0.03, 0.06, 0.10, 0.14, 0.18, 0.24, 0.28])
exponential_weights = np.array([0.015, 0.025, 0.04, 0.07, 0.10, 0.15, 0.25, 0.35])

In [35]:
# Function to calculate rolling means with different weights
def calculate_weighted_means(group):
    # Calculate the linear weighted rolling mean
    group['linear_rolling_mean_8'] = (
        group['fantasy_points'].shift().rolling(window=8, min_periods=1)
        .apply(lambda x: np.dot(x, linear_weights[-len(x):]), raw=False)
    )
    
    # Calculate the exponential weighted rolling mean
    group['exponential_rolling_mean_8'] = (
        group['fantasy_points'].shift().rolling(window=8, min_periods=1)
        .apply(lambda x: np.dot(x, exponential_weights[-len(x):]), raw=False)
    )
    return group

In [36]:
# Apply the calculations using groupby
def_model = def_model.groupby('opponent', group_keys=False).apply(calculate_weighted_means)

In [37]:
def_model.rename(columns = {'rolling_mean_8': 'opp_vs_def_L8',\
                             'linear_rolling_mean_8': 'opp_vs_def_L8_lin',\
                            'exponential_rolling_mean_8': 'opp_vs_def_L8_exp'}, inplace = True)

In [38]:
def_model.drop(columns = ['game_num', 'game_id', 'fantasy_points'], inplace = True)

In [39]:
def_model.info()

<class 'pandas.core.frame.DataFrame'>
Index: 1234 entries, 0 to 1233
Data columns (total 7 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   season             1234 non-null   int64  
 1   week               1234 non-null   int64  
 2   team               1202 non-null   object 
 3   opponent           1234 non-null   object 
 4   opp_vs_def_L8      1202 non-null   float64
 5   opp_vs_def_L8_lin  978 non-null    float64
 6   opp_vs_def_L8_exp  978 non-null    float64
dtypes: float64(3), int64(2), object(2)
memory usage: 109.4+ KB


In [40]:
def_model.tail(10)

Unnamed: 0,season,week,team,opponent,opp_vs_def_L8,opp_vs_def_L8_lin,opp_vs_def_L8_exp
1224,2024,3,,TEN,12.125,12.84,12.39
1225,2024,3,,NYG,9.0,9.72,9.42
1226,2024,3,,PHI,8.375,7.61,6.725
1227,2024,3,,DET,3.25,3.56,3.72
1228,2024,3,,PIT,5.25,5.07,4.975
1229,2024,3,,CIN,7.875,9.38,9.31
1230,2024,3,,SF,6.25,6.64,7.1
1231,2024,3,,CHI,7.375,8.19,8.745
1232,2024,3,,TB,5.5,5.01,4.715
1233,2024,3,,DAL,6.875,6.45,6.325


In [41]:
def_model = def_model[(def_model['season'] == 2024) & (def_model['week'] == week)]

In [42]:
def_model = def_model[['opponent', 'opp_vs_def_L8', 'opp_vs_def_L8_lin', 'opp_vs_def_L8_exp']]

At this point we have rolling L8 averages of fantasy points scored by defense against teams in the opponent column heading into the upcoming week.

In [43]:
def_model

Unnamed: 0,opponent,opp_vs_def_L8,opp_vs_def_L8_lin,opp_vs_def_L8_exp
1202,NYJ,9.625,6.94,5.85
1203,BAL,6.125,7.0,6.25
1204,LAR,6.375,7.05,7.175
1205,BUF,4.75,4.29,3.755
1206,CLE,11.375,12.01,10.885
1207,CAR,12.0,13.24,12.68
1208,SEA,3.875,4.5,4.46
1209,DEN,8.375,9.68,9.8
1210,MIN,9.875,8.86,8.23
1211,GB,2.0,2.23,2.395


In [44]:
#spread_df.head()

# Bringing in FanDuel and DraftKings player lists
We'll filter this for the defenses.

In [45]:
#Getting FanDuel and DraftKings player lists
# Establish the connection to the database
conn = sqlite3.connect('nfl_dfs.db')

# Define the table names based on the week you want to retrieve
fd_table_name = 'fd_table_' + str(week) + '_24'
dk_table_name = 'dk_table_' + str(week) + '_24'

# Retrieve the FanDuel table
fd_df_retrieved = pd.read_sql_query(f"SELECT * FROM {fd_table_name}", conn)

# Retrieve the DraftKings table
dk_df_retrieved = pd.read_sql_query(f"SELECT * FROM {dk_table_name}", conn)

# Close the connection
conn.close()


In [46]:
fanduel_df = fd_df_retrieved
draftkings_df = dk_df_retrieved

In [47]:
#Filtering our DK and FD tables for defenses only
fanduel_df = fanduel_df[fanduel_df['position'] == 'D'].reset_index(drop = True)
draftkings_df = draftkings_df[draftkings_df['position'] == 'D'].reset_index(drop = True)

In [48]:
fanduel_df.head()

Unnamed: 0,ID,name,position,salary,team,opponent,home_team,status,date,week
0,107027-12537,LV,D,5000,LV,CAR,1,Active,09-22-2024,3
1,107027-12529,CLE,D,4900,CLE,NYG,1,Active,09-22-2024,3
2,107027-12547,PIT,D,4800,PIT,LAC,1,Active,09-22-2024,3
3,107027-12551,TB,D,4700,TB,DEN,1,Active,09-22-2024,3
4,107027-12550,SEA,D,4600,SEA,MIA,1,Active,09-22-2024,3


In [49]:
draftkings_df.head()

Unnamed: 0,ID,name,position,salary,team,opponent,home_team,status,date,week
0,35940953,CLE,D,3800,CLE,NYG,1,Active,09-22-2024,3
1,35940954,LV,D,3700,LV,CAR,1,Active,09-22-2024,3
2,35940955,LAC,D,3600,LAC,PIT,0,Active,09-22-2024,3
3,35940957,SEA,D,3500,SEA,MIA,1,Active,09-22-2024,3
4,35940956,SF,D,3500,SF,LAR,0,Active,09-22-2024,3


In [50]:
def_model

Unnamed: 0,opponent,opp_vs_def_L8,opp_vs_def_L8_lin,opp_vs_def_L8_exp
1202,NYJ,9.625,6.94,5.85
1203,BAL,6.125,7.0,6.25
1204,LAR,6.375,7.05,7.175
1205,BUF,4.75,4.29,3.755
1206,CLE,11.375,12.01,10.885
1207,CAR,12.0,13.24,12.68
1208,SEA,3.875,4.5,4.46
1209,DEN,8.375,9.68,9.8
1210,MIN,9.875,8.86,8.23
1211,GB,2.0,2.23,2.395


In [51]:
#Merging the L8 variables to the tables
fanduel_df = pd.merge(fanduel_df, def_model, on = ['opponent'], how = 'left')
draftkings_df = pd.merge(draftkings_df, def_model, on = ['opponent'], how = 'left')

In [52]:
fanduel_df.drop(columns = ['status'], inplace = True)
draftkings_df.drop(columns = ['status'], inplace = True)

In [53]:
fanduel_df.head()

Unnamed: 0,ID,name,position,salary,team,opponent,home_team,date,week,opp_vs_def_L8,opp_vs_def_L8_lin,opp_vs_def_L8_exp
0,107027-12537,LV,D,5000,LV,CAR,1,09-22-2024,3,12.0,13.24,12.68
1,107027-12529,CLE,D,4900,CLE,NYG,1,09-22-2024,3,9.0,9.72,9.42
2,107027-12547,PIT,D,4800,PIT,LAC,1,09-22-2024,3,10.125,8.09,6.955
3,107027-12551,TB,D,4700,TB,DEN,1,09-22-2024,3,8.375,9.68,9.8
4,107027-12550,SEA,D,4600,SEA,MIA,1,09-22-2024,3,8.125,10.34,10.64


In [54]:
draftkings_df.head()

Unnamed: 0,ID,name,position,salary,team,opponent,home_team,date,week,opp_vs_def_L8,opp_vs_def_L8_lin,opp_vs_def_L8_exp
0,35940953,CLE,D,3800,CLE,NYG,1,09-22-2024,3,9.0,9.72,9.42
1,35940954,LV,D,3700,LV,CAR,1,09-22-2024,3,12.0,13.24,12.68
2,35940955,LAC,D,3600,LAC,PIT,0,09-22-2024,3,5.25,5.07,4.975
3,35940957,SEA,D,3500,SEA,MIA,1,09-22-2024,3,8.125,10.34,10.64
4,35940956,SF,D,3500,SF,LAR,0,09-22-2024,3,6.375,7.05,7.175


# Odds, totals, outdoors and grass variables
Our odds CSVs will bring in the rest of the variables.

In [55]:
fd_spreads = pd.read_csv('fd_spreads_' + str(season) + '_' + str(week) + '.csv')
dk_spreads = pd.read_csv('dk_spreads_' + str(season) + '_' + str(week) + '.csv')

In [56]:
fd_spreads.head()

Unnamed: 0.1,Unnamed: 0,opponent,team,total_line,outdoors,grass,home_team,spread_line,pred_total,opp_total,wind,season,week
0,0,IND,CHI,43.5,0,0,0,-1.5,21.0,22.5,0,2024,3
1,1,CLE,NYG,38.5,1,1,0,-6.5,16.0,22.5,5,2024,3
2,2,TB,DEN,40.5,1,1,0,-6.5,17.0,23.5,5,2024,3
3,3,TEN,GB,38.5,1,0,0,-2.5,18.0,20.5,4,2024,3
4,4,MIN,HOU,45.5,0,1,0,1.5,23.5,22.0,0,2024,3


In [57]:
dk_spreads.head()

Unnamed: 0.1,Unnamed: 0,opponent,team,total_line,outdoors,grass,home_team,spread_line,pred_total,opp_total,wind,season,week
0,0,IND,CHI,43.5,0,0,0,-1.0,21.25,22.25,0,2024,3
1,1,CLE,NYG,38.0,1,1,0,-6.5,15.75,22.25,5,2024,3
2,2,TB,DEN,41.0,1,1,0,-6.0,17.5,23.5,5,2024,3
3,3,TEN,GB,38.0,1,0,0,-3.0,17.5,20.5,4,2024,3
4,4,MIN,HOU,46.0,0,1,0,1.5,23.75,22.25,0,2024,3


In [58]:
fanduel_df = pd.merge(fanduel_df, fd_spreads, on = ['team', 'opponent', 'week', 'home_team'], how = 'left')
draftkings_df = pd.merge(draftkings_df, dk_spreads, on = ['team', 'opponent', 'week', 'home_team'], how = 'left')

In [59]:
fanduel_df.drop(columns = ['Unnamed: 0'], inplace = True)
draftkings_df.drop(columns = ['Unnamed: 0'], inplace = True)

In [60]:
fanduel_df.head()

Unnamed: 0,ID,name,position,salary,team,opponent,home_team,date,week,opp_vs_def_L8,opp_vs_def_L8_lin,opp_vs_def_L8_exp,total_line,outdoors,grass,spread_line,pred_total,opp_total,wind,season
0,107027-12537,LV,D,5000,LV,CAR,1,09-22-2024,3,12.0,13.24,12.68,39.5,0,0,6.5,23.0,16.5,0,2024
1,107027-12529,CLE,D,4900,CLE,NYG,1,09-22-2024,3,9.0,9.72,9.42,38.5,1,1,6.5,22.5,16.0,5,2024
2,107027-12547,PIT,D,4800,PIT,LAC,1,09-22-2024,3,10.125,8.09,6.955,34.5,1,1,3.0,18.75,15.75,2,2024
3,107027-12551,TB,D,4700,TB,DEN,1,09-22-2024,3,8.375,9.68,9.8,40.5,1,1,6.5,23.5,17.0,5,2024
4,107027-12550,SEA,D,4600,SEA,MIA,1,09-22-2024,3,8.125,10.34,10.64,42.5,1,0,4.5,23.5,19.0,4,2024


In [61]:
draftkings_df.head()

Unnamed: 0,ID,name,position,salary,team,opponent,home_team,date,week,opp_vs_def_L8,opp_vs_def_L8_lin,opp_vs_def_L8_exp,total_line,outdoors,grass,spread_line,pred_total,opp_total,wind,season
0,35940953,CLE,D,3800,CLE,NYG,1,09-22-2024,3,9.0,9.72,9.42,38.0,1,1,6.5,22.25,15.75,5,2024
1,35940954,LV,D,3700,LV,CAR,1,09-22-2024,3,12.0,13.24,12.68,40.0,0,0,6.0,23.0,17.0,0,2024
2,35940955,LAC,D,3600,LAC,PIT,0,09-22-2024,3,5.25,5.07,4.975,34.5,1,1,-3.0,15.75,18.75,2,2024
3,35940957,SEA,D,3500,SEA,MIA,1,09-22-2024,3,8.125,10.34,10.64,42.0,1,0,4.5,23.25,18.75,4,2024
4,35940956,SF,D,3500,SF,LAR,0,09-22-2024,3,6.375,7.05,7.175,43.0,0,0,6.5,24.75,18.25,0,2024


In [62]:
fanduel_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 26 entries, 0 to 25
Data columns (total 20 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   ID                 26 non-null     object 
 1   name               26 non-null     object 
 2   position           26 non-null     object 
 3   salary             26 non-null     int64  
 4   team               26 non-null     object 
 5   opponent           26 non-null     object 
 6   home_team          26 non-null     int64  
 7   date               26 non-null     object 
 8   week               26 non-null     int64  
 9   opp_vs_def_L8      26 non-null     float64
 10  opp_vs_def_L8_lin  26 non-null     float64
 11  opp_vs_def_L8_exp  26 non-null     float64
 12  total_line         26 non-null     float64
 13  outdoors           26 non-null     int64  
 14  grass              26 non-null     int64  
 15  spread_line        26 non-null     float64
 16  pred_total         26 non-nu

In [63]:
draftkings_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 24 entries, 0 to 23
Data columns (total 20 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   ID                 24 non-null     int64  
 1   name               24 non-null     object 
 2   position           24 non-null     object 
 3   salary             24 non-null     int64  
 4   team               24 non-null     object 
 5   opponent           24 non-null     object 
 6   home_team          24 non-null     int64  
 7   date               24 non-null     object 
 8   week               24 non-null     int64  
 9   opp_vs_def_L8      24 non-null     float64
 10  opp_vs_def_L8_lin  24 non-null     float64
 11  opp_vs_def_L8_exp  24 non-null     float64
 12  total_line         24 non-null     float64
 13  outdoors           24 non-null     int64  
 14  grass              24 non-null     int64  
 15  spread_line        24 non-null     float64
 16  pred_total         24 non-nu

In [64]:
fanduel_df.drop(columns = ['ID', 'name', 'pred_total', 'wind'], inplace = True)
draftkings_df.drop(columns = ['ID', 'name', 'pred_total', 'wind'], inplace = True)

In [65]:
#Setting indexes so that the only columns are the ones that correspond with variables for the model
fanduel_df.set_index(['position', 'salary', 'team', 'opponent', 'date', 'week', 'season'], inplace = True)
draftkings_df.set_index(['position', 'salary', 'team', 'opponent', 'date', 'week', 'season'], inplace = True)

In [66]:
fanduel_df.info()

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 26 entries, ('D', 5000, 'LV', 'CAR', '09-22-2024', 3, 2024) to ('D', 3000, 'CAR', 'LV', '09-22-2024', 3, 2024)
Data columns (total 9 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   home_team          26 non-null     int64  
 1   opp_vs_def_L8      26 non-null     float64
 2   opp_vs_def_L8_lin  26 non-null     float64
 3   opp_vs_def_L8_exp  26 non-null     float64
 4   total_line         26 non-null     float64
 5   outdoors           26 non-null     int64  
 6   grass              26 non-null     int64  
 7   spread_line        26 non-null     float64
 8   opp_total          26 non-null     float64
dtypes: float64(6), int64(3)
memory usage: 6.0+ KB


In [67]:
draftkings_df.info()

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 24 entries, ('D', 3800, 'CLE', 'NYG', '09-22-2024', 3, 2024) to ('D', 2300, 'LAR', 'SF', '09-22-2024', 3, 2024)
Data columns (total 9 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   home_team          24 non-null     int64  
 1   opp_vs_def_L8      24 non-null     float64
 2   opp_vs_def_L8_lin  24 non-null     float64
 3   opp_vs_def_L8_exp  24 non-null     float64
 4   total_line         24 non-null     float64
 5   outdoors           24 non-null     int64  
 6   grass              24 non-null     int64  
 7   spread_line        24 non-null     float64
 8   opp_total          24 non-null     float64
dtypes: float64(6), int64(3)
memory usage: 4.8+ KB


In [68]:
#Now we need to set the variables in the order that they're trained in the model
fanduel_df = fanduel_df[['spread_line', 'home_team', 'outdoors', 'grass', 'total_line', 'opp_total', 'opp_vs_def_L8', 'opp_vs_def_L8_lin', 'opp_vs_def_L8_exp']]

In [69]:
draftkings_df = draftkings_df[['spread_line', 'home_team', 'outdoors', 'grass', 'total_line', 'opp_total', 'opp_vs_def_L8', 'opp_vs_def_L8_lin', 'opp_vs_def_L8_exp']]

In [70]:
fanduel_df.info()

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 26 entries, ('D', 5000, 'LV', 'CAR', '09-22-2024', 3, 2024) to ('D', 3000, 'CAR', 'LV', '09-22-2024', 3, 2024)
Data columns (total 9 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   spread_line        26 non-null     float64
 1   home_team          26 non-null     int64  
 2   outdoors           26 non-null     int64  
 3   grass              26 non-null     int64  
 4   total_line         26 non-null     float64
 5   opp_total          26 non-null     float64
 6   opp_vs_def_L8      26 non-null     float64
 7   opp_vs_def_L8_lin  26 non-null     float64
 8   opp_vs_def_L8_exp  26 non-null     float64
dtypes: float64(6), int64(3)
memory usage: 6.0+ KB


In [71]:
draftkings_df.info()

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 24 entries, ('D', 3800, 'CLE', 'NYG', '09-22-2024', 3, 2024) to ('D', 2300, 'LAR', 'SF', '09-22-2024', 3, 2024)
Data columns (total 9 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   spread_line        24 non-null     float64
 1   home_team          24 non-null     int64  
 2   outdoors           24 non-null     int64  
 3   grass              24 non-null     int64  
 4   total_line         24 non-null     float64
 5   opp_total          24 non-null     float64
 6   opp_vs_def_L8      24 non-null     float64
 7   opp_vs_def_L8_lin  24 non-null     float64
 8   opp_vs_def_L8_exp  24 non-null     float64
dtypes: float64(6), int64(3)
memory usage: 4.8+ KB


In [72]:
fanduel_df.to_csv('FD_def_for_model_' + str(season) + '_' + str(week) + '.csv')

In [73]:
draftkings_df.to_csv('DK_def_for_model_' + str(season) + '_' + str(week) + '.csv')