In [1]:
# Basic Packages
import pandas as pd
import numpy as np
from functools import reduce
from datetime import datetime as dt

# Visualizations
import seaborn as sns
import plotly.express as px
import matplotlib.pyplot as plt

# Modeling
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

In [26]:
df = pd.read_csv("../API Data Out/schedule_2003_to_2023.csv", index_col=0)

In [27]:
df.insert(6, "score_diff", df.home_score - df.away_score)

In [28]:
df.sort_values(by=['game_id'], ascending = False, inplace=True)
df.columns

Index(['game_id', 'season', 'game_type', 'week', 'gameday', 'weekday',
       'score_diff', 'gametime', 'away_team', 'away_score', 'home_team',
       'home_score', 'location', 'result', 'total', 'overtime', 'old_game_id',
       'gsis', 'nfl_detail_id', 'pfr', 'pff', 'espn', 'ftn', 'away_rest',
       'home_rest', 'away_moneyline', 'home_moneyline', 'spread_line',
       'away_spread_odds', 'home_spread_odds', 'total_line', 'under_odds',
       'over_odds', 'div_game', 'roof', 'surface', 'temp', 'wind',
       'away_qb_id', 'home_qb_id', 'away_qb_name', 'home_qb_name',
       'away_coach', 'home_coach', 'referee', 'stadium_id', 'stadium'],
      dtype='object')

In [29]:
test = df.loc[:, ('game_id', 'game_type', 'weekday', 'score_diff', 'spread_line')]
test.dropna(inplace=True)
test

Unnamed: 0,game_id,game_type,weekday,score_diff,spread_line
6449,2023_02_WAS_DEN,REG,Sunday,-2.0,4.0
6447,2023_02_SF_LA,REG,Sunday,-7.0,-7.5
6441,2023_02_SEA_DET,REG,Sunday,-6.0,4.5
6448,2023_02_NYJ_DAL,REG,Sunday,20.0,8.5
6446,2023_02_NYG_ARI,REG,Sunday,-3.0,-5.0
...,...,...,...,...,...
1047,2003_01_DEN_CIN,REG,Sunday,-20.0,-6.0
1057,2003_01_CHI_SF,REG,Sunday,42.0,6.5
1054,2003_01_BAL_PIT,REG,Sunday,19.0,4.0
1055,2003_01_ATL_DAL,REG,Sunday,-14.0,2.0


In [30]:
test['home_favorite'] = np.where(test.loc[:, 'spread_line'] > 0, 1, 0)
test['home_cover'] = np.where(test.loc[:, 'score_diff'] > test.loc[:, 'spread_line'], 1, 0)
test['home_win'] = np.where(test.loc[:, 'score_diff'] > 0, 1, 0)

In [31]:
spread_threshold = 6.5
test = test.loc[(abs(test.spread_line) == spread_threshold) & (test.spread_line != 0) & (test.score_diff != 0)]
test

Unnamed: 0,game_id,game_type,weekday,score_diff,spread_line,home_favorite,home_cover,home_win
6396,2022_18_MIN_CHI,REG,Sunday,-16.0,-6.5,0,0,0
6390,2022_17_LA_LAC,REG,Sunday,21.0,6.5,1,1,1
6370,2022_16_WAS_SF,REG,Saturday,17.0,6.5,1,1,1
6362,2022_16_ATL_BAL,REG,Saturday,8.0,6.5,1,1,1
6332,2022_14_LV_LA,REG,Thursday,1.0,-6.5,0,1,1
...,...,...,...,...,...,...,...,...
1128,2003_06_PIT_DEN,REG,Sunday,3.0,6.5,1,0,1
1124,2003_06_CHI_NO,REG,Sunday,7.0,6.5,1,1,1
1127,2003_06_BAL_ARI,REG,Sunday,-8.0,-6.5,0,0,0
1076,2003_03_TB_ATL,REG,Sunday,-21.0,-6.5,0,0,0


In [32]:
rows = test.shape[0]
home_dogs = len(test[test.home_favorite == 0])
away_dogs = len(test[test.home_favorite == 1])

In [33]:
home_dog_win = len(test[(test.home_favorite == 0) & (test.home_win == 1)])
away_dog_win = len(test[(test.home_favorite == 1) & (test.home_win == 0)])
home_dog_cover = len(test[(test.home_favorite == 0) & (test.home_cover == 1)])
away_dog_cover = len(test[(test.home_favorite == 1) & (test.home_cover == 0)])

In [34]:
print(f"There are {home_dogs} home underdogs in this sample")
print(f"There are {away_dogs} away underdogs in this sample")
print(f"There are {rows} total samples")

There are 75 home underdogs in this sample
There are 177 away underdogs in this sample
There are 252 total samples


In [35]:
print("Outright")
print(f"The home team wins {home_dog_win/home_dogs*100:.2f}% of the time when +{spread_threshold} ")
print(f"The away team wins {away_dog_win/away_dogs*100:.2f}% of the time when +{spread_threshold} ")
print(f"Overal win % = {(away_dog_win + home_dog_win)/rows*100:.2f}% when +{spread_threshold} ")

Outright
The home team wins 26.67% of the time when +6.5 
The away team wins 32.20% of the time when +6.5 
Overal win % = 30.56% when +6.5 


In [36]:
print("Cover")
print(f"The home team covers {home_dog_cover/home_dogs*100:.2f}% of the time when +{spread_threshold} ")
print(f"The away team covers {away_dog_cover/away_dogs*100:.2f}% of the time when +{spread_threshold} ")
print(f"Overal cover % = {(home_dog_cover + away_dog_cover)/rows*100:.2f}% when +{spread_threshold} ")

Cover
The home team covers 50.67% of the time when +6.5 
The away team covers 54.80% of the time when +6.5 
Overal cover % = 53.57% when +6.5 


##### 