# Loading All Required Datasets, Performing Preliminary Exploratory Analysis, and Manipulating/Preparing the Various Datasets to Be Used Throughout the Project

In [1202]:
#Importing common packages
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score

pd.set_option('display.max_columns', 100)

In [1203]:
#Loading in all required .csv files
basic_off = pd.read_csv('NFL Offense Data.csv')
basic_def = pd.read_csv('NFL Defense Data.csv')
adv_pass = pd.read_csv('NFL Advanced Passing Data.csv')
adv_rush = pd.read_csv('NFL Advanced Rushing Data.csv')
adv_def = pd.read_csv('NFL Advanced Defense Data.csv')
adv_rec = pd.read_csv('NFL Advanced Receiving Data.csv')
off_conv = pd.read_csv('NFL Conversions For.csv')
def_conv = pd.read_csv('NFL Conversions Against.csv')
pass_off = pd.read_csv('NFL Passing Offense.csv')
pass_def = pd.read_csv('NFL Passing Defense.csv')
spreadspoke_scores = pd.read_csv('spreadspoke_scores.csv')

In [1204]:
#Dropped all blank columns in the advanced passing DF. Also, dropped the 'Drops' and 'Drop%' columns to reduce redundancy with the advanced receiving data
adv_pass = adv_pass.drop('Unnamed: 31', axis=1)
adv_pass = adv_pass.drop('Unnamed: 32', axis=1)
adv_pass = adv_pass.drop('Unnamed: 33', axis=1)
adv_pass = adv_pass.drop('Unnamed: 34', axis=1)
adv_pass = adv_pass.drop('Unnamed: 35', axis=1)
adv_pass = adv_pass.drop('Unnamed: 36', axis=1)
adv_pass = adv_pass.drop('Unnamed: 37', axis=1)
adv_pass = adv_pass.drop('Unnamed: 38', axis=1)
adv_pass = adv_pass.drop('Drops', axis=1)
adv_pass = adv_pass.drop('Drop%', axis=1)

### Exploratory Analysis Side Note Regarding 2020 NFL Regular Season (COVID)

Because of the abnormal nature of the 2020 NFL season which was played in it's entirety, but had no fans in the stands for the majority of the regular season because of COVID, I performed a separate exploratory analysis of how the 2020 season data compared to the seasons around that year (2019-2022) using the describe() method and found that there was nothing major that would most likely be significant enough to skew predictive data throughout the process of this project. 

### Creating comprehensive advanced datasets for offense and defense

To create a full comprehensive advanced dataset for offense statistics, I merged the passing, rushing, and receiving statistics together into one DF labeled 'adv_off'.

In [1205]:
#Merged adv_pass and adv_rush together
adv_pass_rush = adv_pass.merge(adv_rush, on=['Tm', 'Season', 'G'])
adv_pass_rush.head()

Unnamed: 0,Tm,Season,G,Pass_Cmp,Pass_Att,Pass_Yds,IAY,IAY/PA,CAY,CAY/Cmp,CAY/PA,YAC,YAC/Cmp,Bats,ThAwy,Spikes,BadTh,Bad%,OnTgt,OnTgt%,Sk,PktTime,Bltz,Hrry,Hits,Prss,Prss%,Scrm,Yds/Scr,Rush_Att,Rush_Yds,Rush_TD,Rush_1D,Rush_YBC,Rush_YBC/Att,Rush_YAC,Rush_YAC/Att,Rush_BrkTkl,Att/Br
0,Atlanta Falcons,2022,17,257,415,2699,4011,9.7,1698,6.6,4.1,1229,4.8,4,12,4,85,21.30%,287,71.90%,37,2.6,142,14,41,92,18.80%,37,6.9,559,2718,17,152,1701,3.0,1017,1.8,10,55.9
1,Buffalo Bills,2022,16,361,574,4129,5262,9.2,2694,7.5,4.7,1597,4.4,14,18,1,93,16.80%,407,73.30%,33,2.5,150,41,40,114,17.20%,57,9.4,430,2232,15,130,1577,3.7,655,1.5,17,25.3
2,Carolina Panthers,2022,17,267,457,2996,3303,7.2,1465,5.5,3.2,1781,6.7,17,30,3,73,17.20%,298,70.30%,36,2.5,140,29,24,89,17.40%,19,8.2,483,2210,16,119,1336,2.8,874,1.8,19,25.4
3,Chicago Bears,2022,17,223,377,2219,3437,9.1,1449,6.5,3.8,1149,5.2,12,14,1,72,19.90%,254,70.20%,58,2.6,112,36,35,129,25.40%,73,9.2,558,3014,18,148,1893,3.4,1121,2.0,41,13.6
4,Cincinnati Bengals,2022,16,418,610,4240,4152,6.8,2350,5.6,3.9,2170,5.2,24,17,1,76,12.80%,469,79.20%,44,2.2,113,22,48,114,16.70%,27,7.5,399,1528,14,97,991,2.5,537,1.3,15,26.6


In [1206]:
#Merged adv_rec with adv_pass_rush to create full adv_off dataset
adv_off = adv_pass_rush.merge(adv_rec, on=['Tm', 'Season', 'G'])
adv_off

Unnamed: 0,Tm,Season,G,Pass_Cmp,Pass_Att,Pass_Yds,IAY,IAY/PA,CAY,CAY/Cmp,CAY/PA,YAC,YAC/Cmp,Bats,ThAwy,Spikes,BadTh,Bad%,OnTgt,OnTgt%,Sk,PktTime,Bltz,Hrry,Hits,Prss,Prss%,Scrm,Yds/Scr,Rush_Att,Rush_Yds,Rush_TD,Rush_1D,Rush_YBC,Rush_YBC/Att,Rush_YAC,Rush_YAC/Att,Rush_BrkTkl,Att/Br,Rec_Tgt,Rec,Rec_Yds,Rec_TD,Rec_1D,Rec_YBC,Rec_YBC/R,Rec_YAC,Rec_YAC/R,ADOT,Rec_BrkTkl,Rec/Br,Drop,Drop%
0,Atlanta Falcons,2022,17,257,415,2699,4011,9.7,1698,6.6,4.1,1229,4.8,4,12,4,85,21.30%,287,71.90%,37,2.6,142,14,41,92,18.80%,37,6.9,559,2718,17,152,1701,3.0,1017,1.8,10,55.9,415,257,2927,17,148,1698,6.6,1229,4.8,9.4,7,36.7,9,2.2
1,Buffalo Bills,2022,16,361,574,4129,5262,9.2,2694,7.5,4.7,1597,4.4,14,18,1,93,16.80%,407,73.30%,33,2.5,150,41,40,114,17.20%,57,9.4,430,2232,15,130,1577,3.7,655,1.5,17,25.3,574,361,4291,35,214,2694,7.5,1597,4.4,8.8,11,32.8,38,6.6
2,Carolina Panthers,2022,17,267,457,2996,3303,7.2,1465,5.5,3.2,1781,6.7,17,30,3,73,17.20%,298,70.30%,36,2.5,140,29,24,89,17.40%,19,8.2,483,2210,16,119,1336,2.8,874,1.8,19,25.4,457,267,3246,16,138,1486,5.6,1760,6.6,7.0,16,16.7,26,5.7
3,Chicago Bears,2022,17,223,377,2219,3437,9.1,1449,6.5,3.8,1149,5.2,12,14,1,72,19.90%,254,70.20%,58,2.6,112,36,35,129,25.40%,73,9.2,558,3014,18,148,1893,3.4,1121,2.0,41,13.6,377,223,2598,19,109,1476,6.6,1122,5.0,8.5,13,17.2,18,4.8
4,Cincinnati Bengals,2022,16,418,610,4240,4152,6.8,2350,5.6,3.9,2170,5.2,24,17,1,76,12.80%,469,79.20%,44,2.2,113,22,48,114,16.70%,27,7.5,399,1528,14,97,991,2.5,537,1.3,15,26.6,610,418,4520,35,224,2350,5.6,2170,5.2,6.7,32,13.1,34,5.6
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
153,Los Angeles Chargers,2018,16,348,512,4089,3958,7.7,2189,6.3,4.3,2127,6.1,13,14,0,64,12.90%,444,76.80%,34,2.4,152,48,48,130,23.60%,6,4.3,399,1873,16,99,874,2.2,999,2.5,40,10.0,512,348,4316,32,213,2189,6.3,2127,6.1,7.7,31,11.2,23,4.5
154,Seattle Seahawks,2018,16,280,427,3093,3843,9.0,2063,7.4,4.8,1385,4.9,9,24,8,61,15.40%,376,76.10%,51,2.5,145,82,31,164,32.30%,30,9.6,534,2560,15,133,1528,2.9,1032,1.9,42,12.7,427,280,3448,35,155,2063,7.4,1385,4.9,8.6,15,18.7,18,4.2
155,San Francisco 49ers,2018,16,331,532,3867,3722,7.0,1931,5.8,3.6,2316,7.0,6,20,4,77,15.20%,379,80.80%,48,2.3,162,58,63,169,28.30%,17,5.6,423,1902,7,98,1033,2.4,869,2.1,30,14.1,532,331,4247,26,201,1931,5.8,2316,7.0,6.9,30,11.0,27,5.1
156,Tampa Bay Buccaneers,2018,16,408,625,5125,6635,10.6,3574,8.8,5.7,1784,4.4,11,11,1,96,15.70%,423,69.90%,41,2.4,160,67,56,164,22.60%,59,6.7,389,1523,11,80,849,2.2,674,1.7,24,16.2,625,408,5358,36,264,3581,8.8,1777,4.4,10.5,19,21.5,26,4.2


In [1207]:
#Removing blank columns from 'off_conv' 
off_conv = off_conv.drop('Unnamed: 11', axis=1)
off_conv

Unnamed: 0,Tm,Season,3DAtt,3DConv,3D%,4DAtt,4DConv,4D%,RZAtt,RZTD,RZPct
0,Los Angeles Chargers,2022,15.1,6.59,43.60%,1.71,0.88,51.70%,3.65,1.94,53.20%
1,Pittsburgh Steelers,2022,13.9,6.24,44.90%,0.88,0.65,73.30%,3.06,1.59,51.90%
2,Dallas Cowboys,2022,13.6,6.18,45.50%,1.12,0.59,52.60%,3.29,2.35,71.40%
3,Philadelphia Eagles,2022,13.1,6.00,45.90%,1.88,1.29,68.80%,3.47,2.35,67.80%
4,Buffalo Bills,2022,12.1,6.06,50.30%,0.81,0.44,53.80%,3.63,2.19,60.30%
...,...,...,...,...,...,...,...,...,...,...,...
155,Denver Broncos,2018,12.9,4.31,33.30%,1.56,0.94,60.00%,2.75,1.56,56.80%
156,Cincinnati Bengals,2018,11.7,4.31,36.90%,1.06,0.56,52.90%,2.81,2.00,71.10%
157,Buffalo Bills,2018,13.4,4.25,31.60%,1.13,0.44,38.90%,2.31,1.38,59.50%
158,Arizona Cardinals,2018,12.4,3.63,29.10%,1.00,0.38,37.50%,1.75,1.13,64.30%


In [1208]:
#Merging offense conversions to adv_off and reviewing full advanced offense dataset
adv_off = adv_off.merge(off_conv, on=['Tm', 'Season'])
adv_off

Unnamed: 0,Tm,Season,G,Pass_Cmp,Pass_Att,Pass_Yds,IAY,IAY/PA,CAY,CAY/Cmp,CAY/PA,YAC,YAC/Cmp,Bats,ThAwy,Spikes,BadTh,Bad%,OnTgt,OnTgt%,Sk,PktTime,Bltz,Hrry,Hits,Prss,Prss%,Scrm,Yds/Scr,Rush_Att,Rush_Yds,Rush_TD,Rush_1D,Rush_YBC,Rush_YBC/Att,Rush_YAC,Rush_YAC/Att,Rush_BrkTkl,Att/Br,Rec_Tgt,Rec,Rec_Yds,Rec_TD,Rec_1D,Rec_YBC,Rec_YBC/R,Rec_YAC,Rec_YAC/R,ADOT,Rec_BrkTkl,Rec/Br,Drop,Drop%,3DAtt,3DConv,3D%,4DAtt,4DConv,4D%,RZAtt,RZTD,RZPct
0,Atlanta Falcons,2022,17,257,415,2699,4011,9.7,1698,6.6,4.1,1229,4.8,4,12,4,85,21.30%,287,71.90%,37,2.6,142,14,41,92,18.80%,37,6.9,559,2718,17,152,1701,3.0,1017,1.8,10,55.9,415,257,2927,17,148,1698,6.6,1229,4.8,9.4,7,36.7,9,2.2,11.5,4.82,41.80%,1.06,0.53,50.00%,3.18,1.76,55.60%
1,Buffalo Bills,2022,16,361,574,4129,5262,9.2,2694,7.5,4.7,1597,4.4,14,18,1,93,16.80%,407,73.30%,33,2.5,150,41,40,114,17.20%,57,9.4,430,2232,15,130,1577,3.7,655,1.5,17,25.3,574,361,4291,35,214,2694,7.5,1597,4.4,8.8,11,32.8,38,6.6,12.1,6.06,50.30%,0.81,0.44,53.80%,3.63,2.19,60.30%
2,Carolina Panthers,2022,17,267,457,2996,3303,7.2,1465,5.5,3.2,1781,6.7,17,30,3,73,17.20%,298,70.30%,36,2.5,140,29,24,89,17.40%,19,8.2,483,2210,16,119,1336,2.8,874,1.8,19,25.4,457,267,3246,16,138,1486,5.6,1760,6.6,7.0,16,16.7,26,5.7,11.9,3.76,31.70%,1.24,0.71,57.10%,2.71,1.53,56.50%
3,Chicago Bears,2022,17,223,377,2219,3437,9.1,1449,6.5,3.8,1149,5.2,12,14,1,72,19.90%,254,70.20%,58,2.6,112,36,35,129,25.40%,73,9.2,558,3014,18,148,1893,3.4,1121,2.0,41,13.6,377,223,2598,19,109,1476,6.6,1122,5.0,8.5,13,17.2,18,4.8,12.9,5.29,40.90%,1.47,0.47,32.00%,2.94,1.65,56.00%
4,Cincinnati Bengals,2022,16,418,610,4240,4152,6.8,2350,5.6,3.9,2170,5.2,24,17,1,76,12.80%,469,79.20%,44,2.2,113,22,48,114,16.70%,27,7.5,399,1528,14,97,991,2.5,537,1.3,15,26.6,610,418,4520,35,224,2350,5.6,2170,5.2,6.7,32,13.1,34,5.6,12.9,5.94,46.10%,0.75,0.19,25.00%,3.56,2.31,64.90%
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
153,Los Angeles Chargers,2018,16,348,512,4089,3958,7.7,2189,6.3,4.3,2127,6.1,13,14,0,64,12.90%,444,76.80%,34,2.4,152,48,48,130,23.60%,6,4.3,399,1873,16,99,874,2.2,999,2.5,40,10.0,512,348,4316,32,213,2189,6.3,2127,6.1,7.7,31,11.2,23,4.5,11.1,4.38,39.50%,0.50,0.44,87.50%,3.31,2.13,64.20%
154,Seattle Seahawks,2018,16,280,427,3093,3843,9.0,2063,7.4,4.8,1385,4.9,9,24,8,61,15.40%,376,76.10%,51,2.5,145,82,31,164,32.30%,30,9.6,534,2560,15,133,1528,2.9,1032,1.9,42,12.7,427,280,3448,35,155,2063,7.4,1385,4.9,8.6,15,18.7,18,4.2,13.0,5.06,38.90%,0.88,0.69,78.60%,3.44,2.25,65.50%
155,San Francisco 49ers,2018,16,331,532,3867,3722,7.0,1931,5.8,3.6,2316,7.0,6,20,4,77,15.20%,379,80.80%,48,2.3,162,58,63,169,28.30%,17,5.6,423,1902,7,98,1033,2.4,869,2.1,30,14.1,532,331,4247,26,201,1931,5.8,2316,7.0,6.9,30,11.0,27,5.1,12.1,4.56,37.80%,0.63,0.31,50.00%,3.19,1.31,41.20%
156,Tampa Bay Buccaneers,2018,16,408,625,5125,6635,10.6,3574,8.8,5.7,1784,4.4,11,11,1,96,15.70%,423,69.90%,41,2.4,160,67,56,164,22.60%,59,6.7,389,1523,11,80,849,2.2,674,1.7,24,16.2,625,408,5358,36,264,3581,8.8,1777,4.4,10.5,19,21.5,26,4.2,11.7,5.38,46.00%,0.81,0.56,69.20%,3.75,2.25,60.00%


In [1209]:
#Mergingin def_conv to adv_def and reviewing full advanced defense dataset
adv_def = adv_def.merge(def_conv, on=['Tm', 'Season'])
adv_def

Unnamed: 0,Tm,Season,G,Att,Cmp,Yds,TD,DADOT,Air,YAC,Bltz,Bltz%,Hrry,Hrry%,QBKD,QBKD%,Sk,Prss,Prss%,MTkl,3DAtt,3DConv,3D%,4DAtt,4DConv,4D%,RZAtt,RZTD,RZPct
0,Atlanta Falcons,2022,17,561,372,3942,26,8.3,2297,1793,98,16.30%,23,3.80%,44,7.80%,21,88,14.60%,54,12.9,5.94,45.90%,1.00,0.41,41.20%,3.53,1.94,55.00%
1,Buffalo Bills,2022,16,570,362,3433,21,7.5,2276,1901,124,19.40%,46,7.20%,56,9.80%,40,142,22.20%,77,13.0,4.88,37.50%,2.06,1.19,57.60%,3.06,1.38,44.90%
2,Carolina Panthers,2022,17,580,383,3868,25,7.9,2434,1664,186,29.20%,41,6.40%,50,8.60%,35,126,19.80%,72,13.6,5.59,41.10%,1.06,0.47,44.40%,3.35,1.88,56.10%
3,Chicago Bears,2022,17,481,323,3716,22,7.8,1973,1867,97,18.20%,45,8.40%,20,4.20%,20,85,15.90%,74,12.0,5.88,49.00%,0.88,0.35,40.00%,3.65,2.35,64.50%
4,Cincinnati Bengals,2022,16,559,329,3665,17,8.5,2575,2025,124,20.40%,38,6.30%,68,12.20%,30,136,22.40%,69,13.3,5.25,39.60%,1.44,0.56,39.10%,3.13,1.63,52.00%
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
153,Los Angeles Chargers,2018,16,548,351,3646,23,7.2,2142,2125,104,17.00%,61,10.00%,34,6.20%,38,133,21.70%,112,12.6,4.94,39.10%,1.25,0.69,55.00%,3.13,1.56,50.00%
154,Seattle Seahawks,2018,16,546,355,3842,26,7.8,2091,2098,112,18.40%,73,12.00%,57,10.40%,43,173,28.50%,98,11.4,4.00,35.00%,0.81,0.38,46.20%,3.19,1.56,49.00%
155,San Francisco 49ers,2018,16,542,344,3732,35,8.4,1984,1714,123,20.40%,95,15.70%,44,8.10%,37,176,29.10%,114,13.5,5.44,40.30%,1.06,0.63,58.80%,3.63,2.38,65.50%
156,Tampa Bay Buccaneers,2018,16,534,387,4151,34,8.2,2496,1815,141,23.90%,39,6.60%,42,7.90%,38,119,20.20%,99,11.3,4.56,40.30%,0.88,0.44,50.00%,3.63,2.81,77.60%


## Explanation of 'full' offense and 'full' defense datasets
Because of the importance of passing offense and defense in today's NFL, I also decided to include further detailed statistics for both passing offense and defense and add them to the basic offense and defense data sets from the past 15 years. This combined 'basic' and 'passing' statistics I refer to as the 'full' offense and defense throughout the rest of the project.

In [1210]:
#Removing blank columns from pass_def
pass_def = pass_def.drop('Unnamed: 17', axis=1)
pass_def = pass_def.drop('Unnamed: 18', axis=1)
pass_def = pass_def.drop('Unnamed: 19', axis=1)
#Reviewing pass_def
pass_def

Unnamed: 0,Tm,Season,PD_Cmp%,PD_TD%,PD,PD_Int%,PD_Y/A,PD_AY/A,PD_Y/C,PD_Rate,PD_Sk,PD_Sk_Yds,PD_QBHits,PD_TFL,PD_Sk%,APDNY/A,PD_EXP
0,Philadelphia Eagles,2022,62.9,4.0,5.00,3.1,6.4,5.8,10.1,81.6,4.12,28.90,7.29,5.71,11.2,4.4,3.61
1,New Orleans Saints,2022,60.1,3.2,3.94,1.3,6.4,6.4,10.6,83.8,2.82,16.40,5.12,4.53,8.2,5.4,-1.62
2,New York Jets,2022,62.4,2.7,4.29,2.2,6.3,5.9,10.2,80.5,2.65,18.00,7.65,4.76,7.5,5.0,0.22
3,Washington Commanders,2022,59.9,5.1,4.00,1.8,6.9,7.1,11.5,90.2,2.53,16.40,6.29,5.18,7.7,6.0,-1.20
4,Cleveland Browns,2022,60.7,3.9,5.00,2.1,6.8,6.7,11.3,85.2,2.00,12.40,3.88,4.06,6.1,5.9,-2.14
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
475,Kansas City Chiefs,2008,66.7,4.0,3.63,2.5,7.3,7.0,10.9,91.1,0.63,3.88,2.13,3.50,1.9,6.7,1.27
476,New York Jets,2008,64.3,4.0,4.88,2.4,7.0,6.7,10.9,88.1,2.56,15.80,4.13,4.31,6.7,5.8,3.23
477,Chicago Bears,2008,61.6,3.4,6.50,3.5,6.6,5.6,10.6,77.2,1.75,13.60,4.81,6.06,4.3,5.1,1.08
478,Los Angeles Chargers,2008,67.9,4.1,4.88,2.5,6.8,6.5,10.0,90.3,1.75,8.25,3.50,3.25,4.4,6.0,1.06


In [1211]:
#Merging pass_off and pass_def to the basic statistics datasets to create full offense 'full_off' and full defense 'full_def' DataFrames
full_off = basic_off.merge(pass_off, on=['Tm', 'Season'])
full_def = basic_def.merge(pass_def, on=['Tm', 'Season'])

In [1212]:
#Reviewing 'full_off'
full_off

Unnamed: 0,Tm,Season,PF,Yds,Ply,Y/P,TO,FL,1stD,P_Cmp,P_Att,P_Yds,P_TD,P_Int,NPY/A,P_1stD,R_Att,R_Yds,R_TD,RY/A,R_1stD,Pen,Pen_Yds,1stPy,Sc%,TO%,EXP,P_Cmp%,P_TD%,P_Int%,P_Lng,P_Y/A,P_AY/A,P_Y/C,P_Rate,P_Sk,Sk_Yds_Lost,P_Sk%,APNY/A,QB_4QC,QB_GWD,P_EXP
0,Kansas City Chiefs,2022,29.2,413.6,64.4,6.4,1.35,0.65,24.0,25.6,38.3,297.8,2.41,0.71,7.5,16.00,24.5,115.9,1.06,4.7,6.18,5.12,49.3,1.82,46.4,10.1,14.10,66.8,6.3,1.8,67,8.1,8.5,12.1,104.7,1.53,11.10,3.8,7.9,0.24,0.24,13.70
1,Philadelphia Eagles,2022,28.1,389.1,66.1,5.9,1.12,0.59,22.6,20.6,31.5,241.5,1.47,0.53,7.1,11.50,32.0,147.6,1.88,4.6,9.47,5.29,40.1,1.71,42.5,10.2,10.30,65.3,4.7,1.7,78,8.1,8.3,12.5,99.0,2.59,15.20,7.6,7.2,0.06,0.12,7.72
2,Dallas Cowboys,2022,27.5,354.9,65.5,5.4,1.35,0.29,20.4,20.9,32.7,219.8,1.65,1.06,6.4,11.40,31.2,135.2,1.41,4.3,7.59,6.12,49.5,1.41,41.5,10.8,5.48,63.8,5.0,3.2,68,7.0,6.6,11.0,87.9,1.59,10.30,4.6,6.0,0.12,0.24,6.27
3,Buffalo Bills,2022,28.4,397.6,64.8,6.1,1.69,0.81,22.9,22.6,35.9,258.1,2.19,0.88,6.8,13.30,26.9,139.5,0.94,5.2,8.00,5.75,44.3,1.69,45.0,15.2,9.77,62.9,6.1,2.4,98,7.5,7.6,11.9,95.8,2.06,10.10,5.4,6.9,0.19,0.25,8.07
4,Detroit Lions,2022,26.6,380.0,64.2,5.9,0.88,0.47,22.1,22.5,34.6,251.8,1.71,0.41,7.0,13.40,28.2,128.2,1.35,4.5,6.71,5.29,45.5,2.00,43.2,8.0,9.53,65.1,4.9,1.2,81,7.6,8.0,11.6,99.3,1.41,9.59,3.9,7.4,0.18,0.18,10.20
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
475,Washington Commanders,2008,16.6,320.0,64.1,5.0,1.13,0.75,18.4,19.9,31.9,189.1,0.88,0.38,5.5,10.30,29.9,130.9,0.75,4.4,6.81,5.19,40.3,1.31,29.2,9.6,-10.85,62.4,2.7,1.2,67,6.5,6.5,10.3,85.2,2.38,16.60,6.9,5.5,0.06,0.19,-4.84
476,Las Vegas Raiders,2008,16.4,272.3,57.4,4.7,1.44,0.75,14.1,13.9,26.3,148.1,0.81,0.69,5.2,7.69,28.7,124.2,0.56,4.3,5.44,6.81,51.4,0.94,24.6,11.8,-14.47,52.7,3.1,2.6,84,6.3,5.7,11.9,71.6,2.44,16.90,8.5,4.6,0.06,0.13,-6.46
477,Los Angeles Rams,2008,14.5,287.3,61.4,4.7,1.94,0.75,15.6,18.3,32.5,184.2,0.69,1.19,5.2,8.75,26.1,103.1,0.50,4.0,5.94,6.06,44.9,0.88,26.3,16.3,-14.88,56.2,2.1,3.7,80,6.3,5.1,11.2,66.9,2.81,20.10,8.0,4.1,0.06,0.06,-10.27
478,Cleveland Browns,2008,14.5,249.1,57.6,4.3,1.63,0.38,14.6,14.9,30.5,148.8,0.69,1.25,4.6,7.94,25.6,100.3,0.38,3.9,5.25,6.25,41.8,1.38,27.3,14.5,-13.62,48.8,2.3,4.1,70,5.2,3.8,10.7,54.8,1.50,9.81,4.7,3.3,0.13,0.19,-10.08


In [1213]:
#Reviewing 'full_def'
full_def

Unnamed: 0,Tm,Season,PA,Yds,Ply,Y/P,TO,FL,1stD,P_Cmp,P_Att,P_Yds,P_TD,P_Int,NPY/A,P_1stD,R_Att,R_Yds,R_TD,RY/A,R_1stD,Pen,Pen_Yds,1stPy,Sc%,TO%,EXP,PD_Cmp%,PD_TD%,PD,PD_Int%,PD_Y/A,PD_AY/A,PD_Y/C,PD_Rate,PD_Sk,PD_Sk_Yds,PD_QBHits,PD_TFL,PD_Sk%,APDNY/A,PD_EXP
0,San Francisco 49ers,2022,16.3,300.6,60.4,5.0,1.76,0.59,17.1,22.9,34.9,222.9,1.18,1.18,5.9,11.10,22.9,77.7,0.65,3.4,4.53,5.65,48.9,1.53,25.7,15.3,4.01,65.6,3.4,4.59,3.4,6.9,6.1,10.5,82.7,2.59,18.10,7.12,4.53,6.9,5.2,1.81
1,Buffalo Bills,2022,17.9,319.1,62.3,5.1,1.69,0.63,19.6,22.6,35.6,214.6,1.31,1.06,5.6,11.30,24.2,104.6,0.63,4.3,6.44,5.56,47.1,1.81,31.0,14.3,0.68,63.5,3.7,5.63,3.0,6.5,5.9,10.3,82.1,2.50,18.00,6.13,5.81,6.6,5.1,-1.15
2,Baltimore Ravens,2022,18.5,324.3,61.6,5.3,1.47,0.65,19.0,23.2,34.9,232.2,1.18,0.82,6.2,12.10,23.9,92.1,0.65,3.9,5.35,4.65,38.2,1.53,35.8,13.9,-1.86,66.4,3.4,4.53,2.4,7.2,6.8,10.8,88.7,2.82,17.90,5.59,4.53,7.5,5.8,-3.30
3,New York Jets,2022,18.6,311.1,64.3,4.8,0.94,0.24,18.2,20.4,32.7,189.4,0.88,0.71,5.4,9.76,28.9,121.6,0.82,4.2,6.88,5.29,41.6,1.59,32.3,8.3,1.53,62.4,2.7,4.29,2.2,6.3,5.9,10.2,80.5,2.65,18.00,7.65,4.76,7.5,5.0,0.22
4,Cincinnati Bengals,2022,20.1,335.7,62.3,5.4,1.50,0.69,18.4,20.6,34.9,229.1,1.06,0.81,6.2,10.80,25.4,106.6,0.75,4.2,6.38,6.25,52.0,1.25,36.4,13.1,-0.59,58.9,3.0,4.69,2.3,6.8,6.4,11.6,80.1,1.88,10.20,6.38,3.56,5.1,5.8,-1.98
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
475,Arizona Cardinals,2008,26.6,331.5,62.1,5.3,1.88,1.06,19.5,20.2,32.3,221.3,2.25,0.81,6.5,10.80,27.8,110.3,0.81,4.0,7.31,6.13,51.0,1.44,38.7,15.5,4.19,62.5,7.0,4.69,2.5,7.2,7.5,11.6,96.9,1.94,11.90,4.88,4.25,5.7,6.7,-0.37
476,Kansas City Chiefs,2008,27.5,393.2,65.1,6.0,1.81,1.00,21.5,21.8,32.6,234.3,1.31,0.81,7.0,12.20,31.8,158.9,1.56,5.0,8.31,5.06,36.8,1.00,43.6,15.5,3.69,66.7,4.0,3.63,2.5,7.3,7.0,10.9,91.1,0.63,3.88,2.13,3.50,1.9,6.7,1.27
477,Denver Broncos,2008,28.0,374.6,61.9,6.1,0.81,0.44,20.4,20.8,30.9,228.5,1.25,0.38,7.0,11.60,29.3,146.1,1.63,5.0,7.63,5.19,46.1,1.19,46.4,7.7,-2.98,67.3,4.0,3.25,1.2,7.7,7.9,11.4,98.5,1.63,8.81,3.38,4.00,5.0,7.3,-6.05
478,Los Angeles Rams,2008,29.1,371.9,60.9,6.1,1.63,0.88,20.0,17.4,27.8,217.2,1.25,0.75,7.3,9.88,31.3,154.7,1.63,4.9,9.13,5.56,40.9,1.00,41.6,13.0,1.79,62.6,4.5,3.50,2.7,8.3,8.0,13.2,92.5,1.88,12.60,4.81,5.44,6.3,7.0,-0.54


In [1214]:
#Adding PF column from basic stats to merge together with advanced offense DF.
PF_off = basic_off[['Tm', 'Season', 'PF']]
adv_off = adv_off.merge(PF_off, on=['Tm', 'Season'])
adv_off

Unnamed: 0,Tm,Season,G,Pass_Cmp,Pass_Att,Pass_Yds,IAY,IAY/PA,CAY,CAY/Cmp,CAY/PA,YAC,YAC/Cmp,Bats,ThAwy,Spikes,BadTh,Bad%,OnTgt,OnTgt%,Sk,PktTime,Bltz,Hrry,Hits,Prss,Prss%,Scrm,Yds/Scr,Rush_Att,Rush_Yds,Rush_TD,Rush_1D,Rush_YBC,Rush_YBC/Att,Rush_YAC,Rush_YAC/Att,Rush_BrkTkl,Att/Br,Rec_Tgt,Rec,Rec_Yds,Rec_TD,Rec_1D,Rec_YBC,Rec_YBC/R,Rec_YAC,Rec_YAC/R,ADOT,Rec_BrkTkl,Rec/Br,Drop,Drop%,3DAtt,3DConv,3D%,4DAtt,4DConv,4D%,RZAtt,RZTD,RZPct,PF
0,Atlanta Falcons,2022,17,257,415,2699,4011,9.7,1698,6.6,4.1,1229,4.8,4,12,4,85,21.30%,287,71.90%,37,2.6,142,14,41,92,18.80%,37,6.9,559,2718,17,152,1701,3.0,1017,1.8,10,55.9,415,257,2927,17,148,1698,6.6,1229,4.8,9.4,7,36.7,9,2.2,11.5,4.82,41.80%,1.06,0.53,50.00%,3.18,1.76,55.60%,21.5
1,Buffalo Bills,2022,16,361,574,4129,5262,9.2,2694,7.5,4.7,1597,4.4,14,18,1,93,16.80%,407,73.30%,33,2.5,150,41,40,114,17.20%,57,9.4,430,2232,15,130,1577,3.7,655,1.5,17,25.3,574,361,4291,35,214,2694,7.5,1597,4.4,8.8,11,32.8,38,6.6,12.1,6.06,50.30%,0.81,0.44,53.80%,3.63,2.19,60.30%,28.4
2,Carolina Panthers,2022,17,267,457,2996,3303,7.2,1465,5.5,3.2,1781,6.7,17,30,3,73,17.20%,298,70.30%,36,2.5,140,29,24,89,17.40%,19,8.2,483,2210,16,119,1336,2.8,874,1.8,19,25.4,457,267,3246,16,138,1486,5.6,1760,6.6,7.0,16,16.7,26,5.7,11.9,3.76,31.70%,1.24,0.71,57.10%,2.71,1.53,56.50%,20.4
3,Chicago Bears,2022,17,223,377,2219,3437,9.1,1449,6.5,3.8,1149,5.2,12,14,1,72,19.90%,254,70.20%,58,2.6,112,36,35,129,25.40%,73,9.2,558,3014,18,148,1893,3.4,1121,2.0,41,13.6,377,223,2598,19,109,1476,6.6,1122,5.0,8.5,13,17.2,18,4.8,12.9,5.29,40.90%,1.47,0.47,32.00%,2.94,1.65,56.00%,19.2
4,Cincinnati Bengals,2022,16,418,610,4240,4152,6.8,2350,5.6,3.9,2170,5.2,24,17,1,76,12.80%,469,79.20%,44,2.2,113,22,48,114,16.70%,27,7.5,399,1528,14,97,991,2.5,537,1.3,15,26.6,610,418,4520,35,224,2350,5.6,2170,5.2,6.7,32,13.1,34,5.6,12.9,5.94,46.10%,0.75,0.19,25.00%,3.56,2.31,64.90%,26.1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
153,Los Angeles Chargers,2018,16,348,512,4089,3958,7.7,2189,6.3,4.3,2127,6.1,13,14,0,64,12.90%,444,76.80%,34,2.4,152,48,48,130,23.60%,6,4.3,399,1873,16,99,874,2.2,999,2.5,40,10.0,512,348,4316,32,213,2189,6.3,2127,6.1,7.7,31,11.2,23,4.5,11.1,4.38,39.50%,0.50,0.44,87.50%,3.31,2.13,64.20%,26.8
154,Seattle Seahawks,2018,16,280,427,3093,3843,9.0,2063,7.4,4.8,1385,4.9,9,24,8,61,15.40%,376,76.10%,51,2.5,145,82,31,164,32.30%,30,9.6,534,2560,15,133,1528,2.9,1032,1.9,42,12.7,427,280,3448,35,155,2063,7.4,1385,4.9,8.6,15,18.7,18,4.2,13.0,5.06,38.90%,0.88,0.69,78.60%,3.44,2.25,65.50%,26.8
155,San Francisco 49ers,2018,16,331,532,3867,3722,7.0,1931,5.8,3.6,2316,7.0,6,20,4,77,15.20%,379,80.80%,48,2.3,162,58,63,169,28.30%,17,5.6,423,1902,7,98,1033,2.4,869,2.1,30,14.1,532,331,4247,26,201,1931,5.8,2316,7.0,6.9,30,11.0,27,5.1,12.1,4.56,37.80%,0.63,0.31,50.00%,3.19,1.31,41.20%,21.4
156,Tampa Bay Buccaneers,2018,16,408,625,5125,6635,10.6,3574,8.8,5.7,1784,4.4,11,11,1,96,15.70%,423,69.90%,41,2.4,160,67,56,164,22.60%,59,6.7,389,1523,11,80,849,2.2,674,1.7,24,16.2,625,408,5358,36,264,3581,8.8,1777,4.4,10.5,19,21.5,26,4.2,11.7,5.38,46.00%,0.81,0.56,69.20%,3.75,2.25,60.00%,24.8


In [1215]:
#After trying to fit models to the adv_off and adv_def datasets, I realized the '%' character needed to be removed from all columns that contained it, so I went back and removed those on the front end here.
adv_off['Bad%'] = adv_off['Bad%'].str.replace('%', '')
adv_off['OnTgt%'] = adv_off['OnTgt%'].str.replace('%', '')
adv_off['Prss%'] = adv_off['Prss%'].str.replace('%', '')
adv_off['3D%'] = adv_off['3D%'].str.replace('%', '')
adv_off['4D%'] = adv_off['4D%'].str.replace('%', '')
adv_off['RZPct'] = adv_off['RZPct'].str.replace('%', '')
adv_off

Unnamed: 0,Tm,Season,G,Pass_Cmp,Pass_Att,Pass_Yds,IAY,IAY/PA,CAY,CAY/Cmp,CAY/PA,YAC,YAC/Cmp,Bats,ThAwy,Spikes,BadTh,Bad%,OnTgt,OnTgt%,Sk,PktTime,Bltz,Hrry,Hits,Prss,Prss%,Scrm,Yds/Scr,Rush_Att,Rush_Yds,Rush_TD,Rush_1D,Rush_YBC,Rush_YBC/Att,Rush_YAC,Rush_YAC/Att,Rush_BrkTkl,Att/Br,Rec_Tgt,Rec,Rec_Yds,Rec_TD,Rec_1D,Rec_YBC,Rec_YBC/R,Rec_YAC,Rec_YAC/R,ADOT,Rec_BrkTkl,Rec/Br,Drop,Drop%,3DAtt,3DConv,3D%,4DAtt,4DConv,4D%,RZAtt,RZTD,RZPct,PF
0,Atlanta Falcons,2022,17,257,415,2699,4011,9.7,1698,6.6,4.1,1229,4.8,4,12,4,85,21.30,287,71.90,37,2.6,142,14,41,92,18.80,37,6.9,559,2718,17,152,1701,3.0,1017,1.8,10,55.9,415,257,2927,17,148,1698,6.6,1229,4.8,9.4,7,36.7,9,2.2,11.5,4.82,41.80,1.06,0.53,50.00,3.18,1.76,55.60,21.5
1,Buffalo Bills,2022,16,361,574,4129,5262,9.2,2694,7.5,4.7,1597,4.4,14,18,1,93,16.80,407,73.30,33,2.5,150,41,40,114,17.20,57,9.4,430,2232,15,130,1577,3.7,655,1.5,17,25.3,574,361,4291,35,214,2694,7.5,1597,4.4,8.8,11,32.8,38,6.6,12.1,6.06,50.30,0.81,0.44,53.80,3.63,2.19,60.30,28.4
2,Carolina Panthers,2022,17,267,457,2996,3303,7.2,1465,5.5,3.2,1781,6.7,17,30,3,73,17.20,298,70.30,36,2.5,140,29,24,89,17.40,19,8.2,483,2210,16,119,1336,2.8,874,1.8,19,25.4,457,267,3246,16,138,1486,5.6,1760,6.6,7.0,16,16.7,26,5.7,11.9,3.76,31.70,1.24,0.71,57.10,2.71,1.53,56.50,20.4
3,Chicago Bears,2022,17,223,377,2219,3437,9.1,1449,6.5,3.8,1149,5.2,12,14,1,72,19.90,254,70.20,58,2.6,112,36,35,129,25.40,73,9.2,558,3014,18,148,1893,3.4,1121,2.0,41,13.6,377,223,2598,19,109,1476,6.6,1122,5.0,8.5,13,17.2,18,4.8,12.9,5.29,40.90,1.47,0.47,32.00,2.94,1.65,56.00,19.2
4,Cincinnati Bengals,2022,16,418,610,4240,4152,6.8,2350,5.6,3.9,2170,5.2,24,17,1,76,12.80,469,79.20,44,2.2,113,22,48,114,16.70,27,7.5,399,1528,14,97,991,2.5,537,1.3,15,26.6,610,418,4520,35,224,2350,5.6,2170,5.2,6.7,32,13.1,34,5.6,12.9,5.94,46.10,0.75,0.19,25.00,3.56,2.31,64.90,26.1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
153,Los Angeles Chargers,2018,16,348,512,4089,3958,7.7,2189,6.3,4.3,2127,6.1,13,14,0,64,12.90,444,76.80,34,2.4,152,48,48,130,23.60,6,4.3,399,1873,16,99,874,2.2,999,2.5,40,10.0,512,348,4316,32,213,2189,6.3,2127,6.1,7.7,31,11.2,23,4.5,11.1,4.38,39.50,0.50,0.44,87.50,3.31,2.13,64.20,26.8
154,Seattle Seahawks,2018,16,280,427,3093,3843,9.0,2063,7.4,4.8,1385,4.9,9,24,8,61,15.40,376,76.10,51,2.5,145,82,31,164,32.30,30,9.6,534,2560,15,133,1528,2.9,1032,1.9,42,12.7,427,280,3448,35,155,2063,7.4,1385,4.9,8.6,15,18.7,18,4.2,13.0,5.06,38.90,0.88,0.69,78.60,3.44,2.25,65.50,26.8
155,San Francisco 49ers,2018,16,331,532,3867,3722,7.0,1931,5.8,3.6,2316,7.0,6,20,4,77,15.20,379,80.80,48,2.3,162,58,63,169,28.30,17,5.6,423,1902,7,98,1033,2.4,869,2.1,30,14.1,532,331,4247,26,201,1931,5.8,2316,7.0,6.9,30,11.0,27,5.1,12.1,4.56,37.80,0.63,0.31,50.00,3.19,1.31,41.20,21.4
156,Tampa Bay Buccaneers,2018,16,408,625,5125,6635,10.6,3574,8.8,5.7,1784,4.4,11,11,1,96,15.70,423,69.90,41,2.4,160,67,56,164,22.60,59,6.7,389,1523,11,80,849,2.2,674,1.7,24,16.2,625,408,5358,36,264,3581,8.8,1777,4.4,10.5,19,21.5,26,4.2,11.7,5.38,46.00,0.81,0.56,69.20,3.75,2.25,60.00,24.8


In [1216]:
#Adding PA column from basic stats to merge together with advanced defense DF.
PA_def = basic_def[['Tm', 'Season', 'PA']]
adv_def = adv_def.merge(PA_def, on=['Tm', 'Season'])
adv_def

Unnamed: 0,Tm,Season,G,Att,Cmp,Yds,TD,DADOT,Air,YAC,Bltz,Bltz%,Hrry,Hrry%,QBKD,QBKD%,Sk,Prss,Prss%,MTkl,3DAtt,3DConv,3D%,4DAtt,4DConv,4D%,RZAtt,RZTD,RZPct,PA
0,Atlanta Falcons,2022,17,561,372,3942,26,8.3,2297,1793,98,16.30%,23,3.80%,44,7.80%,21,88,14.60%,54,12.9,5.94,45.90%,1.00,0.41,41.20%,3.53,1.94,55.00%,22.7
1,Buffalo Bills,2022,16,570,362,3433,21,7.5,2276,1901,124,19.40%,46,7.20%,56,9.80%,40,142,22.20%,77,13.0,4.88,37.50%,2.06,1.19,57.60%,3.06,1.38,44.90%,17.9
2,Carolina Panthers,2022,17,580,383,3868,25,7.9,2434,1664,186,29.20%,41,6.40%,50,8.60%,35,126,19.80%,72,13.6,5.59,41.10%,1.06,0.47,44.40%,3.35,1.88,56.10%,22.0
3,Chicago Bears,2022,17,481,323,3716,22,7.8,1973,1867,97,18.20%,45,8.40%,20,4.20%,20,85,15.90%,74,12.0,5.88,49.00%,0.88,0.35,40.00%,3.65,2.35,64.50%,27.2
4,Cincinnati Bengals,2022,16,559,329,3665,17,8.5,2575,2025,124,20.40%,38,6.30%,68,12.20%,30,136,22.40%,69,13.3,5.25,39.60%,1.44,0.56,39.10%,3.13,1.63,52.00%,20.1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
153,Los Angeles Chargers,2018,16,548,351,3646,23,7.2,2142,2125,104,17.00%,61,10.00%,34,6.20%,38,133,21.70%,112,12.6,4.94,39.10%,1.25,0.69,55.00%,3.13,1.56,50.00%,20.6
154,Seattle Seahawks,2018,16,546,355,3842,26,7.8,2091,2098,112,18.40%,73,12.00%,57,10.40%,43,173,28.50%,98,11.4,4.00,35.00%,0.81,0.38,46.20%,3.19,1.56,49.00%,21.7
155,San Francisco 49ers,2018,16,542,344,3732,35,8.4,1984,1714,123,20.40%,95,15.70%,44,8.10%,37,176,29.10%,114,13.5,5.44,40.30%,1.06,0.63,58.80%,3.63,2.38,65.50%,27.2
156,Tampa Bay Buccaneers,2018,16,534,387,4151,34,8.2,2496,1815,141,23.90%,39,6.60%,42,7.90%,38,119,20.20%,99,11.3,4.56,40.30%,0.88,0.44,50.00%,3.63,2.81,77.60%,29.0


In [1217]:
#Removing '%' from adv_def dataset
adv_def['Bltz%'] = adv_def['Bltz%'].str.replace('%', '')
adv_def['Hrry%'] = adv_def['Hrry%'].str.replace('%', '')
adv_def['QBKD%'] = adv_def['QBKD%'].str.replace('%', '')
adv_def['Prss%'] = adv_def['Prss%'].str.replace('%', '')
adv_def['3D%'] = adv_def['3D%'].str.replace('%', '')
adv_def['4D%'] = adv_def['4D%'].str.replace('%', '')
adv_def['RZPct'] = adv_def['RZPct'].str.replace('%', '')
adv_def

Unnamed: 0,Tm,Season,G,Att,Cmp,Yds,TD,DADOT,Air,YAC,Bltz,Bltz%,Hrry,Hrry%,QBKD,QBKD%,Sk,Prss,Prss%,MTkl,3DAtt,3DConv,3D%,4DAtt,4DConv,4D%,RZAtt,RZTD,RZPct,PA
0,Atlanta Falcons,2022,17,561,372,3942,26,8.3,2297,1793,98,16.30,23,3.80,44,7.80,21,88,14.60,54,12.9,5.94,45.90,1.00,0.41,41.20,3.53,1.94,55.00,22.7
1,Buffalo Bills,2022,16,570,362,3433,21,7.5,2276,1901,124,19.40,46,7.20,56,9.80,40,142,22.20,77,13.0,4.88,37.50,2.06,1.19,57.60,3.06,1.38,44.90,17.9
2,Carolina Panthers,2022,17,580,383,3868,25,7.9,2434,1664,186,29.20,41,6.40,50,8.60,35,126,19.80,72,13.6,5.59,41.10,1.06,0.47,44.40,3.35,1.88,56.10,22.0
3,Chicago Bears,2022,17,481,323,3716,22,7.8,1973,1867,97,18.20,45,8.40,20,4.20,20,85,15.90,74,12.0,5.88,49.00,0.88,0.35,40.00,3.65,2.35,64.50,27.2
4,Cincinnati Bengals,2022,16,559,329,3665,17,8.5,2575,2025,124,20.40,38,6.30,68,12.20,30,136,22.40,69,13.3,5.25,39.60,1.44,0.56,39.10,3.13,1.63,52.00,20.1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
153,Los Angeles Chargers,2018,16,548,351,3646,23,7.2,2142,2125,104,17.00,61,10.00,34,6.20,38,133,21.70,112,12.6,4.94,39.10,1.25,0.69,55.00,3.13,1.56,50.00,20.6
154,Seattle Seahawks,2018,16,546,355,3842,26,7.8,2091,2098,112,18.40,73,12.00,57,10.40,43,173,28.50,98,11.4,4.00,35.00,0.81,0.38,46.20,3.19,1.56,49.00,21.7
155,San Francisco 49ers,2018,16,542,344,3732,35,8.4,1984,1714,123,20.40,95,15.70,44,8.10,37,176,29.10,114,13.5,5.44,40.30,1.06,0.63,58.80,3.63,2.38,65.50,27.2
156,Tampa Bay Buccaneers,2018,16,534,387,4151,34,8.2,2496,1815,141,23.90,39,6.60,42,7.90,38,119,20.20,99,11.3,4.56,40.30,0.88,0.44,50.00,3.63,2.81,77.60,29.0


# Prepping basic and advanced statistics DataFrames for machine learning

1. For each DataFrame, I changed the categorical 'Tm' variable to numerical by a assigning unique numerical ID (from 0-31 based on alphabetical order) to each team name in new 'id' feature column.
2. Filtered each DataFrame to put the 'id' column next to the 'Tm' column for clarity and to double check that unique IDs were assigned
3. Dropped the 'Tm' column to get rid of the categorical variable (although it's still represented by the 'id' column).  
4. Reviewed resulting DataFrames with all numerical features and organized by team 'id' number

In [1218]:
#Creating dataset for machine learning for basic offense ('full_off_id')
full_off = full_off.assign(id = (full_off['Tm']).astype('category').cat.codes)
full_off = full_off.filter(['Tm', 'id', 'Season', 'PF', 'Yds', 'Ply', 'Y/P', 'TO', 'FL', '1stD', 'P_Cmp', 'P_Att', 'P_Yds', 'P_TD', 'P_Int', 'NPY/A', 'P_1stD', 
                  'R_Att', 'R_Yds', 'R_TD', 'RY/A', 'R_1stD', 'Pen', 'Pen_Yds', '1stPy', 'Sc%', 'TO%', 'EXP', 'P_Cmp%', 'P_TD%', 'P_Int%', 'P_Lng', 'P_Y/A', 'P_AY/A', 'P_Y/C',
                           'P_Rate', 'P_Sk', 'Sk_Yds_Lost', 'P_Sk%', 'APNY/A', 'QB_4QC', 'QB_GWD', 'P_EXP'])
full_off_id = full_off.drop('Tm', axis=1)
full_off_id

Unnamed: 0,id,Season,PF,Yds,Ply,Y/P,TO,FL,1stD,P_Cmp,P_Att,P_Yds,P_TD,P_Int,NPY/A,P_1stD,R_Att,R_Yds,R_TD,RY/A,R_1stD,Pen,Pen_Yds,1stPy,Sc%,TO%,EXP,P_Cmp%,P_TD%,P_Int%,P_Lng,P_Y/A,P_AY/A,P_Y/C,P_Rate,P_Sk,Sk_Yds_Lost,P_Sk%,APNY/A,QB_4QC,QB_GWD,P_EXP
0,15,2022,29.2,413.6,64.4,6.4,1.35,0.65,24.0,25.6,38.3,297.8,2.41,0.71,7.5,16.00,24.5,115.9,1.06,4.7,6.18,5.12,49.3,1.82,46.4,10.1,14.10,66.8,6.3,1.8,67,8.1,8.5,12.1,104.7,1.53,11.10,3.8,7.9,0.24,0.24,13.70
1,25,2022,28.1,389.1,66.1,5.9,1.12,0.59,22.6,20.6,31.5,241.5,1.47,0.53,7.1,11.50,32.0,147.6,1.88,4.6,9.47,5.29,40.1,1.71,42.5,10.2,10.30,65.3,4.7,1.7,78,8.1,8.3,12.5,99.0,2.59,15.20,7.6,7.2,0.06,0.12,7.72
2,8,2022,27.5,354.9,65.5,5.4,1.35,0.29,20.4,20.9,32.7,219.8,1.65,1.06,6.4,11.40,31.2,135.2,1.41,4.3,7.59,6.12,49.5,1.41,41.5,10.8,5.48,63.8,5.0,3.2,68,7.0,6.6,11.0,87.9,1.59,10.30,4.6,6.0,0.12,0.24,6.27
3,3,2022,28.4,397.6,64.8,6.1,1.69,0.81,22.9,22.6,35.9,258.1,2.19,0.88,6.8,13.30,26.9,139.5,0.94,5.2,8.00,5.75,44.3,1.69,45.0,15.2,9.77,62.9,6.1,2.4,98,7.5,7.6,11.9,95.8,2.06,10.10,5.4,6.9,0.19,0.25,8.07
4,10,2022,26.6,380.0,64.2,5.9,0.88,0.47,22.1,22.5,34.6,251.8,1.71,0.41,7.0,13.40,28.2,128.2,1.35,4.5,6.71,5.29,45.5,2.00,43.2,8.0,9.53,65.1,4.9,1.2,81,7.6,8.0,11.6,99.3,1.41,9.59,3.9,7.4,0.18,0.18,10.20
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
475,31,2008,16.6,320.0,64.1,5.0,1.13,0.75,18.4,19.9,31.9,189.1,0.88,0.38,5.5,10.30,29.9,130.9,0.75,4.4,6.81,5.19,40.3,1.31,29.2,9.6,-10.85,62.4,2.7,1.2,67,6.5,6.5,10.3,85.2,2.38,16.60,6.9,5.5,0.06,0.19,-4.84
476,16,2008,16.4,272.3,57.4,4.7,1.44,0.75,14.1,13.9,26.3,148.1,0.81,0.69,5.2,7.69,28.7,124.2,0.56,4.3,5.44,6.81,51.4,0.94,24.6,11.8,-14.47,52.7,3.1,2.6,84,6.3,5.7,11.9,71.6,2.44,16.90,8.5,4.6,0.06,0.13,-6.46
477,18,2008,14.5,287.3,61.4,4.7,1.94,0.75,15.6,18.3,32.5,184.2,0.69,1.19,5.2,8.75,26.1,103.1,0.50,4.0,5.94,6.06,44.9,0.88,26.3,16.3,-14.88,56.2,2.1,3.7,80,6.3,5.1,11.2,66.9,2.81,20.10,8.0,4.1,0.06,0.06,-10.27
478,7,2008,14.5,249.1,57.6,4.3,1.63,0.38,14.6,14.9,30.5,148.8,0.69,1.25,4.6,7.94,25.6,100.3,0.38,3.9,5.25,6.25,41.8,1.38,27.3,14.5,-13.62,48.8,2.3,4.1,70,5.2,3.8,10.7,54.8,1.50,9.81,4.7,3.3,0.13,0.19,-10.08


In [1219]:
#Creating dataset for machine learning for basic defense ('full_def_id')
full_def = full_def.assign(id = (full_def['Tm']).astype('category').cat.codes)
full_def = full_def.filter(['Tm', 'id', 'Season', 'PA', 'Yds', 'Ply', 'Y/P', 'TO', 'FL', '1stD', 'P_Cmp', 'P_Att', 'P_Yds', 'P_TD', 'P_Int', 'NPY/A', 'P_1stD', 
                  'R_Att', 'R_Yds', 'R_TD', 'RY/A', 'R_1stD', 'Pen', 'Pen_Yds', '1stPy', 'Sc%', 'TO%', 'EXP', 'PD_Cmp%', 'PD_TD%', 'PD', 'PD_Int%', 'PD_Y/A', 'PD_AY/A', 'PD_Y/C',
                           'PD_Rate', 'PD_Sk', 'PD_Sk_Yds', 'PD_QBHits', 'PD_TFL', 'PD_Sk%', 'APDNY/A', 'PD_EXP'])
full_def_id = full_def.drop('Tm', axis=1)
full_def_id

Unnamed: 0,id,Season,PA,Yds,Ply,Y/P,TO,FL,1stD,P_Cmp,P_Att,P_Yds,P_TD,P_Int,NPY/A,P_1stD,R_Att,R_Yds,R_TD,RY/A,R_1stD,Pen,Pen_Yds,1stPy,Sc%,TO%,EXP,PD_Cmp%,PD_TD%,PD,PD_Int%,PD_Y/A,PD_AY/A,PD_Y/C,PD_Rate,PD_Sk,PD_Sk_Yds,PD_QBHits,PD_TFL,PD_Sk%,APDNY/A,PD_EXP
0,27,2022,16.3,300.6,60.4,5.0,1.76,0.59,17.1,22.9,34.9,222.9,1.18,1.18,5.9,11.10,22.9,77.7,0.65,3.4,4.53,5.65,48.9,1.53,25.7,15.3,4.01,65.6,3.4,4.59,3.4,6.9,6.1,10.5,82.7,2.59,18.10,7.12,4.53,6.9,5.2,1.81
1,3,2022,17.9,319.1,62.3,5.1,1.69,0.63,19.6,22.6,35.6,214.6,1.31,1.06,5.6,11.30,24.2,104.6,0.63,4.3,6.44,5.56,47.1,1.81,31.0,14.3,0.68,63.5,3.7,5.63,3.0,6.5,5.9,10.3,82.1,2.50,18.00,6.13,5.81,6.6,5.1,-1.15
2,2,2022,18.5,324.3,61.6,5.3,1.47,0.65,19.0,23.2,34.9,232.2,1.18,0.82,6.2,12.10,23.9,92.1,0.65,3.9,5.35,4.65,38.2,1.53,35.8,13.9,-1.86,66.4,3.4,4.53,2.4,7.2,6.8,10.8,88.7,2.82,17.90,5.59,4.53,7.5,5.8,-3.30
3,24,2022,18.6,311.1,64.3,4.8,0.94,0.24,18.2,20.4,32.7,189.4,0.88,0.71,5.4,9.76,28.9,121.6,0.82,4.2,6.88,5.29,41.6,1.59,32.3,8.3,1.53,62.4,2.7,4.29,2.2,6.3,5.9,10.2,80.5,2.65,18.00,7.65,4.76,7.5,5.0,0.22
4,6,2022,20.1,335.7,62.3,5.4,1.50,0.69,18.4,20.6,34.9,229.1,1.06,0.81,6.2,10.80,25.4,106.6,0.75,4.2,6.38,6.25,52.0,1.25,36.4,13.1,-0.59,58.9,3.0,4.69,2.3,6.8,6.4,11.6,80.1,1.88,10.20,6.38,3.56,5.1,5.8,-1.98
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
475,0,2008,26.6,331.5,62.1,5.3,1.88,1.06,19.5,20.2,32.3,221.3,2.25,0.81,6.5,10.80,27.8,110.3,0.81,4.0,7.31,6.13,51.0,1.44,38.7,15.5,4.19,62.5,7.0,4.69,2.5,7.2,7.5,11.6,96.9,1.94,11.90,4.88,4.25,5.7,6.7,-0.37
476,15,2008,27.5,393.2,65.1,6.0,1.81,1.00,21.5,21.8,32.6,234.3,1.31,0.81,7.0,12.20,31.8,158.9,1.56,5.0,8.31,5.06,36.8,1.00,43.6,15.5,3.69,66.7,4.0,3.63,2.5,7.3,7.0,10.9,91.1,0.63,3.88,2.13,3.50,1.9,6.7,1.27
477,9,2008,28.0,374.6,61.9,6.1,0.81,0.44,20.4,20.8,30.9,228.5,1.25,0.38,7.0,11.60,29.3,146.1,1.63,5.0,7.63,5.19,46.1,1.19,46.4,7.7,-2.98,67.3,4.0,3.25,1.2,7.7,7.9,11.4,98.5,1.63,8.81,3.38,4.00,5.0,7.3,-6.05
478,18,2008,29.1,371.9,60.9,6.1,1.63,0.88,20.0,17.4,27.8,217.2,1.25,0.75,7.3,9.88,31.3,154.7,1.63,4.9,9.13,5.56,40.9,1.00,41.6,13.0,1.79,62.6,4.5,3.50,2.7,8.3,8.0,13.2,92.5,1.88,12.60,4.81,5.44,6.3,7.0,-0.54


In [1220]:
#Creating dataset for machine learning for advanced offense ('adv_off_id')
adv_off = adv_off.assign(id = (adv_off['Tm']).astype('category').cat.codes)
adv_off = adv_off.filter(['Tm', 'id', 'Season', 'G', 'PF', 'Pass_Cmp', 'Pass_Att', 'Pass_Yds', 'IAY', 'IAY/PA', 'CAY', 'CAY/Cmp', 'CAY/PA', 'YAC', 'YAC/Cmp', 'Bats', 'ThAwy', 'Spikes',
                          'BadTh', 'Bad%', 'OnTgt', 'OnTgt%', 'Sk', 'PktTime', 'Bltz', 'Hrry', 'Hits', 'Prss', 'Prss%', 'Scrm', 'Yds/Scr', 'Rush_Att', 'Rush_Yds', 'Rush_TD', 'Rush_1D', 'Rush_YBC',
                          'Rush_YBC/Att', 'Rush_YAC', 'Rush_YAC/Att', 'Rush_BrkTkl', 'Att/Br', 'Rec_Tgt', 'Rec', 'Rec_Yds', 'Rec_TD', 'Rec_1D', 'Rec_YBC', 'Rec_YBC/R', 'Rec_YAC', 'Rec_YAC/R', 'ADOT', 
                          'Rec_BrkTkl', 'Rec/Br', 'Drop', 'Drop%', '3DAtt', '3DConv', '3D%', '4DAtt', '4DConv', '4D%', 'RZAtt', 'RZTD', 'RZPct'])
adv_off_id = adv_off.drop('Tm', axis=1)
adv_off_id

Unnamed: 0,id,Season,G,PF,Pass_Cmp,Pass_Att,Pass_Yds,IAY,IAY/PA,CAY,CAY/Cmp,CAY/PA,YAC,YAC/Cmp,Bats,ThAwy,Spikes,BadTh,Bad%,OnTgt,OnTgt%,Sk,PktTime,Bltz,Hrry,Hits,Prss,Prss%,Scrm,Yds/Scr,Rush_Att,Rush_Yds,Rush_TD,Rush_1D,Rush_YBC,Rush_YBC/Att,Rush_YAC,Rush_YAC/Att,Rush_BrkTkl,Att/Br,Rec_Tgt,Rec,Rec_Yds,Rec_TD,Rec_1D,Rec_YBC,Rec_YBC/R,Rec_YAC,Rec_YAC/R,ADOT,Rec_BrkTkl,Rec/Br,Drop,Drop%,3DAtt,3DConv,3D%,4DAtt,4DConv,4D%,RZAtt,RZTD,RZPct
0,1,2022,17,21.5,257,415,2699,4011,9.7,1698,6.6,4.1,1229,4.8,4,12,4,85,21.30,287,71.90,37,2.6,142,14,41,92,18.80,37,6.9,559,2718,17,152,1701,3.0,1017,1.8,10,55.9,415,257,2927,17,148,1698,6.6,1229,4.8,9.4,7,36.7,9,2.2,11.5,4.82,41.80,1.06,0.53,50.00,3.18,1.76,55.60
1,3,2022,16,28.4,361,574,4129,5262,9.2,2694,7.5,4.7,1597,4.4,14,18,1,93,16.80,407,73.30,33,2.5,150,41,40,114,17.20,57,9.4,430,2232,15,130,1577,3.7,655,1.5,17,25.3,574,361,4291,35,214,2694,7.5,1597,4.4,8.8,11,32.8,38,6.6,12.1,6.06,50.30,0.81,0.44,53.80,3.63,2.19,60.30
2,4,2022,17,20.4,267,457,2996,3303,7.2,1465,5.5,3.2,1781,6.7,17,30,3,73,17.20,298,70.30,36,2.5,140,29,24,89,17.40,19,8.2,483,2210,16,119,1336,2.8,874,1.8,19,25.4,457,267,3246,16,138,1486,5.6,1760,6.6,7.0,16,16.7,26,5.7,11.9,3.76,31.70,1.24,0.71,57.10,2.71,1.53,56.50
3,5,2022,17,19.2,223,377,2219,3437,9.1,1449,6.5,3.8,1149,5.2,12,14,1,72,19.90,254,70.20,58,2.6,112,36,35,129,25.40,73,9.2,558,3014,18,148,1893,3.4,1121,2.0,41,13.6,377,223,2598,19,109,1476,6.6,1122,5.0,8.5,13,17.2,18,4.8,12.9,5.29,40.90,1.47,0.47,32.00,2.94,1.65,56.00
4,6,2022,16,26.1,418,610,4240,4152,6.8,2350,5.6,3.9,2170,5.2,24,17,1,76,12.80,469,79.20,44,2.2,113,22,48,114,16.70,27,7.5,399,1528,14,97,991,2.5,537,1.3,15,26.6,610,418,4520,35,224,2350,5.6,2170,5.2,6.7,32,13.1,34,5.6,12.9,5.94,46.10,0.75,0.19,25.00,3.56,2.31,64.90
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
153,17,2018,16,26.8,348,512,4089,3958,7.7,2189,6.3,4.3,2127,6.1,13,14,0,64,12.90,444,76.80,34,2.4,152,48,48,130,23.60,6,4.3,399,1873,16,99,874,2.2,999,2.5,40,10.0,512,348,4316,32,213,2189,6.3,2127,6.1,7.7,31,11.2,23,4.5,11.1,4.38,39.50,0.50,0.44,87.50,3.31,2.13,64.20
154,28,2018,16,26.8,280,427,3093,3843,9.0,2063,7.4,4.8,1385,4.9,9,24,8,61,15.40,376,76.10,51,2.5,145,82,31,164,32.30,30,9.6,534,2560,15,133,1528,2.9,1032,1.9,42,12.7,427,280,3448,35,155,2063,7.4,1385,4.9,8.6,15,18.7,18,4.2,13.0,5.06,38.90,0.88,0.69,78.60,3.44,2.25,65.50
155,27,2018,16,21.4,331,532,3867,3722,7.0,1931,5.8,3.6,2316,7.0,6,20,4,77,15.20,379,80.80,48,2.3,162,58,63,169,28.30,17,5.6,423,1902,7,98,1033,2.4,869,2.1,30,14.1,532,331,4247,26,201,1931,5.8,2316,7.0,6.9,30,11.0,27,5.1,12.1,4.56,37.80,0.63,0.31,50.00,3.19,1.31,41.20
156,29,2018,16,24.8,408,625,5125,6635,10.6,3574,8.8,5.7,1784,4.4,11,11,1,96,15.70,423,69.90,41,2.4,160,67,56,164,22.60,59,6.7,389,1523,11,80,849,2.2,674,1.7,24,16.2,625,408,5358,36,264,3581,8.8,1777,4.4,10.5,19,21.5,26,4.2,11.7,5.38,46.00,0.81,0.56,69.20,3.75,2.25,60.00


In [1221]:
#Creating dataset for machine learning for advanced defense ('adv_def_id')
adv_def = adv_def.assign(id = (adv_def['Tm']).astype('category').cat.codes)
adv_def = adv_def.filter(['Tm', 'id', 'Season', 'G', 'PA', 'Att', 'Cmp', 'Yds', 'TD', 'DADOT', 'Air', 'YAC', 'Bltz', 'Bltz%', 'Hrry', 'Hrry%', 'QBKD', 'QBKD%',
                         'Sk', 'Prss', 'Prss%', 'MTkl', '3DAtt', '3DConv', '3D%', '4DAtt', '4DConv', '4D%', 'RZAtt', 'RZTD', 'RZPct'])
adv_def_id = adv_def.drop('Tm', axis=1)
adv_def_id

Unnamed: 0,id,Season,G,PA,Att,Cmp,Yds,TD,DADOT,Air,YAC,Bltz,Bltz%,Hrry,Hrry%,QBKD,QBKD%,Sk,Prss,Prss%,MTkl,3DAtt,3DConv,3D%,4DAtt,4DConv,4D%,RZAtt,RZTD,RZPct
0,1,2022,17,22.7,561,372,3942,26,8.3,2297,1793,98,16.30,23,3.80,44,7.80,21,88,14.60,54,12.9,5.94,45.90,1.00,0.41,41.20,3.53,1.94,55.00
1,3,2022,16,17.9,570,362,3433,21,7.5,2276,1901,124,19.40,46,7.20,56,9.80,40,142,22.20,77,13.0,4.88,37.50,2.06,1.19,57.60,3.06,1.38,44.90
2,4,2022,17,22.0,580,383,3868,25,7.9,2434,1664,186,29.20,41,6.40,50,8.60,35,126,19.80,72,13.6,5.59,41.10,1.06,0.47,44.40,3.35,1.88,56.10
3,5,2022,17,27.2,481,323,3716,22,7.8,1973,1867,97,18.20,45,8.40,20,4.20,20,85,15.90,74,12.0,5.88,49.00,0.88,0.35,40.00,3.65,2.35,64.50
4,6,2022,16,20.1,559,329,3665,17,8.5,2575,2025,124,20.40,38,6.30,68,12.20,30,136,22.40,69,13.3,5.25,39.60,1.44,0.56,39.10,3.13,1.63,52.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
153,17,2018,16,20.6,548,351,3646,23,7.2,2142,2125,104,17.00,61,10.00,34,6.20,38,133,21.70,112,12.6,4.94,39.10,1.25,0.69,55.00,3.13,1.56,50.00
154,28,2018,16,21.7,546,355,3842,26,7.8,2091,2098,112,18.40,73,12.00,57,10.40,43,173,28.50,98,11.4,4.00,35.00,0.81,0.38,46.20,3.19,1.56,49.00
155,27,2018,16,27.2,542,344,3732,35,8.4,1984,1714,123,20.40,95,15.70,44,8.10,37,176,29.10,114,13.5,5.44,40.30,1.06,0.63,58.80,3.63,2.38,65.50
156,29,2018,16,29.0,534,387,4151,34,8.2,2496,1815,141,23.90,39,6.60,42,7.90,38,119,20.20,99,11.3,4.56,40.30,0.88,0.44,50.00,3.63,2.81,77.60


# Splitting each dataset (basic and advanced) into training and test sets for model training and testing

In [1222]:
#Splitting basic offense dataset into training and test set and reviewing the training set
full_off_train, full_off_test = train_test_split(full_off_id, test_size= .2, random_state=42)
full_off_train

Unnamed: 0,id,Season,PF,Yds,Ply,Y/P,TO,FL,1stD,P_Cmp,P_Att,P_Yds,P_TD,P_Int,NPY/A,P_1stD,R_Att,R_Yds,R_TD,RY/A,R_1stD,Pen,Pen_Yds,1stPy,Sc%,TO%,EXP,P_Cmp%,P_TD%,P_Int%,P_Lng,P_Y/A,P_AY/A,P_Y/C,P_Rate,P_Sk,Sk_Yds_Lost,P_Sk%,APNY/A,QB_4QC,QB_GWD,P_EXP
132,13,2018,27.1,386.2,66.9,5.8,1.50,0.56,23.2,27.0,40.3,278.8,2.44,0.94,6.7,14.8,25.5,107.4,0.81,4.2,6.00,7.50,59.6,2.38,41.2,13.2,8.78,67.1,6.1,2.3,68,7.1,7.3,10.6,98.2,1.13,8.38,2.7,6.9,0.19,0.19,9.25
228,26,2015,26.4,395.4,63.2,6.3,1.75,0.44,20.7,24.4,36.9,287.7,1.63,1.31,7.4,12.9,24.3,107.8,1.00,4.4,5.69,5.88,54.3,2.06,40.5,13.7,1.44,66.3,4.4,3.6,88,8.2,7.5,12.3,91.2,2.06,13.70,5.3,6.7,0.13,0.19,6.38
473,15,2008,18.2,308.7,59.8,5.2,1.50,0.50,17.1,19.4,33.8,195.6,1.44,1.00,5.4,10.1,23.7,113.1,0.56,4.8,5.88,4.88,40.3,1.13,27.0,12.4,-7.74,57.3,4.3,3.0,75,6.2,5.7,10.8,77.5,2.31,14.30,6.4,5.0,0.00,0.06,-4.40
42,0,2021,26.4,373.6,66.2,5.6,0.88,0.24,21.6,24.4,34.8,251.5,1.59,0.65,6.8,12.6,29.2,122.1,1.35,4.2,7.47,6.71,58.2,1.53,44.7,7.8,7.06,70.2,4.6,1.9,77,7.8,7.9,11.1,100.6,2.29,20.20,6.2,6.9,0.06,0.06,7.50
360,23,2011,24.6,385.1,64.3,6.0,1.50,0.50,20.7,22.4,36.8,295.9,1.81,1.00,7.7,13.5,25.7,89.2,1.06,3.5,5.25,5.88,49.7,1.94,32.5,11.5,-2.86,61.0,4.9,2.7,99,8.4,8.1,13.7,92.9,1.75,12.40,4.5,7.4,0.31,0.38,6.07
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
106,18,2019,24.6,374.9,65.9,5.7,1.50,0.44,21.4,24.8,39.5,281.2,1.38,1.06,6.9,13.9,25.1,93.7,1.25,3.7,5.75,7.38,56.2,1.75,35.7,13.0,2.32,62.8,3.5,2.7,66,7.4,6.9,11.8,85.6,1.38,10.60,3.4,6.4,0.06,0.13,5.55
270,6,2014,22.8,348.0,63.6,5.5,1.63,0.56,19.6,20.2,31.4,213.8,1.25,1.06,6.5,10.3,30.8,134.2,1.19,4.4,7.63,6.38,50.8,1.69,34.0,13.1,0.86,64.2,4.0,3.4,81,7.1,6.3,11.0,84.2,1.44,8.13,4.4,5.8,0.25,0.19,1.06
348,25,2012,17.5,354.1,67.4,5.3,2.31,1.38,20.8,22.9,38.6,236.9,1.13,0.94,5.7,11.9,25.8,117.1,0.63,4.5,6.94,6.13,50.8,1.94,29.9,17.9,-10.90,59.4,2.9,2.4,77,6.6,6.1,11.1,78.6,3.00,17.80,7.2,5.2,0.25,0.25,-5.52
435,9,2009,20.4,341.4,64.5,5.3,1.44,0.63,19.1,21.3,34.9,226.7,1.31,0.81,6.1,11.6,27.5,114.8,0.56,4.2,5.94,5.81,50.0,1.56,31.7,11.6,-6.96,61.1,3.8,2.3,87,6.9,6.6,11.2,84.4,2.13,12.40,5.7,5.8,0.19,0.19,-1.86


In [1223]:
#Splitting basic defense dataset into training and test set and reviewing the training set
full_def_train, full_def_test = train_test_split(full_def_id, test_size= .2, random_state=42)
full_def_train

Unnamed: 0,id,Season,PA,Yds,Ply,Y/P,TO,FL,1stD,P_Cmp,P_Att,P_Yds,P_TD,P_Int,NPY/A,P_1stD,R_Att,R_Yds,R_TD,RY/A,R_1stD,Pen,Pen_Yds,1stPy,Sc%,TO%,EXP,PD_Cmp%,PD_TD%,PD,PD_Int%,PD_Y/A,PD_AY/A,PD_Y/C,PD_Rate,PD_Sk,PD_Sk_Yds,PD_QBHits,PD_TFL,PD_Sk%,APDNY/A,PD_EXP
132,14,2018,19.8,311.4,60.6,5.1,1.06,0.38,18.0,19.5,31.2,194.6,1.06,0.69,5.8,10.1,27.1,116.9,1.00,4.3,5.56,5.75,45.9,2.31,33.1,8.3,0.29,62.5,3.4,3.25,2.2,6.7,6.4,10.8,84.4,2.31,15.10,5.81,4.56,6.9,5.5,-1.03
228,20,2015,18.9,344.4,63.4,5.4,1.38,0.56,19.9,22.4,35.1,235.1,1.50,0.81,6.2,11.8,25.7,109.3,0.44,4.3,5.88,6.81,54.7,2.19,33.3,11.9,0.72,64.0,4.3,4.06,2.3,7.2,7.0,11.3,90.1,2.69,17.70,6.94,4.75,7.1,6.1,-2.24
473,22,2008,24.6,339.5,62.4,5.4,1.38,0.44,18.7,18.7,32.9,221.7,1.31,0.94,6.4,11.3,27.8,117.8,0.88,4.2,6.25,5.25,39.8,1.19,39.7,11.5,6.29,56.8,4.0,5.69,2.9,7.0,6.6,12.4,80.3,1.75,10.10,4.00,4.38,5.1,5.9,1.17
42,0,2021,21.5,329.2,60.5,5.4,1.59,0.82,19.8,21.6,33.0,214.4,1.76,0.76,6.1,11.3,25.1,114.8,0.59,4.6,6.41,5.88,52.7,2.06,35.6,15.3,-1.14,65.4,5.3,4.29,2.3,6.9,6.9,10.6,93.5,2.41,13.40,5.88,4.47,6.8,6.1,-2.92
360,6,2011,20.2,316.3,63.1,5.0,1.38,0.75,17.8,19.9,33.7,211.6,1.31,0.63,5.8,10.6,26.6,104.7,0.88,3.9,5.88,6.31,54.9,1.25,28.5,11.0,7.83,59.2,3.9,4.69,1.9,6.8,6.7,11.5,85.0,2.81,17.30,6.69,5.06,7.7,5.7,3.80
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
106,8,2019,20.1,327.0,63.4,5.2,1.06,0.63,20.4,23.1,35.6,223.5,1.31,0.44,5.9,11.8,25.4,103.5,0.88,4.1,6.31,6.38,53.4,2.38,35.3,9.8,-1.99,65.0,3.7,4.44,1.2,6.8,7.0,10.5,91.9,2.44,18.90,5.63,4.75,6.4,6.1,-4.22
270,8,2014,22.0,355.1,61.1,5.8,1.94,0.81,19.7,23.2,34.9,251.9,1.38,1.13,6.9,12.2,24.5,103.1,1.13,4.2,5.38,6.50,53.8,2.13,31.3,16.8,-4.37,66.5,3.9,4.44,3.2,7.5,6.8,11.3,88.5,1.75,9.75,4.94,4.00,4.8,6.2,-6.57
348,25,2012,27.8,343.2,62.1,5.5,0.81,0.31,19.8,18.3,30.3,216.9,2.06,0.50,6.7,11.1,29.9,126.3,0.69,4.2,6.75,7.38,62.3,1.94,39.0,6.6,0.41,60.2,6.8,4.50,1.6,7.6,8.2,12.6,99.6,1.88,12.40,4.00,4.88,5.8,7.3,-4.98
435,22,2009,21.3,357.8,65.3,5.5,2.44,0.81,19.4,20.6,35.9,235.6,0.94,1.63,6.2,10.9,27.2,122.2,1.19,4.5,6.94,5.38,44.8,1.50,31.4,18.0,11.10,57.5,2.6,5.38,4.5,6.9,5.4,12.0,68.6,2.19,12.00,5.19,3.94,5.7,4.8,7.98


In [1224]:
#Splitting advanced offense dataset into training and test set and reviewing the training set
adv_off_train, adv_off_test = train_test_split(adv_off_id, test_size= .2, random_state=42)
adv_off_train

Unnamed: 0,id,Season,G,PF,Pass_Cmp,Pass_Att,Pass_Yds,IAY,IAY/PA,CAY,CAY/Cmp,CAY/PA,YAC,YAC/Cmp,Bats,ThAwy,Spikes,BadTh,Bad%,OnTgt,OnTgt%,Sk,PktTime,Bltz,Hrry,Hits,Prss,Prss%,Scrm,Yds/Scr,Rush_Att,Rush_Yds,Rush_TD,Rush_1D,Rush_YBC,Rush_YBC/Att,Rush_YAC,Rush_YAC/Att,Rush_BrkTkl,Att/Br,Rec_Tgt,Rec,Rec_Yds,Rec_TD,Rec_1D,Rec_YBC,Rec_YBC/R,Rec_YAC,Rec_YAC/R,ADOT,Rec_BrkTkl,Rec/Br,Drop,Drop%,3DAtt,3DConv,3D%,4DAtt,4DConv,4D%,RZAtt,RZTD,RZPct
137,10,2018,16,20.3,375,574,3576,3982,6.9,1797,4.8,3.1,2047,5.5,13,17,1,99,17.80,371,67.30,41,2.3,168,57,34,132,21.00,13,7.1,404,1660,11,91,883,2.2,777,1.9,27,15.0,574,375,3844,22,198,1797,4.8,2047,5.5,6.7,33,11.4,30,5.2,13.0,4.69,36.10,1.13,0.69,61.10,3.06,1.63,53.10
55,26,2021,17,20.2,425,664,3778,4422,6.7,1963,4.6,3.0,2054,4.8,13,7,3,120,18.30,495,75.70,38,2.1,161,44,39,121,17.10,6,12.0,411,1583,10,85,765,1.9,818,2.0,35,11.7,664,425,4017,23,204,1981,4.7,2036,4.8,6.6,43,9.9,33,5.0,14.1,5.47,38.90,1.24,0.59,47.60,2.94,1.59,54.00
126,31,2019,16,16.6,298,479,2812,3639,7.6,1751,5.9,3.7,1454,4.9,11,14,3,85,18.40,339,73.40,50,2.5,146,69,47,166,30.40,17,6.4,356,1583,9,74,680,1.9,903,2.5,15,23.7,479,298,3205,18,154,1751,5.9,1454,4.9,7.4,18,16.6,26,5.4,11.2,3.25,29.10,0.88,0.38,42.90,2.56,1.25,48.80
84,24,2020,16,15.2,292,499,2796,4243,8.5,1599,5.5,3.2,1516,5.2,8,31,3,91,19.60,333,71.60,43,2.4,201,59,58,160,27.90,31,7.5,406,1683,9,94,871,2.1,812,2.0,19,21.4,499,292,3115,16,146,1599,5.5,1516,5.2,8.1,16,18.3,19,3.8,12.9,4.38,34.00,1.13,0.44,38.90,2.38,1.00,42.10
154,28,2018,16,26.8,280,427,3093,3843,9.0,2063,7.4,4.8,1385,4.9,9,24,8,61,15.40,376,76.10,51,2.5,145,82,31,164,32.30,30,9.6,534,2560,15,133,1528,2.9,1032,1.9,42,12.7,427,280,3448,35,155,2063,7.4,1385,4.9,8.6,15,18.7,18,4.2,13.0,5.06,38.90,0.88,0.69,78.60,3.44,2.25,65.50
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
71,0,2020,16,25.6,387,575,3916,4460,7.8,2234,5.8,3.9,1868,4.8,14,25,8,90,16.60,419,77.30,29,2.4,160,55,25,109,16.60,51,8.4,479,2237,22,136,1505,3.1,732,1.5,31,15.5,575,387,4102,27,211,2232,5.8,1870,4.8,7.5,27,14.3,13,2.3,12.6,5.00,39.60,1.56,1.06,68.00,3.63,2.38,65.50
106,10,2019,16,21.3,344,571,3900,5456,9.6,2683,7.8,4.7,1504,4.4,13,16,4,125,22.70,371,67.30,43,2.5,177,59,47,149,23.20,28,7.0,407,1649,7,82,855,2.1,794,2.0,25,16.3,571,344,4187,28,196,2683,7.8,1504,4.4,9.1,17,20.2,20,3.5,13.8,5.63,40.90,0.81,0.31,38.50,2.63,1.56,59.50
14,15,2022,17,29.2,435,651,5062,4683,7.2,2397,5.5,3.7,2853,6.6,9,33,0,96,15.50,474,76.70,26,2.6,161,58,56,140,19.40,46,7.5,417,1970,18,105,1334,3.2,636,1.5,17,24.5,651,435,5250,41,272,2400,5.5,2850,6.6,6.9,33,13.2,34,5.2,11.4,5.53,48.70,0.71,0.53,75.00,4.24,2.94,69.40
92,28,2020,16,28.7,388,563,3941,4834,8.6,2404,6.2,4.3,1841,4.7,4,24,1,85,15.80,428,79.60,48,2.6,210,65,68,181,27.30,53,8.7,411,1971,15,111,1275,3.1,696,1.7,20,20.6,563,388,4245,40,216,2404,6.2,1841,4.7,8.0,16,24.3,27,4.8,11.8,4.75,40.20,0.88,0.50,57.10,3.50,2.56,73.20


In [1225]:
#Splitting advanced defense dataset into training and test set and reviewing the training set
adv_def_train, adv_def_test = train_test_split(adv_def_id, test_size= .2, random_state=42)
adv_def_train

Unnamed: 0,id,Season,G,PA,Att,Cmp,Yds,TD,DADOT,Air,YAC,Bltz,Bltz%,Hrry,Hrry%,QBKD,QBKD%,Sk,Prss,Prss%,MTkl,3DAtt,3DConv,3D%,4DAtt,4DConv,4D%,RZAtt,RZTD,RZPct
137,10,2018,16,22.5,496,323,3599,29,10.0,2375,1426,94,16.90,54,9.70,28,5.60,43,125,22.50,83,11.6,4.31,37.10,1.06,0.56,52.90,2.94,1.69,57.40
55,26,2021,17,23.4,563,355,3656,24,7.9,2310,2050,176,27.00,71,10.90,44,7.80,55,170,26.10,125,13.6,5.06,37.10,1.41,0.76,54.20,3.47,1.76,50.80
126,31,2019,16,27.2,540,371,3823,35,7.9,2094,1740,146,23.90,83,13.60,45,8.30,46,174,28.50,116,14.2,6.94,48.90,1.19,0.75,63.20,3.69,2.25,61.00
84,24,2020,16,28.6,605,418,4409,34,7.6,2483,2051,249,38.40,57,8.80,53,8.80,31,141,21.70,132,12.6,5.63,44.60,0.75,0.31,41.70,3.75,2.25,60.00
154,28,2018,16,21.7,546,355,3842,26,7.8,2091,2098,112,18.40,73,12.00,57,10.40,43,173,28.50,98,11.4,4.00,35.00,0.81,0.38,46.20,3.19,1.56,49.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
71,0,2020,16,22.9,570,365,3623,26,7.7,1884,2039,257,39.40,65,10.00,56,9.80,48,169,25.90,110,12.6,5.00,39.60,1.31,0.88,66.70,3.50,1.81,51.80
106,10,2019,16,26.4,611,381,4551,33,10.4,2864,1722,119,18.00,53,8.00,43,7.00,28,124,18.80,127,13.4,5.88,43.90,0.94,0.44,46.70,3.56,2.06,57.90
14,15,2022,17,21.7,619,408,3756,33,7.2,2559,2376,173,24.20,59,8.30,64,10.30,55,178,24.90,82,13.1,5.00,38.30,2.06,1.18,57.10,3.24,2.18,67.30
92,28,2020,16,23.2,674,450,4560,23,8.0,3005,1943,253,33.50,70,9.30,66,9.80,46,182,24.10,88,13.9,6.56,47.10,1.44,0.63,43.50,3.50,2.19,62.50


# For each dataset, the next step is dropping the labels of the desired predicted feature for both basic and advanced DataFrames ('PF' for offense and 'PA' for defense)

In [1226]:
#Drop labels for basic offense training set
X_full_off = full_off_train.drop('PF', axis=1)
y_full_off = full_off_train['PF'].copy()
#Drop labels for basic defense training set
X_full_def = full_def_train.drop('PA', axis=1)
y_full_def = full_def_train['PA'].copy()
#Drop labels for advanced offense training set
X_adv_off = adv_off_train.drop('PF', axis=1)
y_adv_off = adv_off_train['PF'].copy()
#Drop labels for advanced defense training set
X_adv_def = adv_def_train.drop('PA', axis=1)
y_adv_def = adv_def_train['PA'].copy()

# Building optimal GradientBoostingRegressor models for each training set (offense/defense basic and advanced)

1. For each training set, I ran a GridSearchCV to find the optimal hyperparameters for that specific training set and fit the data using a GradientBoostingRegressor model. Each specific step in hyperparameter optimization isn't shown below to make the notebook more concise and clear, but each GridSearch was run 5-10 times and hyperparameters were narrowed down each time to find the most optimal parameters. 
2. After thorough testing, each specific training set is fit to the optimal estimator resulting from the grid searches. 

In [1227]:
#GradientBoost Regressor Full Offense
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import GradientBoostingRegressor

param_grid_gbrfullo = [{'learning_rate': [.075], 
               'max_depth': [1],
              'n_estimators': [2600],
               'max_features' : [40]}]

grid_search_gbrfullo = GridSearchCV(GradientBoostingRegressor(random_state=42), 
                                      param_grid_gbrfullo, verbose=3, cv=5)

grid_search_gbrfullo.fit(X_full_off, y_full_off)

print("The best parameters are: ", grid_search_gbrfullo.best_params_)

Fitting 5 folds for each of 1 candidates, totalling 5 fits
[CV 1/5] END learning_rate=0.075, max_depth=1, max_features=40, n_estimators=2600;, score=0.919 total time=   3.5s
[CV 2/5] END learning_rate=0.075, max_depth=1, max_features=40, n_estimators=2600;, score=0.893 total time=   3.2s
[CV 3/5] END learning_rate=0.075, max_depth=1, max_features=40, n_estimators=2600;, score=0.916 total time=   3.2s
[CV 4/5] END learning_rate=0.075, max_depth=1, max_features=40, n_estimators=2600;, score=0.914 total time=   3.3s
[CV 5/5] END learning_rate=0.075, max_depth=1, max_features=40, n_estimators=2600;, score=0.908 total time=   3.2s
The best parameters are:  {'learning_rate': 0.075, 'max_depth': 1, 'max_features': 40, 'n_estimators': 2600}


In [1228]:
#GradientBoostingRegressor Full Offense Optimal Estimator
grid_search_gbrfullo.best_estimator_

In [1229]:
##GradientBoostRegressor Full Defense
param_grid_gbrfulld = [{'learning_rate': [.05], 
               'max_depth': [1],
              'n_estimators': [1475],
               'max_features' : [29]}]

grid_search_gbrfulld = GridSearchCV(GradientBoostingRegressor(random_state=42), 
                                      param_grid_gbrfulld, verbose=3, cv=5)

grid_search_gbrfulld.fit(X_full_def, y_full_def)

print("The best parameters are: ", grid_search_gbrfulld.best_params_)

Fitting 5 folds for each of 1 candidates, totalling 5 fits
[CV 1/5] END learning_rate=0.05, max_depth=1, max_features=29, n_estimators=1475;, score=0.894 total time=   1.8s
[CV 2/5] END learning_rate=0.05, max_depth=1, max_features=29, n_estimators=1475;, score=0.922 total time=   1.6s
[CV 3/5] END learning_rate=0.05, max_depth=1, max_features=29, n_estimators=1475;, score=0.916 total time=   1.6s
[CV 4/5] END learning_rate=0.05, max_depth=1, max_features=29, n_estimators=1475;, score=0.878 total time=   1.6s
[CV 5/5] END learning_rate=0.05, max_depth=1, max_features=29, n_estimators=1475;, score=0.905 total time=   1.6s
The best parameters are:  {'learning_rate': 0.05, 'max_depth': 1, 'max_features': 29, 'n_estimators': 1475}


In [1230]:
#GradientBoostingRegressor Full Defense Optimal Estimator
grid_search_gbrfulld.best_estimator_

In [1231]:
##GradientBoost Regressor Advanced Offense
param_grid_gbradvo = [{'learning_rate': [.04], 
               'max_depth': [3],
              'n_estimators': [1000],
               'max_features' : [20]}]

grid_search_gbradvo = GridSearchCV(GradientBoostingRegressor(random_state=42), 
                                      param_grid_gbradvo, verbose=3, cv=5)

grid_search_gbradvo.fit(X_adv_off, y_adv_off)

print("The best parameters are: ", grid_search_gbradvo.best_params_)

Fitting 5 folds for each of 1 candidates, totalling 5 fits
[CV 1/5] END learning_rate=0.04, max_depth=3, max_features=20, n_estimators=1000;, score=0.844 total time=   1.1s
[CV 2/5] END learning_rate=0.04, max_depth=3, max_features=20, n_estimators=1000;, score=0.803 total time=   1.2s
[CV 3/5] END learning_rate=0.04, max_depth=3, max_features=20, n_estimators=1000;, score=0.873 total time=   1.1s
[CV 4/5] END learning_rate=0.04, max_depth=3, max_features=20, n_estimators=1000;, score=0.846 total time=   0.9s
[CV 5/5] END learning_rate=0.04, max_depth=3, max_features=20, n_estimators=1000;, score=0.860 total time=   0.9s
The best parameters are:  {'learning_rate': 0.04, 'max_depth': 3, 'max_features': 20, 'n_estimators': 1000}


In [1232]:
#GradientBoostingRegressor Advanced Offense Optimal Estimator
grid_search_gbradvo.best_estimator_

In [1233]:
##GradientBoost Regressor Advanced Defense
param_grid_gbradvd = [{'learning_rate': [.01], 
               'max_depth': [2],
              'n_estimators': [523],
               'max_features' : [20]}]

grid_search_gbradvd = GridSearchCV(GradientBoostingRegressor(random_state=42), 
                                      param_grid_gbradvd, verbose=3, cv=5)

grid_search_gbradvd.fit(X_adv_def, y_adv_def)

print("The best parameters are: ", grid_search_gbradvd.best_params_)

Fitting 5 folds for each of 1 candidates, totalling 5 fits
[CV 1/5] END learning_rate=0.01, max_depth=2, max_features=20, n_estimators=523;, score=0.831 total time=   0.5s
[CV 2/5] END learning_rate=0.01, max_depth=2, max_features=20, n_estimators=523;, score=0.654 total time=   0.4s
[CV 3/5] END learning_rate=0.01, max_depth=2, max_features=20, n_estimators=523;, score=0.736 total time=   0.5s
[CV 4/5] END learning_rate=0.01, max_depth=2, max_features=20, n_estimators=523;, score=0.731 total time=   0.4s
[CV 5/5] END learning_rate=0.01, max_depth=2, max_features=20, n_estimators=523;, score=0.832 total time=   0.4s
The best parameters are:  {'learning_rate': 0.01, 'max_depth': 2, 'max_features': 20, 'n_estimators': 523}


In [1234]:
#GradientBoostingRegressor Advanced Defense Optimal Estimator
grid_search_gbradvd.best_estimator_

# Utilizing optimal models to make predictions on the test sets for both full and advanced datasets

In [1235]:
#GradientBoostingRegressor Full Offense Predictions on Test Set (full_off_test)
gbrfullo_model = grid_search_gbrfullo.best_estimator_

X_fullo_test = full_off_test.drop("PF", axis=1)
y_fullo_test = full_off_test["PF"].copy()

gbrfullo_pred = gbrfullo_model.predict(X_fullo_test)

print("Predictions:", gbrfullo_pred)
print("Labels:", list(y_fullo_test))

Predictions: [28.96058095 17.78112941 24.30019277 21.62285184 21.72082951 22.06260686
 25.78738984 19.73656179 25.39733728 17.60276132 23.27480342 20.3309616
 20.05084418 24.306967   15.62588128 19.89893128 20.42615902 25.84676847
 27.47932768 25.70277932 21.88042762 24.06437039 29.48189696 29.38981184
 24.40920231 18.22033796 26.77029994 29.17345465 23.7024673  29.63712125
 25.42083337 24.88800007 14.69445377 22.79120199 18.40872723 21.8954712
 24.68988103 23.4884506  18.96934234 21.35242789 24.62995079 18.0063345
 20.20002059 18.42907915 27.73358563 25.91518819 23.15530489 27.44323281
 18.75538591 16.48424262 24.50701696 21.3021382  12.13572152 20.29108787
 23.71737295 23.5076827  25.30045025 27.13935533 15.42276662 21.23567047
 24.46980071 18.01825938 24.25525029 25.81484212 26.48448286 23.35059763
 28.44851459 29.38847641 24.95345083 19.56068163 20.63021498 25.06259191
 22.26535194 21.3727153  20.32859843 21.95714271 21.57774111 17.17127205
 23.04820765 24.74311412 28.16251959 21.7

In [1236]:
#GradientBoostingRegressor Full Offense Predictions on Test Set (full_def_test)
gbrfulld_model = grid_search_gbrfulld.best_estimator_

X_fulld_test = full_def_test.drop("PA", axis=1)
y_fulld_test = full_def_test["PA"].copy()

gbrfulld_pred = gbrfulld_model.predict(X_fulld_test)

print("Predictions:", gbrfulld_pred)
print("Labels:", list(y_fulld_test))

Predictions: [21.12105629 24.39337028 19.78194219 24.88409575 21.45500857 20.27480875
 23.06251442 23.18254205 24.05675463 25.79826139 21.29545624 25.23413974
 21.04286778 22.13548657 25.6127078  29.10927047 25.25792394 20.57586909
 17.91622257 21.48520475 21.55318986 20.35574139 24.33970078 19.03249866
 19.52735788 23.70799226 19.74176592 16.64039812 23.10068448 16.19065822
 20.90652818 22.63346037 25.58518873 20.2527258  24.56721589 23.70595437
 21.94237219 20.88602803 23.5570022  22.84024015 23.40105481 26.25179024
 31.32430083 24.44547856 22.70110418 19.16667296 20.42897112 23.68781402
 27.74306012 29.0782725  19.54295766 24.55739414 29.25907894 21.505186
 20.84686766 22.34228966 27.16451485 19.60837624 27.95469261 22.43704896
 20.94608116 27.74177058 25.18650278 22.04231289 21.8982882  22.89040463
 16.02213235 20.78781655 23.011376   27.46142654 23.24670952 24.57731467
 24.5309727  23.41848546 27.10657367 23.42941597 26.67652599 26.54585515
 21.86401214 23.7102073  18.84301523 22.

In [1237]:
#GradientBoostingRegressor Advanced Offense Predictions on Test Set (adv_off_test)
gbradvo_model = grid_search_gbradvo.best_estimator_

X_advo_test = adv_off_test.drop("PF", axis=1)
y_advo_test = adv_off_test["PF"].copy()

gbradvo_pred = gbradvo_model.predict(X_advo_test)

print("Predictions:", gbradvo_pred)
print("Labels:", list(y_advo_test))

Predictions: [18.60123174 18.42958503 16.68809363 27.82256873 27.43442083 27.43357
 24.9726464  20.46184349 19.39679214 17.05586505 21.90128977 23.4149165
 23.7290981  27.06912056 21.91850809 24.67025013 24.88231804 19.84749926
 24.67769003 30.55920154 27.53193173 18.08794275 16.96369622 18.48043339
 19.22922269 29.66141773 16.31851946 24.94291688 26.470411   22.3589066
 25.16623573 29.7848702 ]
Labels: [16.8, 14.9, 14.1, 24.8, 29.3, 26.5, 23.8, 21.3, 20.9, 15.3, 21.5, 22.0, 23.4, 25.5, 23.2, 18.4, 21.1, 18.8, 23.5, 31.3, 26.8, 21.4, 17.0, 16.9, 18.9, 30.1, 15.2, 25.1, 28.6, 23.1, 27.3, 31.8]


In [1238]:
#GradientBoostingRegressor Advanced Defense Predictions on Test Set (adv_def_test)
gbradvd_model = grid_search_gbradvd.best_estimator_

X_advd_test = adv_def_test.drop("PA", axis=1)
y_advd_test = adv_def_test["PA"].copy()

gbradvd_pred = gbradvd_model.predict(X_advd_test)

print("Predictions:", gbradvd_pred)
print("Labels:", list(y_advd_test))

Predictions: [22.1522271  24.37554471 23.38929899 29.17590502 19.19836953 19.04442217
 23.53646423 26.63545126 18.34900249 20.7601272  23.86067543 22.28711567
 24.17728699 26.35311495 24.40719653 20.81840657 21.81960979 23.63535237
 20.41374489 25.17571928 22.67512863 21.44104647 21.93559538 20.39555714
 20.82159927 22.06158816 26.31094963 21.94895115 24.56992774 25.78823316
 22.02064789 21.89452501]
Labels: [23.4, 26.9, 26.6, 29.0, 18.9, 16.3, 24.9, 29.4, 20.6, 19.8, 21.8, 25.8, 23.5, 26.2, 24.6, 21.1, 21.6, 24.8, 24.4, 23.4, 22.5, 20.4, 24.7, 21.1, 20.2, 21.1, 24.5, 19.8, 28.1, 25.8, 20.3, 23.1]


# Exploring which features were weighted as most important for each model and then listing the features with the largest significance for each model. I decided to create a cutoff for significant features once there was a clear margin between levels of significance, with the lowest included feature being greater than .01. 

In [1239]:
gbrfullo_model.feature_importances_

array([3.52795316e-04, 7.29900988e-03, 1.63806365e-01, 5.73215171e-03,
       1.13473416e-03, 7.84968866e-04, 6.90833293e-04, 2.89877759e-03,
       4.34092303e-04, 1.33332414e-03, 3.05538155e-03, 1.19764865e-01,
       9.58031650e-04, 9.90287272e-03, 1.91217891e-03, 1.52078734e-03,
       7.66408827e-04, 6.00365890e-02, 9.50554586e-04, 1.30142347e-03,
       2.61947351e-03, 1.56188476e-03, 4.84588610e-04, 4.98659645e-01,
       1.54436294e-03, 9.09931283e-04, 5.63505804e-04, 1.00697540e-01,
       7.55022710e-04, 4.74041176e-04, 8.20330563e-04, 5.68578273e-04,
       7.23730318e-04, 5.00420998e-04, 6.25717793e-05, 1.48692121e-03,
       5.64933841e-04, 1.50466232e-03, 1.12124828e-05, 0.00000000e+00,
       8.50496505e-04])

In [1240]:
gbrfullo_feat = pd.Series(gbrfullo_model.feature_importances_, index=X_full_off.columns)
gbrfullo_feat.nlargest(5)

Sc%      0.498660
Yds      0.163806
P_TD     0.119765
P_TD%    0.100698
R_TD     0.060037
dtype: float64

In [1241]:
gbrfulld_model.feature_importances_

array([3.00247723e-04, 6.12679064e-03, 9.25172367e-02, 9.71604762e-03,
       1.17882806e-03, 3.48002669e-04, 6.18790347e-04, 6.34768509e-04,
       2.81495279e-05, 4.31082929e-03, 2.79402971e-05, 3.94302687e-02,
       1.14306044e-04, 4.81682018e-04, 7.42973558e-04, 3.23207536e-02,
       8.76633639e-04, 9.29952909e-02, 5.89477061e-04, 2.61348294e-03,
       7.76812800e-04, 6.30852657e-04, 0.00000000e+00, 4.65988223e-01,
       2.80302175e-03, 2.79617744e-05, 6.47257448e-04, 1.41493866e-01,
       5.67882581e-04, 1.65754572e-04, 8.23335481e-04, 0.00000000e+00,
       3.06857618e-03, 2.97617636e-03, 5.22883666e-05, 1.88858986e-04,
       5.32265979e-04, 5.75874058e-03, 3.46796198e-04, 8.49836272e-02,
       2.19520239e-03])

In [1242]:
gbrfulld_feat = pd.Series(gbrfulld_model.feature_importances_, index=X_full_def.columns)
gbrfulld_feat.nlargest(7)

Sc%        0.465988
PD_TD%     0.141494
R_TD       0.092995
Yds        0.092517
APDNY/A    0.084984
P_TD       0.039430
R_Att      0.032321
dtype: float64

In [1243]:
gbradvo_model.feature_importances_

array([1.10752718e-03, 2.05625354e-03, 2.30411585e-04, 8.27322484e-04,
       1.83005799e-03, 2.95089153e-02, 1.97273215e-03, 6.46467814e-03,
       3.99713450e-03, 1.55210580e-03, 9.81363866e-03, 6.05599529e-03,
       1.91098622e-03, 5.20056191e-04, 1.14951780e-03, 3.11721964e-04,
       9.06681295e-04, 1.53013863e-03, 4.69211963e-04, 3.62568820e-03,
       5.18062499e-02, 8.54215997e-04, 1.83164057e-03, 4.99270095e-04,
       1.37034666e-03, 8.52733058e-03, 3.49156193e-03, 1.22315211e-03,
       4.62570197e-03, 1.51584288e-03, 2.85428844e-03, 1.45085641e-02,
       7.09441666e-03, 2.33602932e-03, 2.23950263e-03, 7.98135812e-04,
       9.08693473e-04, 8.44186249e-04, 8.58284007e-04, 1.95140866e-03,
       4.29662152e-04, 2.49297666e-02, 1.81792599e-01, 1.41867687e-02,
       4.53221888e-03, 3.94729171e-04, 4.09443828e-03, 8.38306995e-04,
       3.07448924e-03, 2.15242069e-04, 1.18479065e-03, 1.69447770e-03,
       1.89494828e-03, 9.69663236e-03, 2.98647997e-03, 9.03867141e-02,
      

In [1244]:
gbradvo_feat = pd.Series(gbradvo_model.feature_importances_, index=X_adv_off.columns)
gbradvo_feat.nlargest(10)

RZTD        0.256326
Rec_TD      0.181793
RZAtt       0.181276
3D%         0.090387
Sk          0.051806
Pass_Yds    0.029509
Rec_Yds     0.024930
RZPct       0.020312
Rush_TD     0.014509
Rec_1D      0.014187
dtype: float64

In [1245]:
gbradvd_model.feature_importances_

array([8.37089853e-04, 5.11419932e-04, 0.00000000e+00, 3.07868559e-02,
       1.55688824e-03, 6.40582166e-02, 1.14570507e-01, 1.41841942e-02,
       2.90766043e-03, 1.41388617e-04, 7.59295538e-04, 4.19360884e-04,
       5.16320186e-04, 1.15724981e-04, 2.06500160e-03, 9.37754317e-05,
       2.94290614e-03, 1.89406040e-02, 7.16887423e-03, 5.64514733e-03,
       4.63381399e-04, 2.16393734e-03, 3.15285964e-02, 4.70433625e-02,
       2.56711677e-04, 2.33828572e-03, 3.10222651e-01, 3.30607661e-01,
       7.15418258e-03])

In [1246]:
gbradvd_feat = pd.Series(gbradvd_model.feature_importances_, index=X_adv_def.columns)
gbradvd_feat.nlargest(9)

RZTD     0.330608
RZAtt    0.310223
TD       0.114571
Yds      0.064058
4DAtt    0.047043
3D%      0.031529
Att      0.030787
Prss     0.018941
DADOT    0.014184
dtype: float64

# Creating new reduced datasets based on the most significant features for each specific dataset in an effort to develop improved predictive models by redoing each step above for GradientBoostingRegressor model development, just with a reduced dataset for both full and advanced statistics

In [1247]:
#Full Offense Reduced Dataset based on most significant features (full_off_red)
full_off_red = full_off[['Tm', 'Season', 'PF', 'Sc%', 'Yds', 'APNY/A', 'P_TD', 'P_TD%', 'R_TD']]
#Full Defense Reduced Dataset based on most significant features (full_def_red)
full_def_red = full_def[['Tm', 'Season', 'PA', 'Sc%', 'PD_TD%', 'R_TD', 'Yds', 'APDNY/A', 'P_TD', 'R_Att']]
#Advanced Offense Reduced Dataset based on most significant features (adv_off_red)
adv_off_red = adv_off[['Tm', 'Season', 'PF', 'RZTD', 'Rec_TD', 'RZAtt', '3D%', 'Sk', 'Pass_Yds', 'Rec_Yds', 'RZPct', 'Rush_TD', 'Rec_1D']]
#Advanced Defense Reduced Dataset based on most significant features (adv_def_red)
adv_def_red = adv_def[['Tm', 'Season', 'PA', 'RZTD', 'RZAtt', 'TD', 'Yds', '4DAtt', '3D%', 'Att', 'Prss', 'DADOT']]

In [1248]:
#Prepping each DataFrame for machine learning by changing each categorical value to a unique numerical id. Also filtering so 'Tm' and 'id' are next to each other for clarity. Then dropping 'Tm' feature.
full_off_red = full_off_red.assign(id = (full_off_red['Tm']).astype('category').cat.codes)
full_off_red = full_off_red.filter(['Tm', 'id', 'PF', 'Season', 'Sc%', 'Yds', 'APNY/A', 'P_TD%', 'P_TD', 'R_TD'])
full_off_red_id = full_off_red.drop('Tm', axis=1)

full_def_red = full_def_red.assign(id = (full_def_red['Tm']).astype('category').cat.codes)
full_def_red = full_def_red.filter(['Tm', 'id', 'PA', 'Season', 'Sc%', 'PD_TD%', 'R_TD', 'Yds', 'APDNY/A', 'P_TD', 'R_Att'])
full_def_red_id = full_def_red.drop('Tm', axis=1)

adv_off_red = adv_off_red.assign(id = (adv_off_red['Tm']).astype('category').cat.codes)
adv_off_red = adv_off_red.filter(['Tm', 'id', 'PF', 'Season', 'RZTD', 'Rec_TD', 'RZAtt', '3D%', 'Sk', 'Pass_Yds', 'Rec_Yds', 'RZPct', 'Rush_TD', 'Rec_1D'])
adv_off_red_id = adv_off_red.drop('Tm', axis=1)

adv_def_red = adv_def_red.assign(id = (adv_def_red['Tm']).astype('category').cat.codes)
adv_def_red = adv_def_red.filter(['Tm', 'id', 'PA', 'Season', 'RZTD', 'RZAtt', 'TD', 'Yds', '4DAtt', '3D%', 'Att', 'Prss', 'DADOT'])
adv_def_red_id = adv_def_red.drop('Tm', axis=1)

In [1249]:
#Splitting each reduced dataset into a train and test set
fullo_red_train, fullo_red_test = train_test_split(full_off_red_id, test_size= .2, random_state=42)
fulld_red_train, fulld_red_test = train_test_split(full_def_red_id, test_size= .2, random_state=42)
advo_red_train, advo_red_test = train_test_split(adv_off_red_id, test_size= .2, random_state=42)
advd_red_train, advd_red_test = train_test_split(adv_def_red_id, test_size= .2, random_state=42)

In [1250]:
#Dropping target feature labels from training sets (PF for off, PA for def)
X_fullo_red = fullo_red_train.drop('PF', axis=1)
y_fullo_red = fullo_red_train['PF'].copy()

X_fulld_red = fulld_red_train.drop('PA', axis=1)
y_fulld_red = fulld_red_train['PA'].copy()

X_advo_red = advo_red_train.drop('PF', axis=1)
y_advo_red = advo_red_train['PF'].copy()

X_advd_red = advd_red_train.drop('PA', axis=1)
y_advd_red = advd_red_train['PA'].copy()

In [1251]:
#GradientBoosting Regression and Grid Search for Full Offense Reduced Dataset (gbrfullo_red)

param_grid_gbrfullo_red = [{'learning_rate': [.08], 
               'max_depth': [1],
              'n_estimators': [1500],
               'max_features' : [7]}]

grid_search_gbrfullo_red = GridSearchCV(GradientBoostingRegressor(random_state=42), param_grid_gbrfullo_red, 
                           cv=5, verbose = 3)
grid_search_gbrfullo_red.fit(X_fullo_red, y_fullo_red)

print("The best parameters are: ", grid_search_gbrfullo_red.best_params_)

Fitting 5 folds for each of 1 candidates, totalling 5 fits
[CV 1/5] END learning_rate=0.08, max_depth=1, max_features=7, n_estimators=1500;, score=0.924 total time=   1.0s
[CV 2/5] END learning_rate=0.08, max_depth=1, max_features=7, n_estimators=1500;, score=0.897 total time=   1.0s
[CV 3/5] END learning_rate=0.08, max_depth=1, max_features=7, n_estimators=1500;, score=0.917 total time=   1.0s
[CV 4/5] END learning_rate=0.08, max_depth=1, max_features=7, n_estimators=1500;, score=0.920 total time=   1.0s
[CV 5/5] END learning_rate=0.08, max_depth=1, max_features=7, n_estimators=1500;, score=0.899 total time=   1.0s
The best parameters are:  {'learning_rate': 0.08, 'max_depth': 1, 'max_features': 7, 'n_estimators': 1500}


In [1252]:
#GradientBoostingRegressor Full Offense Reduced Optimal Estimator
grid_search_gbrfullo_red.best_estimator_

In [1253]:
#GradientBoosting Regression and Grid Search for Full Defense Reduced Dataset (gbrfulld_red)

param_grid_gbrfulld_red = [{'learning_rate': [.06], 
               'max_depth': [1],
              'n_estimators': [998],
               'max_features' : [5]}]

grid_search_gbrfulld_red = GridSearchCV(GradientBoostingRegressor(random_state=42), param_grid_gbrfulld_red, 
                           cv=5, verbose = 3)
grid_search_gbrfulld_red.fit(X_fulld_red, y_fulld_red)

print("The best parameters are: ", grid_search_gbrfulld_red.best_params_)

Fitting 5 folds for each of 1 candidates, totalling 5 fits
[CV 1/5] END learning_rate=0.06, max_depth=1, max_features=5, n_estimators=998;, score=0.873 total time=   0.7s
[CV 2/5] END learning_rate=0.06, max_depth=1, max_features=5, n_estimators=998;, score=0.916 total time=   0.7s
[CV 3/5] END learning_rate=0.06, max_depth=1, max_features=5, n_estimators=998;, score=0.940 total time=   0.6s
[CV 4/5] END learning_rate=0.06, max_depth=1, max_features=5, n_estimators=998;, score=0.865 total time=   0.7s
[CV 5/5] END learning_rate=0.06, max_depth=1, max_features=5, n_estimators=998;, score=0.897 total time=   0.6s
The best parameters are:  {'learning_rate': 0.06, 'max_depth': 1, 'max_features': 5, 'n_estimators': 998}


In [1254]:
#GradientBoostingRegressor Full Defense Reduced Optimal Estimator
grid_search_gbrfulld_red.best_estimator_

In [1255]:
#GradientBoosting Regression and Grid Search for Advanced Offense Reduced Dataset (gbradvo_red)

param_grid_gbradvo_red = [{'learning_rate': [.1], 
               'max_depth': [2],
              'n_estimators': [318],
               'max_features' : [2]}]

grid_search_gbradvo_red = GridSearchCV(GradientBoostingRegressor(random_state=42), param_grid_gbradvo_red, 
                           cv=5, verbose = 3)
grid_search_gbradvo_red.fit(X_advo_red, y_advo_red)

print("The best parameters are: ", grid_search_gbradvo_red.best_params_)

Fitting 5 folds for each of 1 candidates, totalling 5 fits
[CV 1/5] END learning_rate=0.1, max_depth=2, max_features=2, n_estimators=318;, score=0.847 total time=   0.3s
[CV 2/5] END learning_rate=0.1, max_depth=2, max_features=2, n_estimators=318;, score=0.804 total time=   0.2s
[CV 3/5] END learning_rate=0.1, max_depth=2, max_features=2, n_estimators=318;, score=0.855 total time=   0.2s
[CV 4/5] END learning_rate=0.1, max_depth=2, max_features=2, n_estimators=318;, score=0.865 total time=   0.2s
[CV 5/5] END learning_rate=0.1, max_depth=2, max_features=2, n_estimators=318;, score=0.878 total time=   0.2s
The best parameters are:  {'learning_rate': 0.1, 'max_depth': 2, 'max_features': 2, 'n_estimators': 318}


In [1256]:
#GradientBoostingRegressor Advanced Offense Reduced Optimal Estimator
grid_search_gbradvo_red.best_estimator_

In [1257]:
#GradientBoosting Regression and Grid Search for Advanced Defense Reduced Dataset (gbradvd_red)

param_grid_gbradvd_red = [{'learning_rate': [.2], 
               'max_depth': [1],
              'n_estimators': [274],
               'max_features' : [9]}]

grid_search_gbradvd_red = GridSearchCV(GradientBoostingRegressor(random_state=42), param_grid_gbradvd_red, 
                           cv=5, verbose = 3)
grid_search_gbradvd_red.fit(X_advd_red, y_advd_red)

print("The best parameters are: ", grid_search_gbradvd_red.best_params_)

Fitting 5 folds for each of 1 candidates, totalling 5 fits
[CV 1/5] END learning_rate=0.2, max_depth=1, max_features=9, n_estimators=274;, score=0.885 total time=   0.2s
[CV 2/5] END learning_rate=0.2, max_depth=1, max_features=9, n_estimators=274;, score=0.657 total time=   0.2s
[CV 3/5] END learning_rate=0.2, max_depth=1, max_features=9, n_estimators=274;, score=0.807 total time=   0.2s
[CV 4/5] END learning_rate=0.2, max_depth=1, max_features=9, n_estimators=274;, score=0.692 total time=   0.2s
[CV 5/5] END learning_rate=0.2, max_depth=1, max_features=9, n_estimators=274;, score=0.900 total time=   0.2s
The best parameters are:  {'learning_rate': 0.2, 'max_depth': 1, 'max_features': 9, 'n_estimators': 274}


In [1258]:
#GradientBoostingRegressor Advanced Defense Reduced Optimal Estimator
grid_search_gbradvd_red.best_estimator_

In [1259]:
#GradientBoostingRegressor Full Offense Reduced Predictions on Test Set (fullo_red_test)
gbrfullo_red_model = grid_search_gbrfullo_red.best_estimator_

X_fullo_red_test = fullo_red_test.drop("PF", axis=1)
y_fullo_red_test = fullo_red_test["PF"].copy()

gbrfullo_red_pred = gbrfullo_red_model.predict(X_fullo_red_test)

print("Predictions:", gbrfullo_red_pred)
print("Labels:", list(y_fullo_red_test))

Predictions: [28.69475763 18.11576351 25.15959045 21.6167385  22.36944502 22.51412749
 24.28271902 19.81603505 25.78575638 15.67628144 23.28013978 20.40510017
 19.9650404  23.69235331 15.8938271  19.73526026 20.15518016 26.01084788
 28.50950008 25.70139821 22.03914095 23.93697878 30.14531286 28.30081432
 24.89714881 18.35699476 25.44701781 28.94988578 24.02644602 29.94306224
 25.81889919 25.19653012 14.95098862 21.76240862 18.96359756 21.44438572
 24.18946474 23.7980918  19.03727582 21.64924478 24.44677559 17.94169241
 21.22030154 19.10939387 28.00025289 26.04283608 23.60097912 27.01896548
 19.02766692 15.89690159 23.73086118 21.03533296 13.38394232 20.60524042
 24.61383835 24.76519824 24.31984067 27.05544469 15.32956749 21.19010741
 25.36376772 17.6650083  24.43108476 26.49238133 25.16490127 23.82020413
 28.62444408 29.97045562 25.12300789 19.97177728 20.04016947 24.38084715
 23.09060916 22.44110212 21.04560467 21.47268506 21.34801451 17.98183113
 23.8369475  24.55153139 27.80863028 2

In [1260]:
#GradientBoostingRegressor Full Defense Reduced Predictions on Test Set (fulld_red_test)
gbrfulld_red_model = grid_search_gbrfulld_red.best_estimator_

X_fulld_red_test = fulld_red_test.drop("PA", axis=1)
y_fulld_red_test = fulld_red_test["PA"].copy()

gbrfulld_red_pred = gbrfulld_red_model.predict(X_fulld_red_test)

print("Predictions:", gbrfulld_red_pred)
print("Labels:", list(y_fulld_red_test))

Predictions: [21.40580471 24.74649876 19.6067155  24.55589331 21.41467309 20.38817229
 23.20839557 22.50517795 24.02774532 25.41488342 21.43563327 24.48883295
 20.86765904 22.33646374 25.94828706 29.46680757 24.83370344 20.75923784
 18.47851096 22.53465963 21.24335629 20.56038641 24.98030388 18.74889995
 20.00850634 25.09382223 19.84678244 17.29068458 22.6794745  16.72542275
 21.32702775 22.29161447 25.03499154 19.9053522  23.82240082 24.9033976
 22.53773184 20.695718   24.48558552 22.59312255 23.60862732 25.92177661
 30.29678905 24.15250169 23.01768088 19.11023452 20.22646501 23.52641384
 26.98670865 29.19987084 19.19359716 24.10074421 29.35132026 20.95044551
 20.72567177 22.74385743 27.12550834 19.16882116 28.45859126 22.66135365
 20.51712576 27.5350093  22.9317299  21.00729698 21.87620171 22.3396375
 16.15665029 21.71444446 22.52383847 26.41687893 23.79163971 24.54396745
 25.38645412 23.09816026 27.45895615 22.66447252 26.68868881 26.98397732
 21.71185392 23.92517519 19.16538626 21.

In [1261]:
#GradientBoostingRegressor Advanced Offense Reduced Predictions on Test Set (advo_red_test)
gbradvo_red_model = grid_search_gbradvo_red.best_estimator_

X_advo_red_test = advo_red_test.drop("PF", axis=1)
y_advo_red_test = advo_red_test["PF"].copy()

gbradvo_red_pred = gbradvo_red_model.predict(X_advo_red_test)

print("Predictions:", gbradvo_red_pred)
print("Labels:", list(y_advo_red_test))

Predictions: [18.90761152 17.71529971 18.55447123 27.65109951 29.90938975 27.41503637
 24.32681758 21.77673099 19.8558017  16.97698413 21.17541273 22.69839604
 23.56464763 28.20175344 21.78709467 23.90823618 23.76802256 18.51114938
 24.7808326  30.06156561 28.47305775 18.34743096 17.45676561 17.77609928
 18.00601812 30.12579976 16.16165005 24.2021149  27.51868208 23.35514104
 26.53678647 32.28856538]
Labels: [16.8, 14.9, 14.1, 24.8, 29.3, 26.5, 23.8, 21.3, 20.9, 15.3, 21.5, 22.0, 23.4, 25.5, 23.2, 18.4, 21.1, 18.8, 23.5, 31.3, 26.8, 21.4, 17.0, 16.9, 18.9, 30.1, 15.2, 25.1, 28.6, 23.1, 27.3, 31.8]


In [1262]:
#GradientBoostingRegressor Advanced Defense Reduced Predictions on Test Set (advd_red_test)
gbradvd_red_model = grid_search_gbradvd_red.best_estimator_

X_advd_red_test = advd_red_test.drop("PA", axis=1)
y_advd_red_test = advd_red_test["PA"].copy()

gbradvd_red_pred = gbradvd_red_model.predict(X_advd_red_test)

print("Predictions:", gbradvd_red_pred)
print("Labels:", list(y_advd_red_test))

Predictions: [21.86605522 23.72660628 23.39819889 30.74337223 18.40561052 18.47836736
 23.21001226 27.63837203 18.34194041 20.48122267 24.33475715 23.07526393
 24.25012295 25.73071299 24.8638068  20.71343981 20.49875877 24.29885354
 21.03419438 24.90305808 22.57648573 21.62340709 22.12093898 20.16828091
 21.0486377  22.50330356 27.47441153 22.03200443 24.30412227 26.57041025
 21.68145464 22.2764909 ]
Labels: [23.4, 26.9, 26.6, 29.0, 18.9, 16.3, 24.9, 29.4, 20.6, 19.8, 21.8, 25.8, 23.5, 26.2, 24.6, 21.1, 21.6, 24.8, 24.4, 23.4, 22.5, 20.4, 24.7, 21.1, 20.2, 21.1, 24.5, 19.8, 28.1, 25.8, 20.3, 23.1]


# Creating new reduced datasets that are a combination of the most significant features for both basic and advanced statistics from the 2018-2022 season. Then redoing each step above for GradientBoostingRegressor model development, just with a the datasets built on a combination of both basic and advanced statistics

In [1263]:
#Selecting seasons from 2018-2022 from the basic statistics to match the 5 seasons of available data for the advanced statistics. Then merging the full and advanced datasets together. 
full_off_five = full_off.loc[full_off['Season'].isin([2018, 2019, 2020, 2021, 2022])]
full_def_five = full_def.loc[full_def['Season'].isin([2018, 2019, 2020, 2021, 2022])]

off_comb = full_off_five.merge(adv_off, on=['Tm', 'id', 'Season', 'PF'])
def_comb = full_def_five.merge(adv_def, on=['Tm', 'id', 'Season', 'PA'])

def_comb = def_comb.rename(columns = {'Yds_x': 'Yds/A'})

#Combined Offense Reduced Dataset based on most significant features (off_comb_red)
off_comb_red = off_comb[['Tm', 'PF', 'Sc%', 'Season', 'Yds', 'APNY/A', 'P_TD%', 'P_TD', 'RZTD', 'Rec_TD', 'RZAtt', 'Pass_Yds', '3D%']]
#Combined Defense Reduced Dataset based on most significant features (def_comb_red)
def_comb_red = def_comb[['Tm', 'PA', 'Sc%', 'Season', 'Yds/A', 'PD_TD%', 'RZTD', 'RZAtt', 'TD', '3D%', '4DAtt', 'RZPct', 'Sk']]

#Prepping each DataFrame for machine learning by changing each categorical value to a unique numerical id. Also filtering so 'Tm' and 'id' are next to each other for clarity. Then dropping 'Tm' feature.
off_comb_red = off_comb_red.assign(id = (off_comb_red['Tm']).astype('category').cat.codes)
off_comb_red = off_comb_red.filter(['Tm', 'id', 'PF', 'Season', 'Sc%', 'Yds', 'APNY/A', 'P_TD%', 'P_TD', 'RZTD', 'Rec_TD', 'RZAtt', 'Pass_Yds', '3D%'])
off_comb_red_id = off_comb_red.drop('Tm', axis=1)

def_comb_red = def_comb_red.assign(id = (def_comb_red['Tm']).astype('category').cat.codes)
def_comb_red = def_comb_red.filter(['Tm', 'id', 'PA', 'Season', 'Sc%', 'Yds/A', 'PD_TD%', 'RZTD', 'RZAtt', 'TD', '3D%', '4DAtt', 'RZPct', 'Sk'])
def_comb_red_id = def_comb_red.drop('Tm', axis=1)

#Splitting each combined dataset into a train and test set
combo_red_train, combo_red_test = train_test_split(off_comb_red_id, test_size= .2, random_state=42)
combd_red_train, combd_red_test = train_test_split(def_comb_red_id, test_size= .2, random_state=42)

#Dropping target feature labels from training sets (PF for off, PA for def)
X_combo_red = combo_red_train.drop('PF', axis=1)
y_combo_red = combo_red_train['PF'].copy()

X_combd_red = combd_red_train.drop('PA', axis=1)
y_combd_red = combd_red_train['PA'].copy()

In [1264]:
#GradientBoosting Regression and Grid Search for Combined Offense Reduced Dataset (gbrcombo_red)

param_grid_gbrcombo_red = [{'learning_rate': [.098], 
               'max_depth': [1],
              'n_estimators': [108],
               'max_features' : [11]}]

grid_search_gbrcombo_red = GridSearchCV(GradientBoostingRegressor(random_state=42), param_grid_gbrcombo_red, 
                           cv=5, verbose = 3)
grid_search_gbrcombo_red.fit(X_combo_red, y_combo_red)

print("The best parameters are: ", grid_search_gbrcombo_red.best_params_)

Fitting 5 folds for each of 1 candidates, totalling 5 fits
[CV 1/5] END learning_rate=0.098, max_depth=1, max_features=11, n_estimators=108;, score=0.901 total time=   0.1s
[CV 2/5] END learning_rate=0.098, max_depth=1, max_features=11, n_estimators=108;, score=0.887 total time=   0.1s
[CV 3/5] END learning_rate=0.098, max_depth=1, max_features=11, n_estimators=108;, score=0.916 total time=   0.1s
[CV 4/5] END learning_rate=0.098, max_depth=1, max_features=11, n_estimators=108;, score=0.871 total time=   0.1s
[CV 5/5] END learning_rate=0.098, max_depth=1, max_features=11, n_estimators=108;, score=0.911 total time=   0.1s
The best parameters are:  {'learning_rate': 0.098, 'max_depth': 1, 'max_features': 11, 'n_estimators': 108}


In [1265]:
#GradientBoostingRegressor Combined Offense Reduced Optimal Estimator
grid_search_gbrcombo_red.best_estimator_

In [1266]:
#GradientBoosting Regression and Grid Search for Combined Defense Reduced Dataset (gbrcombd_red)

param_grid_gbrcombd_red = [{'learning_rate': [.1], 
               'max_depth': [1],
              'n_estimators': [110],
               'max_features' : [11]}]

grid_search_gbrcombd_red = GridSearchCV(GradientBoostingRegressor(random_state=42), param_grid_gbrcombd_red, 
                           cv=5, verbose = 3)
grid_search_gbrcombd_red.fit(X_combd_red, y_combd_red)

print("The best parameters are: ", grid_search_gbrcombd_red.best_params_)

Fitting 5 folds for each of 1 candidates, totalling 5 fits
[CV 1/5] END learning_rate=0.1, max_depth=1, max_features=11, n_estimators=110;, score=0.672 total time=   0.1s
[CV 2/5] END learning_rate=0.1, max_depth=1, max_features=11, n_estimators=110;, score=0.879 total time=   0.1s
[CV 3/5] END learning_rate=0.1, max_depth=1, max_features=11, n_estimators=110;, score=0.876 total time=   0.1s
[CV 4/5] END learning_rate=0.1, max_depth=1, max_features=11, n_estimators=110;, score=0.767 total time=   0.1s
[CV 5/5] END learning_rate=0.1, max_depth=1, max_features=11, n_estimators=110;, score=0.876 total time=   0.1s
The best parameters are:  {'learning_rate': 0.1, 'max_depth': 1, 'max_features': 11, 'n_estimators': 110}


In [1267]:
#GradientBoostingRegressor Combined Defense Reduced Optimal Estimator
grid_search_gbrcombd_red.best_estimator_

In [1268]:
#GradientBoostingRegressor Combined Offense Reduced Predictions on Test Set (combo_red_test)
gbrcombo_red_model = grid_search_gbrcombo_red.best_estimator_

X_combo_red_test = combo_red_test.drop("PF", axis=1)
y_combo_red_test = combo_red_test["PF"].copy()

gbrcombo_red_pred = gbrcombo_red_model.predict(X_combo_red_test)

print("Predictions:", gbrcombo_red_pred)
print("Labels:", list(y_combo_red_test))

Predictions: [29.32713592 24.66992408 25.33959014 15.9210437  19.53163576 18.2415786
 29.94225585 25.55700769 16.85401963 22.46687993 18.49979499 19.64403448
 22.17183997 29.09450868 21.52870248 18.86114505 18.68527732 24.60686368
 20.35729188 29.98864581 20.91705428 20.90951036 23.59074347 23.95317863
 17.99613492 25.41468914 22.00538351 24.294088   16.74925316 23.49064881
 22.08095602 23.82872377]
Labels: [32.9, 25.0, 26.8, 15.3, 20.4, 17.0, 33.2, 28.6, 15.2, 23.5, 20.4, 19.1, 21.5, 29.6, 18.4, 17.0, 17.6, 23.6, 19.1, 31.3, 20.6, 20.6, 23.0, 23.8, 16.9, 24.0, 20.5, 24.3, 17.3, 22.4, 22.5, 26.0]


In [1269]:
#GradientBoostingRegressor Combined Defense Reduced Predictions on Test Set (combd_red_test)
gbrcombd_red_model = grid_search_gbrcombd_red.best_estimator_

X_combd_red_test = combd_red_test.drop("PA", axis=1)
y_combd_red_test = combd_red_test["PA"].copy()

gbrcombd_red_pred = gbrcombd_red_model.predict(X_combd_red_test)

print("Predictions:", gbrcombd_red_pred)
print("Labels:", list(y_combd_red_test))

Predictions: [19.58049745 23.41944372 20.61644242 28.25732156 28.17275756 22.05594626
 17.3758327  19.25995696 29.92463127 22.27621338 22.72236451 24.64066187
 22.27136631 20.5727989  22.35201052 26.27686892 28.65763075 20.92380348
 28.37474453 19.13415328 26.22202272 21.67159579 20.86806733 21.91085917
 26.1647955  27.22184287 23.0276261  22.42648069 26.7577396  24.49491588
 24.25774216 21.92104489]
Labels: [17.9, 21.8, 20.6, 28.4, 29.0, 25.1, 14.1, 17.6, 32.4, 22.1, 22.4, 25.5, 21.7, 21.1, 23.6, 26.4, 27.6, 21.6, 29.9, 18.9, 26.3, 22.0, 21.1, 20.4, 27.2, 25.1, 23.4, 21.8, 29.4, 24.0, 23.9, 22.9]


# Evaluating and comparing models by reviewing mean_squared_error and adjusted r2 score metrics  from each of the models' test set predictions when compared to the actual labels of the test sets

In [1270]:
from sklearn.metrics import mean_squared_error

gbrfullo_mse = round(mean_squared_error(y_fullo_test, gbrfullo_pred), 4)
gbrfulld_mse = round(mean_squared_error(y_fulld_test, gbrfulld_pred), 4)
gbradvo_mse = round(mean_squared_error(y_advo_test, gbradvo_pred), 4)
gbradvd_mse = round(mean_squared_error(y_advd_test, gbradvd_pred), 4)

gbrfullo_red_mse = round(mean_squared_error(y_fullo_red_test, gbrfullo_red_pred), 4)
gbrfulld_red_mse = round(mean_squared_error(y_fulld_red_test, gbrfulld_red_pred), 4)
gbradvo_red_mse = round(mean_squared_error(y_advo_red_test, gbradvo_red_pred), 4)
gbradvd_red_mse = round(mean_squared_error(y_advd_red_test, gbradvd_red_pred), 4)

gbrcombo_red_mse = round(mean_squared_error(y_combo_red_test, gbrcombo_red_pred), 4)
gbrcombd_red_mse = round(mean_squared_error(y_combd_red_test, gbrcombd_red_pred), 4)

print("The mse for gbrfullo_mse is", gbrfullo_mse, "The mse for gbrfullo_red_mse is", gbrfullo_red_mse, "The mse for gbrcombo_red_mse is", gbrcombo_red_mse)
print("The mse for gbrfulld_mse is", gbrfulld_mse, "The mse for gbrfulld_red_mse is", gbrfulld_red_mse, "The mse for gbrcombd_red_mse is", gbrcombd_red_mse)
print("The mse for gbradvo_mse is", gbradvo_mse, "The mse for gbradvo_red_mse is", gbradvo_red_mse)
print("The mse for gbradvd_mse is", gbradvd_mse, "The mse for gbradvd_red_mse is", gbradvd_red_mse)

The mse for gbrfullo_mse is 1.1407 The mse for gbrfullo_red_mse is 1.1132 The mse for gbrcombo_red_mse is 2.4107
The mse for gbrfulld_mse is 1.3361 The mse for gbrfulld_red_mse is 1.4026 The mse for gbrcombd_red_mse is 1.8409
The mse for gbradvo_mse is 4.2381 The mse for gbradvo_red_mse is 3.5709
The mse for gbradvd_mse is 3.5735 The mse for gbradvd_red_mse is 3.6018


In [1271]:
from sklearn.metrics import r2_score

gbrfullo_r2 = round(r2_score(y_fullo_test, gbrfullo_pred), 4)
gbrfulld_r2 = round(r2_score(y_fulld_test, gbrfulld_pred), 4)
gbradvo_r2 = round(r2_score(y_advo_test, gbradvo_pred), 4)
gbradvd_r2 = round(r2_score(y_advd_test, gbradvd_pred), 4)

gbrfullo_red_r2 = round(r2_score(y_fullo_red_test, gbrfullo_red_pred), 4)
gbrfulld_red_r2 = round(r2_score(y_fulld_red_test, gbrfulld_red_pred), 4)
gbradvo_red_r2 = round(r2_score(y_advo_red_test, gbradvo_red_pred), 4)
gbradvd_red_r2 = round(r2_score(y_advd_red_test, gbradvd_red_pred), 4)

gbrcombo_red_r2 = round(r2_score(y_combo_red_test, gbrcombo_red_pred), 4)
gbrcombd_red_r2 = round(r2_score(y_combd_red_test, gbrcombd_red_pred), 4)

print("The r2_score for gbrfullo_r2 is", gbrfullo_r2, "The r2_score for gbrfullo_red_r2 is", gbrfullo_red_r2, "The r2_score for gbrcombo_red_r2 is", gbrcombo_red_r2)
print("The r2_score for gbrfulld_r2 is", gbrfulld_r2, "The r2_score for gbrfulld_red_r2 is", gbrfulld_red_r2, "The r2_score for gbrcombd_red_r2 is", gbrcombd_red_r2)
print("The r2_score for gbradvo_r2 is", gbradvo_r2, "The r2_score for gbradvo_red_r2 is", gbradvo_red_r2)
print("The r2_score for gbradvd_r2 is", gbradvd_r2, "The r2_score for gbradvd_red_r2 is", gbradvd_red_r2)

The r2_score for gbrfullo_r2 is 0.932 The r2_score for gbrfullo_red_r2 is 0.9336 The r2_score for gbrcombo_red_r2 is 0.8962
The r2_score for gbrfulld_r2 is 0.8663 The r2_score for gbrfulld_red_r2 is 0.8596 The r2_score for gbrcombd_red_r2 is 0.8795
The r2_score for gbradvo_r2 is 0.8233 The r2_score for gbradvo_red_r2 is 0.8511
The r2_score for gbradvd_r2 is 0.6139 The r2_score for gbradvd_red_r2 is 0.6108


# Utilizing optimal trained models to make predictions on the full training set on then merge all data together to create a full statistical dataset including all PF and PA predictions

### Full Offense and Defense GBR Predictions

In [1272]:
#Preparing full dataset for predictions utilizing the best gb model for PF
X_gbr_off = full_off.drop('Tm', axis=1)
X_gbr_off = X_gbr_off.drop('PF', axis=1)
#Using the optimal model to make predictions and then adding PF predictions to full dataset. Also filtering for the 2022 season.
PF_gbrpred = gbrfullo_model.predict(X_gbr_off)
PF_gbrpred_df = pd.DataFrame(PF_gbrpred, columns=['PF_gbrpred'])
full_off['PF_gbr_Pred'] = PF_gbrpred
full_gbroff_2022 = full_off.loc[full_off['Season'] == 2022]
full_gbroff_2022

Unnamed: 0,Tm,id,Season,PF,Yds,Ply,Y/P,TO,FL,1stD,P_Cmp,P_Att,P_Yds,P_TD,P_Int,NPY/A,P_1stD,R_Att,R_Yds,R_TD,RY/A,R_1stD,Pen,Pen_Yds,1stPy,Sc%,TO%,EXP,P_Cmp%,P_TD%,P_Int%,P_Lng,P_Y/A,P_AY/A,P_Y/C,P_Rate,P_Sk,Sk_Yds_Lost,P_Sk%,APNY/A,QB_4QC,QB_GWD,P_EXP,PF_gbr_Pred
0,Kansas City Chiefs,15,2022,29.2,413.6,64.4,6.4,1.35,0.65,24.0,25.6,38.3,297.8,2.41,0.71,7.5,16.0,24.5,115.9,1.06,4.7,6.18,5.12,49.3,1.82,46.4,10.1,14.1,66.8,6.3,1.8,67,8.1,8.5,12.1,104.7,1.53,11.1,3.8,7.9,0.24,0.24,13.7,29.637121
1,Philadelphia Eagles,25,2022,28.1,389.1,66.1,5.9,1.12,0.59,22.6,20.6,31.5,241.5,1.47,0.53,7.1,11.5,32.0,147.6,1.88,4.6,9.47,5.29,40.1,1.71,42.5,10.2,10.3,65.3,4.7,1.7,78,8.1,8.3,12.5,99.0,2.59,15.2,7.6,7.2,0.06,0.12,7.72,27.605864
2,Dallas Cowboys,8,2022,27.5,354.9,65.5,5.4,1.35,0.29,20.4,20.9,32.7,219.8,1.65,1.06,6.4,11.4,31.2,135.2,1.41,4.3,7.59,6.12,49.5,1.41,41.5,10.8,5.48,63.8,5.0,3.2,68,7.0,6.6,11.0,87.9,1.59,10.3,4.6,6.0,0.12,0.24,6.27,27.73882
3,Buffalo Bills,3,2022,28.4,397.6,64.8,6.1,1.69,0.81,22.9,22.6,35.9,258.1,2.19,0.88,6.8,13.3,26.9,139.5,0.94,5.2,8.0,5.75,44.3,1.69,45.0,15.2,9.77,62.9,6.1,2.4,98,7.5,7.6,11.9,95.8,2.06,10.1,5.4,6.9,0.19,0.25,8.07,28.064636
4,Detroit Lions,10,2022,26.6,380.0,64.2,5.9,0.88,0.47,22.1,22.5,34.6,251.8,1.71,0.41,7.0,13.4,28.2,128.2,1.35,4.5,6.71,5.29,45.5,2.0,43.2,8.0,9.53,65.1,4.9,1.2,81,7.6,8.0,11.6,99.3,1.41,9.59,3.9,7.4,0.18,0.18,10.2,27.042691
5,San Francisco 49ers,27,2022,26.5,365.6,61.6,5.9,1.0,0.47,20.4,19.9,30.1,226.8,1.76,0.53,7.1,11.1,29.6,138.8,1.18,4.7,7.76,5.53,42.9,1.59,41.4,9.1,8.41,66.0,5.9,1.8,57,7.9,8.3,12.0,102.3,1.82,11.4,5.7,7.5,0.12,0.12,9.82,25.90257
6,Minnesota Vikings,20,2022,24.9,361.5,66.1,5.5,1.35,0.47,22.5,26.4,39.5,263.8,1.76,0.88,6.2,14.4,23.8,97.7,1.06,4.1,5.47,5.18,40.5,2.65,36.0,11.7,4.46,66.7,4.5,2.2,66,7.2,7.1,10.8,93.1,2.76,19.6,6.5,6.1,0.47,0.47,7.17,24.436709
7,Cincinnati Bengals,6,2022,26.1,360.5,65.8,5.5,1.13,0.38,22.3,26.1,38.1,265.0,2.19,0.75,6.5,13.8,24.9,95.5,0.88,3.8,6.0,5.06,38.9,2.5,41.5,9.7,7.55,68.5,5.7,2.0,60,7.4,7.7,10.8,101.0,2.75,17.5,6.7,6.7,0.19,0.25,9.14,26.013764
8,Seattle Seahawks,28,2022,23.9,351.5,61.4,5.7,1.35,0.65,20.4,23.5,33.7,231.4,1.76,0.71,6.4,12.1,25.0,120.1,0.71,4.8,6.24,6.24,50.2,2.0,40.6,11.2,3.36,69.6,5.2,2.1,54,7.5,7.6,10.7,100.0,2.71,20.5,7.4,6.5,0.12,0.18,4.55,24.489553
9,Jacksonville Jaguars,14,2022,23.8,357.4,63.1,5.7,1.29,0.76,20.7,23.2,35.1,232.9,1.47,0.53,6.3,12.2,26.4,124.5,0.94,4.7,6.71,5.29,39.6,1.76,39.2,12.2,5.6,66.1,4.2,1.5,59,7.0,7.1,10.5,93.9,1.65,11.1,4.5,6.5,0.18,0.12,6.34,23.274803


In [1273]:
#Preparing full dataset for predictions utilizing the best gb model for PA
X_gbr_def = full_def.drop('Tm', axis=1)
X_gbr_def = X_gbr_def.drop('PA', axis=1)
#Using the model to make predictions and then adding PA predictions to full dataset. Also filtering for the 2022 season.
PA_gbrpred = gbrfulld_model.predict(X_gbr_def)
PA_gbrpred_df = pd.DataFrame(PA_gbrpred, columns=['PA_gbrpred'])
full_def['PA_gbr_Pred'] = PA_gbrpred
full_gbrdef_2022 = full_def.loc[full_def['Season'] == 2022]
full_gbrdef_2022

Unnamed: 0,Tm,id,Season,PA,Yds,Ply,Y/P,TO,FL,1stD,P_Cmp,P_Att,P_Yds,P_TD,P_Int,NPY/A,P_1stD,R_Att,R_Yds,R_TD,RY/A,R_1stD,Pen,Pen_Yds,1stPy,Sc%,TO%,EXP,PD_Cmp%,PD_TD%,PD,PD_Int%,PD_Y/A,PD_AY/A,PD_Y/C,PD_Rate,PD_Sk,PD_Sk_Yds,PD_QBHits,PD_TFL,PD_Sk%,APDNY/A,PD_EXP,PA_gbr_Pred
0,San Francisco 49ers,27,2022,16.3,300.6,60.4,5.0,1.76,0.59,17.1,22.9,34.9,222.9,1.18,1.18,5.9,11.1,22.9,77.7,0.65,3.4,4.53,5.65,48.9,1.53,25.7,15.3,4.01,65.6,3.4,4.59,3.4,6.9,6.1,10.5,82.7,2.59,18.1,7.12,4.53,6.9,5.2,1.81,16.190658
1,Buffalo Bills,3,2022,17.9,319.1,62.3,5.1,1.69,0.63,19.6,22.6,35.6,214.6,1.31,1.06,5.6,11.3,24.2,104.6,0.63,4.3,6.44,5.56,47.1,1.81,31.0,14.3,0.68,63.5,3.7,5.63,3.0,6.5,5.9,10.3,82.1,2.5,18.0,6.13,5.81,6.6,5.1,-1.15,19.068041
2,Baltimore Ravens,2,2022,18.5,324.3,61.6,5.3,1.47,0.65,19.0,23.2,34.9,232.2,1.18,0.82,6.2,12.1,23.9,92.1,0.65,3.9,5.35,4.65,38.2,1.53,35.8,13.9,-1.86,66.4,3.4,4.53,2.4,7.2,6.8,10.8,88.7,2.82,17.9,5.59,4.53,7.5,5.8,-3.3,19.308233
3,New York Jets,24,2022,18.6,311.1,64.3,4.8,0.94,0.24,18.2,20.4,32.7,189.4,0.88,0.71,5.4,9.76,28.9,121.6,0.82,4.2,6.88,5.29,41.6,1.59,32.3,8.3,1.53,62.4,2.7,4.29,2.2,6.3,5.9,10.2,80.5,2.65,18.0,7.65,4.76,7.5,5.0,0.22,18.901812
4,Cincinnati Bengals,6,2022,20.1,335.7,62.3,5.4,1.5,0.69,18.4,20.6,34.9,229.1,1.06,0.81,6.2,10.8,25.4,106.6,0.75,4.2,6.38,6.25,52.0,1.25,36.4,13.1,-0.59,58.9,3.0,4.69,2.3,6.8,6.4,11.6,80.1,1.88,10.2,6.38,3.56,5.1,5.8,-1.98,20.129431
5,Dallas Cowboys,8,2022,20.1,330.2,64.8,5.1,1.94,1.0,19.2,20.3,32.4,200.9,1.35,0.94,5.7,10.2,29.2,129.3,0.53,4.4,7.0,5.76,50.1,2.0,33.5,16.2,2.12,62.7,4.2,3.47,2.9,6.8,6.4,10.9,84.7,3.18,20.7,6.47,5.47,8.9,5.2,0.44,19.846132
6,Washington Commanders,31,2022,20.2,304.6,58.7,5.2,1.06,0.53,16.8,18.1,30.2,191.3,1.53,0.53,5.8,9.12,25.9,113.3,0.59,4.4,6.0,5.0,41.6,1.71,31.5,7.6,0.43,59.9,5.1,4.0,1.8,6.9,7.1,11.5,90.2,2.53,16.4,6.29,5.18,7.7,6.0,-1.2,20.030199
7,Philadelphia Eagles,25,2022,20.2,301.5,63.1,4.8,1.59,0.59,19.1,20.6,32.7,179.8,1.29,1.0,4.9,10.9,26.2,121.6,0.88,4.6,6.76,5.59,46.9,1.41,32.4,15.1,0.89,62.9,4.0,5.0,3.1,6.4,5.8,10.1,81.6,4.12,28.9,7.29,5.71,11.2,4.4,3.61,20.230224
8,New Orleans Saints,22,2022,20.3,314.8,63.3,5.0,0.82,0.41,18.8,18.9,31.5,184.4,1.0,0.41,5.4,9.35,28.9,130.5,0.82,4.5,7.18,5.41,46.9,2.24,34.8,7.1,-1.8,60.1,3.2,3.94,1.3,6.4,6.4,10.6,83.8,2.82,16.4,5.12,4.53,8.2,5.4,-1.62,19.542768
9,Pittsburgh Steelers,26,2022,20.4,330.4,60.2,5.5,1.35,0.18,18.5,19.5,31.9,222.3,1.71,1.18,6.5,11.5,26.0,108.1,0.41,4.2,5.59,5.65,42.2,1.35,37.3,13.0,-2.77,61.3,5.4,4.82,3.7,7.5,6.9,12.2,86.8,2.35,16.9,4.94,4.0,6.9,5.9,-3.87,21.295456


In [1274]:
#Creating new DataFrame including both the predicted PF and PA for each optimal GBR model and organizing by team
PF_gbroff_2022 = full_gbroff_2022[['Tm', 'PF_gbr_Pred']]
PA_gbrdef_2022 = full_gbrdef_2022[['Tm', 'PA_gbr_Pred']]
tm_pred_gbr = pd.merge(PF_gbroff_2022, PA_gbrdef_2022, on=['Tm'])
tm_pred_gbr

Unnamed: 0,Tm,PF_gbr_Pred,PA_gbr_Pred
0,Kansas City Chiefs,29.637121,22.165063
1,Philadelphia Eagles,27.605864,20.230224
2,Dallas Cowboys,27.73882,19.846132
3,Buffalo Bills,28.064636,19.068041
4,Detroit Lions,27.042691,25.912179
5,San Francisco 49ers,25.90257,16.190658
6,Minnesota Vikings,24.436709,24.35377
7,Cincinnati Bengals,26.013764,20.129431
8,Seattle Seahawks,24.489553,23.690854
9,Jacksonville Jaguars,23.274803,22.63346


In [1275]:
#Making a copy of the predictions DF in order to organize statistics for two teams to eventually create a potential matchup DF
tm_pred_gbr1 = tm_pred_gbr.copy()
tm_pred_gbr1 = tm_pred_gbr1.rename(columns={'Tm': 'Tm2', 'PF_gbr_Pred':'PF_gbr_Pred2', 'PA_gbr_Pred': 'PA_gbr_Pred2'})
tm_df2 = tm_pred_gbr1[['Tm2']]

tm_pred_gbr_join = tm_pred_gbr.join(tm_pred_gbr1, how='outer')
tm_pred_gbr_join = tm_pred_gbr_join.rename(columns={'Tm': 'Tm1', 'PF_gbr_Pred': 'PF_gbr_Pred1', 'PA_gbr_Pred': 'PA_gbr_Pred1'})
tm_pred_gbr_join

Unnamed: 0,Tm1,PF_gbr_Pred1,PA_gbr_Pred1,Tm2,PF_gbr_Pred2,PA_gbr_Pred2
0,Kansas City Chiefs,29.637121,22.165063,Kansas City Chiefs,29.637121,22.165063
1,Philadelphia Eagles,27.605864,20.230224,Philadelphia Eagles,27.605864,20.230224
2,Dallas Cowboys,27.73882,19.846132,Dallas Cowboys,27.73882,19.846132
3,Buffalo Bills,28.064636,19.068041,Buffalo Bills,28.064636,19.068041
4,Detroit Lions,27.042691,25.912179,Detroit Lions,27.042691,25.912179
5,San Francisco 49ers,25.90257,16.190658,San Francisco 49ers,25.90257,16.190658
6,Minnesota Vikings,24.436709,24.35377,Minnesota Vikings,24.436709,24.35377
7,Cincinnati Bengals,26.013764,20.129431,Cincinnati Bengals,26.013764,20.129431
8,Seattle Seahawks,24.489553,23.690854,Seattle Seahawks,24.489553,23.690854
9,Jacksonville Jaguars,23.274803,22.63346,Jacksonville Jaguars,23.274803,22.63346


### Advanced Offense and Defense GBR Predictions

In [1276]:
#Preparing full dataset for predictions utilizing the best adv gb model for PF
X_gbr_advoff = adv_off.drop('Tm', axis=1)
X_gbr_advoff = X_gbr_advoff.drop('PF', axis=1)
#Using the optimal model to make predictions and then adding PF predictions to full dataset. Also filtering for the 2022 season.
PF_gbrpred_adv = gbradvo_model.predict(X_gbr_advoff)
PF_gbrpred_adv_df = pd.DataFrame(PF_gbrpred_adv, columns=['PF_gbrpred_adv'])
adv_off['PF_gbradv_Pred'] = PF_gbrpred_adv
adv_gbroff_2022 = adv_off.loc[adv_off['Season'] == 2022]
adv_gbroff_2022

Unnamed: 0,Tm,id,Season,G,PF,Pass_Cmp,Pass_Att,Pass_Yds,IAY,IAY/PA,CAY,CAY/Cmp,CAY/PA,YAC,YAC/Cmp,Bats,ThAwy,Spikes,BadTh,Bad%,OnTgt,OnTgt%,Sk,PktTime,Bltz,Hrry,Hits,Prss,Prss%,Scrm,Yds/Scr,Rush_Att,Rush_Yds,Rush_TD,Rush_1D,Rush_YBC,Rush_YBC/Att,Rush_YAC,Rush_YAC/Att,Rush_BrkTkl,Att/Br,Rec_Tgt,Rec,Rec_Yds,Rec_TD,Rec_1D,Rec_YBC,Rec_YBC/R,Rec_YAC,Rec_YAC/R,ADOT,Rec_BrkTkl,Rec/Br,Drop,Drop%,3DAtt,3DConv,3D%,4DAtt,4DConv,4D%,RZAtt,RZTD,RZPct,PF_gbradv_Pred
0,Atlanta Falcons,1,2022,17,21.5,257,415,2699,4011,9.7,1698,6.6,4.1,1229,4.8,4,12,4,85,21.3,287,71.9,37,2.6,142,14,41,92,18.8,37,6.9,559,2718,17,152,1701,3.0,1017,1.8,10,55.9,415,257,2927,17,148,1698,6.6,1229,4.8,9.4,7,36.7,9,2.2,11.5,4.82,41.8,1.06,0.53,50.0,3.18,1.76,55.6,21.499297
1,Buffalo Bills,3,2022,16,28.4,361,574,4129,5262,9.2,2694,7.5,4.7,1597,4.4,14,18,1,93,16.8,407,73.3,33,2.5,150,41,40,114,17.2,57,9.4,430,2232,15,130,1577,3.7,655,1.5,17,25.3,574,361,4291,35,214,2694,7.5,1597,4.4,8.8,11,32.8,38,6.6,12.1,6.06,50.3,0.81,0.44,53.8,3.63,2.19,60.3,28.400784
2,Carolina Panthers,4,2022,17,20.4,267,457,2996,3303,7.2,1465,5.5,3.2,1781,6.7,17,30,3,73,17.2,298,70.3,36,2.5,140,29,24,89,17.4,19,8.2,483,2210,16,119,1336,2.8,874,1.8,19,25.4,457,267,3246,16,138,1486,5.6,1760,6.6,7.0,16,16.7,26,5.7,11.9,3.76,31.7,1.24,0.71,57.1,2.71,1.53,56.5,20.400487
3,Chicago Bears,5,2022,17,19.2,223,377,2219,3437,9.1,1449,6.5,3.8,1149,5.2,12,14,1,72,19.9,254,70.2,58,2.6,112,36,35,129,25.4,73,9.2,558,3014,18,148,1893,3.4,1121,2.0,41,13.6,377,223,2598,19,109,1476,6.6,1122,5.0,8.5,13,17.2,18,4.8,12.9,5.29,40.9,1.47,0.47,32.0,2.94,1.65,56.0,19.200074
4,Cincinnati Bengals,6,2022,16,26.1,418,610,4240,4152,6.8,2350,5.6,3.9,2170,5.2,24,17,1,76,12.8,469,79.2,44,2.2,113,22,48,114,16.7,27,7.5,399,1528,14,97,991,2.5,537,1.3,15,26.6,610,418,4520,35,224,2350,5.6,2170,5.2,6.7,32,13.1,34,5.6,12.9,5.94,46.1,0.75,0.19,25.0,3.56,2.31,64.9,26.099394
5,Cleveland Browns,7,2022,17,21.2,335,540,3444,4379,8.1,2071,6.2,3.8,1639,4.9,9,28,5,71,14.0,381,75.1,44,2.6,137,44,52,140,22.5,37,8.1,532,2490,19,142,1423,2.7,1067,2.0,36,14.8,540,335,3710,19,182,2071,6.2,1639,4.9,7.6,22,15.2,37,6.9,13.6,5.18,38.1,2.47,1.35,54.8,3.29,1.76,53.6,21.201247
6,Indianapolis Colts,13,2022,17,17.0,398,604,3432,3828,6.3,2056,5.2,3.4,1798,4.5,14,19,1,87,14.9,441,75.5,60,2.3,181,45,61,166,24.2,22,7.2,439,1866,8,91,1128,2.6,738,1.7,33,13.3,604,398,3854,17,193,2056,5.2,1798,4.5,6.1,24,16.6,27,4.5,13.4,4.41,32.9,1.53,0.71,46.2,2.82,1.29,45.8,17.001339
7,Arizona Cardinals,0,2022,17,20.0,433,664,3626,4599,6.9,1908,4.4,2.9,2058,4.8,12,26,4,97,15.3,485,76.5,46,2.2,154,51,45,142,18.8,45,6.8,434,1873,15,115,1244,2.9,629,1.4,16,27.1,664,433,3966,17,189,1908,4.4,2058,4.8,6.8,26,16.7,37,5.6,13.7,4.82,35.2,2.41,1.06,43.9,2.59,1.47,56.8,19.998683
8,Dallas Cowboys,8,2022,17,27.5,355,556,3736,4497,8.1,2252,6.3,4.1,1659,4.7,12,8,1,100,18.3,402,73.5,27,2.4,149,43,54,124,20.7,17,4.9,531,2298,24,129,1336,2.5,962,1.8,24,22.1,556,355,3911,28,194,2252,6.3,1659,4.7,8.0,26,13.7,33,5.9,13.6,6.18,45.5,1.12,0.59,52.6,3.29,2.35,71.4,27.498042
9,Denver Broncos,9,2022,17,16.9,345,571,3592,5004,8.8,2019,5.9,3.5,1988,5.8,21,33,1,89,16.6,393,73.2,63,2.5,160,44,74,181,27.3,29,7.7,444,1935,11,105,1236,2.8,699,1.6,24,18.5,571,345,4007,18,166,2019,5.9,1988,5.8,8.3,17,20.3,32,5.6,13.5,3.94,29.1,1.41,0.71,50.0,2.12,1.18,55.6,18.480433


In [1277]:
#Preparing full dataset for predictions utilizing the best adv gb model for PA
X_gbr_advdef = adv_def.drop('Tm', axis=1)
X_gbr_advdef = X_gbr_advdef.drop('PA', axis=1)
#Using the optimal model to make predictions and then adding PA predictions to full dataset. Also filtering for the 2022 season.
PA_gbrpred_adv = gbradvd_model.predict(X_gbr_advdef)
PA_gbrpred_adv_df = pd.DataFrame(PA_gbrpred_adv, columns=['PA_gbrpred_adv'])
adv_def['PA_gbradv_Pred'] = PA_gbrpred_adv
adv_gbrdef_2022 = adv_def.loc[adv_def['Season'] == 2022]
adv_gbrdef_2022

Unnamed: 0,Tm,id,Season,G,PA,Att,Cmp,Yds,TD,DADOT,Air,YAC,Bltz,Bltz%,Hrry,Hrry%,QBKD,QBKD%,Sk,Prss,Prss%,MTkl,3DAtt,3DConv,3D%,4DAtt,4DConv,4D%,RZAtt,RZTD,RZPct,PA_gbradv_Pred
0,Atlanta Falcons,1,2022,17,22.7,561,372,3942,26,8.3,2297,1793,98,16.3,23,3.8,44,7.8,21,88,14.6,54,12.9,5.94,45.9,1.0,0.41,41.2,3.53,1.94,55.0,23.65808
1,Buffalo Bills,3,2022,16,17.9,570,362,3433,21,7.5,2276,1901,124,19.4,46,7.2,56,9.8,40,142,22.2,77,13.0,4.88,37.5,2.06,1.19,57.6,3.06,1.38,44.9,18.303863
2,Carolina Panthers,4,2022,17,22.0,580,383,3868,25,7.9,2434,1664,186,29.2,41,6.4,50,8.6,35,126,19.8,72,13.6,5.59,41.1,1.06,0.47,44.4,3.35,1.88,56.1,22.927502
3,Chicago Bears,5,2022,17,27.2,481,323,3716,22,7.8,1973,1867,97,18.2,45,8.4,20,4.2,20,85,15.9,74,12.0,5.88,49.0,0.88,0.35,40.0,3.65,2.35,64.5,27.180896
4,Cincinnati Bengals,6,2022,16,20.1,559,329,3665,17,8.5,2575,2025,124,20.4,38,6.3,68,12.2,30,136,22.4,69,13.3,5.25,39.6,1.44,0.56,39.1,3.13,1.63,52.0,20.504447
5,Cleveland Browns,7,2022,17,22.4,519,315,3336,20,7.8,1895,1647,130,22.9,42,7.4,27,5.2,34,103,18.2,76,12.6,5.0,39.5,1.06,0.47,44.4,3.41,1.88,55.2,22.602988
6,Indianapolis Colts,13,2022,17,25.1,537,365,3569,25,7.1,1775,2061,100,16.4,53,8.7,37,6.9,44,134,22.0,85,13.2,5.0,37.9,1.47,0.76,52.0,3.12,2.12,67.9,23.658011
7,Arizona Cardinals,0,2022,17,26.4,593,414,3915,29,5.7,1622,2559,225,34.5,48,7.4,63,10.6,36,147,22.5,83,11.9,5.12,42.9,0.94,0.47,50.0,3.88,2.53,65.2,26.541225
8,Dallas Cowboys,8,2022,17,20.1,550,345,3415,23,7.7,2252,2080,162,25.6,54,8.5,54,9.8,54,162,25.6,64,13.6,5.12,37.7,1.29,0.71,54.5,2.94,1.53,52.0,19.918712
9,Denver Broncos,9,2022,17,21.1,605,392,3574,20,6.7,1741,2040,219,32.9,40,6.0,47,7.8,36,123,18.5,98,12.9,4.41,34.1,0.82,0.53,64.3,2.76,1.41,51.1,20.395557


In [1278]:
#Creating new DataFrame including both the predicted PF and PA for each optimal adv full GBR model and organizing by team
PF_gbroff_adv_2022 = adv_gbroff_2022[['Tm', 'PF_gbradv_Pred']]
PA_gbrdef_adv_2022 = adv_gbrdef_2022[['Tm', 'PA_gbradv_Pred']]
tm_pred_gbr_adv = pd.merge(PF_gbroff_adv_2022, PA_gbrdef_adv_2022, on=['Tm'])
tm_pred_gbr_adv

Unnamed: 0,Tm,PF_gbradv_Pred,PA_gbradv_Pred
0,Atlanta Falcons,21.499297,23.65808
1,Buffalo Bills,28.400784,18.303863
2,Carolina Panthers,20.400487,22.927502
3,Chicago Bears,19.200074,27.180896
4,Cincinnati Bengals,26.099394,20.504447
5,Cleveland Browns,21.201247,22.602988
6,Indianapolis Colts,17.001339,23.658011
7,Arizona Cardinals,19.998683,26.541225
8,Dallas Cowboys,27.498042,19.918712
9,Denver Broncos,18.480433,20.395557


In [1279]:
#Making a copy of the predictions DF in order to organize statistics for two teams to eventually create a potential matchup DF
tm_pred_gbr1_adv = tm_pred_gbr_adv.copy()
tm_pred_gbr1_adv = tm_pred_gbr1_adv.rename(columns={'Tm': 'Tm2', 'PF_gbradv_Pred':'PF_gbradv_Pred2', 'PA_gbradv_Pred': 'PA_gbradv_Pred2'})
tm_df2_adv = tm_pred_gbr1_adv[['Tm2']]

tm_pred_gbradv_join = tm_pred_gbr_adv.join(tm_pred_gbr1, how='outer')
tm_pred_gbradv_join = tm_pred_gbradv_join.rename(columns={'Tm': 'Tm1', 'PF_gbradv_Pred': 'PF_gbradv_Pred1', 'PA_gbradv_Pred': 'PA_gbradv_Pred1'})
tm_pred_gbradv_join

Unnamed: 0,Tm1,PF_gbradv_Pred1,PA_gbradv_Pred1,Tm2,PF_gbr_Pred2,PA_gbr_Pred2
0,Atlanta Falcons,21.499297,23.65808,Kansas City Chiefs,29.637121,22.165063
1,Buffalo Bills,28.400784,18.303863,Philadelphia Eagles,27.605864,20.230224
2,Carolina Panthers,20.400487,22.927502,Dallas Cowboys,27.73882,19.846132
3,Chicago Bears,19.200074,27.180896,Buffalo Bills,28.064636,19.068041
4,Cincinnati Bengals,26.099394,20.504447,Detroit Lions,27.042691,25.912179
5,Cleveland Browns,21.201247,22.602988,San Francisco 49ers,25.90257,16.190658
6,Indianapolis Colts,17.001339,23.658011,Minnesota Vikings,24.436709,24.35377
7,Arizona Cardinals,19.998683,26.541225,Cincinnati Bengals,26.013764,20.129431
8,Dallas Cowboys,27.498042,19.918712,Seattle Seahawks,24.489553,23.690854
9,Denver Broncos,18.480433,20.395557,Jacksonville Jaguars,23.274803,22.63346


### Reduced Full Dataset Offense and Defense GBR Predictions

In [1280]:
#Preparing full dataset for predictions utilizing the 2nd best gb model for PF
X_gbr_off_red = full_off_red.drop('Tm', axis=1)
X_gbr_off_red = X_gbr_off_red.drop('PF', axis=1)
#Using the 2nd best model to make predictions and then adding PF predictions to full dataset. Also filtering for the 2022 season.
PF_gbrpred_red = gbrfullo_red_model.predict(X_gbr_off_red)
PF_gbrpred_red_df = pd.DataFrame(PF_gbrpred_red, columns=['PF_gbrpred_red'])
full_off_red['PF_gbrred_Pred'] = PF_gbrpred_red
full_gbroff_red_2022 = full_off_red.loc[full_off_red['Season'] == 2022]
full_gbroff_red_2022

Unnamed: 0,Tm,id,PF,Season,Sc%,Yds,APNY/A,P_TD%,P_TD,R_TD,PF_gbrred_Pred
0,Kansas City Chiefs,15,29.2,2022,46.4,413.6,7.9,6.3,2.41,1.06,29.943062
1,Philadelphia Eagles,25,28.1,2022,42.5,389.1,7.2,4.7,1.47,1.88,27.686473
2,Dallas Cowboys,8,27.5,2022,41.5,354.9,6.0,5.0,1.65,1.41,27.302403
3,Buffalo Bills,3,28.4,2022,45.0,397.6,6.9,6.1,2.19,0.94,27.977903
4,Detroit Lions,10,26.6,2022,43.2,380.0,7.4,4.9,1.71,1.35,26.731218
5,San Francisco 49ers,27,26.5,2022,41.4,365.6,7.5,5.9,1.76,1.18,25.966943
6,Minnesota Vikings,20,24.9,2022,36.0,361.5,6.1,4.5,1.76,1.06,24.602116
7,Cincinnati Bengals,6,26.1,2022,41.5,360.5,6.7,5.7,2.19,0.88,26.551884
8,Seattle Seahawks,28,23.9,2022,40.6,351.5,6.5,5.2,1.76,0.71,24.72738
9,Jacksonville Jaguars,14,23.8,2022,39.2,357.4,6.5,4.2,1.47,0.94,23.28014


In [1281]:
#Preparing full dataset for predictions utilizing the 2nd best gb model for PA
X_gbr_def_red = full_def_red.drop('Tm', axis=1)
X_gbr_def_red = X_gbr_def_red.drop('PA', axis=1)
#Using the 2nd best model to make predictions and then adding PA predictions to full dataset. Also filtering for the 2022 season.
PA_gbrpred_red = gbrfulld_red_model.predict(X_gbr_def_red)
PA_gbrpred_red_df = pd.DataFrame(PA_gbrpred_red, columns=['PA_gbrpred_red'])
full_def_red['PA_gbrred_Pred'] = PA_gbrpred_red
full_gbrdef_red_2022 = full_def_red.loc[full_def_red['Season'] == 2022]
full_gbrdef_red_2022

Unnamed: 0,Tm,id,PA,Season,Sc%,PD_TD%,R_TD,Yds,APDNY/A,P_TD,R_Att,PA_gbrred_Pred
0,San Francisco 49ers,27,16.3,2022,25.7,3.4,0.65,300.6,5.2,1.18,22.9,16.725423
1,Buffalo Bills,3,17.9,2022,31.0,3.7,0.63,319.1,5.1,1.31,24.2,18.969305
2,Baltimore Ravens,2,18.5,2022,35.8,3.4,0.65,324.3,5.8,1.18,23.9,19.51282
3,New York Jets,24,18.6,2022,32.3,2.7,0.82,311.1,5.0,0.88,28.9,19.381783
4,Cincinnati Bengals,6,20.1,2022,36.4,3.0,0.75,335.7,5.8,1.06,25.4,19.552076
5,Dallas Cowboys,8,20.1,2022,33.5,4.2,0.53,330.2,5.2,1.35,29.2,19.668586
6,Washington Commanders,31,20.2,2022,31.5,5.1,0.59,304.6,6.0,1.53,25.9,19.475799
7,Philadelphia Eagles,25,20.2,2022,32.4,4.0,0.88,301.5,4.4,1.29,26.2,20.643006
8,New Orleans Saints,22,20.3,2022,34.8,3.2,0.82,314.8,5.4,1.0,28.9,19.732115
9,Pittsburgh Steelers,26,20.4,2022,37.3,5.4,0.41,330.4,5.9,1.71,26.0,21.435633


In [1282]:
#Creating new DataFrame including both the predicted PF and PA for each GBR model and organizing by team
PF_gbroff_red_2022 = full_gbroff_red_2022[['Tm', 'PF_gbrred_Pred']]
PA_gbrdef_red_2022 = full_gbrdef_red_2022[['Tm', 'PA_gbrred_Pred']]
tm_pred_gbrred = pd.merge(PF_gbroff_red_2022, PA_gbrdef_red_2022, on=['Tm'])
tm_pred_gbrred

Unnamed: 0,Tm,PF_gbrred_Pred,PA_gbrred_Pred
0,Kansas City Chiefs,29.943062,21.6432
1,Philadelphia Eagles,27.686473,20.643006
2,Dallas Cowboys,27.302403,19.668586
3,Buffalo Bills,27.977903,18.969305
4,Detroit Lions,26.731218,26.294944
5,San Francisco 49ers,25.966943,16.725423
6,Minnesota Vikings,24.602116,23.932514
7,Cincinnati Bengals,26.551884,19.552076
8,Seattle Seahawks,24.72738,23.623704
9,Jacksonville Jaguars,23.28014,22.291614


In [1283]:
#Making a copy of the predictions DF in order to organize statistics for two teams to eventually create a potential matchup DF
tm_pred_gbrred1 = tm_pred_gbrred.copy()
tm_pred_gbrred1 = tm_pred_gbrred1.rename(columns={'Tm': 'Tm2', 'PF_gbrred_Pred':'PF_gbrred_Pred2', 'PA_gbrred_Pred': 'PA_gbrred_Pred2'})
tm_red_df2 = tm_pred_gbrred1[['Tm2']]

tm_pred_gbrred_join = tm_pred_gbrred.join(tm_pred_gbrred1, how='outer')
tm_pred_gbrred_join = tm_pred_gbrred_join.rename(columns={'Tm': 'Tm1', 'PF_gbrred_Pred': 'PF_gbrred_Pred1', 'PA_gbrred_Pred': 'PA_gbrred_Pred1'})
tm_pred_gbrred_join

Unnamed: 0,Tm1,PF_gbrred_Pred1,PA_gbrred_Pred1,Tm2,PF_gbrred_Pred2,PA_gbrred_Pred2
0,Kansas City Chiefs,29.943062,21.6432,Kansas City Chiefs,29.943062,21.6432
1,Philadelphia Eagles,27.686473,20.643006,Philadelphia Eagles,27.686473,20.643006
2,Dallas Cowboys,27.302403,19.668586,Dallas Cowboys,27.302403,19.668586
3,Buffalo Bills,27.977903,18.969305,Buffalo Bills,27.977903,18.969305
4,Detroit Lions,26.731218,26.294944,Detroit Lions,26.731218,26.294944
5,San Francisco 49ers,25.966943,16.725423,San Francisco 49ers,25.966943,16.725423
6,Minnesota Vikings,24.602116,23.932514,Minnesota Vikings,24.602116,23.932514
7,Cincinnati Bengals,26.551884,19.552076,Cincinnati Bengals,26.551884,19.552076
8,Seattle Seahawks,24.72738,23.623704,Seattle Seahawks,24.72738,23.623704
9,Jacksonville Jaguars,23.28014,22.291614,Jacksonville Jaguars,23.28014,22.291614


### Reduced Advanced Dataset Offense and Defense GBR Predictions

In [1284]:
#Preparing full dataset for predictions utilizing the best adv_red gbr model for PF
X_gbr_advoff_red = adv_off_red.drop('Tm', axis=1)
X_gbr_advoff_red = X_gbr_advoff_red.drop('PF', axis=1)
#Using the optimal model to make predictions and then adding PF predictions to full dataset. Also filtering for the 2022 season.
PF_gbrpred_adv_red = gbradvo_red_model.predict(X_gbr_advoff_red)
PF_gbrpred_adv_red_df = pd.DataFrame(PF_gbrpred_adv_red, columns=['PF_gbrpred_adv_red'])
adv_off_red['PF_gbradv_Pred_red'] = PF_gbrpred_adv_red
adv_gbroff_red_2022 = adv_off_red.loc[adv_off_red['Season'] == 2022]
adv_gbroff_red_2022

Unnamed: 0,Tm,id,PF,Season,RZTD,Rec_TD,RZAtt,3D%,Sk,Pass_Yds,Rec_Yds,RZPct,Rush_TD,Rec_1D,PF_gbradv_Pred_red
0,Atlanta Falcons,1,21.5,2022,1.76,17,3.18,41.8,37,2699,2927,55.6,17,148,21.114618
1,Buffalo Bills,3,28.4,2022,2.19,35,3.63,50.3,33,4129,4291,60.3,15,214,28.196786
2,Carolina Panthers,4,20.4,2022,1.53,16,2.71,31.7,36,2996,3246,56.5,16,138,20.253297
3,Chicago Bears,5,19.2,2022,1.65,19,2.94,40.9,58,2219,2598,56.0,18,109,19.359229
4,Cincinnati Bengals,6,26.1,2022,2.31,35,3.56,46.1,44,4240,4520,64.9,14,224,26.014972
5,Cleveland Browns,7,21.2,2022,1.76,19,3.29,38.1,44,3444,3710,53.6,19,182,20.852184
6,Indianapolis Colts,13,17.0,2022,1.29,17,2.82,32.9,60,3432,3854,45.8,8,193,17.069022
7,Arizona Cardinals,0,20.0,2022,1.47,17,2.59,35.2,46,3626,3966,56.8,15,189,19.704595
8,Dallas Cowboys,8,27.5,2022,2.35,28,3.29,45.5,27,3736,3911,71.4,24,194,27.561768
9,Denver Broncos,9,16.9,2022,1.18,18,2.12,29.1,63,3592,4007,55.6,11,166,17.776099


In [1285]:
#Preparing full dataset for predictions utilizing the best adv_red gbr model for PA
X_gbr_advdef_red = adv_def_red.drop('Tm', axis=1)
X_gbr_advdef_red = X_gbr_advdef_red.drop('PA', axis=1)
#Using the optimal model to make predictions and then adding PA predictions to full dataset. Also filtering for the 2022 season.
PA_gbrpred_adv_red = gbradvd_red_model.predict(X_gbr_advdef_red)
PA_gbrpred_adv_red_df = pd.DataFrame(PA_gbrpred_adv_red, columns=['PA_gbrpred_adv_red'])
adv_def_red['PA_gbradv_Pred_red'] = PA_gbrpred_adv_red
adv_gbrdef_red_2022 = adv_def_red.loc[adv_def_red['Season'] == 2022]
adv_gbrdef_red_2022

Unnamed: 0,Tm,id,PA,Season,RZTD,RZAtt,TD,Yds,4DAtt,3D%,Att,Prss,DADOT,PA_gbradv_Pred_red
0,Atlanta Falcons,1,22.7,2022,1.94,3.53,26,3942,1.0,45.9,561,88,8.3,23.181483
1,Buffalo Bills,3,17.9,2022,1.38,3.06,21,3433,2.06,37.5,570,142,7.5,17.628941
2,Carolina Panthers,4,22.0,2022,1.88,3.35,25,3868,1.06,41.1,580,126,7.9,22.379602
3,Chicago Bears,5,27.2,2022,2.35,3.65,22,3716,0.88,49.0,481,85,7.8,27.309055
4,Cincinnati Bengals,6,20.1,2022,1.63,3.13,17,3665,1.44,39.6,559,136,8.5,20.63128
5,Cleveland Browns,7,22.4,2022,1.88,3.41,20,3336,1.06,39.5,519,103,7.8,22.679205
6,Indianapolis Colts,13,25.1,2022,2.12,3.12,25,3569,1.47,37.9,537,134,7.1,24.013524
7,Arizona Cardinals,0,26.4,2022,2.53,3.88,29,3915,0.94,42.9,593,147,5.7,26.514306
8,Dallas Cowboys,8,20.1,2022,1.53,2.94,23,3415,1.29,37.7,550,162,7.7,19.837221
9,Denver Broncos,9,21.1,2022,1.41,2.76,20,3574,0.82,34.1,605,123,6.7,20.168281


In [1286]:
#Creating new DataFrame including both the predicted PF and PA for each optimal adv_red GBR model and organizing by team
PF_gbroff_adv_red_2022 = adv_gbroff_red_2022[['Tm', 'PF_gbradv_Pred_red']]
PA_gbrdef_adv_red_2022 = adv_gbrdef_red_2022[['Tm', 'PA_gbradv_Pred_red']]
tm_pred_gbr_adv_red = pd.merge(PF_gbroff_adv_red_2022, PA_gbrdef_adv_red_2022, on=['Tm'])
tm_pred_gbr_adv_red

Unnamed: 0,Tm,PF_gbradv_Pred_red,PA_gbradv_Pred_red
0,Atlanta Falcons,21.114618,23.181483
1,Buffalo Bills,28.196786,17.628941
2,Carolina Panthers,20.253297,22.379602
3,Chicago Bears,19.359229,27.309055
4,Cincinnati Bengals,26.014972,20.63128
5,Cleveland Browns,20.852184,22.679205
6,Indianapolis Colts,17.069022,24.013524
7,Arizona Cardinals,19.704595,26.514306
8,Dallas Cowboys,27.561768,19.837221
9,Denver Broncos,17.776099,20.168281


In [1287]:
#Making a copy of the predictions DF in order to organize statistics for two teams to eventually create a potential matchup DF
tm_pred_gbr1_adv_red = tm_pred_gbr_adv_red.copy()
tm_pred_gbr1_adv_red = tm_pred_gbr1_adv_red.rename(columns={'Tm': 'Tm2', 'PF_gbradv_Pred_red':'PF_gbradv_Pred2_red', 'PA_gbradv_Pred_red': 'PA_gbradv_Pred2_red'})
tm_df2_adv_red = tm_pred_gbr1_adv_red[['Tm2']]

tm_pred_gbradv_red_join = tm_pred_gbr_adv_red.join(tm_pred_gbr1_adv_red, how='outer')
tm_pred_gbradv_red_join = tm_pred_gbradv_red_join.rename(columns={'Tm': 'Tm1', 'PF_gbradv_Pred_red': 'PF_gbradv_Pred1_red', 'PA_gbradv_Pred_red': 'PA_gbradv_Pred1_red'})
tm_pred_gbradv_red_join

Unnamed: 0,Tm1,PF_gbradv_Pred1_red,PA_gbradv_Pred1_red,Tm2,PF_gbradv_Pred2_red,PA_gbradv_Pred2_red
0,Atlanta Falcons,21.114618,23.181483,Atlanta Falcons,21.114618,23.181483
1,Buffalo Bills,28.196786,17.628941,Buffalo Bills,28.196786,17.628941
2,Carolina Panthers,20.253297,22.379602,Carolina Panthers,20.253297,22.379602
3,Chicago Bears,19.359229,27.309055,Chicago Bears,19.359229,27.309055
4,Cincinnati Bengals,26.014972,20.63128,Cincinnati Bengals,26.014972,20.63128
5,Cleveland Browns,20.852184,22.679205,Cleveland Browns,20.852184,22.679205
6,Indianapolis Colts,17.069022,24.013524,Indianapolis Colts,17.069022,24.013524
7,Arizona Cardinals,19.704595,26.514306,Arizona Cardinals,19.704595,26.514306
8,Dallas Cowboys,27.561768,19.837221,Dallas Cowboys,27.561768,19.837221
9,Denver Broncos,17.776099,20.168281,Denver Broncos,17.776099,20.168281


### Combined Reduced Dataset Offense and Defense GBR Predictions

In [1288]:
#Preparing full dataset for predictions utilizing the best gbr comb_red model for PF
X_gbr_off_comb_red = off_comb_red.drop('Tm', axis=1)
X_gbr_off_comb_red = X_gbr_off_comb_red.drop('PF', axis=1)
#Using the best gbr comb_red model to make predictions and then adding PF predictions to full dataset. Also filtering for the 2022 season.
PF_gbrpred_comb_red = gbrcombo_red_model.predict(X_gbr_off_comb_red)
PF_gbrpred_comb_red_df = pd.DataFrame(PF_gbrpred_comb_red, columns=['PF_gbrpred_comb_red'])
off_comb_red['PF_gbrred_comb_Pred'] = PF_gbrpred_comb_red
gbroff_comb_red_2022 = off_comb_red.loc[off_comb_red['Season'] == 2022]
gbroff_comb_red_2022

Unnamed: 0,Tm,id,PF,Season,Sc%,Yds,APNY/A,P_TD%,P_TD,RZTD,Rec_TD,RZAtt,Pass_Yds,3D%,PF_gbrred_comb_Pred
0,Kansas City Chiefs,15,29.2,2022,46.4,413.6,7.9,6.3,2.41,2.94,41,4.24,5062,48.7,29.121285
1,Philadelphia Eagles,25,28.1,2022,42.5,389.1,7.2,4.7,1.47,2.35,25,3.47,4105,45.9,27.083595
2,Dallas Cowboys,8,27.5,2022,41.5,354.9,6.0,5.0,1.65,2.35,28,3.29,3736,45.5,26.16928
3,Buffalo Bills,3,28.4,2022,45.0,397.6,6.9,6.1,2.19,2.19,35,3.63,4129,50.3,28.698861
4,Detroit Lions,10,26.6,2022,43.2,380.0,7.4,4.9,1.71,2.65,29,4.0,4281,40.8,26.72194
5,San Francisco 49ers,27,26.5,2022,41.4,365.6,7.5,5.9,1.76,2.0,30,3.71,3856,45.0,26.638957
6,Minnesota Vikings,20,24.9,2022,36.0,361.5,6.1,4.5,1.76,2.35,30,3.76,4484,41.2,24.602103
7,Cincinnati Bengals,6,26.1,2022,41.5,360.5,6.7,5.7,2.19,2.31,35,3.56,4240,46.1,26.393463
8,Seattle Seahawks,28,23.9,2022,40.6,351.5,6.5,5.2,1.76,1.53,30,3.24,3934,37.8,23.55936
9,Jacksonville Jaguars,14,23.8,2022,39.2,357.4,6.5,4.2,1.47,1.82,25,3.41,3959,41.9,23.953179


In [1289]:
#Preparing full dataset for predictions utilizing the best gbr comb_red model for PA
X_gbr_def_comb_red = def_comb_red.drop('Tm', axis=1)
X_gbr_def_comb_red = X_gbr_def_comb_red.drop('PA', axis=1)
#Using the best gbr comb_red model to make predictions and then adding PA predictions to full dataset. Also filtering for the 2022 season.
PA_gbrpred_comb_red = gbrcombd_red_model.predict(X_gbr_def_comb_red)
PA_gbrpred_comb_red_df = pd.DataFrame(PA_gbrpred_comb_red, columns=['PA_gbrpred_comb_red'])
def_comb_red['PA_gbrred_comb_Pred'] = PA_gbrpred_comb_red
gbrdef_comb_red_2022 = def_comb_red.loc[def_comb_red['Season'] == 2022]
gbrdef_comb_red_2022

Unnamed: 0,Tm,id,PA,Season,Sc%,Yds/A,PD_TD%,RZTD,RZAtt,TD,3D%,4DAtt,RZPct,Sk,PA_gbrred_comb_Pred
0,San Francisco 49ers,27,16.3,2022,25.7,300.6,3.4,1.24,2.18,20,39.0,1.47,56.8,44,17.308602
1,Buffalo Bills,3,17.9,2022,31.0,319.1,3.7,1.38,3.06,21,37.5,2.06,44.9,40,18.261131
2,Baltimore Ravens,2,18.5,2022,35.8,324.3,3.4,1.53,3.29,20,34.9,1.41,46.4,48,20.073103
3,New York Jets,24,18.6,2022,32.3,311.1,2.7,1.29,2.71,15,38.1,1.53,47.8,45,19.219318
4,Cincinnati Bengals,6,20.1,2022,36.4,335.7,3.0,1.63,3.13,17,39.6,1.44,52.0,30,20.582833
5,Dallas Cowboys,8,20.1,2022,33.5,330.2,4.2,1.53,2.94,23,37.7,1.29,52.0,54,20.487142
6,Washington Commanders,31,20.2,2022,31.5,304.6,5.1,1.59,3.06,26,31.9,1.47,51.9,43,20.824805
7,Philadelphia Eagles,25,20.2,2022,32.4,301.5,4.0,1.71,3.24,22,38.6,1.47,52.7,70,20.173075
8,New Orleans Saints,22,20.3,2022,34.8,314.8,3.2,1.29,2.59,17,41.6,1.12,50.0,48,20.201592
9,Pittsburgh Steelers,26,20.4,2022,37.3,330.4,5.4,1.53,2.76,29,39.4,1.24,55.3,40,21.910859


In [1290]:
#Creating new DataFrame including both the predicted PF and PA for each GBR model and organizing by team
PF_gbroff_comb_red_2022 = gbroff_comb_red_2022[['Tm', 'PF_gbrred_comb_Pred']]
PA_gbrdef_comb_red_2022 = gbrdef_comb_red_2022[['Tm', 'PA_gbrred_comb_Pred']]
tm_pred_gbrred_comb = pd.merge(PF_gbroff_comb_red_2022, PA_gbrdef_comb_red_2022, on=['Tm'])
tm_pred_gbrred_comb

Unnamed: 0,Tm,PF_gbrred_comb_Pred,PA_gbrred_comb_Pred
0,Kansas City Chiefs,29.121285,22.271366
1,Philadelphia Eagles,27.083595,20.173075
2,Dallas Cowboys,26.16928,20.487142
3,Buffalo Bills,28.698861,18.261131
4,Detroit Lions,26.72194,25.202876
5,San Francisco 49ers,26.638957,17.308602
6,Minnesota Vikings,24.602103,24.398425
7,Cincinnati Bengals,26.393463,20.582833
8,Seattle Seahawks,23.55936,22.352011
9,Jacksonville Jaguars,23.953179,20.865802


In [1291]:
#Making a copy of the predictions DF in order to organize statistics for two teams to eventually create a potential matchup DF
tm_pred_gbrred1_comb = tm_pred_gbrred_comb.copy()
tm_pred_gbrred1_comb = tm_pred_gbrred1_comb.rename(columns={'Tm': 'Tm2', 'PF_gbrred_comb_Pred':'PF_gbrred_comb_Pred2', 'PA_gbrred_comb_Pred': 'PA_gbrred_comb_Pred2'})
tm_comb_red_df2 = tm_pred_gbrred1_comb[['Tm2']]

tm_pred_gbrred_comb_join = tm_pred_gbrred_comb.join(tm_pred_gbrred1_comb, how='outer')
tm_pred_gbrred_comb_join = tm_pred_gbrred_comb_join.rename(columns={'Tm': 'Tm1', 'PF_gbrred_comb_Pred': 'PF_gbrred_comb_Pred1', 'PA_gbrred_comb_Pred': 'PA_gbrred_comb_Pred1'})
tm_pred_gbrred_comb_join

Unnamed: 0,Tm1,PF_gbrred_comb_Pred1,PA_gbrred_comb_Pred1,Tm2,PF_gbrred_comb_Pred2,PA_gbrred_comb_Pred2
0,Kansas City Chiefs,29.121285,22.271366,Kansas City Chiefs,29.121285,22.271366
1,Philadelphia Eagles,27.083595,20.173075,Philadelphia Eagles,27.083595,20.173075
2,Dallas Cowboys,26.16928,20.487142,Dallas Cowboys,26.16928,20.487142
3,Buffalo Bills,28.698861,18.261131,Buffalo Bills,28.698861,18.261131
4,Detroit Lions,26.72194,25.202876,Detroit Lions,26.72194,25.202876
5,San Francisco 49ers,26.638957,17.308602,San Francisco 49ers,26.638957,17.308602
6,Minnesota Vikings,24.602103,24.398425,Minnesota Vikings,24.602103,24.398425
7,Cincinnati Bengals,26.393463,20.582833,Cincinnati Bengals,26.393463,20.582833
8,Seattle Seahawks,23.55936,22.352011,Seattle Seahawks,23.55936,22.352011
9,Jacksonville Jaguars,23.953179,20.865802,Jacksonville Jaguars,23.953179,20.865802


# In order to apply team PF and PA predictions to specific team matchups, I created a new DataFrame that includes every potential unique matchup with each team's respective predicted PF and PA. 

In [1292]:
import itertools
from itertools import combinations
#Creating a new DF that includes all possible unique NFL matchups
tm_df = tm_pred_gbr[['Tm']]
pairs = set(itertools.combinations(tm_df['Tm'], 2))
pairs_df = pd.DataFrame(pairs, columns=['Tm1', 'Tm2'])
pairs_df

Unnamed: 0,Tm1,Tm2
0,Los Angeles Rams,Indianapolis Colts
1,Philadelphia Eagles,Dallas Cowboys
2,Atlanta Falcons,Tennessee Titans
3,Green Bay Packers,Tampa Bay Buccaneers
4,Cincinnati Bengals,Seattle Seahawks
...,...,...
491,Dallas Cowboys,Cincinnati Bengals
492,Detroit Lions,New York Jets
493,Arizona Cardinals,New Orleans Saints
494,Seattle Seahawks,Los Angeles Chargers


### Full Offense and Defense

In [1293]:
#Merging together the matchup pairs DF and the predicted PF and PA for Tm1. Then removing unnecessary columns and renaming columns for clarity and to remove redundancy.
nfl_matchup1 = pd.merge(pairs_df, tm_pred_gbr_join, on=['Tm1'], how='outer').dropna()
nfl_matchup1 = nfl_matchup1.drop('Tm2_y', axis=1)
nfl_matchup1 = nfl_matchup1.drop('PF_gbr_Pred2', axis=1)
nfl_matchup1 = nfl_matchup1.drop('PA_gbr_Pred2', axis=1)
nfl_matchup1 = nfl_matchup1.rename(columns={'Tm2_x': 'Tm2'})
nfl_matchup1

Unnamed: 0,Tm1,Tm2,PF_gbr_Pred1,PA_gbr_Pred1
0,Los Angeles Rams,Indianapolis Colts,18.384056,22.786295
1,Los Angeles Rams,Denver Broncos,18.384056,22.786295
2,Los Angeles Rams,New York Jets,18.384056,22.786295
3,Los Angeles Rams,Houston Texans,18.384056,22.786295
4,Los Angeles Rams,Tennessee Titans,18.384056,22.786295
...,...,...,...,...
491,Tennessee Titans,Houston Texans,17.988781,20.808765
492,Tennessee Titans,Denver Broncos,17.988781,20.808765
493,Tennessee Titans,New York Jets,17.988781,20.808765
494,Tennessee Titans,Indianapolis Colts,17.988781,20.808765


In [1294]:
#Merging together the matchup pairs DF and the predicted PF and PA for Tm2. Then removing unnecessary columns and renaming columns for clarity and to remove redundancy. Then filtering a final DF for each predicted matchup from the optimal gbr model.
nfl_matchup2 = pd.merge(nfl_matchup1, tm_pred_gbr_join, on=['Tm2'], how='outer').dropna()
nfl_matchup2 = nfl_matchup2.drop('Tm1_y', axis=1)
nfl_matchup2 = nfl_matchup2.drop('PF_gbr_Pred1_y', axis=1)
nfl_matchup2 = nfl_matchup2.drop('PA_gbr_Pred1_y', axis=1)
nfl_matchup2 = nfl_matchup2.rename(columns={'Tm1_x': 'Tm1', 'PF_gbr_Pred1_x': 'PF_gbr_Pred1', 'PA_gbr_Pred1_x': 'PA_gbr_Pred1'})
match_gbrpred = nfl_matchup2.filter(['Tm1', 'PF_gbr_Pred1', 'PA_gbr_Pred1', 'Tm2', 'PF_gbr_Pred2', 'PA_gbr_Pred2'])
match_gbrpred

Unnamed: 0,Tm1,PF_gbr_Pred1,PA_gbr_Pred1,Tm2,PF_gbr_Pred2,PA_gbr_Pred2
0,Los Angeles Rams,18.384056,22.786295,Indianapolis Colts,15.625881,24.883673
1,Philadelphia Eagles,27.605864,20.230224,Indianapolis Colts,15.625881,24.883673
2,Atlanta Falcons,20.734957,23.981173,Indianapolis Colts,15.625881,24.883673
3,Green Bay Packers,22.185518,22.265264,Indianapolis Colts,15.625881,24.883673
4,Cincinnati Bengals,26.013764,20.129431,Indianapolis Colts,15.625881,24.883673
...,...,...,...,...,...,...
491,Cleveland Browns,20.850948,23.565793,New Orleans Saints,19.848193,19.542768
492,Seattle Seahawks,24.489553,23.690854,New Orleans Saints,19.848193,19.542768
493,Arizona Cardinals,20.145702,25.612708,New Orleans Saints,19.848193,19.542768
494,Baltimore Ravens,21.352428,19.308233,New Orleans Saints,19.848193,19.542768


In [1295]:
#Calculating the predicted points for each team based on the model's predictions for the given matchup. In this case, I'm taking the average for the matchup based on combining the PF for one team against the PA for the other. Then figuring out the predicted matchup spread and total by calculating the difference and sum of the Tm1 and Tm2 PF predictions. Then adding them as new columns to the matchup DF.
match_gbrpred['PF_Tm1'] = (match_gbrpred['PF_gbr_Pred1'] + match_gbrpred['PA_gbr_Pred2'])/2
match_gbrpred['PF_Tm2'] = (match_gbrpred['PF_gbr_Pred2'] + match_gbrpred['PA_gbr_Pred1'])/2
match_gbrpred['Pred_Spread'] = round((match_gbrpred['PF_Tm1'] - match_gbrpred['PF_Tm2']).abs(), 3)
match_gbrpred['Pred_Total'] = round(match_gbrpred['PF_Tm1'] + match_gbrpred['PF_Tm2'], 3)
match_gbrpred

Unnamed: 0,Tm1,PF_gbr_Pred1,PA_gbr_Pred1,Tm2,PF_gbr_Pred2,PA_gbr_Pred2,PF_Tm1,PF_Tm2,Pred_Spread,Pred_Total
0,Los Angeles Rams,18.384056,22.786295,Indianapolis Colts,15.625881,24.883673,21.633865,19.206088,2.428,40.840
1,Philadelphia Eagles,27.605864,20.230224,Indianapolis Colts,15.625881,24.883673,26.244769,17.928052,8.317,44.173
2,Atlanta Falcons,20.734957,23.981173,Indianapolis Colts,15.625881,24.883673,22.809315,19.803527,3.006,42.613
3,Green Bay Packers,22.185518,22.265264,Indianapolis Colts,15.625881,24.883673,23.534596,18.945573,4.589,42.480
4,Cincinnati Bengals,26.013764,20.129431,Indianapolis Colts,15.625881,24.883673,25.448719,17.877656,7.571,43.326
...,...,...,...,...,...,...,...,...,...,...
491,Cleveland Browns,20.850948,23.565793,New Orleans Saints,19.848193,19.542768,20.196858,21.706993,1.510,41.904
492,Seattle Seahawks,24.489553,23.690854,New Orleans Saints,19.848193,19.542768,22.016160,21.769524,0.247,43.786
493,Arizona Cardinals,20.145702,25.612708,New Orleans Saints,19.848193,19.542768,19.844235,22.730451,2.886,42.575
494,Baltimore Ravens,21.352428,19.308233,New Orleans Saints,19.848193,19.542768,20.447598,19.578213,0.869,40.026


In [1296]:
#Creating functions to assign which team is the predicted favorite and which is the predicted underdog and then applying to matchup DF. Then filtering the DF for clarity.
def tfp(row):
    if row['PF_Tm1'] > row['PF_Tm2']:
        val = row['Tm1']
    else:
        val = row['Tm2']
    return val
def tup(row):
    if row['PF_Tm1'] < row['PF_Tm2']:
        val = row['Tm1']
    else:
        val = row['Tm2']
    return val

match_gbrpred['Pred_team_favorite'] = match_gbrpred.apply(tfp, axis=1)
match_gbrpred['Pred_team_underdog'] = match_gbrpred.apply(tup, axis=1)
match_gbrpred = match_gbrpred.filter(['Tm1', 'PF_gbr_Pred1', 'PA_gbr_Pred1', 'Tm2', 'PF_gbr_Pred2', 'PA_gbr_Pred2', 'PF_Tm1', 'PF_Tm2', 'Pred_team_favorite', 'Pred_Spread', 'Pred_team_underdog', 'Pred_Total'])
match_gbrpred

Unnamed: 0,Tm1,PF_gbr_Pred1,PA_gbr_Pred1,Tm2,PF_gbr_Pred2,PA_gbr_Pred2,PF_Tm1,PF_Tm2,Pred_team_favorite,Pred_Spread,Pred_team_underdog,Pred_Total
0,Los Angeles Rams,18.384056,22.786295,Indianapolis Colts,15.625881,24.883673,21.633865,19.206088,Los Angeles Rams,2.428,Indianapolis Colts,40.840
1,Philadelphia Eagles,27.605864,20.230224,Indianapolis Colts,15.625881,24.883673,26.244769,17.928052,Philadelphia Eagles,8.317,Indianapolis Colts,44.173
2,Atlanta Falcons,20.734957,23.981173,Indianapolis Colts,15.625881,24.883673,22.809315,19.803527,Atlanta Falcons,3.006,Indianapolis Colts,42.613
3,Green Bay Packers,22.185518,22.265264,Indianapolis Colts,15.625881,24.883673,23.534596,18.945573,Green Bay Packers,4.589,Indianapolis Colts,42.480
4,Cincinnati Bengals,26.013764,20.129431,Indianapolis Colts,15.625881,24.883673,25.448719,17.877656,Cincinnati Bengals,7.571,Indianapolis Colts,43.326
...,...,...,...,...,...,...,...,...,...,...,...,...
491,Cleveland Browns,20.850948,23.565793,New Orleans Saints,19.848193,19.542768,20.196858,21.706993,New Orleans Saints,1.510,Cleveland Browns,41.904
492,Seattle Seahawks,24.489553,23.690854,New Orleans Saints,19.848193,19.542768,22.016160,21.769524,Seattle Seahawks,0.247,New Orleans Saints,43.786
493,Arizona Cardinals,20.145702,25.612708,New Orleans Saints,19.848193,19.542768,19.844235,22.730451,New Orleans Saints,2.886,Arizona Cardinals,42.575
494,Baltimore Ravens,21.352428,19.308233,New Orleans Saints,19.848193,19.542768,20.447598,19.578213,Baltimore Ravens,0.869,New Orleans Saints,40.026


### Advanced Offense and Defense

In [1297]:
#Merging together the matchup pairs DF and the predicted PF and PA for Tm1. Then removing unnecessary columns and renaming columns for clarity and to remove redundancy.
nfl_matchup1_adv = pd.merge(pairs_df, tm_pred_gbradv_join, on=['Tm1'], how='outer').dropna()
nfl_matchup1_adv = nfl_matchup1_adv.drop('Tm2_y', axis=1)
nfl_matchup1_adv = nfl_matchup1_adv.drop('PF_gbr_Pred2', axis=1)
nfl_matchup1_adv = nfl_matchup1_adv.drop('PA_gbr_Pred2', axis=1)
nfl_matchup1_adv = nfl_matchup1_adv.rename(columns={'Tm2_x': 'Tm2'})
nfl_matchup1_adv

Unnamed: 0,Tm1,Tm2,PF_gbradv_Pred1,PA_gbradv_Pred1
0,Los Angeles Rams,Indianapolis Colts,18.098850,21.635873
1,Los Angeles Rams,Denver Broncos,18.098850,21.635873
2,Los Angeles Rams,New York Jets,18.098850,21.635873
3,Los Angeles Rams,Houston Texans,18.098850,21.635873
4,Los Angeles Rams,Tennessee Titans,18.098850,21.635873
...,...,...,...,...
491,Tennessee Titans,Houston Texans,17.501123,21.405609
492,Tennessee Titans,Denver Broncos,17.501123,21.405609
493,Tennessee Titans,New York Jets,17.501123,21.405609
494,Tennessee Titans,Indianapolis Colts,17.501123,21.405609


In [1298]:
#Merging together the matchup pairs DF and the predicted PF and PA for Tm2. Then removing unnecessary columns and renaming columns for clarity and to remove redundancy. Then filtering a final DF for each predicted matchup from the optimal gbr model.
nfl_matchup2_adv = pd.merge(nfl_matchup1_adv, tm_pred_gbradv_join, on=['Tm2'], how='outer').dropna()
nfl_matchup2_adv = nfl_matchup2_adv.drop('Tm1_y', axis=1)
nfl_matchup2_adv = nfl_matchup2_adv.drop('PF_gbradv_Pred1_y', axis=1)
nfl_matchup2_adv = nfl_matchup2_adv.drop('PA_gbradv_Pred1_y', axis=1)
nfl_matchup2_adv = nfl_matchup2_adv.rename(columns={'Tm1_x': 'Tm1', 'PF_gbradv_Pred1_x': 'PF_gbradv_Pred1', 'PA_gbradv_Pred1_x': 'PA_gbradv_Pred1',
                                                   'PF_gbr_Pred2': 'PF_gbradv_Pred2', 'PA_gbr_Pred2': 'PA_gbradv_Pred2'})
match_gbrpred_adv = nfl_matchup2_adv.filter(['Tm1', 'PF_gbradv_Pred1', 'PA_gbradv_Pred1', 'Tm2', 'PF_gbradv_Pred2', 'PA_gbradv_Pred2'])
match_gbrpred_adv

Unnamed: 0,Tm1,PF_gbradv_Pred1,PA_gbradv_Pred1,Tm2,PF_gbradv_Pred2,PA_gbradv_Pred2
0,Los Angeles Rams,18.098850,21.635873,Indianapolis Colts,15.625881,24.883673
1,Philadelphia Eagles,28.098718,19.924910,Indianapolis Colts,15.625881,24.883673
2,Atlanta Falcons,21.499297,23.658080,Indianapolis Colts,15.625881,24.883673
3,Green Bay Packers,21.799536,21.940991,Indianapolis Colts,15.625881,24.883673
4,Cincinnati Bengals,26.099394,20.504447,Indianapolis Colts,15.625881,24.883673
...,...,...,...,...,...,...
491,Cleveland Browns,21.201247,22.602988,New Orleans Saints,19.848193,19.542768
492,Seattle Seahawks,23.899308,22.254408,New Orleans Saints,19.848193,19.542768
493,Arizona Cardinals,19.998683,26.541225,New Orleans Saints,19.848193,19.542768
494,Baltimore Ravens,20.600686,19.527338,New Orleans Saints,19.848193,19.542768


In [1299]:
#Calculating the predicted points for each team based on the model's predictions for the given matchup. In this case, I'm taking the average for the matchup based on combining the PF for one team against the PA for the other. Then figuring out the predicted matchup spread and total by calculating the difference and sum of the Tm1 and Tm2 PF predictions. Then adding them as new columns to the matchup DF.
match_gbrpred_adv['PF_Tm1'] = (match_gbrpred_adv['PF_gbradv_Pred1'] + match_gbrpred_adv['PA_gbradv_Pred2'])/2
match_gbrpred_adv['PF_Tm2'] = (match_gbrpred_adv['PF_gbradv_Pred2'] + match_gbrpred_adv['PA_gbradv_Pred1'])/2
match_gbrpred_adv['Pred_Spread'] = round((match_gbrpred_adv['PF_Tm1'] - match_gbrpred_adv['PF_Tm2']).abs(), 3)
match_gbrpred_adv['Pred_Total'] = round(match_gbrpred_adv['PF_Tm1'] + match_gbrpred_adv['PF_Tm2'], 3)
match_gbrpred_adv

Unnamed: 0,Tm1,PF_gbradv_Pred1,PA_gbradv_Pred1,Tm2,PF_gbradv_Pred2,PA_gbradv_Pred2,PF_Tm1,PF_Tm2,Pred_Spread,Pred_Total
0,Los Angeles Rams,18.098850,21.635873,Indianapolis Colts,15.625881,24.883673,21.491262,18.630877,2.860,40.122
1,Philadelphia Eagles,28.098718,19.924910,Indianapolis Colts,15.625881,24.883673,26.491196,17.775396,8.716,44.267
2,Atlanta Falcons,21.499297,23.658080,Indianapolis Colts,15.625881,24.883673,23.191485,19.641980,3.550,42.833
3,Green Bay Packers,21.799536,21.940991,Indianapolis Colts,15.625881,24.883673,23.341605,18.783436,4.558,42.125
4,Cincinnati Bengals,26.099394,20.504447,Indianapolis Colts,15.625881,24.883673,25.491533,18.065164,7.426,43.557
...,...,...,...,...,...,...,...,...,...,...
491,Cleveland Browns,21.201247,22.602988,New Orleans Saints,19.848193,19.542768,20.372007,21.225591,0.854,41.598
492,Seattle Seahawks,23.899308,22.254408,New Orleans Saints,19.848193,19.542768,21.721038,21.051301,0.670,42.772
493,Arizona Cardinals,19.998683,26.541225,New Orleans Saints,19.848193,19.542768,19.770725,23.194709,3.424,42.965
494,Baltimore Ravens,20.600686,19.527338,New Orleans Saints,19.848193,19.542768,20.071727,19.687766,0.384,39.759


In [1300]:
#Creating functions to assign which team is the predicted favorite and which is the predicted underdog and then applying to matchup DF. Then filtering the DF for clarity.
match_gbrpred_adv['Pred_team_favorite'] = match_gbrpred_adv.apply(tfp, axis=1)
match_gbrpred_adv['Pred_team_underdog'] = match_gbrpred_adv.apply(tup, axis=1)
match_gbrpred_adv = match_gbrpred_adv.filter(['Tm1', 'PF_gbradv_Pred1', 'PA_gbradv_Pred1', 'Tm2', 'PF_gbradv_Pred2', 'PA_gbradv_Pred2', 'PF_Tm1', 'PF_Tm2', 'Pred_team_favorite', 'Pred_Spread', 'Pred_team_underdog', 'Pred_Total'])
match_gbrpred_adv

Unnamed: 0,Tm1,PF_gbradv_Pred1,PA_gbradv_Pred1,Tm2,PF_gbradv_Pred2,PA_gbradv_Pred2,PF_Tm1,PF_Tm2,Pred_team_favorite,Pred_Spread,Pred_team_underdog,Pred_Total
0,Los Angeles Rams,18.098850,21.635873,Indianapolis Colts,15.625881,24.883673,21.491262,18.630877,Los Angeles Rams,2.860,Indianapolis Colts,40.122
1,Philadelphia Eagles,28.098718,19.924910,Indianapolis Colts,15.625881,24.883673,26.491196,17.775396,Philadelphia Eagles,8.716,Indianapolis Colts,44.267
2,Atlanta Falcons,21.499297,23.658080,Indianapolis Colts,15.625881,24.883673,23.191485,19.641980,Atlanta Falcons,3.550,Indianapolis Colts,42.833
3,Green Bay Packers,21.799536,21.940991,Indianapolis Colts,15.625881,24.883673,23.341605,18.783436,Green Bay Packers,4.558,Indianapolis Colts,42.125
4,Cincinnati Bengals,26.099394,20.504447,Indianapolis Colts,15.625881,24.883673,25.491533,18.065164,Cincinnati Bengals,7.426,Indianapolis Colts,43.557
...,...,...,...,...,...,...,...,...,...,...,...,...
491,Cleveland Browns,21.201247,22.602988,New Orleans Saints,19.848193,19.542768,20.372007,21.225591,New Orleans Saints,0.854,Cleveland Browns,41.598
492,Seattle Seahawks,23.899308,22.254408,New Orleans Saints,19.848193,19.542768,21.721038,21.051301,Seattle Seahawks,0.670,New Orleans Saints,42.772
493,Arizona Cardinals,19.998683,26.541225,New Orleans Saints,19.848193,19.542768,19.770725,23.194709,New Orleans Saints,3.424,Arizona Cardinals,42.965
494,Baltimore Ravens,20.600686,19.527338,New Orleans Saints,19.848193,19.542768,20.071727,19.687766,Baltimore Ravens,0.384,New Orleans Saints,39.759


### Reduced Full Dataset Offense and Defense

In [1301]:
#Merging together the matchup pairs DF and the predicted PF and PA for Tm1. Then removing unnecessary columns and renaming columns for clarity and to remove redundancy.
nfl_matchup1_red = pd.merge(pairs_df, tm_pred_gbrred_join, on=['Tm1'], how='outer').dropna()
nfl_matchup1_red = nfl_matchup1_red.drop('Tm2_y', axis=1)
nfl_matchup1_red = nfl_matchup1_red.drop('PF_gbrred_Pred2', axis=1)
nfl_matchup1_red = nfl_matchup1_red.drop('PA_gbrred_Pred2', axis=1)
nfl_matchup1_red = nfl_matchup1_red.rename(columns={'Tm2_x': 'Tm2'})
nfl_matchup1_red

Unnamed: 0,Tm1,Tm2,PF_gbrred_Pred1,PA_gbrred_Pred1
0,Los Angeles Rams,Indianapolis Colts,18.707671,22.649000
1,Los Angeles Rams,Denver Broncos,18.707671,22.649000
2,Los Angeles Rams,New York Jets,18.707671,22.649000
3,Los Angeles Rams,Houston Texans,18.707671,22.649000
4,Los Angeles Rams,Tennessee Titans,18.707671,22.649000
...,...,...,...,...
491,Tennessee Titans,Houston Texans,18.388217,20.314067
492,Tennessee Titans,Denver Broncos,18.388217,20.314067
493,Tennessee Titans,New York Jets,18.388217,20.314067
494,Tennessee Titans,Indianapolis Colts,18.388217,20.314067


In [1302]:
#Merging together the matchup pairs DF and the predicted PF and PA for Tm2. Then removing unnecessary columns and renaming columns for clarity and to remove redundancy. Then filtering a final DF for each predicted matchup from the optimal gbr model.
nfl_matchup2_red = pd.merge(nfl_matchup1_red, tm_pred_gbrred_join, on=['Tm2'], how='outer').dropna()
nfl_matchup2_red = nfl_matchup2_red.drop('Tm1_y', axis=1)
nfl_matchup2_red = nfl_matchup2_red.drop('PF_gbrred_Pred1_y', axis=1)
nfl_matchup2_red = nfl_matchup2_red.drop('PA_gbrred_Pred1_y', axis=1)
nfl_matchup2_red = nfl_matchup2_red.rename(columns={'Tm1_x': 'Tm1', 'PF_gbrred_Pred1_x': 'PF_gbrred_Pred1', 'PA_gbrred_Pred1_x': 'PA_gbrred_Pred1'})
match_gbrpred_red = nfl_matchup2_red.filter(['Tm1', 'PF_gbrred_Pred1', 'PA_gbrred_Pred1', 'Tm2', 'PF_gbrred_Pred2', 'PA_gbrred_Pred2'])
match_gbrpred_red

Unnamed: 0,Tm1,PF_gbrred_Pred1,PA_gbrred_Pred1,Tm2,PF_gbrred_Pred2,PA_gbrred_Pred2
0,Los Angeles Rams,18.707671,22.649000,Indianapolis Colts,15.893827,24.407451
1,Philadelphia Eagles,27.686473,20.643006,Indianapolis Colts,15.893827,24.407451
2,Atlanta Falcons,21.304797,24.272532,Indianapolis Colts,15.893827,24.407451
3,Green Bay Packers,22.390195,23.845098,Indianapolis Colts,15.893827,24.407451
4,Cincinnati Bengals,26.551884,19.552076,Indianapolis Colts,15.893827,24.407451
...,...,...,...,...,...,...
491,Cleveland Browns,20.945524,24.129603,New Orleans Saints,20.152110,19.732115
492,Seattle Seahawks,24.727380,23.623704,New Orleans Saints,20.152110,19.732115
493,Arizona Cardinals,19.847527,25.948287,New Orleans Saints,20.152110,19.732115
494,Baltimore Ravens,21.649245,19.512820,New Orleans Saints,20.152110,19.732115


In [1303]:
#Calculating the predicted points for each team based on the model's predictions for the given matchup. In this case, I'm taking the average for the matchup based on combining the PF for one team against the PA for the other. Then figuring out the predicted matchup spread and total by calculating the difference and sum of the Tm1 and Tm2 PF predictions. Then adding them as new columns to the matchup DF.
match_gbrpred_red['PF_Tm1'] = (match_gbrpred_red['PF_gbrred_Pred1'] + match_gbrpred_red['PA_gbrred_Pred2'])/2
match_gbrpred_red['PF_Tm2'] = (match_gbrpred_red['PF_gbrred_Pred2'] + match_gbrpred_red['PA_gbrred_Pred1'])/2
match_gbrpred_red['Pred_Spread'] = round((match_gbrpred_red['PF_Tm1'] - match_gbrpred_red['PF_Tm2']).abs(), 3)
match_gbrpred_red['Pred_Total'] = round(match_gbrpred_red['PF_Tm1'] + match_gbrpred_red['PF_Tm2'], 3)
match_gbrpred_red

Unnamed: 0,Tm1,PF_gbrred_Pred1,PA_gbrred_Pred1,Tm2,PF_gbrred_Pred2,PA_gbrred_Pred2,PF_Tm1,PF_Tm2,Pred_Spread,Pred_Total
0,Los Angeles Rams,18.707671,22.649000,Indianapolis Colts,15.893827,24.407451,21.557561,19.271413,2.286,40.829
1,Philadelphia Eagles,27.686473,20.643006,Indianapolis Colts,15.893827,24.407451,26.046962,18.268417,7.779,44.315
2,Atlanta Falcons,21.304797,24.272532,Indianapolis Colts,15.893827,24.407451,22.856124,20.083180,2.773,42.939
3,Green Bay Packers,22.390195,23.845098,Indianapolis Colts,15.893827,24.407451,23.398823,19.869463,3.529,43.268
4,Cincinnati Bengals,26.551884,19.552076,Indianapolis Colts,15.893827,24.407451,25.479667,17.722952,7.757,43.203
...,...,...,...,...,...,...,...,...,...,...
491,Cleveland Browns,20.945524,24.129603,New Orleans Saints,20.152110,19.732115,20.338820,22.140856,1.802,42.480
492,Seattle Seahawks,24.727380,23.623704,New Orleans Saints,20.152110,19.732115,22.229747,21.887907,0.342,44.118
493,Arizona Cardinals,19.847527,25.948287,New Orleans Saints,20.152110,19.732115,19.789821,23.050199,3.260,42.840
494,Baltimore Ravens,21.649245,19.512820,New Orleans Saints,20.152110,19.732115,20.690680,19.832465,0.858,40.523


In [1304]:
#GBR reduced final matchup DF
match_gbrpred_red['Pred_team_favorite'] = match_gbrpred_red.apply(tfp, axis=1)
match_gbrpred_red['Pred_team_underdog'] = match_gbrpred_red.apply(tup, axis=1)
match_gbrpred_red = match_gbrpred_red.filter(['Tm1', 'PF_gbrred_Pred1', 'PA_gbrred_Pred1', 'Tm2', 'PF_gbrred_Pred2', 'PA_gbrred_Pred2', 'PF_Tm1', 'PF_Tm2', 'Pred_team_favorite', 'Pred_Spread', 'Pred_team_underdog', 'Pred_Total'])
match_gbrpred_red

Unnamed: 0,Tm1,PF_gbrred_Pred1,PA_gbrred_Pred1,Tm2,PF_gbrred_Pred2,PA_gbrred_Pred2,PF_Tm1,PF_Tm2,Pred_team_favorite,Pred_Spread,Pred_team_underdog,Pred_Total
0,Los Angeles Rams,18.707671,22.649000,Indianapolis Colts,15.893827,24.407451,21.557561,19.271413,Los Angeles Rams,2.286,Indianapolis Colts,40.829
1,Philadelphia Eagles,27.686473,20.643006,Indianapolis Colts,15.893827,24.407451,26.046962,18.268417,Philadelphia Eagles,7.779,Indianapolis Colts,44.315
2,Atlanta Falcons,21.304797,24.272532,Indianapolis Colts,15.893827,24.407451,22.856124,20.083180,Atlanta Falcons,2.773,Indianapolis Colts,42.939
3,Green Bay Packers,22.390195,23.845098,Indianapolis Colts,15.893827,24.407451,23.398823,19.869463,Green Bay Packers,3.529,Indianapolis Colts,43.268
4,Cincinnati Bengals,26.551884,19.552076,Indianapolis Colts,15.893827,24.407451,25.479667,17.722952,Cincinnati Bengals,7.757,Indianapolis Colts,43.203
...,...,...,...,...,...,...,...,...,...,...,...,...
491,Cleveland Browns,20.945524,24.129603,New Orleans Saints,20.152110,19.732115,20.338820,22.140856,New Orleans Saints,1.802,Cleveland Browns,42.480
492,Seattle Seahawks,24.727380,23.623704,New Orleans Saints,20.152110,19.732115,22.229747,21.887907,Seattle Seahawks,0.342,New Orleans Saints,44.118
493,Arizona Cardinals,19.847527,25.948287,New Orleans Saints,20.152110,19.732115,19.789821,23.050199,New Orleans Saints,3.260,Arizona Cardinals,42.840
494,Baltimore Ravens,21.649245,19.512820,New Orleans Saints,20.152110,19.732115,20.690680,19.832465,Baltimore Ravens,0.858,New Orleans Saints,40.523


### Reduced Advanced Dataset Offense and Defense

In [1305]:
#Merging together the matchup pairs DF and the predicted PF and PA for Tm1. Then removing unnecessary columns and renaming columns for clarity and to remove redundancy.
nfl_matchup1_adv_red = pd.merge(pairs_df, tm_pred_gbradv_red_join, on=['Tm1'], how='outer').dropna()
nfl_matchup1_adv_red = nfl_matchup1_adv_red.drop('Tm2_y', axis=1)
nfl_matchup1_adv_red = nfl_matchup1_adv_red.drop('PF_gbradv_Pred2_red', axis=1)
nfl_matchup1_adv_red = nfl_matchup1_adv_red.drop('PA_gbradv_Pred2_red', axis=1)
nfl_matchup1_adv_red = nfl_matchup1_adv_red.rename(columns={'Tm2_x': 'Tm2'})
nfl_matchup1_adv_red

Unnamed: 0,Tm1,Tm2,PF_gbradv_Pred1_red,PA_gbradv_Pred1_red
0,Los Angeles Rams,Indianapolis Colts,18.119786,21.631954
1,Los Angeles Rams,Denver Broncos,18.119786,21.631954
2,Los Angeles Rams,New York Jets,18.119786,21.631954
3,Los Angeles Rams,Houston Texans,18.119786,21.631954
4,Los Angeles Rams,Tennessee Titans,18.119786,21.631954
...,...,...,...,...
491,Tennessee Titans,Houston Texans,17.649514,21.156560
492,Tennessee Titans,Denver Broncos,17.649514,21.156560
493,Tennessee Titans,New York Jets,17.649514,21.156560
494,Tennessee Titans,Indianapolis Colts,17.649514,21.156560


In [1306]:
#Merging together the matchup pairs DF and the predicted PF and PA for Tm2. Then removing unnecessary columns and renaming columns for clarity and to remove redundancy. Then filtering a final DF for each predicted matchup from the optimal gbr model.
nfl_matchup2_adv_red = pd.merge(nfl_matchup1_adv_red, tm_pred_gbradv_red_join, on=['Tm2'], how='outer').dropna()
nfl_matchup2_adv_red = nfl_matchup2_adv_red.drop('Tm1_y', axis=1)
nfl_matchup2_adv_red = nfl_matchup2_adv_red.drop('PF_gbradv_Pred1_red_y', axis=1)
nfl_matchup2_adv_red = nfl_matchup2_adv_red.drop('PA_gbradv_Pred1_red_y', axis=1)
nfl_matchup2_adv_red = nfl_matchup2_adv_red.rename(columns={'Tm1_x': 'Tm1', 'PF_gbradv_Pred1_red_x': 'PF_gbradv_Pred1_red', 'PA_gbradv_Pred1_red_x': 'PA_gbradv_Pred1_red',
                                                   'PF_gbradv_Pred2': 'PF_gbradv_Pred2_red', 'PA_gbradv_Pred2': 'PA_gbradv_Pred2_red'})
match_gbrpred_adv_red = nfl_matchup2_adv_red.filter(['Tm1', 'PF_gbradv_Pred1_red', 'PA_gbradv_Pred1_red', 'Tm2', 'PF_gbradv_Pred2_red', 'PA_gbradv_Pred2_red'])
match_gbrpred_adv_red

Unnamed: 0,Tm1,PF_gbradv_Pred1_red,PA_gbradv_Pred1_red,Tm2,PF_gbradv_Pred2_red,PA_gbradv_Pred2_red
0,Los Angeles Rams,18.119786,21.631954,Indianapolis Colts,17.069022,24.013524
1,Philadelphia Eagles,27.917533,19.752214,Indianapolis Colts,17.069022,24.013524
2,Atlanta Falcons,21.114618,23.181483,Indianapolis Colts,17.069022,24.013524
3,Green Bay Packers,21.956461,22.170322,Indianapolis Colts,17.069022,24.013524
4,Cincinnati Bengals,26.014972,20.631280,Indianapolis Colts,17.069022,24.013524
...,...,...,...,...,...,...
491,Cleveland Browns,20.852184,22.679205,New Orleans Saints,19.243383,19.785249
492,Seattle Seahawks,23.592883,22.338753,New Orleans Saints,19.243383,19.785249
493,Arizona Cardinals,19.704595,26.514306,New Orleans Saints,19.243383,19.785249
494,Baltimore Ravens,20.694819,19.173094,New Orleans Saints,19.243383,19.785249


In [1307]:
#Calculating the predicted points for each team based on the model's predictions for the given matchup. In this case, I'm taking the average for the matchup based on combining the PF for one team against the PA for the other. Then figuring out the predicted matchup spread and total by calculating the difference and sum of the Tm1 and Tm2 PF predictions. Then adding them as new columns to the matchup DF.
match_gbrpred_adv_red['PF_Tm1'] = (match_gbrpred_adv_red['PF_gbradv_Pred1_red'] + match_gbrpred_adv_red['PA_gbradv_Pred2_red'])/2
match_gbrpred_adv_red['PF_Tm2'] = (match_gbrpred_adv_red['PF_gbradv_Pred2_red'] + match_gbrpred_adv_red['PA_gbradv_Pred1_red'])/2
match_gbrpred_adv_red['Pred_Spread'] = round((match_gbrpred_adv_red['PF_Tm1'] - match_gbrpred_adv_red['PF_Tm2']).abs(), 3)
match_gbrpred_adv_red['Pred_Total'] = round(match_gbrpred_adv_red['PF_Tm1'] + match_gbrpred_adv_red['PF_Tm2'], 3)
match_gbrpred_adv_red

Unnamed: 0,Tm1,PF_gbradv_Pred1_red,PA_gbradv_Pred1_red,Tm2,PF_gbradv_Pred2_red,PA_gbradv_Pred2_red,PF_Tm1,PF_Tm2,Pred_Spread,Pred_Total
0,Los Angeles Rams,18.119786,21.631954,Indianapolis Colts,17.069022,24.013524,21.066655,19.350488,1.716,40.417
1,Philadelphia Eagles,27.917533,19.752214,Indianapolis Colts,17.069022,24.013524,25.965528,18.410618,7.555,44.376
2,Atlanta Falcons,21.114618,23.181483,Indianapolis Colts,17.069022,24.013524,22.564071,20.125253,2.439,42.689
3,Green Bay Packers,21.956461,22.170322,Indianapolis Colts,17.069022,24.013524,22.984992,19.619672,3.365,42.605
4,Cincinnati Bengals,26.014972,20.631280,Indianapolis Colts,17.069022,24.013524,25.014248,18.850151,6.164,43.864
...,...,...,...,...,...,...,...,...,...,...
491,Cleveland Browns,20.852184,22.679205,New Orleans Saints,19.243383,19.785249,20.318717,20.961294,0.643,41.280
492,Seattle Seahawks,23.592883,22.338753,New Orleans Saints,19.243383,19.785249,21.689066,20.791068,0.898,42.480
493,Arizona Cardinals,19.704595,26.514306,New Orleans Saints,19.243383,19.785249,19.744922,22.878845,3.134,42.624
494,Baltimore Ravens,20.694819,19.173094,New Orleans Saints,19.243383,19.785249,20.240034,19.208238,1.032,39.448


In [1308]:
#Creating functions to assign which team is the predicted favorite and which is the predicted underdog and then applying to matchup DF. Then filtering the DF for clarity.
match_gbrpred_adv_red['Pred_team_favorite'] = match_gbrpred_adv_red.apply(tfp, axis=1)
match_gbrpred_adv_red['Pred_team_underdog'] = match_gbrpred_adv_red.apply(tup, axis=1)
match_gbrpred_adv_red = match_gbrpred_adv_red.filter(['Tm1', 'PF_gbradv_Pred1_red', 'PA_gbradv_Pred1_red', 'Tm2', 'PF_gbradv_Pred2_red', 'PA_gbradv_Pred2_red', 'PF_Tm1', 'PF_Tm2', 'Pred_team_favorite', 'Pred_Spread', 'Pred_team_underdog', 'Pred_Total'])
match_gbrpred_adv_red

Unnamed: 0,Tm1,PF_gbradv_Pred1_red,PA_gbradv_Pred1_red,Tm2,PF_gbradv_Pred2_red,PA_gbradv_Pred2_red,PF_Tm1,PF_Tm2,Pred_team_favorite,Pred_Spread,Pred_team_underdog,Pred_Total
0,Los Angeles Rams,18.119786,21.631954,Indianapolis Colts,17.069022,24.013524,21.066655,19.350488,Los Angeles Rams,1.716,Indianapolis Colts,40.417
1,Philadelphia Eagles,27.917533,19.752214,Indianapolis Colts,17.069022,24.013524,25.965528,18.410618,Philadelphia Eagles,7.555,Indianapolis Colts,44.376
2,Atlanta Falcons,21.114618,23.181483,Indianapolis Colts,17.069022,24.013524,22.564071,20.125253,Atlanta Falcons,2.439,Indianapolis Colts,42.689
3,Green Bay Packers,21.956461,22.170322,Indianapolis Colts,17.069022,24.013524,22.984992,19.619672,Green Bay Packers,3.365,Indianapolis Colts,42.605
4,Cincinnati Bengals,26.014972,20.631280,Indianapolis Colts,17.069022,24.013524,25.014248,18.850151,Cincinnati Bengals,6.164,Indianapolis Colts,43.864
...,...,...,...,...,...,...,...,...,...,...,...,...
491,Cleveland Browns,20.852184,22.679205,New Orleans Saints,19.243383,19.785249,20.318717,20.961294,New Orleans Saints,0.643,Cleveland Browns,41.280
492,Seattle Seahawks,23.592883,22.338753,New Orleans Saints,19.243383,19.785249,21.689066,20.791068,Seattle Seahawks,0.898,New Orleans Saints,42.480
493,Arizona Cardinals,19.704595,26.514306,New Orleans Saints,19.243383,19.785249,19.744922,22.878845,New Orleans Saints,3.134,Arizona Cardinals,42.624
494,Baltimore Ravens,20.694819,19.173094,New Orleans Saints,19.243383,19.785249,20.240034,19.208238,Baltimore Ravens,1.032,New Orleans Saints,39.448


### Combined Reduced Dataset Offense and Defense

In [1309]:
#Merging together the matchup pairs DF and the predicted PF and PA for Tm1. Then removing unnecessary columns and renaming columns for clarity and to remove redundancy.
nfl_matchup1_comb_red = pd.merge(pairs_df, tm_pred_gbrred_comb_join, on=['Tm1'], how='outer').dropna()
nfl_matchup1_comb_red = nfl_matchup1_comb_red.drop('Tm2_y', axis=1)
nfl_matchup1_comb_red = nfl_matchup1_comb_red.drop('PF_gbrred_comb_Pred2', axis=1)
nfl_matchup1_comb_red = nfl_matchup1_comb_red.drop('PA_gbrred_comb_Pred2', axis=1)
nfl_matchup1_comb_red = nfl_matchup1_comb_red.rename(columns={'Tm2_x': 'Tm2'})
nfl_matchup1_comb_red

Unnamed: 0,Tm1,Tm2,PF_gbrred_comb_Pred1,PA_gbrred_comb_Pred1
0,Los Angeles Rams,Indianapolis Colts,18.191863,22.700040
1,Los Angeles Rams,Denver Broncos,18.191863,22.700040
2,Los Angeles Rams,New York Jets,18.191863,22.700040
3,Los Angeles Rams,Houston Texans,18.191863,22.700040
4,Los Angeles Rams,Tennessee Titans,18.191863,22.700040
...,...,...,...,...
491,Tennessee Titans,Houston Texans,17.694250,21.345722
492,Tennessee Titans,Denver Broncos,17.694250,21.345722
493,Tennessee Titans,New York Jets,17.694250,21.345722
494,Tennessee Titans,Indianapolis Colts,17.694250,21.345722


In [1310]:
#Merging together the matchup pairs DF and the predicted PF and PA for Tm2. Then removing unnecessary columns and renaming columns for clarity and to remove redundancy. Then filtering a final DF for each predicted matchup from the optimal gbr model.
nfl_matchup2_comb_red = pd.merge(nfl_matchup1_comb_red, tm_pred_gbrred_comb_join, on=['Tm2'], how='outer').dropna()
nfl_matchup2_comb_red = nfl_matchup2_comb_red.drop('Tm1_y', axis=1)
nfl_matchup2_comb_red = nfl_matchup2_comb_red.drop('PF_gbrred_comb_Pred1_y', axis=1)
nfl_matchup2_comb_red = nfl_matchup2_comb_red.drop('PA_gbrred_comb_Pred1_y', axis=1)
nfl_matchup2_comb_red = nfl_matchup2_comb_red.rename(columns={'Tm1_x': 'Tm1', 'PF_gbrred_comb_Pred1_x': 'PF_gbrred_comb_Pred1', 'PA_gbrred_comb_Pred1_x': 'PA_gbrred_comb_Pred1'})
match_gbrpred_comb_red = nfl_matchup2_comb_red.filter(['Tm1', 'PF_gbrred_comb_Pred1', 'PA_gbrred_comb_Pred1', 'Tm2', 'PF_gbrred_comb_Pred2', 'PA_gbrred_comb_Pred2'])
match_gbrpred_comb_red

Unnamed: 0,Tm1,PF_gbrred_comb_Pred1,PA_gbrred_comb_Pred1,Tm2,PF_gbrred_comb_Pred2,PA_gbrred_comb_Pred2
0,Los Angeles Rams,18.191863,22.700040,Indianapolis Colts,18.861145,22.055946
1,Philadelphia Eagles,27.083595,20.173075,Indianapolis Colts,18.861145,22.055946
2,Atlanta Falcons,20.824703,24.425055,Indianapolis Colts,18.861145,22.055946
3,Green Bay Packers,21.127359,22.610406,Indianapolis Colts,18.861145,22.055946
4,Cincinnati Bengals,26.393463,20.582833,Indianapolis Colts,18.861145,22.055946
...,...,...,...,...,...,...
491,Cleveland Browns,21.380020,22.722365,New Orleans Saints,19.816302,20.201592
492,Seattle Seahawks,23.559360,22.352011,New Orleans Saints,19.816302,20.201592
493,Arizona Cardinals,19.589672,26.276869,New Orleans Saints,19.816302,20.201592
494,Baltimore Ravens,20.909510,20.073103,New Orleans Saints,19.816302,20.201592


In [1311]:
#Calculating the predicted points for each team based on the model's predictions for the given matchup. In this case, I'm taking the average for the matchup based on combining the PF for one team against the PA for the other. Then figuring out the predicted matchup spread and total by calculating the difference and sum of the Tm1 and Tm2 PF predictions. Then adding them as new columns to the matchup DF.
match_gbrpred_comb_red['PF_Tm1'] = (match_gbrpred_comb_red['PF_gbrred_comb_Pred1'] + match_gbrpred_comb_red['PA_gbrred_comb_Pred2'])/2
match_gbrpred_comb_red['PF_Tm2'] = (match_gbrpred_comb_red['PF_gbrred_comb_Pred2'] + match_gbrpred_comb_red['PA_gbrred_comb_Pred1'])/2
match_gbrpred_comb_red['Pred_Spread'] = round((match_gbrpred_comb_red['PF_Tm1'] - match_gbrpred_comb_red['PF_Tm2']).abs(), 3)
match_gbrpred_comb_red['Pred_Total'] = round(match_gbrpred_comb_red['PF_Tm1'] + match_gbrpred_comb_red['PF_Tm2'], 3)
match_gbrpred_comb_red

Unnamed: 0,Tm1,PF_gbrred_comb_Pred1,PA_gbrred_comb_Pred1,Tm2,PF_gbrred_comb_Pred2,PA_gbrred_comb_Pred2,PF_Tm1,PF_Tm2,Pred_Spread,Pred_Total
0,Los Angeles Rams,18.191863,22.700040,Indianapolis Colts,18.861145,22.055946,20.123904,20.780593,0.657,40.904
1,Philadelphia Eagles,27.083595,20.173075,Indianapolis Colts,18.861145,22.055946,24.569770,19.517110,5.053,44.087
2,Atlanta Falcons,20.824703,24.425055,Indianapolis Colts,18.861145,22.055946,21.440325,21.643100,0.203,43.083
3,Green Bay Packers,21.127359,22.610406,Indianapolis Colts,18.861145,22.055946,21.591653,20.735775,0.856,42.327
4,Cincinnati Bengals,26.393463,20.582833,Indianapolis Colts,18.861145,22.055946,24.224705,19.721989,4.503,43.947
...,...,...,...,...,...,...,...,...,...,...
491,Cleveland Browns,21.380020,22.722365,New Orleans Saints,19.816302,20.201592,20.790806,21.269333,0.479,42.060
492,Seattle Seahawks,23.559360,22.352011,New Orleans Saints,19.816302,20.201592,21.880476,21.084156,0.796,42.965
493,Arizona Cardinals,19.589672,26.276869,New Orleans Saints,19.816302,20.201592,19.895632,23.046586,3.151,42.942
494,Baltimore Ravens,20.909510,20.073103,New Orleans Saints,19.816302,20.201592,20.555551,19.944703,0.611,40.500


In [1312]:
#GBR combined reduced final matchup DF
match_gbrpred_comb_red['Pred_team_favorite'] = match_gbrpred_comb_red.apply(tfp, axis=1)
match_gbrpred_comb_red['Pred_team_underdog'] = match_gbrpred_comb_red.apply(tup, axis=1)
match_gbrpred_comb_red = match_gbrpred_comb_red.filter(['Tm1', 'PF_gbrred_comb_Pred1', 'PA_gbrred_comb_Pred1', 'Tm2', 'PF_gbrred_comb_Pred2', 'PA_gbrred_comb_Pred2', 'PF_Tm1', 'PF_Tm2', 'Pred_team_favorite', 'Pred_Spread', 'Pred_team_underdog', 'Pred_Total'])
match_gbrpred_comb_red

Unnamed: 0,Tm1,PF_gbrred_comb_Pred1,PA_gbrred_comb_Pred1,Tm2,PF_gbrred_comb_Pred2,PA_gbrred_comb_Pred2,PF_Tm1,PF_Tm2,Pred_team_favorite,Pred_Spread,Pred_team_underdog,Pred_Total
0,Los Angeles Rams,18.191863,22.700040,Indianapolis Colts,18.861145,22.055946,20.123904,20.780593,Indianapolis Colts,0.657,Los Angeles Rams,40.904
1,Philadelphia Eagles,27.083595,20.173075,Indianapolis Colts,18.861145,22.055946,24.569770,19.517110,Philadelphia Eagles,5.053,Indianapolis Colts,44.087
2,Atlanta Falcons,20.824703,24.425055,Indianapolis Colts,18.861145,22.055946,21.440325,21.643100,Indianapolis Colts,0.203,Atlanta Falcons,43.083
3,Green Bay Packers,21.127359,22.610406,Indianapolis Colts,18.861145,22.055946,21.591653,20.735775,Green Bay Packers,0.856,Indianapolis Colts,42.327
4,Cincinnati Bengals,26.393463,20.582833,Indianapolis Colts,18.861145,22.055946,24.224705,19.721989,Cincinnati Bengals,4.503,Indianapolis Colts,43.947
...,...,...,...,...,...,...,...,...,...,...,...,...
491,Cleveland Browns,21.380020,22.722365,New Orleans Saints,19.816302,20.201592,20.790806,21.269333,New Orleans Saints,0.479,Cleveland Browns,42.060
492,Seattle Seahawks,23.559360,22.352011,New Orleans Saints,19.816302,20.201592,21.880476,21.084156,Seattle Seahawks,0.796,New Orleans Saints,42.965
493,Arizona Cardinals,19.589672,26.276869,New Orleans Saints,19.816302,20.201592,19.895632,23.046586,New Orleans Saints,3.151,Arizona Cardinals,42.942
494,Baltimore Ravens,20.909510,20.073103,New Orleans Saints,19.816302,20.201592,20.555551,19.944703,Baltimore Ravens,0.611,New Orleans Saints,40.500


# Backtesting how the models would have performed for the 2022 regular season utilizing the spread and total dataset (imported as spreadspoke-scores.csv from Kaggle)

In [1313]:
#Changing name to stdata (spread and total data)
stdata = spreadspoke_scores
#Reducing dataset to only include games from the 2022 regular season
stdata2022 = stdata.loc[stdata['schedule_season'] == 2022]
stdata2022 = stdata2022[stdata2022.schedule_playoff != True]
#Modifying dataset to include a column for the points scored in the game, and then a resulting column (O/U) indiciating if the game went Over or Under when compared to the pre-game total.
stdata2022['actual_total'] = stdata2022['score_away'] + stdata2022['score_home']
stdata2022['over_under_line'] = stdata2022['over_under_line'].astype('float64')
stdata2022['O/U'] = np.select([(stdata2022['actual_total'] > stdata2022['over_under_line']), 
                                    (stdata2022['actual_total'] < stdata2022['over_under_line']),
                                (stdata2022['actual_total'] == stdata2022['over_under_line'])], 
                                   ['O', 'U', 'P'])
#Changing all values in the 'spread_favorite' column to positive numbers (the "-" for betting favorites is assumed) so that subsequent functions comparing the actual spread and betting spread can work.
stdata2022['spread_favorite'] = stdata2022['spread_favorite'].abs()
#Filtering the dataset to remove unnecessary columns and organize the columns for clarity
stdata2022 = stdata2022.filter(['schedule_week', 'team_home', 'score_home', 'score_away', 'team_away', 'team_favorite_id', 'spread_favorite', 'over_under_line', 'actual_total', 'O/U'])

In [1314]:
#Creating various functions to add features to allow the dataset to determine whether or not a favorite or underdog covered the spread
#Winning score
def w(row):
    if row['score_home'] >= row['score_away']:
        val = row['score_home']
    else:
        val = row['score_away']
    return val
#Losing score
def l(row):
    if row['score_home'] <= row['score_away']:
        val = row['score_home']
    else:
        val = row['score_away']
    return val
#Winning Team
def wt(row):
    if row['score_home'] >= row['score_away']:
        val = row['team_home']
    else:
        val = row['team_away']
    return val
#Losing Team
def lt(row):
    if row['score_home'] <= row['score_away']:
        val = row['team_home']
    else:
        val = row['team_away']
    return val
#Applying functions to create new columns
stdata2022['winning_score'] = stdata2022.apply(w, axis=1)
stdata2022['losing_score'] = stdata2022.apply(l, axis=1)
stdata2022['winning_team'] = stdata2022.apply(wt, axis=1)
stdata2022['losing_team'] = stdata2022.apply(lt, axis=1)

In [1315]:
#Changing 'team_favorite_id' column from team abbreviations to full team name in order to be consistent with the winning/losing team columns.
teams = {
    'ARI': 'Arizona Cardinals',
    'ATL': 'Atlanta Falcons',
    'BAL': 'Baltimore Ravens',
    'BUF': 'Buffalo Bills',
    'CAR': 'Carolina Panthers',
    'CHI': 'Chicago Bears',
    'CIN': 'Cincinnati Bengals',
    'CLE': 'Cleveland Browns',
    'DAL': 'Dallas Cowboys',
    'DEN': 'Denver Broncos',
    'DET': 'Detroit Lions',
    'GB': 'Green Bay Packers',
    'HOU': 'Houston Texans',
    'IND': 'Indianapolis Colts',
    'JAX': 'Jacksonville Jaguars',
    'KC': 'Kansas City Chiefs',
    'LVR': 'Las Vegas Raiders',
    'LAC': 'Los Angeles Chargers',
    'LAR': 'Los Angeles Rams',
    'MIA': 'Miami Dolphins',
    'MIN': 'Minnesota Vikings',
    'NE': 'New England Patriots',
    'NO': 'New Orleans Saints',
    'NYG': 'New York Giants',
    'NYJ': 'New York Jets',
    'PHI': 'Philadelphia Eagles',
    'PIT': 'Pittsburgh Steelers',
    'SF': 'San Francisco 49ers',
    'SEA': 'Seattle Seahawks',
    'TB': 'Tampa Bay Buccaneers',
    'TEN': 'Tennessee Titans', 
    'WAS': 'Washington Commanders'}

stdata2022['team_favorite'] = stdata2022['team_favorite_id'].replace(teams)
#Creating function to create 'team_underdog' (tu) column in order to compare team_favorite and team_underdog
def tu(row):
    if row['winning_team'] == row['team_favorite']:
        val = row['losing_team']
    else:
        val = row['winning_team']
    return val
#Applying function to create column
stdata2022['team_underdog'] = stdata2022.apply(tu, axis=1)
#Creating new column indicating the actual winning margin (spread) for the game to compare with the pre-game spread
stdata2022['actual_spread'] = stdata2022['winning_score'] - stdata2022['losing_score']
#Filtering for new columns and clarity
stdata2022 = stdata2022.filter(['schedule_week', 'winning_team', 'winning_score', 'losing_score', 'losing_team', 'team_favorite', 'team_underdog', 'spread_favorite', 'actual_spread', 'over_under_line', 'actual_total', 'O/U'])
stdata2022

Unnamed: 0,schedule_week,winning_team,winning_score,losing_score,losing_team,team_favorite,team_underdog,spread_favorite,actual_spread,over_under_line,actual_total,O/U
13232,1,Buffalo Bills,31,10,Los Angeles Rams,Buffalo Bills,Los Angeles Rams,2.0,21,52.0,41,U
13233,1,Kansas City Chiefs,44,21,Arizona Cardinals,Kansas City Chiefs,Arizona Cardinals,6.0,23,54.0,65,O
13234,1,New Orleans Saints,27,26,Atlanta Falcons,New Orleans Saints,Atlanta Falcons,5.5,1,44.0,53,O
13235,1,Cleveland Browns,26,24,Carolina Panthers,Carolina Panthers,Cleveland Browns,1.0,2,42.0,50,O
13236,1,Chicago Bears,19,10,San Francisco 49ers,San Francisco 49ers,Chicago Bears,6.5,9,38.0,29,U
...,...,...,...,...,...,...,...,...,...,...,...,...
13498,18,Philadelphia Eagles,22,16,New York Giants,Philadelphia Eagles,New York Giants,17.0,6,43.0,38,U
13499,18,Pittsburgh Steelers,28,14,Cleveland Browns,Pittsburgh Steelers,Cleveland Browns,2.5,14,40.0,42,O
13500,18,San Francisco 49ers,38,13,Arizona Cardinals,San Francisco 49ers,Arizona Cardinals,14.5,25,40.0,51,O
13501,18,Seattle Seahawks,19,16,Los Angeles Rams,Seattle Seahawks,Los Angeles Rams,4.5,3,43.0,35,U


In [1316]:
#Creating duplicate columns to id teams as 'Tm1' and 'Tm2' in order to merge the matchup prediction DF with the 2022 regular season DF.
stdata2022['Tm1'] = stdata2022['winning_team']
stdata2022['Tm2'] = stdata2022['losing_team']
#Manually dealing with the two games from the 2022 regular season that ended in a tie. Then combining back the tied games into the full stdata2022 DF.
stdata2022_tie = stdata2022.loc[stdata2022['actual_spread'] == 0]
stdata2022_tie['winning_team'] = stdata2022_tie['winning_team'].replace(['Houston Texans'],['Indianapolis Colts'])
stdata2022_tie['winning_team'] = stdata2022_tie['winning_team'].replace(['New York Giants'],['Washington Commanders'])
stdata2022_tie['Tm2'] = stdata2022_tie['Tm2'].replace(['Houston Texans'],['Indianapolis Colts'])
stdata2022_tie['Tm2'] = stdata2022_tie['Tm2'].replace(['New York Giants'],['Washington Commanders'])

stdata2022_both = [stdata2022, stdata2022_tie]
stdata2022 = pd.concat(stdata2022_both).reset_index(drop=True)
stdata2022['schedule_week'] = stdata2022['schedule_week'].astype(np.int64)
stdata2022 = stdata2022.sort_values(by=['schedule_week']).reset_index(drop=True)
stdata2022

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  stdata2022_tie['winning_team'] = stdata2022_tie['winning_team'].replace(['Houston Texans'],['Indianapolis Colts'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  stdata2022_tie['winning_team'] = stdata2022_tie['winning_team'].replace(['New York Giants'],['Washington Commanders'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#retu

Unnamed: 0,schedule_week,winning_team,winning_score,losing_score,losing_team,team_favorite,team_underdog,spread_favorite,actual_spread,over_under_line,actual_total,O/U,Tm1,Tm2
0,1,Buffalo Bills,31,10,Los Angeles Rams,Buffalo Bills,Los Angeles Rams,2.0,21,52.0,41,U,Buffalo Bills,Los Angeles Rams
1,1,Indianapolis Colts,20,20,Houston Texans,Indianapolis Colts,Houston Texans,7.0,0,45.5,40,U,Houston Texans,Indianapolis Colts
2,1,Seattle Seahawks,17,16,Denver Broncos,Denver Broncos,Seattle Seahawks,6.0,1,44.0,33,U,Seattle Seahawks,Denver Broncos
3,1,Washington Commanders,28,22,Jacksonville Jaguars,Washington Commanders,Jacksonville Jaguars,3.0,6,43.0,50,O,Washington Commanders,Jacksonville Jaguars
4,1,New York Giants,21,20,Tennessee Titans,Tennessee Titans,New York Giants,5.5,1,44.0,41,U,New York Giants,Tennessee Titans
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
268,18,Atlanta Falcons,30,17,Tampa Bay Buccaneers,Atlanta Falcons,Tampa Bay Buccaneers,6.0,13,40.5,47,O,Atlanta Falcons,Tampa Bay Buccaneers
269,18,Kansas City Chiefs,31,13,Las Vegas Raiders,Kansas City Chiefs,Las Vegas Raiders,8.5,18,52.0,44,U,Kansas City Chiefs,Las Vegas Raiders
270,18,Jacksonville Jaguars,20,16,Tennessee Titans,Jacksonville Jaguars,Tennessee Titans,6.5,4,39.5,36,U,Jacksonville Jaguars,Tennessee Titans
271,18,Detroit Lions,20,16,Green Bay Packers,Green Bay Packers,Detroit Lions,4.0,4,48.0,36,U,Detroit Lions,Green Bay Packers


In [1317]:
#Creating function to determine whether or not the team_favorite (F) or team_underdog (U) covered the spread or pushed (P) if the ending spread equaled the original spread
def fu(row):
    if row['team_underdog'] == row['winning_team']:
        val = 'U'
    elif row['actual_spread'] > row['spread_favorite']:
        val = 'F'
    elif row['actual_spread'] == ['spread_favorite']:
        val = 'P'
    else:
        val = 'U'
    return val
#Adding 'F/U_cover' (favorite/underdog) to DataFrame
stdata2022['F/U_cover'] = stdata2022.apply(fu, axis=1)
stdata2022 = stdata2022.filter(['schedule_week', 'winning_team', 
                                'winning_score', 'losing_score', 'losing_team', 'team_favorite', 'team_underdog', 'spread_favorite', 
                                'actual_spread', 'F/U_cover', 'over_under_line', 'actual_total', 'O/U'])

### Full Offense and Defense

In [1318]:
#Merging the matchup Df with the actual schedule DF
stdata2022['Tm1'] = stdata2022['winning_team']
stdata2022['Tm2'] = stdata2022['losing_team']

both_df3 = pd.merge(stdata2022, match_gbrpred, on=['Tm1', 'Tm2'], how='left')
both_df_na = both_df3[both_df3.isna().any(axis=1)]
both_df_na = both_df_na.rename(columns = {'Tm1': 'Tm3', 'Tm2': 'Tm4'})
both_df_na['Tm1'] = both_df_na['Tm4']
both_df_na['Tm2'] = both_df_na['Tm3']
both_df_na = both_df_na.filter(['schedule_week', 'winning_team', 'winning_score', 'losing_score', 'losing_team', 'team_favorite',
                               'team_underdog', 'spread_favorite', 'actual_spread', 'F/U_cover', 'over_under_line', 'actual_total',
                               'O/U', 'Tm1', 'Tm2'])

both_df2 = pd.merge(both_df_na, match_gbrpred, on=['Tm2', 'Tm1'], how='left').dropna()
both_df2

Unnamed: 0,schedule_week,winning_team,winning_score,losing_score,losing_team,team_favorite,team_underdog,spread_favorite,actual_spread,F/U_cover,over_under_line,actual_total,O/U,Tm1,Tm2,PF_gbr_Pred1,PA_gbr_Pred1,PF_gbr_Pred2,PA_gbr_Pred2,PF_Tm1,PF_Tm2,Pred_team_favorite,Pred_Spread,Pred_team_underdog,Pred_Total
0,1,Indianapolis Colts,20,20,Houston Texans,Indianapolis Colts,Houston Texans,7.0,0,U,45.5,40,U,Houston Texans,Indianapolis Colts,17.121341,24.540418,15.625881,24.883673,21.002507,20.083149,Houston Texans,0.919,Indianapolis Colts,41.086
1,1,Washington Commanders,28,22,Jacksonville Jaguars,Washington Commanders,Jacksonville Jaguars,3.0,6,F,43.0,50,O,Jacksonville Jaguars,Washington Commanders,23.274803,22.633460,18.576376,20.030199,21.652501,20.604918,Jacksonville Jaguars,1.048,Washington Commanders,42.257
2,1,Los Angeles Chargers,24,19,Las Vegas Raiders,Los Angeles Chargers,Las Vegas Raiders,3.5,5,F,52.5,43,U,Las Vegas Raiders,Los Angeles Chargers,24.888000,24.093766,23.298167,22.639646,23.763823,23.695966,Las Vegas Raiders,0.068,Los Angeles Chargers,47.460
3,1,Tampa Bay Buccaneers,19,3,Dallas Cowboys,Tampa Bay Buccaneers,Dallas Cowboys,2.5,16,F,49.5,22,U,Dallas Cowboys,Tampa Bay Buccaneers,27.738820,19.846132,18.551291,21.484837,24.611828,19.198711,Dallas Cowboys,5.413,Tampa Bay Buccaneers,43.811
4,1,Pittsburgh Steelers,23,20,Cincinnati Bengals,Cincinnati Bengals,Pittsburgh Steelers,7.0,3,U,44.5,43,U,Cincinnati Bengals,Pittsburgh Steelers,26.013764,20.129431,19.551624,21.295456,23.654610,19.840528,Cincinnati Bengals,3.814,Pittsburgh Steelers,43.495
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
82,17,Green Bay Packers,41,17,Minnesota Vikings,Green Bay Packers,Minnesota Vikings,3.5,24,F,47.5,58,O,Minnesota Vikings,Green Bay Packers,24.436709,24.353770,22.185518,22.265264,23.350987,23.269644,Minnesota Vikings,0.081,Green Bay Packers,46.621
83,17,Pittsburgh Steelers,16,13,Baltimore Ravens,Baltimore Ravens,Pittsburgh Steelers,1.0,3,U,35.5,29,U,Baltimore Ravens,Pittsburgh Steelers,21.352428,19.308233,19.551624,21.295456,21.323942,19.429929,Baltimore Ravens,1.894,Pittsburgh Steelers,40.754
84,18,Washington Commanders,26,6,Dallas Cowboys,Dallas Cowboys,Washington Commanders,7.5,20,U,41.0,32,U,Dallas Cowboys,Washington Commanders,27.738820,19.846132,18.576376,20.030199,23.884509,19.211254,Dallas Cowboys,4.673,Washington Commanders,43.096
85,18,Pittsburgh Steelers,28,14,Cleveland Browns,Pittsburgh Steelers,Cleveland Browns,2.5,14,F,40.0,42,O,Cleveland Browns,Pittsburgh Steelers,20.850948,23.565793,19.551624,21.295456,21.073202,21.558708,Pittsburgh Steelers,0.486,Cleveland Browns,42.632


In [1319]:
#Merging the rest of the matchup DF with the actual schedule DF. Then filtering for a final DF. 
df_comb = [both_df3, both_df2]
result = pd.concat(df_comb).reset_index(drop=True).dropna()
result['schedule_week'] = result['schedule_week'].astype(np.int64)
final_result = result.sort_values(by=['schedule_week']).reset_index(drop=True)
stdata2022_full = final_result.filter(['schedule_week', 'winning_team', 'winning_score', 'losing_score', 'losing_team',
                                      'team_favorite', 'team_underdog', 'spread_favorite', 'actual_spread', 'F/U_cover',
                                      'over_under_line', 'actual_total', 'O/U', 'PF_Tm1', 'PF_Tm2', 'Pred_team_favorite',
                                      'Pred_Spread', 'Pred_team_underdog', 'Pred_Total'])
def pft(row):
    if row['PF_Tm1'] > row['PF_Tm2']:
        val = row['PF_Tm1']
    else:
        val = row['PF_Tm2']
    return val
def put(row):
    if row['PF_Tm1'] < row['PF_Tm2']:
        val = row['PF_Tm1']
    else:
        val = row['PF_Tm2']
    return val
stdata2022_full['Pred_PF_favorite'] = stdata2022_full.apply(pft, axis=1)
stdata2022_full['Pred_PF_underdog'] = stdata2022_full.apply(put, axis=1)
stdata2022_full = stdata2022_full.filter(['schedule_week', 'winning_team', 'winning_score', 'losing_score', 'losing_team',
                                      'team_favorite', 'spread_favorite', 'team_underdog', 'actual_spread', 'F/U_cover',
                                      'over_under_line', 'actual_total', 'O/U', 'Pred_PF_favorite', 'Pred_team_favorite',
                                    'Pred_Spread', 'Pred_team_underdog', 'Pred_PF_underdog', 'Pred_Total'])
stdata2022_full

Unnamed: 0,schedule_week,winning_team,winning_score,losing_score,losing_team,team_favorite,spread_favorite,team_underdog,actual_spread,F/U_cover,over_under_line,actual_total,O/U,Pred_PF_favorite,Pred_team_favorite,Pred_Spread,Pred_team_underdog,Pred_PF_underdog,Pred_Total
0,1,Buffalo Bills,31,10,Los Angeles Rams,Buffalo Bills,2.0,Los Angeles Rams,21,F,52.0,41,U,25.425465,Buffalo Bills,6.699,Los Angeles Rams,18.726048,44.152
1,1,Seattle Seahawks,17,16,Denver Broncos,Denver Broncos,6.0,Seattle Seahawks,1,U,44.0,33,U,22.271177,Seattle Seahawks,1.995,Denver Broncos,20.276476,42.548
2,1,New York Giants,21,20,Tennessee Titans,Tennessee Titans,5.5,New York Giants,1,U,44.0,41,U,21.297734,New York Giants,0.681,Tennessee Titans,20.617189,41.915
3,1,Baltimore Ravens,24,9,New York Jets,Baltimore Ravens,6.5,New York Jets,15,F,44.0,33,U,20.127120,Baltimore Ravens,1.963,New York Jets,18.164400,38.292
4,1,Miami Dolphins,20,7,New England Patriots,Miami Dolphins,3.0,New England Patriots,13,F,46.5,27,U,21.797734,Miami Dolphins,0.000,New England Patriots,21.797242,43.595
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
266,18,Atlanta Falcons,30,17,Tampa Bay Buccaneers,Atlanta Falcons,6.0,Tampa Bay Buccaneers,13,F,40.5,47,O,21.266232,Tampa Bay Buccaneers,0.156,Atlanta Falcons,21.109897,42.376
267,18,Kansas City Chiefs,31,13,Las Vegas Raiders,Kansas City Chiefs,8.5,Las Vegas Raiders,18,F,52.0,44,U,26.865443,Kansas City Chiefs,3.339,Las Vegas Raiders,23.526532,50.392
268,18,Jacksonville Jaguars,20,16,Tennessee Titans,Jacksonville Jaguars,6.5,Tennessee Titans,4,U,39.5,36,U,22.041784,Jacksonville Jaguars,1.731,Tennessee Titans,20.311120,42.353
269,18,Pittsburgh Steelers,28,14,Cleveland Browns,Pittsburgh Steelers,2.5,Cleveland Browns,14,F,40.0,42,O,21.558708,Pittsburgh Steelers,0.486,Cleveland Browns,21.073202,42.632


In [1320]:
#Creating functions that compares the predicted spread bets and predicted total bets with what actually happened in during the 2022 season. 
def psb(row):
    if row['team_underdog'] == row['Pred_team_favorite']:
        val = 'U'
    elif row['Pred_Spread'] < row['spread_favorite']:
        val = 'U'
    else:
        val = 'F'
    return val
def ptb(row):
    if row['Pred_Total'] > row['over_under_line']:
        val = 'O'
    else:
        val = 'U'
    return val
stdata2022_full['Pred_spread_bet'] = stdata2022_full.apply(psb, axis=1)
stdata2022_full['Pred_total_bet'] = stdata2022_full.apply(ptb, axis=1)
#Creating rows that track the W/L for both spread and total based on the predicted bets
def sbwl(row):
    if row['Pred_spread_bet'] == row['F/U_cover']:
        val = 'W'
    else:
        val = 'L'
    return val
def tbwl(row):
    if row['Pred_total_bet'] == row['O/U']:
        val = 'W'
    else:
        val = 'L'
    return val
stdata2022_full['spread_bet_W/L'] = stdata2022_full.apply(sbwl, axis=1)
stdata2022_full['total_bet_W/L'] = stdata2022_full.apply(tbwl, axis=1)
stdata2022_full

Unnamed: 0,schedule_week,winning_team,winning_score,losing_score,losing_team,team_favorite,spread_favorite,team_underdog,actual_spread,F/U_cover,over_under_line,actual_total,O/U,Pred_PF_favorite,Pred_team_favorite,Pred_Spread,Pred_team_underdog,Pred_PF_underdog,Pred_Total,Pred_spread_bet,Pred_total_bet,spread_bet_W/L,total_bet_W/L
0,1,Buffalo Bills,31,10,Los Angeles Rams,Buffalo Bills,2.0,Los Angeles Rams,21,F,52.0,41,U,25.425465,Buffalo Bills,6.699,Los Angeles Rams,18.726048,44.152,F,U,W,W
1,1,Seattle Seahawks,17,16,Denver Broncos,Denver Broncos,6.0,Seattle Seahawks,1,U,44.0,33,U,22.271177,Seattle Seahawks,1.995,Denver Broncos,20.276476,42.548,U,U,W,W
2,1,New York Giants,21,20,Tennessee Titans,Tennessee Titans,5.5,New York Giants,1,U,44.0,41,U,21.297734,New York Giants,0.681,Tennessee Titans,20.617189,41.915,U,U,W,W
3,1,Baltimore Ravens,24,9,New York Jets,Baltimore Ravens,6.5,New York Jets,15,F,44.0,33,U,20.127120,Baltimore Ravens,1.963,New York Jets,18.164400,38.292,U,U,L,W
4,1,Miami Dolphins,20,7,New England Patriots,Miami Dolphins,3.0,New England Patriots,13,F,46.5,27,U,21.797734,Miami Dolphins,0.000,New England Patriots,21.797242,43.595,U,U,L,W
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
266,18,Atlanta Falcons,30,17,Tampa Bay Buccaneers,Atlanta Falcons,6.0,Tampa Bay Buccaneers,13,F,40.5,47,O,21.266232,Tampa Bay Buccaneers,0.156,Atlanta Falcons,21.109897,42.376,U,O,L,W
267,18,Kansas City Chiefs,31,13,Las Vegas Raiders,Kansas City Chiefs,8.5,Las Vegas Raiders,18,F,52.0,44,U,26.865443,Kansas City Chiefs,3.339,Las Vegas Raiders,23.526532,50.392,U,U,L,W
268,18,Jacksonville Jaguars,20,16,Tennessee Titans,Jacksonville Jaguars,6.5,Tennessee Titans,4,U,39.5,36,U,22.041784,Jacksonville Jaguars,1.731,Tennessee Titans,20.311120,42.353,U,O,W,L
269,18,Pittsburgh Steelers,28,14,Cleveland Browns,Pittsburgh Steelers,2.5,Cleveland Browns,14,F,40.0,42,O,21.558708,Pittsburgh Steelers,0.486,Cleveland Browns,21.073202,42.632,U,O,L,W


### Advanced Offense and Defense

In [1321]:
#Merging the matchup Df with the actual schedule DF
both_df3_adv = pd.merge(stdata2022, match_gbrpred_adv, on=['Tm1', 'Tm2'], how='left')
both_df_na_adv = both_df3_adv[both_df3_adv.isna().any(axis=1)]
both_df_na_adv = both_df_na_adv.rename(columns = {'Tm1': 'Tm3', 'Tm2': 'Tm4'})
both_df_na_adv['Tm1'] = both_df_na_adv['Tm4']
both_df_na_adv['Tm2'] = both_df_na_adv['Tm3']
both_df_na_adv = both_df_na_adv.filter(['schedule_week', 'winning_team', 'winning_score', 'losing_score', 'losing_team', 'team_favorite',
                               'team_underdog', 'spread_favorite', 'actual_spread', 'F/U_cover', 'over_under_line', 'actual_total',
                               'O/U', 'Tm1', 'Tm2'])
both_df2_adv = pd.merge(both_df_na_adv, match_gbrpred_adv, on=['Tm2', 'Tm1'], how='left').dropna()
both_df2_adv

Unnamed: 0,schedule_week,winning_team,winning_score,losing_score,losing_team,team_favorite,team_underdog,spread_favorite,actual_spread,F/U_cover,over_under_line,actual_total,O/U,Tm1,Tm2,PF_gbradv_Pred1,PA_gbradv_Pred1,PF_gbradv_Pred2,PA_gbradv_Pred2,PF_Tm1,PF_Tm2,Pred_team_favorite,Pred_Spread,Pred_team_underdog,Pred_Total
0,1,Indianapolis Colts,20,20,Houston Texans,Indianapolis Colts,Houston Texans,7.0,0,U,45.5,40,U,Houston Texans,Indianapolis Colts,16.963696,21.935595,15.625881,24.883673,20.923685,18.780738,Houston Texans,2.143,Indianapolis Colts,39.704
1,1,Washington Commanders,28,22,Jacksonville Jaguars,Washington Commanders,Jacksonville Jaguars,3.0,6,F,43.0,50,O,Jacksonville Jaguars,Washington Commanders,23.798374,21.277374,18.576376,20.030199,21.914286,19.926875,Jacksonville Jaguars,1.987,Washington Commanders,41.841
2,1,Los Angeles Chargers,24,19,Las Vegas Raiders,Los Angeles Chargers,Las Vegas Raiders,3.5,5,F,52.5,43,U,Las Vegas Raiders,Los Angeles Chargers,21.918508,24.407197,23.298167,22.639646,22.279077,23.852682,Los Angeles Chargers,1.574,Las Vegas Raiders,46.132
3,1,Tampa Bay Buccaneers,19,3,Dallas Cowboys,Tampa Bay Buccaneers,Dallas Cowboys,2.5,16,F,49.5,22,U,Dallas Cowboys,Tampa Bay Buccaneers,27.498042,19.918712,18.551291,21.484837,24.491439,19.235001,Dallas Cowboys,5.256,Tampa Bay Buccaneers,43.726
4,1,Pittsburgh Steelers,23,20,Cincinnati Bengals,Cincinnati Bengals,Pittsburgh Steelers,7.0,3,U,44.5,43,U,Cincinnati Bengals,Pittsburgh Steelers,26.099394,20.504447,19.551624,21.295456,23.697425,20.028036,Cincinnati Bengals,3.669,Pittsburgh Steelers,43.725
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
82,17,Green Bay Packers,41,17,Minnesota Vikings,Green Bay Packers,Minnesota Vikings,3.5,24,F,47.5,58,O,Minnesota Vikings,Green Bay Packers,24.899565,22.699662,22.185518,22.265264,23.582414,22.442590,Minnesota Vikings,1.140,Green Bay Packers,46.025
83,17,Pittsburgh Steelers,16,13,Baltimore Ravens,Baltimore Ravens,Pittsburgh Steelers,1.0,3,U,35.5,29,U,Baltimore Ravens,Pittsburgh Steelers,20.600686,19.527338,19.551624,21.295456,20.948071,19.539481,Baltimore Ravens,1.409,Pittsburgh Steelers,40.488
84,18,Washington Commanders,26,6,Dallas Cowboys,Dallas Cowboys,Washington Commanders,7.5,20,U,41.0,32,U,Dallas Cowboys,Washington Commanders,27.498042,19.918712,18.576376,20.030199,23.764120,19.247544,Dallas Cowboys,4.517,Washington Commanders,43.012
85,18,Pittsburgh Steelers,28,14,Cleveland Browns,Pittsburgh Steelers,Cleveland Browns,2.5,14,F,40.0,42,O,Cleveland Browns,Pittsburgh Steelers,21.201247,22.602988,19.551624,21.295456,21.248351,21.077306,Cleveland Browns,0.171,Pittsburgh Steelers,42.326


In [1322]:
#Merging the rest of the matchup DF with the actual schedule DF. Then filtering for a final DF. 
df_comb_adv = [both_df3_adv, both_df2_adv]
result_adv = pd.concat(df_comb_adv).reset_index(drop=True).dropna()
result_adv['schedule_week'] = result_adv['schedule_week'].astype(np.int64)
final_result_adv = result_adv.sort_values(by=['schedule_week']).reset_index(drop=True)
stdata2022_full_adv = final_result_adv.filter(['schedule_week', 'winning_team', 'winning_score', 'losing_score', 'losing_team',
                                      'team_favorite', 'team_underdog', 'spread_favorite', 'actual_spread', 'F/U_cover',
                                      'over_under_line', 'actual_total', 'O/U', 'PF_Tm1', 'PF_Tm2', 'Pred_team_favorite',
                                      'Pred_Spread', 'Pred_team_underdog', 'Pred_Total'])
stdata2022_full_adv['Pred_PF_favorite'] = stdata2022_full_adv.apply(pft, axis=1)
stdata2022_full_adv['Pred_PF_underdog'] = stdata2022_full_adv.apply(put, axis=1)
stdata2022_full_adv = stdata2022_full_adv.filter(['schedule_week', 'winning_team', 'winning_score', 'losing_score', 'losing_team',
                                      'team_favorite', 'spread_favorite', 'team_underdog', 'actual_spread', 'F/U_cover',
                                      'over_under_line', 'actual_total', 'O/U', 'Pred_PF_favorite', 'Pred_team_favorite',
                                    'Pred_Spread', 'Pred_team_underdog', 'Pred_PF_underdog', 'Pred_Total'])
stdata2022_full_adv

Unnamed: 0,schedule_week,winning_team,winning_score,losing_score,losing_team,team_favorite,spread_favorite,team_underdog,actual_spread,F/U_cover,over_under_line,actual_total,O/U,Pred_PF_favorite,Pred_team_favorite,Pred_Spread,Pred_team_underdog,Pred_PF_underdog,Pred_Total
0,1,Buffalo Bills,31,10,Los Angeles Rams,Buffalo Bills,2.0,Los Angeles Rams,21,F,52.0,41,U,25.593539,Buffalo Bills,7.250,Los Angeles Rams,18.343960,43.937
1,1,Seattle Seahawks,17,16,Denver Broncos,Denver Broncos,6.0,Seattle Seahawks,1,U,44.0,33,U,21.976054,Seattle Seahawks,2.418,Denver Broncos,19.558253,41.534
2,1,New York Giants,21,20,Tennessee Titans,Tennessee Titans,5.5,New York Giants,1,U,44.0,41,U,21.355027,New York Giants,0.430,Tennessee Titans,20.924728,42.280
3,1,Baltimore Ravens,24,9,New York Jets,Baltimore Ravens,6.5,New York Jets,15,F,44.0,33,U,19.751249,Baltimore Ravens,1.477,New York Jets,18.273953,38.025
4,1,Miami Dolphins,20,7,New England Patriots,Miami Dolphins,3.0,New England Patriots,13,F,46.5,27,U,22.143559,New England Patriots,0.293,Miami Dolphins,21.850983,43.995
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
266,18,Atlanta Falcons,30,17,Tampa Bay Buccaneers,Atlanta Falcons,6.0,Tampa Bay Buccaneers,13,F,40.5,47,O,21.492067,Atlanta Falcons,0.387,Tampa Bay Buccaneers,21.104685,42.597
267,18,Kansas City Chiefs,31,13,Las Vegas Raiders,Kansas City Chiefs,8.5,Las Vegas Raiders,18,F,52.0,44,U,26.647408,Kansas City Chiefs,3.019,Las Vegas Raiders,23.628528,50.276
268,18,Jacksonville Jaguars,20,16,Tennessee Titans,Jacksonville Jaguars,6.5,Tennessee Titans,4,U,39.5,36,U,22.303569,Jacksonville Jaguars,2.670,Tennessee Titans,19.633077,41.937
269,18,Pittsburgh Steelers,28,14,Cleveland Browns,Pittsburgh Steelers,2.5,Cleveland Browns,14,F,40.0,42,O,21.248351,Cleveland Browns,0.171,Pittsburgh Steelers,21.077306,42.326


In [1323]:
#Creating functions that compares the predicted spread bets and predicted total bets with what actually happened in during the 2022 season. 
stdata2022_full_adv['Pred_spread_bet'] = stdata2022_full_adv.apply(psb, axis=1)
stdata2022_full_adv['Pred_total_bet'] = stdata2022_full_adv.apply(ptb, axis=1)
stdata2022_full_adv['spread_bet_W/L'] = stdata2022_full_adv.apply(sbwl, axis=1)
stdata2022_full_adv['total_bet_W/L'] = stdata2022_full_adv.apply(tbwl, axis=1)
stdata2022_full_adv

Unnamed: 0,schedule_week,winning_team,winning_score,losing_score,losing_team,team_favorite,spread_favorite,team_underdog,actual_spread,F/U_cover,over_under_line,actual_total,O/U,Pred_PF_favorite,Pred_team_favorite,Pred_Spread,Pred_team_underdog,Pred_PF_underdog,Pred_Total,Pred_spread_bet,Pred_total_bet,spread_bet_W/L,total_bet_W/L
0,1,Buffalo Bills,31,10,Los Angeles Rams,Buffalo Bills,2.0,Los Angeles Rams,21,F,52.0,41,U,25.593539,Buffalo Bills,7.250,Los Angeles Rams,18.343960,43.937,F,U,W,W
1,1,Seattle Seahawks,17,16,Denver Broncos,Denver Broncos,6.0,Seattle Seahawks,1,U,44.0,33,U,21.976054,Seattle Seahawks,2.418,Denver Broncos,19.558253,41.534,U,U,W,W
2,1,New York Giants,21,20,Tennessee Titans,Tennessee Titans,5.5,New York Giants,1,U,44.0,41,U,21.355027,New York Giants,0.430,Tennessee Titans,20.924728,42.280,U,U,W,W
3,1,Baltimore Ravens,24,9,New York Jets,Baltimore Ravens,6.5,New York Jets,15,F,44.0,33,U,19.751249,Baltimore Ravens,1.477,New York Jets,18.273953,38.025,U,U,L,W
4,1,Miami Dolphins,20,7,New England Patriots,Miami Dolphins,3.0,New England Patriots,13,F,46.5,27,U,22.143559,New England Patriots,0.293,Miami Dolphins,21.850983,43.995,U,U,L,W
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
266,18,Atlanta Falcons,30,17,Tampa Bay Buccaneers,Atlanta Falcons,6.0,Tampa Bay Buccaneers,13,F,40.5,47,O,21.492067,Atlanta Falcons,0.387,Tampa Bay Buccaneers,21.104685,42.597,U,O,L,W
267,18,Kansas City Chiefs,31,13,Las Vegas Raiders,Kansas City Chiefs,8.5,Las Vegas Raiders,18,F,52.0,44,U,26.647408,Kansas City Chiefs,3.019,Las Vegas Raiders,23.628528,50.276,U,U,L,W
268,18,Jacksonville Jaguars,20,16,Tennessee Titans,Jacksonville Jaguars,6.5,Tennessee Titans,4,U,39.5,36,U,22.303569,Jacksonville Jaguars,2.670,Tennessee Titans,19.633077,41.937,U,O,W,L
269,18,Pittsburgh Steelers,28,14,Cleveland Browns,Pittsburgh Steelers,2.5,Cleveland Browns,14,F,40.0,42,O,21.248351,Cleveland Browns,0.171,Pittsburgh Steelers,21.077306,42.326,U,O,L,W


### Reduced Full Dataset Offense and Defense

In [1324]:
#Merging the reduced matchup Df with the actual schedule DF
both_df3_red = pd.merge(stdata2022, match_gbrpred_red, on=['Tm1', 'Tm2'], how='left')
both_df_na_red = both_df3_red[both_df3_red.isna().any(axis=1)]
both_df_na_red = both_df_na_red.rename(columns = {'Tm1': 'Tm3', 'Tm2': 'Tm4'})
both_df_na_red['Tm1'] = both_df_na_red['Tm4']
both_df_na_red['Tm2'] = both_df_na_red['Tm3']
both_df_na_red = both_df_na_red.filter(['schedule_week', 'winning_team', 'winning_score', 'losing_score', 'losing_team', 'team_favorite',
                               'team_underdog', 'spread_favorite', 'actual_spread', 'F/U_cover', 'over_under_line', 'actual_total',
                               'O/U', 'Tm1', 'Tm2'])
both_df2_red = pd.merge(both_df_na_red, match_gbrpred_red, on=['Tm2', 'Tm1'], how='left').dropna()
both_df2_red

Unnamed: 0,schedule_week,winning_team,winning_score,losing_score,losing_team,team_favorite,team_underdog,spread_favorite,actual_spread,F/U_cover,over_under_line,actual_total,O/U,Tm1,Tm2,PF_gbrred_Pred1,PA_gbrred_Pred1,PF_gbrred_Pred2,PA_gbrred_Pred2,PF_Tm1,PF_Tm2,Pred_team_favorite,Pred_Spread,Pred_team_underdog,Pred_Total
0,1,Indianapolis Colts,20,20,Houston Texans,Indianapolis Colts,Houston Texans,7.0,0,U,45.5,40,U,Houston Texans,Indianapolis Colts,16.933403,23.321638,15.893827,24.407451,20.670427,19.607732,Houston Texans,1.063,Indianapolis Colts,40.278
1,1,Washington Commanders,28,22,Jacksonville Jaguars,Washington Commanders,Jacksonville Jaguars,3.0,6,F,43.0,50,O,Jacksonville Jaguars,Washington Commanders,23.280140,22.291614,18.146695,19.475799,21.377969,20.219155,Jacksonville Jaguars,1.159,Washington Commanders,41.597
2,1,Los Angeles Chargers,24,19,Las Vegas Raiders,Los Angeles Chargers,Las Vegas Raiders,3.5,5,F,52.5,43,U,Las Vegas Raiders,Los Angeles Chargers,25.196530,24.655310,23.341837,22.800059,23.998295,23.998573,Los Angeles Chargers,0.000,Las Vegas Raiders,47.997
3,1,Tampa Bay Buccaneers,19,3,Dallas Cowboys,Tampa Bay Buccaneers,Dallas Cowboys,2.5,16,F,49.5,22,U,Dallas Cowboys,Tampa Bay Buccaneers,27.302403,19.668586,18.751479,21.467483,24.384943,19.210032,Dallas Cowboys,5.175,Tampa Bay Buccaneers,43.595
4,1,Pittsburgh Steelers,23,20,Cincinnati Bengals,Cincinnati Bengals,Pittsburgh Steelers,7.0,3,U,44.5,43,U,Cincinnati Bengals,Pittsburgh Steelers,26.551884,19.552076,19.727235,21.435633,23.993758,19.639656,Cincinnati Bengals,4.354,Pittsburgh Steelers,43.633
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
82,17,Green Bay Packers,41,17,Minnesota Vikings,Green Bay Packers,Minnesota Vikings,3.5,24,F,47.5,58,O,Minnesota Vikings,Green Bay Packers,24.602116,23.932514,22.390195,23.845098,24.223607,23.161354,Minnesota Vikings,1.062,Green Bay Packers,47.385
83,17,Pittsburgh Steelers,16,13,Baltimore Ravens,Baltimore Ravens,Pittsburgh Steelers,1.0,3,U,35.5,29,U,Baltimore Ravens,Pittsburgh Steelers,21.649245,19.512820,19.727235,21.435633,21.542439,19.620027,Baltimore Ravens,1.922,Pittsburgh Steelers,41.162
84,18,Washington Commanders,26,6,Dallas Cowboys,Dallas Cowboys,Washington Commanders,7.5,20,U,41.0,32,U,Dallas Cowboys,Washington Commanders,27.302403,19.668586,18.146695,19.475799,23.389101,18.907640,Dallas Cowboys,4.481,Washington Commanders,42.297
85,18,Pittsburgh Steelers,28,14,Cleveland Browns,Pittsburgh Steelers,Cleveland Browns,2.5,14,F,40.0,42,O,Cleveland Browns,Pittsburgh Steelers,20.945524,24.129603,19.727235,21.435633,21.190579,21.928419,Pittsburgh Steelers,0.738,Cleveland Browns,43.119


In [1325]:
#Merging the rest of the reduced matchup DF with the actual schedule DF. Then filtering for a final DF. 
df_comb_red = [both_df3_red, both_df2_red]
result_red = pd.concat(df_comb_red).reset_index(drop=True).dropna()
result_red['schedule_week'] = result_red['schedule_week'].astype(np.int64)
final_result_red = result_red.sort_values(by=['schedule_week']).reset_index(drop=True)
stdata2022_full_red = final_result_red.filter(['schedule_week', 'winning_team', 'winning_score', 'losing_score', 'losing_team',
                                      'team_favorite', 'team_underdog', 'spread_favorite', 'actual_spread', 'F/U_cover',
                                      'over_under_line', 'actual_total', 'O/U', 'PF_Tm1', 'PF_Tm2', 'Pred_team_favorite',
                                      'Pred_Spread', 'Pred_team_underdog', 'Pred_Total'])
stdata2022_full_red['Pred_PF_favorite'] = stdata2022_full_red.apply(pft, axis=1)
stdata2022_full_red['Pred_PF_underdog'] = stdata2022_full_red.apply(put, axis=1)
stdata2022_full_red = stdata2022_full_red.filter(['schedule_week', 'winning_team', 'winning_score', 'losing_score', 'losing_team',
                                      'team_favorite', 'spread_favorite', 'team_underdog', 'actual_spread', 'F/U_cover',
                                      'over_under_line', 'actual_total', 'O/U', 'Pred_PF_favorite', 'Pred_team_favorite',
                                    'Pred_Spread', 'Pred_team_underdog', 'Pred_PF_underdog', 'Pred_Total'])
stdata2022_full_red

Unnamed: 0,schedule_week,winning_team,winning_score,losing_score,losing_team,team_favorite,spread_favorite,team_underdog,actual_spread,F/U_cover,over_under_line,actual_total,O/U,Pred_PF_favorite,Pred_team_favorite,Pred_Spread,Pred_team_underdog,Pred_PF_underdog,Pred_Total
0,1,Buffalo Bills,31,10,Los Angeles Rams,Buffalo Bills,2.0,Los Angeles Rams,21,F,52.0,41,U,25.313451,Buffalo Bills,6.475,Los Angeles Rams,18.838488,44.152
1,1,Seattle Seahawks,17,16,Denver Broncos,Denver Broncos,6.0,Seattle Seahawks,1,U,44.0,33,U,22.355918,Seattle Seahawks,2.016,Denver Broncos,20.340238,42.696
2,1,New York Giants,21,20,Tennessee Titans,Tennessee Titans,5.5,New York Giants,1,U,44.0,41,U,21.276487,Tennessee Titans,0.286,New York Giants,20.990028,42.267
3,1,Baltimore Ravens,24,9,New York Jets,Baltimore Ravens,6.5,New York Jets,15,F,44.0,33,U,20.515514,Baltimore Ravens,2.138,New York Jets,18.377177,38.893
4,1,Miami Dolphins,20,7,New England Patriots,Miami Dolphins,3.0,New England Patriots,13,F,46.5,27,U,21.698237,Miami Dolphins,0.117,New England Patriots,21.581669,43.280
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
266,18,Atlanta Falcons,30,17,Tampa Bay Buccaneers,Atlanta Falcons,6.0,Tampa Bay Buccaneers,13,F,40.5,47,O,21.512006,Tampa Bay Buccaneers,0.126,Atlanta Falcons,21.386140,42.898
267,18,Kansas City Chiefs,31,13,Las Vegas Raiders,Kansas City Chiefs,8.5,Las Vegas Raiders,18,F,52.0,44,U,27.299186,Kansas City Chiefs,3.879,Las Vegas Raiders,23.419865,50.719
268,18,Jacksonville Jaguars,20,16,Tennessee Titans,Jacksonville Jaguars,6.5,Tennessee Titans,4,U,39.5,36,U,21.797103,Jacksonville Jaguars,1.457,Tennessee Titans,20.339916,42.137
269,18,Pittsburgh Steelers,28,14,Cleveland Browns,Pittsburgh Steelers,2.5,Cleveland Browns,14,F,40.0,42,O,21.928419,Pittsburgh Steelers,0.738,Cleveland Browns,21.190579,43.119


In [1326]:
#Applying functions that compares the predicted spread bets and predicted total bets with what actually happened in during the 2022 season.
stdata2022_full_red['Pred_spread_bet'] = stdata2022_full_red.apply(psb, axis=1)
stdata2022_full_red['Pred_total_bet'] = stdata2022_full_red.apply(ptb, axis=1)
stdata2022_full_red['spread_bet_W/L'] = stdata2022_full_red.apply(sbwl, axis=1)
stdata2022_full_red['total_bet_W/L'] = stdata2022_full_red.apply(tbwl, axis=1)
stdata2022_full_red

Unnamed: 0,schedule_week,winning_team,winning_score,losing_score,losing_team,team_favorite,spread_favorite,team_underdog,actual_spread,F/U_cover,over_under_line,actual_total,O/U,Pred_PF_favorite,Pred_team_favorite,Pred_Spread,Pred_team_underdog,Pred_PF_underdog,Pred_Total,Pred_spread_bet,Pred_total_bet,spread_bet_W/L,total_bet_W/L
0,1,Buffalo Bills,31,10,Los Angeles Rams,Buffalo Bills,2.0,Los Angeles Rams,21,F,52.0,41,U,25.313451,Buffalo Bills,6.475,Los Angeles Rams,18.838488,44.152,F,U,W,W
1,1,Seattle Seahawks,17,16,Denver Broncos,Denver Broncos,6.0,Seattle Seahawks,1,U,44.0,33,U,22.355918,Seattle Seahawks,2.016,Denver Broncos,20.340238,42.696,U,U,W,W
2,1,New York Giants,21,20,Tennessee Titans,Tennessee Titans,5.5,New York Giants,1,U,44.0,41,U,21.276487,Tennessee Titans,0.286,New York Giants,20.990028,42.267,U,U,W,W
3,1,Baltimore Ravens,24,9,New York Jets,Baltimore Ravens,6.5,New York Jets,15,F,44.0,33,U,20.515514,Baltimore Ravens,2.138,New York Jets,18.377177,38.893,U,U,L,W
4,1,Miami Dolphins,20,7,New England Patriots,Miami Dolphins,3.0,New England Patriots,13,F,46.5,27,U,21.698237,Miami Dolphins,0.117,New England Patriots,21.581669,43.280,U,U,L,W
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
266,18,Atlanta Falcons,30,17,Tampa Bay Buccaneers,Atlanta Falcons,6.0,Tampa Bay Buccaneers,13,F,40.5,47,O,21.512006,Tampa Bay Buccaneers,0.126,Atlanta Falcons,21.386140,42.898,U,O,L,W
267,18,Kansas City Chiefs,31,13,Las Vegas Raiders,Kansas City Chiefs,8.5,Las Vegas Raiders,18,F,52.0,44,U,27.299186,Kansas City Chiefs,3.879,Las Vegas Raiders,23.419865,50.719,U,U,L,W
268,18,Jacksonville Jaguars,20,16,Tennessee Titans,Jacksonville Jaguars,6.5,Tennessee Titans,4,U,39.5,36,U,21.797103,Jacksonville Jaguars,1.457,Tennessee Titans,20.339916,42.137,U,O,W,L
269,18,Pittsburgh Steelers,28,14,Cleveland Browns,Pittsburgh Steelers,2.5,Cleveland Browns,14,F,40.0,42,O,21.928419,Pittsburgh Steelers,0.738,Cleveland Browns,21.190579,43.119,U,O,L,W


### Reduced Advanced Dataset Offense and Defense

In [1327]:
#Merging the reduced matchup Df with the actual schedule DF
both_df3_adv_red = pd.merge(stdata2022, match_gbrpred_adv_red, on=['Tm1', 'Tm2'], how='left')
both_df_na_adv_red = both_df3_adv_red[both_df3_adv_red.isna().any(axis=1)]
both_df_na_adv_red = both_df_na_adv_red.rename(columns = {'Tm1': 'Tm3', 'Tm2': 'Tm4'})
both_df_na_adv_red['Tm1'] = both_df_na_adv_red['Tm4']
both_df_na_adv_red['Tm2'] = both_df_na_adv_red['Tm3']
both_df_na_adv_red = both_df_na_adv_red.filter(['schedule_week', 'winning_team', 'winning_score', 'losing_score', 'losing_team', 'team_favorite',
                               'team_underdog', 'spread_favorite', 'actual_spread', 'F/U_cover', 'over_under_line', 'actual_total',
                               'O/U', 'Tm1', 'Tm2'])
both_df2_adv_red = pd.merge(both_df_na_adv_red, match_gbrpred_adv_red, on=['Tm2', 'Tm1'], how='left').dropna()
both_df2_adv_red

Unnamed: 0,schedule_week,winning_team,winning_score,losing_score,losing_team,team_favorite,team_underdog,spread_favorite,actual_spread,F/U_cover,over_under_line,actual_total,O/U,Tm1,Tm2,PF_gbradv_Pred1_red,PA_gbradv_Pred1_red,PF_gbradv_Pred2_red,PA_gbradv_Pred2_red,PF_Tm1,PF_Tm2,Pred_team_favorite,Pred_Spread,Pred_team_underdog,Pred_Total
0,1,Indianapolis Colts,20,20,Houston Texans,Indianapolis Colts,Houston Texans,7.0,0,U,45.5,40,U,Houston Texans,Indianapolis Colts,17.456766,22.120939,17.069022,24.013524,20.735145,19.594981,Houston Texans,1.140,Indianapolis Colts,40.330
1,1,Washington Commanders,28,22,Jacksonville Jaguars,Washington Commanders,Jacksonville Jaguars,3.0,6,F,43.0,50,O,Jacksonville Jaguars,Washington Commanders,23.909244,20.601898,18.006018,21.048638,22.478941,19.303958,Jacksonville Jaguars,3.175,Washington Commanders,41.783
2,1,Los Angeles Chargers,24,19,Las Vegas Raiders,Los Angeles Chargers,Las Vegas Raiders,3.5,5,F,52.5,43,U,Las Vegas Raiders,Los Angeles Chargers,21.787095,24.863807,23.441154,22.761763,22.274429,24.152481,Los Angeles Chargers,1.878,Las Vegas Raiders,46.427
3,1,Tampa Bay Buccaneers,19,3,Dallas Cowboys,Tampa Bay Buccaneers,Dallas Cowboys,2.5,16,F,49.5,22,U,Dallas Cowboys,Tampa Bay Buccaneers,27.561768,19.837221,23.908236,20.713440,24.137604,21.872728,Dallas Cowboys,2.265,Tampa Bay Buccaneers,46.010
4,1,Pittsburgh Steelers,23,20,Cincinnati Bengals,Cincinnati Bengals,Pittsburgh Steelers,7.0,3,U,44.5,43,U,Cincinnati Bengals,Pittsburgh Steelers,26.014972,20.631280,18.455516,21.551922,23.783447,19.543398,Cincinnati Bengals,4.240,Pittsburgh Steelers,43.327
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
82,17,Green Bay Packers,41,17,Minnesota Vikings,Green Bay Packers,Minnesota Vikings,3.5,24,F,47.5,58,O,Minnesota Vikings,Green Bay Packers,24.924564,23.644820,21.956461,22.170322,23.547443,22.800640,Minnesota Vikings,0.747,Green Bay Packers,46.348
83,17,Pittsburgh Steelers,16,13,Baltimore Ravens,Baltimore Ravens,Pittsburgh Steelers,1.0,3,U,35.5,29,U,Baltimore Ravens,Pittsburgh Steelers,20.694819,19.173094,18.455516,21.551922,21.123370,18.814305,Baltimore Ravens,2.309,Pittsburgh Steelers,39.938
84,18,Washington Commanders,26,6,Dallas Cowboys,Dallas Cowboys,Washington Commanders,7.5,20,U,41.0,32,U,Dallas Cowboys,Washington Commanders,27.561768,19.837221,18.006018,21.048638,24.305203,18.921619,Dallas Cowboys,5.384,Washington Commanders,43.227
85,18,Pittsburgh Steelers,28,14,Cleveland Browns,Pittsburgh Steelers,Cleveland Browns,2.5,14,F,40.0,42,O,Cleveland Browns,Pittsburgh Steelers,20.852184,22.679205,18.455516,21.551922,21.202053,20.567361,Cleveland Browns,0.635,Pittsburgh Steelers,41.769


In [1328]:
#Merging the rest of the matchup DF with the actual schedule DF. Then filtering for a final DF. 
df_comb_adv_red = [both_df3_adv_red, both_df2_adv_red]
result_adv_red = pd.concat(df_comb_adv_red).reset_index(drop=True).dropna()
result_adv_red['schedule_week'] = result_adv_red['schedule_week'].astype(np.int64)
final_result_adv_red = result_adv_red.sort_values(by=['schedule_week']).reset_index(drop=True)
stdata2022_full_adv_red = final_result_adv_red.filter(['schedule_week', 'winning_team', 'winning_score', 'losing_score', 'losing_team',
                                      'team_favorite', 'team_underdog', 'spread_favorite', 'actual_spread', 'F/U_cover',
                                      'over_under_line', 'actual_total', 'O/U', 'PF_Tm1', 'PF_Tm2', 'Pred_team_favorite',
                                      'Pred_Spread', 'Pred_team_underdog', 'Pred_Total'])
stdata2022_full_adv_red['Pred_PF_favorite'] = stdata2022_full_adv_red.apply(pft, axis=1)
stdata2022_full_adv_red['Pred_PF_underdog'] = stdata2022_full_adv_red.apply(put, axis=1)
stdata2022_full_adv_red = stdata2022_full_adv_red.filter(['schedule_week', 'winning_team', 'winning_score', 'losing_score', 'losing_team',
                                      'team_favorite', 'spread_favorite', 'team_underdog', 'actual_spread', 'F/U_cover',
                                      'over_under_line', 'actual_total', 'O/U', 'Pred_PF_favorite', 'Pred_team_favorite',
                                    'Pred_Spread', 'Pred_team_underdog', 'Pred_PF_underdog', 'Pred_Total'])
stdata2022_full_adv_red

Unnamed: 0,schedule_week,winning_team,winning_score,losing_score,losing_team,team_favorite,spread_favorite,team_underdog,actual_spread,F/U_cover,over_under_line,actual_total,O/U,Pred_PF_favorite,Pred_team_favorite,Pred_Spread,Pred_team_underdog,Pred_PF_underdog,Pred_Total
0,1,Buffalo Bills,31,10,Los Angeles Rams,Buffalo Bills,2.0,Los Angeles Rams,21,F,52.0,41,U,24.914370,Buffalo Bills,7.040,Los Angeles Rams,17.874364,42.789
1,1,Seattle Seahawks,17,16,Denver Broncos,Denver Broncos,6.0,Seattle Seahawks,1,U,44.0,33,U,21.880582,Seattle Seahawks,1.823,Denver Broncos,20.057426,41.938
2,1,New York Giants,21,20,Tennessee Titans,Tennessee Titans,5.5,New York Giants,1,U,44.0,41,U,21.165986,New York Giants,0.174,Tennessee Titans,20.992136,42.158
3,1,Baltimore Ravens,24,9,New York Jets,Baltimore Ravens,6.5,New York Jets,15,F,44.0,33,U,19.724773,Baltimore Ravens,1.432,New York Jets,18.292389,38.017
4,1,Miami Dolphins,20,7,New England Patriots,Miami Dolphins,3.0,New England Patriots,13,F,46.5,27,U,22.594027,Miami Dolphins,1.295,New England Patriots,21.298777,43.893
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
266,18,Atlanta Falcons,30,17,Tampa Bay Buccaneers,Atlanta Falcons,6.0,Tampa Bay Buccaneers,13,F,40.5,47,O,23.544860,Tampa Bay Buccaneers,2.631,Atlanta Falcons,20.914029,44.459
267,18,Kansas City Chiefs,31,13,Las Vegas Raiders,Kansas City Chiefs,8.5,Las Vegas Raiders,18,F,52.0,44,U,27.047425,Kansas City Chiefs,5.149,Las Vegas Raiders,21.898002,48.945
268,18,Jacksonville Jaguars,20,16,Tennessee Titans,Jacksonville Jaguars,6.5,Tennessee Titans,4,U,39.5,36,U,22.532902,Jacksonville Jaguars,3.407,Tennessee Titans,19.125706,41.659
269,18,Pittsburgh Steelers,28,14,Cleveland Browns,Pittsburgh Steelers,2.5,Cleveland Browns,14,F,40.0,42,O,21.202053,Cleveland Browns,0.635,Pittsburgh Steelers,20.567361,41.769


In [1329]:
#Creating functions that compares the predicted spread bets and predicted total bets with what actually happened in during the 2022 season. 
stdata2022_full_adv_red['Pred_spread_bet'] = stdata2022_full_adv_red.apply(psb, axis=1)
stdata2022_full_adv_red['Pred_total_bet'] = stdata2022_full_adv_red.apply(ptb, axis=1)
stdata2022_full_adv_red['spread_bet_W/L'] = stdata2022_full_adv_red.apply(sbwl, axis=1)
stdata2022_full_adv_red['total_bet_W/L'] = stdata2022_full_adv_red.apply(tbwl, axis=1)
stdata2022_full_adv_red

Unnamed: 0,schedule_week,winning_team,winning_score,losing_score,losing_team,team_favorite,spread_favorite,team_underdog,actual_spread,F/U_cover,over_under_line,actual_total,O/U,Pred_PF_favorite,Pred_team_favorite,Pred_Spread,Pred_team_underdog,Pred_PF_underdog,Pred_Total,Pred_spread_bet,Pred_total_bet,spread_bet_W/L,total_bet_W/L
0,1,Buffalo Bills,31,10,Los Angeles Rams,Buffalo Bills,2.0,Los Angeles Rams,21,F,52.0,41,U,24.914370,Buffalo Bills,7.040,Los Angeles Rams,17.874364,42.789,F,U,W,W
1,1,Seattle Seahawks,17,16,Denver Broncos,Denver Broncos,6.0,Seattle Seahawks,1,U,44.0,33,U,21.880582,Seattle Seahawks,1.823,Denver Broncos,20.057426,41.938,U,U,W,W
2,1,New York Giants,21,20,Tennessee Titans,Tennessee Titans,5.5,New York Giants,1,U,44.0,41,U,21.165986,New York Giants,0.174,Tennessee Titans,20.992136,42.158,U,U,W,W
3,1,Baltimore Ravens,24,9,New York Jets,Baltimore Ravens,6.5,New York Jets,15,F,44.0,33,U,19.724773,Baltimore Ravens,1.432,New York Jets,18.292389,38.017,U,U,L,W
4,1,Miami Dolphins,20,7,New England Patriots,Miami Dolphins,3.0,New England Patriots,13,F,46.5,27,U,22.594027,Miami Dolphins,1.295,New England Patriots,21.298777,43.893,U,U,L,W
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
266,18,Atlanta Falcons,30,17,Tampa Bay Buccaneers,Atlanta Falcons,6.0,Tampa Bay Buccaneers,13,F,40.5,47,O,23.544860,Tampa Bay Buccaneers,2.631,Atlanta Falcons,20.914029,44.459,U,O,L,W
267,18,Kansas City Chiefs,31,13,Las Vegas Raiders,Kansas City Chiefs,8.5,Las Vegas Raiders,18,F,52.0,44,U,27.047425,Kansas City Chiefs,5.149,Las Vegas Raiders,21.898002,48.945,U,U,L,W
268,18,Jacksonville Jaguars,20,16,Tennessee Titans,Jacksonville Jaguars,6.5,Tennessee Titans,4,U,39.5,36,U,22.532902,Jacksonville Jaguars,3.407,Tennessee Titans,19.125706,41.659,U,O,W,L
269,18,Pittsburgh Steelers,28,14,Cleveland Browns,Pittsburgh Steelers,2.5,Cleveland Browns,14,F,40.0,42,O,21.202053,Cleveland Browns,0.635,Pittsburgh Steelers,20.567361,41.769,U,O,L,W


### Combined Reduced Dataset Offense and Defense

In [1330]:
#Merging the reduced matchup DF with the actual schedule DF
both_df3_comb_red = pd.merge(stdata2022, match_gbrpred_comb_red, on=['Tm1', 'Tm2'], how='left')
both_df_na_comb_red = both_df3_comb_red[both_df3_comb_red.isna().any(axis=1)]
both_df_na_comb_red = both_df_na_comb_red.rename(columns = {'Tm1': 'Tm3', 'Tm2': 'Tm4'})
both_df_na_comb_red['Tm1'] = both_df_na_comb_red['Tm4']
both_df_na_comb_red['Tm2'] = both_df_na_comb_red['Tm3']
both_df_na_comb_red = both_df_na_comb_red.filter(['schedule_week', 'winning_team', 'winning_score', 'losing_score', 'losing_team', 'team_favorite',
                               'team_underdog', 'spread_favorite', 'actual_spread', 'F/U_cover', 'over_under_line', 'actual_total',
                               'O/U', 'Tm1', 'Tm2'])
both_df2_comb_red = pd.merge(both_df_na_comb_red, match_gbrpred_comb_red, on=['Tm2', 'Tm1'], how='left').dropna()
both_df2_comb_red

Unnamed: 0,schedule_week,winning_team,winning_score,losing_score,losing_team,team_favorite,team_underdog,spread_favorite,actual_spread,F/U_cover,over_under_line,actual_total,O/U,Tm1,Tm2,PF_gbrred_comb_Pred1,PA_gbrred_comb_Pred1,PF_gbrred_comb_Pred2,PA_gbrred_comb_Pred2,PF_Tm1,PF_Tm2,Pred_team_favorite,Pred_Spread,Pred_team_underdog,Pred_Total
0,1,Indianapolis Colts,20,20,Houston Texans,Indianapolis Colts,Houston Texans,7.0,0,U,45.5,40,U,Houston Texans,Indianapolis Colts,18.241579,22.220764,18.861145,22.055946,20.148762,20.540954,Indianapolis Colts,0.392,Houston Texans,40.690
1,1,Washington Commanders,28,22,Jacksonville Jaguars,Washington Commanders,Jacksonville Jaguars,3.0,6,F,43.0,50,O,Jacksonville Jaguars,Washington Commanders,23.953179,20.865802,19.412588,20.824805,22.388992,20.139195,Jacksonville Jaguars,2.250,Washington Commanders,42.528
2,1,Los Angeles Chargers,24,19,Las Vegas Raiders,Los Angeles Chargers,Las Vegas Raiders,3.5,5,F,52.5,43,U,Las Vegas Raiders,Los Angeles Chargers,23.411709,25.064548,23.590743,22.660070,23.035889,24.327646,Los Angeles Chargers,1.292,Las Vegas Raiders,47.364
3,1,Tampa Bay Buccaneers,19,3,Dallas Cowboys,Tampa Bay Buccaneers,Dallas Cowboys,2.5,16,F,49.5,22,U,Dallas Cowboys,Tampa Bay Buccaneers,26.169280,20.487142,21.528702,20.868067,23.518673,21.007922,Dallas Cowboys,2.511,Tampa Bay Buccaneers,44.527
4,1,Pittsburgh Steelers,23,20,Cincinnati Bengals,Cincinnati Bengals,Pittsburgh Steelers,7.0,3,U,44.5,43,U,Cincinnati Bengals,Pittsburgh Steelers,26.393463,20.582833,18.826956,21.910859,24.152161,19.704894,Cincinnati Bengals,4.447,Pittsburgh Steelers,43.857
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
82,17,Green Bay Packers,41,17,Minnesota Vikings,Green Bay Packers,Minnesota Vikings,3.5,24,F,47.5,58,O,Minnesota Vikings,Green Bay Packers,24.602103,24.398425,21.127359,22.610406,23.606254,22.762892,Minnesota Vikings,0.843,Green Bay Packers,46.369
83,17,Pittsburgh Steelers,16,13,Baltimore Ravens,Baltimore Ravens,Pittsburgh Steelers,1.0,3,U,35.5,29,U,Baltimore Ravens,Pittsburgh Steelers,20.909510,20.073103,18.826956,21.910859,21.410185,19.450029,Baltimore Ravens,1.960,Pittsburgh Steelers,40.860
84,18,Washington Commanders,26,6,Dallas Cowboys,Dallas Cowboys,Washington Commanders,7.5,20,U,41.0,32,U,Dallas Cowboys,Washington Commanders,26.169280,20.487142,19.412588,20.824805,23.497042,19.949865,Dallas Cowboys,3.547,Washington Commanders,43.447
85,18,Pittsburgh Steelers,28,14,Cleveland Browns,Pittsburgh Steelers,Cleveland Browns,2.5,14,F,40.0,42,O,Cleveland Browns,Pittsburgh Steelers,21.380020,22.722365,18.826956,21.910859,21.645439,20.774660,Cleveland Browns,0.871,Pittsburgh Steelers,42.420


In [1331]:
#Merging the rest of the reduced matchup DF with the actual schedule DF. Then filtering for a final DF. 
df_comb_red_comb = [both_df3_comb_red, both_df2_comb_red]
result_comb_red = pd.concat(df_comb_red_comb).reset_index(drop=True).dropna()
result_comb_red['schedule_week'] = result_comb_red['schedule_week'].astype(np.int64)
final_result_comb_red = result_comb_red.sort_values(by=['schedule_week']).reset_index(drop=True)
stdata2022_full_comb_red = final_result_comb_red.filter(['schedule_week', 'winning_team', 'winning_score', 'losing_score', 'losing_team',
                                      'team_favorite', 'team_underdog', 'spread_favorite', 'actual_spread', 'F/U_cover',
                                      'over_under_line', 'actual_total', 'O/U', 'PF_Tm1', 'PF_Tm2', 'Pred_team_favorite',
                                      'Pred_Spread', 'Pred_team_underdog', 'Pred_Total'])
stdata2022_full_comb_red['Pred_PF_favorite'] = stdata2022_full_comb_red.apply(pft, axis=1)
stdata2022_full_comb_red['Pred_PF_underdog'] = stdata2022_full_comb_red.apply(put, axis=1)
stdata2022_full_comb_red = stdata2022_full_comb_red.filter(['schedule_week', 'winning_team', 'winning_score', 'losing_score', 'losing_team',
                                      'team_favorite', 'spread_favorite', 'team_underdog', 'actual_spread', 'F/U_cover',
                                      'over_under_line', 'actual_total', 'O/U', 'Pred_PF_favorite', 'Pred_team_favorite',
                                    'Pred_Spread', 'Pred_team_underdog', 'Pred_PF_underdog', 'Pred_Total'])
stdata2022_full_comb_red

Unnamed: 0,schedule_week,winning_team,winning_score,losing_score,losing_team,team_favorite,spread_favorite,team_underdog,actual_spread,F/U_cover,over_under_line,actual_total,O/U,Pred_PF_favorite,Pred_team_favorite,Pred_Spread,Pred_team_underdog,Pred_PF_underdog,Pred_Total
0,1,Buffalo Bills,31,10,Los Angeles Rams,Buffalo Bills,2.0,Los Angeles Rams,21,F,52.0,41,U,25.699450,Buffalo Bills,7.473,Los Angeles Rams,18.226497,43.926
1,1,Seattle Seahawks,17,16,Denver Broncos,Denver Broncos,6.0,Seattle Seahawks,1,U,44.0,33,U,22.251001,Seattle Seahawks,2.077,Denver Broncos,20.174073,42.425
2,1,New York Giants,21,20,Tennessee Titans,Tennessee Titans,5.5,New York Giants,1,U,44.0,41,U,21.758781,New York Giants,1.260,Tennessee Titans,20.499220,42.258
3,1,Baltimore Ravens,24,9,New York Jets,Baltimore Ravens,6.5,New York Jets,15,F,44.0,33,U,20.064414,Baltimore Ravens,0.971,New York Jets,19.093717,39.158
4,1,Miami Dolphins,20,7,New England Patriots,Miami Dolphins,3.0,New England Patriots,13,F,46.5,27,U,21.406204,Miami Dolphins,0.174,New England Patriots,21.231917,42.638
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
266,18,Atlanta Falcons,30,17,Tampa Bay Buccaneers,Atlanta Falcons,6.0,Tampa Bay Buccaneers,13,F,40.5,47,O,22.976879,Tampa Bay Buccaneers,2.130,Atlanta Falcons,20.846385,43.823
267,18,Kansas City Chiefs,31,13,Las Vegas Raiders,Kansas City Chiefs,8.5,Las Vegas Raiders,18,F,52.0,44,U,27.092916,Kansas City Chiefs,4.251,Las Vegas Raiders,22.841538,49.934
268,18,Jacksonville Jaguars,20,16,Tennessee Titans,Jacksonville Jaguars,6.5,Tennessee Titans,4,U,39.5,36,U,22.649450,Jacksonville Jaguars,3.369,Tennessee Titans,19.280026,41.929
269,18,Pittsburgh Steelers,28,14,Cleveland Browns,Pittsburgh Steelers,2.5,Cleveland Browns,14,F,40.0,42,O,21.645439,Cleveland Browns,0.871,Pittsburgh Steelers,20.774660,42.420


In [1332]:
#Applying functions that compares the predicted spread bets and predicted total bets with what actually happened in during the 2022 season.
stdata2022_full_comb_red['Pred_spread_bet'] = stdata2022_full_comb_red.apply(psb, axis=1)
stdata2022_full_comb_red['Pred_total_bet'] = stdata2022_full_comb_red.apply(ptb, axis=1)
stdata2022_full_comb_red['spread_bet_W/L'] = stdata2022_full_comb_red.apply(sbwl, axis=1)
stdata2022_full_comb_red['total_bet_W/L'] = stdata2022_full_comb_red.apply(tbwl, axis=1)
stdata2022_full_comb_red

Unnamed: 0,schedule_week,winning_team,winning_score,losing_score,losing_team,team_favorite,spread_favorite,team_underdog,actual_spread,F/U_cover,over_under_line,actual_total,O/U,Pred_PF_favorite,Pred_team_favorite,Pred_Spread,Pred_team_underdog,Pred_PF_underdog,Pred_Total,Pred_spread_bet,Pred_total_bet,spread_bet_W/L,total_bet_W/L
0,1,Buffalo Bills,31,10,Los Angeles Rams,Buffalo Bills,2.0,Los Angeles Rams,21,F,52.0,41,U,25.699450,Buffalo Bills,7.473,Los Angeles Rams,18.226497,43.926,F,U,W,W
1,1,Seattle Seahawks,17,16,Denver Broncos,Denver Broncos,6.0,Seattle Seahawks,1,U,44.0,33,U,22.251001,Seattle Seahawks,2.077,Denver Broncos,20.174073,42.425,U,U,W,W
2,1,New York Giants,21,20,Tennessee Titans,Tennessee Titans,5.5,New York Giants,1,U,44.0,41,U,21.758781,New York Giants,1.260,Tennessee Titans,20.499220,42.258,U,U,W,W
3,1,Baltimore Ravens,24,9,New York Jets,Baltimore Ravens,6.5,New York Jets,15,F,44.0,33,U,20.064414,Baltimore Ravens,0.971,New York Jets,19.093717,39.158,U,U,L,W
4,1,Miami Dolphins,20,7,New England Patriots,Miami Dolphins,3.0,New England Patriots,13,F,46.5,27,U,21.406204,Miami Dolphins,0.174,New England Patriots,21.231917,42.638,U,U,L,W
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
266,18,Atlanta Falcons,30,17,Tampa Bay Buccaneers,Atlanta Falcons,6.0,Tampa Bay Buccaneers,13,F,40.5,47,O,22.976879,Tampa Bay Buccaneers,2.130,Atlanta Falcons,20.846385,43.823,U,O,L,W
267,18,Kansas City Chiefs,31,13,Las Vegas Raiders,Kansas City Chiefs,8.5,Las Vegas Raiders,18,F,52.0,44,U,27.092916,Kansas City Chiefs,4.251,Las Vegas Raiders,22.841538,49.934,U,U,L,W
268,18,Jacksonville Jaguars,20,16,Tennessee Titans,Jacksonville Jaguars,6.5,Tennessee Titans,4,U,39.5,36,U,22.649450,Jacksonville Jaguars,3.369,Tennessee Titans,19.280026,41.929,U,O,W,L
269,18,Pittsburgh Steelers,28,14,Cleveland Browns,Pittsburgh Steelers,2.5,Cleveland Browns,14,F,40.0,42,O,21.645439,Cleveland Browns,0.871,Pittsburgh Steelers,20.774660,42.420,U,O,L,W


# W/L Results for Both Spread and Total Bets for Each GBR Model

### Full Offense and Defense

In [1333]:
#Counting up the W/L to determine model's performance on hypothetical bets for the spread.
stdata2022_full['spread_bet_W/L'].value_counts()

W    156
L    115
Name: spread_bet_W/L, dtype: int64

In [1334]:
#Counting up the W/L to determine model's performance on hypothetical bets for the total. 
stdata2022_full['total_bet_W/L'].value_counts()

W    149
L    122
Name: total_bet_W/L, dtype: int64

In [1335]:
#Printing final W percentages for both spread and total for the optimal full basic GradientBoostingRegressor estimator. 
print("The optimal full basic GradientBoostingRegressor model would have correctly predicted the spread bets at", (round((156/271), 4) * 100), "percent.")
print("The optimal full basic GradientBoostingRegressor model would have correctly predicted the total bets at", (round((149/271), 4) * 100), "percent.")

The optimal full basic GradientBoostingRegressor model would have correctly predicted the spread bets at 57.56 percent.
The optimal full basic GradientBoostingRegressor model would have correctly predicted the total bets at 54.98 percent.


### Advanced Offense and Defense

In [1336]:
#Testing optimal spread model on teasers
stdata2022_full_adv_teas = stdata2022_full_adv.copy()
stdata2022_full_adv_teas['underdog_tease6'] = stdata2022_full_adv_teas['spread_favorite'] + 6
stdata2022_full_adv_teas['favorite_tease6'] = (stdata2022_full_adv_teas['spread_favorite'] - 6).abs()
stdata2022_full_adv_teas['underdog_tease7'] = stdata2022_full_adv_teas['spread_favorite'] + 7
stdata2022_full_adv_teas['favorite_tease7'] = (stdata2022_full_adv_teas['spread_favorite'] - 7).abs()
stdata2022_full_adv_teas['underdog_tease10'] = stdata2022_full_adv_teas['spread_favorite'] + 10
stdata2022_full_adv_teas['favorite_tease10'] = (stdata2022_full_adv_teas['spread_favorite'] - 10).abs()

def psbt6(row):
    if row['Pred_spread_bet'] == 'F':
        val = row['favorite_tease6']
    else:
        val = row['underdog_tease6']
    return val
stdata2022_full_adv_teas['Pred_tease_spread6'] = stdata2022_full_adv_teas.apply(psbt6, axis=1)

def psbt7(row):
    if row['Pred_spread_bet'] == 'F':
        val = row['favorite_tease7']
    else:
        val = row['underdog_tease7']
    return val
stdata2022_full_adv_teas['Pred_tease_spread7'] = stdata2022_full_adv_teas.apply(psbt7, axis=1)

def psbt10(row):
    if row['Pred_spread_bet'] == 'F':
        val = row['favorite_tease10']
    else:
        val = row['underdog_tease10']
    return val
stdata2022_full_adv_teas['Pred_tease_spread10'] = stdata2022_full_adv_teas.apply(psbt10, axis=1)

stdata2022_full_adv_teas

Unnamed: 0,schedule_week,winning_team,winning_score,losing_score,losing_team,team_favorite,spread_favorite,team_underdog,actual_spread,F/U_cover,over_under_line,actual_total,O/U,Pred_PF_favorite,Pred_team_favorite,Pred_Spread,Pred_team_underdog,Pred_PF_underdog,Pred_Total,Pred_spread_bet,Pred_total_bet,spread_bet_W/L,total_bet_W/L,underdog_tease6,favorite_tease6,underdog_tease7,favorite_tease7,underdog_tease10,favorite_tease10,Pred_tease_spread6,Pred_tease_spread7,Pred_tease_spread10
0,1,Buffalo Bills,31,10,Los Angeles Rams,Buffalo Bills,2.0,Los Angeles Rams,21,F,52.0,41,U,25.593539,Buffalo Bills,7.250,Los Angeles Rams,18.343960,43.937,F,U,W,W,8.0,4.0,9.0,5.0,12.0,8.0,4.0,5.0,8.0
1,1,Seattle Seahawks,17,16,Denver Broncos,Denver Broncos,6.0,Seattle Seahawks,1,U,44.0,33,U,21.976054,Seattle Seahawks,2.418,Denver Broncos,19.558253,41.534,U,U,W,W,12.0,0.0,13.0,1.0,16.0,4.0,12.0,13.0,16.0
2,1,New York Giants,21,20,Tennessee Titans,Tennessee Titans,5.5,New York Giants,1,U,44.0,41,U,21.355027,New York Giants,0.430,Tennessee Titans,20.924728,42.280,U,U,W,W,11.5,0.5,12.5,1.5,15.5,4.5,11.5,12.5,15.5
3,1,Baltimore Ravens,24,9,New York Jets,Baltimore Ravens,6.5,New York Jets,15,F,44.0,33,U,19.751249,Baltimore Ravens,1.477,New York Jets,18.273953,38.025,U,U,L,W,12.5,0.5,13.5,0.5,16.5,3.5,12.5,13.5,16.5
4,1,Miami Dolphins,20,7,New England Patriots,Miami Dolphins,3.0,New England Patriots,13,F,46.5,27,U,22.143559,New England Patriots,0.293,Miami Dolphins,21.850983,43.995,U,U,L,W,9.0,3.0,10.0,4.0,13.0,7.0,9.0,10.0,13.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
266,18,Atlanta Falcons,30,17,Tampa Bay Buccaneers,Atlanta Falcons,6.0,Tampa Bay Buccaneers,13,F,40.5,47,O,21.492067,Atlanta Falcons,0.387,Tampa Bay Buccaneers,21.104685,42.597,U,O,L,W,12.0,0.0,13.0,1.0,16.0,4.0,12.0,13.0,16.0
267,18,Kansas City Chiefs,31,13,Las Vegas Raiders,Kansas City Chiefs,8.5,Las Vegas Raiders,18,F,52.0,44,U,26.647408,Kansas City Chiefs,3.019,Las Vegas Raiders,23.628528,50.276,U,U,L,W,14.5,2.5,15.5,1.5,18.5,1.5,14.5,15.5,18.5
268,18,Jacksonville Jaguars,20,16,Tennessee Titans,Jacksonville Jaguars,6.5,Tennessee Titans,4,U,39.5,36,U,22.303569,Jacksonville Jaguars,2.670,Tennessee Titans,19.633077,41.937,U,O,W,L,12.5,0.5,13.5,0.5,16.5,3.5,12.5,13.5,16.5
269,18,Pittsburgh Steelers,28,14,Cleveland Browns,Pittsburgh Steelers,2.5,Cleveland Browns,14,F,40.0,42,O,21.248351,Cleveland Browns,0.171,Pittsburgh Steelers,21.077306,42.326,U,O,L,W,8.5,3.5,9.5,4.5,12.5,7.5,8.5,9.5,12.5


In [1337]:
def pteamb(row):
    if row['team_underdog'] == row['Pred_team_favorite']:
        val = row['Pred_team_favorite']
    elif row['Pred_spread_bet'] == 'F':
        val = row['Pred_team_favorite']
    else:
        val = row['Pred_team_underdog']
    return val

def tease6swl(row):
    if row['spread_bet_W/L'] == 'W':
        val = 'W'
    elif row['actual_spread'] > row['Pred_tease_spread6']:
        val = 'L'
    elif row['actual_spread'] == row['Pred_tease_spread6']:
        val = 'P'
    else:
        val = 'W'
    return val

def tease7swl(row):
    if row['spread_bet_W/L'] == 'W':
        val = 'W'
    elif row['actual_spread'] > row['Pred_tease_spread7']:
        val = 'L'
    elif row['actual_spread'] == row['Pred_tease_spread7']:
        val = 'P'
    else:
        val = 'W'
    return val

def tease10swl(row):
    if row['spread_bet_W/L'] == 'W':
        val = 'W'
    elif row['actual_spread'] > row['Pred_tease_spread10']:
        val = 'L'
    elif row['actual_spread'] == row['Pred_tease_spread10']:
        val = 'L'
    else:
        val = 'W'
    return val

stdata2022_full_adv_teas['Pred_team_bet'] = stdata2022_full_adv_teas.apply(pteamb, axis=1)
stdata2022_full_adv_teas['tease6_spread_W/L'] = stdata2022_full_adv_teas.apply(tease6swl, axis=1)
stdata2022_full_adv_teas['tease7_spread_W/L'] = stdata2022_full_adv_teas.apply(tease7swl, axis=1)
stdata2022_full_adv_teas['tease10_spread_W/L'] = stdata2022_full_adv_teas.apply(tease10swl, axis=1)
stdata2022_full_adv_teas = stdata2022_full_adv_teas.filter(['schedule_week', 'winning_team', 'winning_score', 'losing_score', 'losing_team',
                                      'team_favorite', 'spread_favorite', 'team_underdog', 'actual_spread', 'F/U_cover',
                                      'over_under_line', 'actual_total', 'O/U', 'Pred_PF_favorite', 'Pred_team_favorite',
                                    'Pred_Spread', 'Pred_team_underdog', 'Pred_PF_underdog', 'Pred_Total', 'Pred_spread_bet', 'Pred_total_bet',
                                    'spread_bet_W/L', 'total_bet_W/L', 'underdog_tease6', 'favorite_tease6', 'underdog_tease7', 'favorite_tease7',
                                        'underdog_tease10', 'favorite_tease10', 'Pred_team_bet', 'Pred_tease_spread6', 'tease6_spread_W/L',
                                        'Pred_tease_spread7', 'tease7_spread_W/L', 'Pred_tease_spread10', 'tease10_spread_W/L'])
stdata2022_full_adv_teas

Unnamed: 0,schedule_week,winning_team,winning_score,losing_score,losing_team,team_favorite,spread_favorite,team_underdog,actual_spread,F/U_cover,over_under_line,actual_total,O/U,Pred_PF_favorite,Pred_team_favorite,Pred_Spread,Pred_team_underdog,Pred_PF_underdog,Pred_Total,Pred_spread_bet,Pred_total_bet,spread_bet_W/L,total_bet_W/L,underdog_tease6,favorite_tease6,underdog_tease7,favorite_tease7,underdog_tease10,favorite_tease10,Pred_team_bet,Pred_tease_spread6,tease6_spread_W/L,Pred_tease_spread7,tease7_spread_W/L,Pred_tease_spread10,tease10_spread_W/L
0,1,Buffalo Bills,31,10,Los Angeles Rams,Buffalo Bills,2.0,Los Angeles Rams,21,F,52.0,41,U,25.593539,Buffalo Bills,7.250,Los Angeles Rams,18.343960,43.937,F,U,W,W,8.0,4.0,9.0,5.0,12.0,8.0,Buffalo Bills,4.0,W,5.0,W,8.0,W
1,1,Seattle Seahawks,17,16,Denver Broncos,Denver Broncos,6.0,Seattle Seahawks,1,U,44.0,33,U,21.976054,Seattle Seahawks,2.418,Denver Broncos,19.558253,41.534,U,U,W,W,12.0,0.0,13.0,1.0,16.0,4.0,Seattle Seahawks,12.0,W,13.0,W,16.0,W
2,1,New York Giants,21,20,Tennessee Titans,Tennessee Titans,5.5,New York Giants,1,U,44.0,41,U,21.355027,New York Giants,0.430,Tennessee Titans,20.924728,42.280,U,U,W,W,11.5,0.5,12.5,1.5,15.5,4.5,New York Giants,11.5,W,12.5,W,15.5,W
3,1,Baltimore Ravens,24,9,New York Jets,Baltimore Ravens,6.5,New York Jets,15,F,44.0,33,U,19.751249,Baltimore Ravens,1.477,New York Jets,18.273953,38.025,U,U,L,W,12.5,0.5,13.5,0.5,16.5,3.5,New York Jets,12.5,L,13.5,L,16.5,W
4,1,Miami Dolphins,20,7,New England Patriots,Miami Dolphins,3.0,New England Patriots,13,F,46.5,27,U,22.143559,New England Patriots,0.293,Miami Dolphins,21.850983,43.995,U,U,L,W,9.0,3.0,10.0,4.0,13.0,7.0,New England Patriots,9.0,L,10.0,L,13.0,L
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
266,18,Atlanta Falcons,30,17,Tampa Bay Buccaneers,Atlanta Falcons,6.0,Tampa Bay Buccaneers,13,F,40.5,47,O,21.492067,Atlanta Falcons,0.387,Tampa Bay Buccaneers,21.104685,42.597,U,O,L,W,12.0,0.0,13.0,1.0,16.0,4.0,Tampa Bay Buccaneers,12.0,L,13.0,P,16.0,W
267,18,Kansas City Chiefs,31,13,Las Vegas Raiders,Kansas City Chiefs,8.5,Las Vegas Raiders,18,F,52.0,44,U,26.647408,Kansas City Chiefs,3.019,Las Vegas Raiders,23.628528,50.276,U,U,L,W,14.5,2.5,15.5,1.5,18.5,1.5,Las Vegas Raiders,14.5,L,15.5,L,18.5,W
268,18,Jacksonville Jaguars,20,16,Tennessee Titans,Jacksonville Jaguars,6.5,Tennessee Titans,4,U,39.5,36,U,22.303569,Jacksonville Jaguars,2.670,Tennessee Titans,19.633077,41.937,U,O,W,L,12.5,0.5,13.5,0.5,16.5,3.5,Tennessee Titans,12.5,W,13.5,W,16.5,W
269,18,Pittsburgh Steelers,28,14,Cleveland Browns,Pittsburgh Steelers,2.5,Cleveland Browns,14,F,40.0,42,O,21.248351,Cleveland Browns,0.171,Pittsburgh Steelers,21.077306,42.326,U,O,L,W,8.5,3.5,9.5,4.5,12.5,7.5,Cleveland Browns,8.5,L,9.5,L,12.5,L


In [1338]:
stdata2022_full_adv_teas['tease6_spread_W/L'].value_counts()

W    211
L     56
P      4
Name: tease6_spread_W/L, dtype: int64

In [1339]:
stdata2022_full_adv_teas['tease7_spread_W/L'].value_counts()

W    218
L     49
P      4
Name: tease7_spread_W/L, dtype: int64

In [1340]:
stdata2022_full_adv_teas['tease10_spread_W/L'].value_counts()

W    233
L     38
Name: tease10_spread_W/L, dtype: int64

In [1341]:
#Counting up the W/L to determine model's performance on hypothetical bets for the spread.
stdata2022_full_adv['spread_bet_W/L'].value_counts()

W    160
L    111
Name: spread_bet_W/L, dtype: int64

In [1342]:
#Counting up the W/L to determine model's performance on hypothetical bets for the total. 
stdata2022_full_adv['total_bet_W/L'].value_counts()

W    153
L    118
Name: total_bet_W/L, dtype: int64

In [1343]:
#Printing final W percentages for both spread and total for the optimal full advanced GradientBoostingRegressor estimator. 
print("The optimal full advanced GradientBoostingRegressor model would have correctly predicted the spread bets at", (round((160/271), 5) * 100), "percent.")
print("The optimal full advanced GradientBoostingRegressor model would have correctly predicted the total bets at", (round((153/271), 4) * 100), "percent.")

The optimal full advanced GradientBoostingRegressor model would have correctly predicted the spread bets at 59.041 percent.
The optimal full advanced GradientBoostingRegressor model would have correctly predicted the total bets at 56.46 percent.


### Reduced Full Dataset

In [1344]:
#Counting up the W/L to determine model's performance on hypothetical bets for the spread.
stdata2022_full_red['spread_bet_W/L'].value_counts()

W    156
L    115
Name: spread_bet_W/L, dtype: int64

In [1345]:
#Counting up the W/L to determine model's performance on hypothetical bets for the total.
stdata2022_full_red['total_bet_W/L'].value_counts()

W    153
L    118
Name: total_bet_W/L, dtype: int64

In [1346]:
#Printing final W percentages for both spread and total for the optimal GradientBoostingRegressor estimator. 
print("The reduced GradientBoostingRegressor model would have correctly predicted the spread bets at", (round((156/271), 4) * 100), "percent.")
print("The reduced GradientBoostingRegressor model would have correctly predicted the total bets at", (round((153/271), 4) * 100), "percent.")

The reduced GradientBoostingRegressor model would have correctly predicted the spread bets at 57.56 percent.
The reduced GradientBoostingRegressor model would have correctly predicted the total bets at 56.46 percent.


### Reduced Advanced Dataset

In [1347]:
#Counting up the W/L to determine model's performance on hypothetical bets for the spread.
stdata2022_full_adv_red['spread_bet_W/L'].value_counts()

W    153
L    118
Name: spread_bet_W/L, dtype: int64

In [1348]:
#Counting up the W/L to determine model's performance on hypothetical bets for the total.
stdata2022_full_adv_red['total_bet_W/L'].value_counts()

W    155
L    116
Name: total_bet_W/L, dtype: int64

In [1349]:
#Printing final W percentages for both spread and total for the optimal GradientBoostingRegressor estimator. 
print("The reduced advanced GradientBoostingRegressor model would have correctly predicted the spread bets at", (round((153/271), 4) * 100), "percent.")
print("The reduced advanced GradientBoostingRegressor model would have correctly predicted the total bets at", (round((155/271), 4) * 100), "percent.")

The reduced advanced GradientBoostingRegressor model would have correctly predicted the spread bets at 56.46 percent.
The reduced advanced GradientBoostingRegressor model would have correctly predicted the total bets at 57.199999999999996 percent.


In [1350]:
#Testing optimal totals model on teasers
stdata2022_full_adv_red_teas = stdata2022_full_adv_red.copy()

def ptbt6(row):
    if row['Pred_total_bet'] == 'U':
        val = row['over_under_line'] + 6
    else:
        val = row['over_under_line'] - 6
    return val
stdata2022_full_adv_red_teas['Pred_tease_total6'] = stdata2022_full_adv_red_teas.apply(ptbt6, axis=1)

def ptbt7(row):
    if row['Pred_total_bet'] == 'U':
        val = row['over_under_line'] + 7
    else:
        val = row['over_under_line'] - 7
    return val
stdata2022_full_adv_red_teas['Pred_tease_total7'] = stdata2022_full_adv_red_teas.apply(ptbt7, axis=1)

def ptbt10(row):
    if row['Pred_total_bet'] == 'U':
        val = row['over_under_line'] + 10
    else:
        val = row['over_under_line'] - 10
    return val
stdata2022_full_adv_red_teas['Pred_tease_total10'] = stdata2022_full_adv_red_teas.apply(ptbt10, axis=1)

stdata2022_full_adv_teas

Unnamed: 0,schedule_week,winning_team,winning_score,losing_score,losing_team,team_favorite,spread_favorite,team_underdog,actual_spread,F/U_cover,over_under_line,actual_total,O/U,Pred_PF_favorite,Pred_team_favorite,Pred_Spread,Pred_team_underdog,Pred_PF_underdog,Pred_Total,Pred_spread_bet,Pred_total_bet,spread_bet_W/L,total_bet_W/L,underdog_tease6,favorite_tease6,underdog_tease7,favorite_tease7,underdog_tease10,favorite_tease10,Pred_team_bet,Pred_tease_spread6,tease6_spread_W/L,Pred_tease_spread7,tease7_spread_W/L,Pred_tease_spread10,tease10_spread_W/L
0,1,Buffalo Bills,31,10,Los Angeles Rams,Buffalo Bills,2.0,Los Angeles Rams,21,F,52.0,41,U,25.593539,Buffalo Bills,7.250,Los Angeles Rams,18.343960,43.937,F,U,W,W,8.0,4.0,9.0,5.0,12.0,8.0,Buffalo Bills,4.0,W,5.0,W,8.0,W
1,1,Seattle Seahawks,17,16,Denver Broncos,Denver Broncos,6.0,Seattle Seahawks,1,U,44.0,33,U,21.976054,Seattle Seahawks,2.418,Denver Broncos,19.558253,41.534,U,U,W,W,12.0,0.0,13.0,1.0,16.0,4.0,Seattle Seahawks,12.0,W,13.0,W,16.0,W
2,1,New York Giants,21,20,Tennessee Titans,Tennessee Titans,5.5,New York Giants,1,U,44.0,41,U,21.355027,New York Giants,0.430,Tennessee Titans,20.924728,42.280,U,U,W,W,11.5,0.5,12.5,1.5,15.5,4.5,New York Giants,11.5,W,12.5,W,15.5,W
3,1,Baltimore Ravens,24,9,New York Jets,Baltimore Ravens,6.5,New York Jets,15,F,44.0,33,U,19.751249,Baltimore Ravens,1.477,New York Jets,18.273953,38.025,U,U,L,W,12.5,0.5,13.5,0.5,16.5,3.5,New York Jets,12.5,L,13.5,L,16.5,W
4,1,Miami Dolphins,20,7,New England Patriots,Miami Dolphins,3.0,New England Patriots,13,F,46.5,27,U,22.143559,New England Patriots,0.293,Miami Dolphins,21.850983,43.995,U,U,L,W,9.0,3.0,10.0,4.0,13.0,7.0,New England Patriots,9.0,L,10.0,L,13.0,L
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
266,18,Atlanta Falcons,30,17,Tampa Bay Buccaneers,Atlanta Falcons,6.0,Tampa Bay Buccaneers,13,F,40.5,47,O,21.492067,Atlanta Falcons,0.387,Tampa Bay Buccaneers,21.104685,42.597,U,O,L,W,12.0,0.0,13.0,1.0,16.0,4.0,Tampa Bay Buccaneers,12.0,L,13.0,P,16.0,W
267,18,Kansas City Chiefs,31,13,Las Vegas Raiders,Kansas City Chiefs,8.5,Las Vegas Raiders,18,F,52.0,44,U,26.647408,Kansas City Chiefs,3.019,Las Vegas Raiders,23.628528,50.276,U,U,L,W,14.5,2.5,15.5,1.5,18.5,1.5,Las Vegas Raiders,14.5,L,15.5,L,18.5,W
268,18,Jacksonville Jaguars,20,16,Tennessee Titans,Jacksonville Jaguars,6.5,Tennessee Titans,4,U,39.5,36,U,22.303569,Jacksonville Jaguars,2.670,Tennessee Titans,19.633077,41.937,U,O,W,L,12.5,0.5,13.5,0.5,16.5,3.5,Tennessee Titans,12.5,W,13.5,W,16.5,W
269,18,Pittsburgh Steelers,28,14,Cleveland Browns,Pittsburgh Steelers,2.5,Cleveland Browns,14,F,40.0,42,O,21.248351,Cleveland Browns,0.171,Pittsburgh Steelers,21.077306,42.326,U,O,L,W,8.5,3.5,9.5,4.5,12.5,7.5,Cleveland Browns,8.5,L,9.5,L,12.5,L


In [1351]:
#Creating function to find W/L for predicted teaser totals compared to actual scores
def tease6twl(row):
    if row['Pred_total_bet'] == 'U':
       
        if row['Pred_tease_total6'] > row['actual_total']:
            val = 'W'
        elif row['Pred_tease_total6'] == row['actual_total']:
            val = 'P'
        else:
            val = 'L'
            
    elif row['Pred_total_bet'] == 'O':
        
        if row['Pred_tease_total6'] < row['actual_total']:
            val = 'W'
        elif row['Pred_tease_total6'] == row['actual_total']:
            val = 'P'
        else:
            val = 'L'
            
    else:
        val = 'W'
    return val

def tease7twl(row):
    if row['Pred_total_bet'] == 'U':
       
        if row['Pred_tease_total7'] > row['actual_total']:
            val = 'W'
        elif row['Pred_tease_total7'] == row['actual_total']:
            val = 'P'
        else:
            val = 'L'
            
    elif row['Pred_total_bet'] == 'O':
        
        if row['Pred_tease_total7'] < row['actual_total']:
            val = 'W'
        elif row['Pred_tease_total7'] == row['actual_total']:
            val = 'P'
        else:
            val = 'L'
            
    else:
        val = 'W'
    return val

def tease10twl(row):
    if row['Pred_total_bet'] == 'U':
       
        if row['Pred_tease_total10'] > row['actual_total']:
            val = 'W'
        elif row['Pred_tease_total10'] == row['actual_total']:
            val = 'L'
        else:
            val = 'L'
            
    elif row['Pred_total_bet'] == 'O':
        
        if row['Pred_tease_total10'] < row['actual_total']:
            val = 'W'
        elif row['Pred_tease_total10'] == row['actual_total']:
            val = 'L'
        else:
            val = 'L'
            
    else:
        val = 'W'
    return val

stdata2022_full_adv_red_teas['tease6_total_W/L'] = stdata2022_full_adv_red_teas.apply(tease6twl, axis=1)
stdata2022_full_adv_red_teas['tease7_total_W/L'] = stdata2022_full_adv_red_teas.apply(tease7twl, axis=1)
stdata2022_full_adv_red_teas['tease10_total_W/L'] = stdata2022_full_adv_red_teas.apply(tease10twl, axis=1)
stdata2022_full_adv_red_teas

Flushing oldest 200 entries.
  warn('Output cache limit (currently {sz} entries) hit.\n'


Unnamed: 0,schedule_week,winning_team,winning_score,losing_score,losing_team,team_favorite,spread_favorite,team_underdog,actual_spread,F/U_cover,over_under_line,actual_total,O/U,Pred_PF_favorite,Pred_team_favorite,Pred_Spread,Pred_team_underdog,Pred_PF_underdog,Pred_Total,Pred_spread_bet,Pred_total_bet,spread_bet_W/L,total_bet_W/L,Pred_tease_total6,Pred_tease_total7,Pred_tease_total10,tease6_total_W/L,tease7_total_W/L,tease10_total_W/L
0,1,Buffalo Bills,31,10,Los Angeles Rams,Buffalo Bills,2.0,Los Angeles Rams,21,F,52.0,41,U,24.914370,Buffalo Bills,7.040,Los Angeles Rams,17.874364,42.789,F,U,W,W,58.0,59.0,62.0,W,W,W
1,1,Seattle Seahawks,17,16,Denver Broncos,Denver Broncos,6.0,Seattle Seahawks,1,U,44.0,33,U,21.880582,Seattle Seahawks,1.823,Denver Broncos,20.057426,41.938,U,U,W,W,50.0,51.0,54.0,W,W,W
2,1,New York Giants,21,20,Tennessee Titans,Tennessee Titans,5.5,New York Giants,1,U,44.0,41,U,21.165986,New York Giants,0.174,Tennessee Titans,20.992136,42.158,U,U,W,W,50.0,51.0,54.0,W,W,W
3,1,Baltimore Ravens,24,9,New York Jets,Baltimore Ravens,6.5,New York Jets,15,F,44.0,33,U,19.724773,Baltimore Ravens,1.432,New York Jets,18.292389,38.017,U,U,L,W,50.0,51.0,54.0,W,W,W
4,1,Miami Dolphins,20,7,New England Patriots,Miami Dolphins,3.0,New England Patriots,13,F,46.5,27,U,22.594027,Miami Dolphins,1.295,New England Patriots,21.298777,43.893,U,U,L,W,52.5,53.5,56.5,W,W,W
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
266,18,Atlanta Falcons,30,17,Tampa Bay Buccaneers,Atlanta Falcons,6.0,Tampa Bay Buccaneers,13,F,40.5,47,O,23.544860,Tampa Bay Buccaneers,2.631,Atlanta Falcons,20.914029,44.459,U,O,L,W,34.5,33.5,30.5,W,W,W
267,18,Kansas City Chiefs,31,13,Las Vegas Raiders,Kansas City Chiefs,8.5,Las Vegas Raiders,18,F,52.0,44,U,27.047425,Kansas City Chiefs,5.149,Las Vegas Raiders,21.898002,48.945,U,U,L,W,58.0,59.0,62.0,W,W,W
268,18,Jacksonville Jaguars,20,16,Tennessee Titans,Jacksonville Jaguars,6.5,Tennessee Titans,4,U,39.5,36,U,22.532902,Jacksonville Jaguars,3.407,Tennessee Titans,19.125706,41.659,U,O,W,L,33.5,32.5,29.5,W,W,W
269,18,Pittsburgh Steelers,28,14,Cleveland Browns,Pittsburgh Steelers,2.5,Cleveland Browns,14,F,40.0,42,O,21.202053,Cleveland Browns,0.635,Pittsburgh Steelers,20.567361,41.769,U,O,L,W,34.0,33.0,30.0,W,W,W


In [1352]:
stdata2022_full_adv_red_teas['tease6_total_W/L'].value_counts()

W    202
L     65
P      4
Name: tease6_total_W/L, dtype: int64

In [1353]:
stdata2022_full_adv_red_teas['tease7_total_W/L'].value_counts()

W    210
L     57
P      4
Name: tease7_total_W/L, dtype: int64

In [1354]:
stdata2022_full_adv_red_teas['tease10_total_W/L'].value_counts()

W    237
L     34
Name: tease10_total_W/L, dtype: int64

### Combined Reduced Dataset

In [1355]:
#Counting up the W/L to determine model's performance on hypothetical bets for the spread.
stdata2022_full_comb_red['spread_bet_W/L'].value_counts()

W    156
L    115
Name: spread_bet_W/L, dtype: int64

In [1356]:
#Counting up the W/L to determine model's performance on hypothetical bets for the total.
stdata2022_full_comb_red['total_bet_W/L'].value_counts()

W    152
L    119
Name: total_bet_W/L, dtype: int64

In [1357]:
#Printing final W percentages for both spread and total for the optimal GradientBoostingRegressor estimator. 
print("The combined reduced GradientBoostingRegressor model would have correctly predicted the spread bets at", (round((156/271), 4) * 100), "percent.")
print("The combined reduced GradientBoostingRegressor model would have correctly predicted the total bets at", (round((152/271), 5) * 100), "percent.")

The combined reduced GradientBoostingRegressor model would have correctly predicted the spread bets at 57.56 percent.
The combined reduced GradientBoostingRegressor model would have correctly predicted the total bets at 56.089 percent.


# Final Results and Summary of All Optimal GBR Model Performances 

In [1402]:
#Creating W% summary for each optimal GBR model for both spread and total bets. 
final_GBRdata = [{'GBR_Full': '57.56%', 'GBR_Adv': '59.04%', 'GBR_Full_Red': '57.56%', 'GBR_Adv_Red': '56.46%', 'GBR_Comb_Red': '57.56%'},
              {'GBR_Full': '54.98%', 'GBR_Adv': '56.06%', 'GBR_Full_Red': '56.46%', 'GBR_Adv_Red': '57.2%', 'GBR_Comb_Red': '56.09%'}]
final_GBRsumm = pd.DataFrame(final_GBRdata, index=['Spread_Bets_W%',
                                        'Total_Bets_W%'])
final_GBRsumm

Unnamed: 0,GBR_Full,GBR_Adv,GBR_Full_Red,GBR_Adv_Red,GBR_Comb_Red
Spread_Bets_W%,57.56%,59.04%,57.56%,56.46%,57.56%
Total_Bets_W%,54.98%,56.06%,56.46%,57.2%,56.09%


In [1401]:
#Creating W% summary for each optimal GBR and RF model for both spread and total bets. 
final_GBR_RFdata = [{'GBR_Full': '57.56%', 'RF_Full': '57.93%', 'GBR_Adv': '59.04%', 'RF_Adv': '59.04%', 'GBR_Full_Red': '57.56%', 'RF_Full_Red': '57.93%', 'GBR_Adv_Red': '56.46%', 'RF_Adv_Red': '57.2%', 'GBR_Comb_Red': '57.56%', 'RF_Comb_Red': '57.56%'},
              {'GBR_Full': '54.98%', 'RF_Full': '54.24%', 'GBR_Adv': '56.06%', 'RF_Adv': '52.03%', 'GBR_Full_Red': '56.46%', 'RF_Full_Red': '52.4%', 'GBR_Adv_Red': '57.2%', 'RF_Adv_Red': '57.2%', 'GBR_Comb_Red': '56.09%', 'RF_Comb_Red': '54.98%'}]
proj_summ = pd.DataFrame(final_GBR_RFdata, index=['Spread_Bets_W%',
                                        'Total_Bets_W%'])
proj_summ

Unnamed: 0,GBR_Full,RF_Full,GBR_Adv,RF_Adv,GBR_Full_Red,RF_Full_Red,GBR_Adv_Red,RF_Adv_Red,GBR_Comb_Red,RF_Comb_Red
Spread_Bets_W%,57.56%,57.93%,59.04%,59.04%,57.56%,57.93%,56.46%,57.2%,57.56%,57.56%
Total_Bets_W%,54.98%,54.24%,56.06%,52.03%,56.46%,52.4%,57.2%,57.2%,56.09%,54.98%


### Optimal Spread Bet Models (GBR_Adv and RF_Adv - 59.04%)

The GBR_Adv and RF_Adv were the best performing models for this specific project in regards to accuractly predicting the NFL spread bets for the 2022 regular season, with each model returning a 59.04% winning percentage when it's predictions were backtested and compared to both the consensus betting lines before each individual game and the actual spread result. For real world context, this would mean that if a bettor placed a spread bet on every NFL matchup from the 2022 regular season for 110 dollars to win 100 (reflecting the standard -110 betting line) and strictly followed the predictions from either the GBR_Adv or RF_Adv models, this bettor would have profited $3,790 and went 160-111 (W/L) against the spread. These models would have been quite profitable and would be considered incredibly successful if effectively applied to real world betting. 

For full transparency, it would be almost impossible for a bettor to only bet -110 betting lines in real life, as there are often added or reduced "juice" built into betting lines based on how money has been bet on games, however, assuming a bettor strictly only bet spread bets (which essentially represent the fair line for both teams in betting terms), it is reasonable to assume that a bettor would end up averaging around a -110 line. 

### Optimal Total Bet Model (GBR_Adv_Red - 57.2%)

The GBR_Adv_Red was the best performing model for this specific project in regards to accuractly predicting the NFL total bets for the 2022 regular season, with the model returning a 57.2% winning percentage when it's predictions were backtested and compared to both the consensus betting lines before each individual game and the actual total result. For real world context, this would mean that if a bettor placed a total bet on every NFL matchup from the 2022 regular season for 110 dollars to win 100 (reflecting the standard -110 betting line) and strictly followed the predictions from the RF_Adv_Red model, this bettor would have profited $2,740 and went 155-116 (W/L) against the spread. This model would have been quite profitable and would be considered incredibly successful if effectively applied to real world betting. 

For full transparency, it would be almost impossible for a bettor to only bet -110 betting lines in real life, as there are often added or reduced "juice" built into betting lines based on how money has been bet on games, however, assuming a bettor strictly only bet spread bets (which essentially represent the fair line for both teams in betting terms), it is reasonable to assume that a bettor would end up averaging around a -110 line. 

### Further Observations

1. Overall, the models performed remarkably well in accurately predicting spread and total bets. In fact, all 10 models utilized in this project predicted the spread successfully and would have netted a profit if strictly followed for the 2022 regular season. In addition, 8 of the 10 models used predicted the totals successfully, with one model essentially breaking even (RF_Full_Red at 52.4%) and only one model which would have resulted in a slight loss (RF_Adv at 52.03%). 
2. The results demonstrate that the models utilized in this project had a much more successful time in consistently predicting spread bets accurately than total bets, with a narrow and impressive 56.46-59.04% winning percentage range for all 10 models, compared to a slightly wider, and less successful overall, 52.03-57.2% winnining percentage range. 
3. In general, the GBR models performed more consistently and with a narrower success range than the RF models, however, it is interesting that the absolute best model for both betting the spread and betting the total was found through fine tuning and testing RandomForest models (in addition to the GBR_Adv of course). This most likely spreaks to the importance of thorough testing and stresses the importance of tuning hyperparameters and data features throughout the model development process. 
4. Based on the best models ending up being ones that incorporated advanced statistics, it is clear (at least in this specific project), that utilizing advanced statistcs over basic statistics is important when predicting games for betting purposes. It is interesting that models trained on 5 seasons of advanced statistics ended up being the optimal models, even though their evaluation metrics (mse, adjusted r2) were worse than the models trained on 15 seasons of basic/passing statistics. This would indicate to me that for betting purposes, it's crucial to test every model for accuracy regardless of standard evaluation metrics, as the goal isn't to create a model that precisely predicts what happened, but beats the Vegas line betting odds in the long term. 

### Considerations Moving Forward 

1. While the results of this specific project ended up being successful, there is still plenty that could be done in terms of model development and refinement in a continued effort to make models that will be successful when betting the spread or total longterm. 
2. Possible steps for continued improvement could include:
   - Continued efforts to discover significant predictive features and experiment with various combinations of    features in model development and evaluation
   - While a select number of significant features from both the full and advanced datasets were combined in this project, it would be worthwhile to combine all of the features from both statistical datasets (while making sure to perform dimension reduction on similar statistics to reduce potential redundancy) to see if a model using the most features possible could produce a more successful model. Obviously, this would take an incredible amount of time to train and fine tune for optimal parameters, and would just need more time than this project allowed to do.
   - While this project only backtested against the 2022 regular season (also because of lack of time), it would be important to continue to backtest the optimal model performances for more prior seasons (at least within 5 years where a fair amount of team rosters/quarterbacks are similar) to see if the models could potentially produce true longterm success in predicting the outcomes of betting spreads and totals. 
   - For this project, I used average as the statistic of choice of PF and PA for the given teams to try and give a somewhat accurate prediction for what would happen to their predicted scores, the spread, and the total. It worked for in this context, but I'd be interested to know if there were other metrics that could simulate matchups more effectively (i.e. simple difference in PF and PA for each team) and improve the model performances. 
   - While this project only focused on spread and total predictions, the most common type of bets, it would also be interesting to see how the models would do in predicting other betting opportunities like moneyline underdogs or 6 or 7 point teasers to see if they could be even more successful and profitable.
3. Overall, this project was fun to work on and I look forward to working on similar projects in the future. 

## Testing Optimal Models on Last 5 Seasons

In [1360]:
#Reducing dataset to only include games from the 2022 regular season
stdata1822 = stdata.loc[(stdata['schedule_season'] == 2018) | (stdata['schedule_season'] == 2019) | (stdata['schedule_season'] == 2020) | (stdata['schedule_season'] == 2021) | (stdata['schedule_season'] == 2022)]
stdata1822 = stdata1822[stdata1822.schedule_playoff != True]
#Modifying dataset to include a column for the points scored in the game, and then a resulting column (O/U) indiciating if the game went Over or Under when compared to the pre-game total.
stdata1822['actual_total'] = stdata1822['score_away'] + stdata1822['score_home']
stdata1822['over_under_line'] = stdata1822['over_under_line'].astype('float64')
stdata1822['O/U'] = np.select([(stdata1822['actual_total'] > stdata1822['over_under_line']), 
                                    (stdata1822['actual_total'] < stdata1822['over_under_line']),
                                (stdata1822['actual_total'] == stdata1822['over_under_line'])], 
                                   ['O', 'U', 'P'])
#Changing all values in the 'spread_favorite' column to positive numbers (the "-" for betting favorites is assumed) so that subsequent functions comparing the actual spread and betting spread can work.
stdata1822['spread_favorite'] = stdata1822['spread_favorite'].abs()
#Filtering the dataset to remove unnecessary columns and organize the columns for clarity
stdata1822 = stdata1822.filter(['schedule_season', 'schedule_week', 'team_home', 'score_home', 'score_away', 'team_away', 'team_favorite_id', 'spread_favorite', 'over_under_line', 'actual_total', 'O/U'])

In [1361]:
#Creating various functions to add features to allow the dataset to determine whether or not a favorite or underdog covered the spread
#Winning score
def w(row):
    if row['score_home'] >= row['score_away']:
        val = row['score_home']
    else:
        val = row['score_away']
    return val
#Losing score
def l(row):
    if row['score_home'] <= row['score_away']:
        val = row['score_home']
    else:
        val = row['score_away']
    return val
#Winning Team
def wt(row):
    if row['score_home'] >= row['score_away']:
        val = row['team_home']
    else:
        val = row['team_away']
    return val
#Losing Team
def lt(row):
    if row['score_home'] <= row['score_away']:
        val = row['team_home']
    else:
        val = row['team_away']
    return val
#Applying functions to create new columns
stdata1822['winning_score'] = stdata1822.apply(w, axis=1)
stdata1822['losing_score'] = stdata1822.apply(l, axis=1)
stdata1822['winning_team'] = stdata1822.apply(wt, axis=1)
stdata1822['losing_team'] = stdata1822.apply(lt, axis=1)

In [1362]:
#Changing 'team_favorite_id' column from team abbreviations to full team name in order to be consistent with the winning/losing team columns.
teams = {
    'ARI': 'Arizona Cardinals',
    'ATL': 'Atlanta Falcons',
    'BAL': 'Baltimore Ravens',
    'BUF': 'Buffalo Bills',
    'CAR': 'Carolina Panthers',
    'CHI': 'Chicago Bears',
    'CIN': 'Cincinnati Bengals',
    'CLE': 'Cleveland Browns',
    'DAL': 'Dallas Cowboys',
    'DEN': 'Denver Broncos',
    'DET': 'Detroit Lions',
    'GB': 'Green Bay Packers',
    'HOU': 'Houston Texans',
    'IND': 'Indianapolis Colts',
    'JAX': 'Jacksonville Jaguars',
    'KC': 'Kansas City Chiefs',
    'LVR': 'Las Vegas Raiders',
    'LAC': 'Los Angeles Chargers',
    'LAR': 'Los Angeles Rams',
    'MIA': 'Miami Dolphins',
    'MIN': 'Minnesota Vikings',
    'NE': 'New England Patriots',
    'NO': 'New Orleans Saints',
    'NYG': 'New York Giants',
    'NYJ': 'New York Jets',
    'PHI': 'Philadelphia Eagles',
    'PIT': 'Pittsburgh Steelers',
    'SF': 'San Francisco 49ers',
    'SEA': 'Seattle Seahawks',
    'TB': 'Tampa Bay Buccaneers',
    'TEN': 'Tennessee Titans', 
    'WAS': 'Washington Commanders'}

stdata1822['team_favorite'] = stdata1822['team_favorite_id'].replace(teams)
#Creating function to create 'team_underdog' (tu) column in order to compare team_favorite and team_underdog
def tu(row):
    if row['winning_team'] == row['team_favorite']:
        val = row['losing_team']
    else:
        val = row['winning_team']
    return val
#Applying function to create column
stdata1822['team_underdog'] = stdata1822.apply(tu, axis=1)
#Creating new column indicating the actual winning margin (spread) for the game to compare with the pre-game spread
stdata1822['actual_spread'] = stdata1822['winning_score'] - stdata1822['losing_score']
#Filtering for new columns and clarity
stdata1822 = stdata1822.filter(['schedule_season', 'schedule_week', 'winning_team', 'winning_score', 'losing_score', 'losing_team', 'team_favorite', 'team_underdog', 'spread_favorite', 'actual_spread', 'over_under_line', 'actual_total', 'O/U'])
stdata1822

Unnamed: 0,schedule_season,schedule_week,winning_team,winning_score,losing_score,losing_team,team_favorite,team_underdog,spread_favorite,actual_spread,over_under_line,actual_total,O/U
12144,2018,1,Philadelphia Eagles,18,12,Atlanta Falcons,Philadelphia Eagles,Atlanta Falcons,1.0,6,44.5,30,U
12145,2018,1,Washington Redskins,24,6,Arizona Cardinals,Arizona Cardinals,Washington Redskins,2.0,18,43.5,30,U
12146,2018,1,Baltimore Ravens,47,3,Buffalo Bills,Baltimore Ravens,Buffalo Bills,7.5,44,39.0,50,O
12147,2018,1,Carolina Panthers,16,8,Dallas Cowboys,Carolina Panthers,Dallas Cowboys,2.5,8,42.5,24,U
12148,2018,1,Cleveland Browns,21,21,Cleveland Browns,Pittsburgh Steelers,Cleveland Browns,3.5,0,41.0,42,O
...,...,...,...,...,...,...,...,...,...,...,...,...,...
13498,2022,18,Philadelphia Eagles,22,16,New York Giants,Philadelphia Eagles,New York Giants,17.0,6,43.0,38,U
13499,2022,18,Pittsburgh Steelers,28,14,Cleveland Browns,Pittsburgh Steelers,Cleveland Browns,2.5,14,40.0,42,O
13500,2022,18,San Francisco 49ers,38,13,Arizona Cardinals,San Francisco 49ers,Arizona Cardinals,14.5,25,40.0,51,O
13501,2022,18,Seattle Seahawks,19,16,Los Angeles Rams,Seattle Seahawks,Los Angeles Rams,4.5,3,43.0,35,U


In [1363]:
#Creating duplicate columns to id teams as 'Tm1' and 'Tm2' in order to merge the matchup prediction DF with the 2022 regular season DF. 
stdata1822['Tm1'] = stdata1822['winning_team']
stdata1822['Tm2'] = stdata1822['losing_team']
#Manually dealing with the two games from the 2022 regular season that ended in a tie. Then combining back the tied games into the full stdata2022 DF.
stdata1822_tie = stdata1822.loc[stdata1822['actual_spread'] == 0]
stdata1822 = stdata1822[stdata1822.actual_spread != 0]

In [1364]:
stdata1822_tie['winning_team'] = stdata1822_tie['winning_team'].replace(['Cleveland Browns'], ['Pittsburgh Steelers'])
stdata1822_tie['winning_team'] = stdata1822_tie['winning_team'].replace(['Green Bay Packers'], ['Minnesota Vikings'])
stdata1822_tie['Tm2'] = stdata1822_tie['Tm2'].replace(['Cleveland Browns'], ['Pittsburgh Steelers'])
stdata1822_tie['Tm2'] = stdata1822_tie['Tm2'].replace(['Green Bay Packers'],['Minnesota Vikings'])
stdata1822_tie['winning_team'] = stdata1822_tie['winning_team'].replace(['Arizona Cardinals'],['Detroit Lions'])
stdata1822_tie['winning_team'] = stdata1822_tie['winning_team'].replace(['Philadelphia Eagles'],['Cincinnati Bengals'])
stdata1822_tie['team_underdog'] = stdata1822_tie['team_underdog'].replace(['Philadelphia Eagles'],['Cincinnati Bengals'])
stdata1822_tie['Tm2'] = stdata1822_tie['Tm2'].replace(['Arizona Cardinals'],['Detroit Lions'])
stdata1822_tie['Tm2'] = stdata1822_tie['Tm2'].replace(['Philadelphia Eagles'],['Cincinnati Bengals'])
stdata1822_tie['winning_team'] = stdata1822_tie['winning_team'].replace(['Pittsburgh Steelers'],['Detroit Lions'])
stdata1822_tie['team_underdog'] = stdata1822_tie['team_underdog'].replace(['Pittsburgh Steelers'],['Detroit Lions'])
stdata1822_tie['Tm2'] = stdata1822_tie['Tm2'].replace(['Pittsburgh Steelers'],['Detroit Lions'])
stdata1822_tie['winning_team'] = stdata1822_tie['winning_team'].replace(['Houston Texans'],['Indianapolis Colts'])
stdata1822_tie['winning_team'] = stdata1822_tie['winning_team'].replace(['New York Giants'],['Washington Commanders'])
stdata1822_tie['Tm2'] = stdata1822_tie['Tm2'].replace(['Houston Texans'],['Indianapolis Colts'])
stdata1822_tie['Tm2'] = stdata1822_tie['Tm2'].replace(['New York Giants'],['Washington Commanders'])

stdata1822_tie.loc[12148, 'winning_team'] = 'Pittsburgh Steelers'
stdata1822_tie.loc[12148, 'Tm2'] = 'Pittsburgh Steelers'
stdata1822_tie

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  stdata1822_tie['winning_team'] = stdata1822_tie['winning_team'].replace(['Cleveland Browns'], ['Pittsburgh Steelers'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  stdata1822_tie['winning_team'] = stdata1822_tie['winning_team'].replace(['Green Bay Packers'], ['Minnesota Vikings'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#r

Unnamed: 0,schedule_season,schedule_week,winning_team,winning_score,losing_score,losing_team,team_favorite,team_underdog,spread_favorite,actual_spread,over_under_line,actual_total,O/U,Tm1,Tm2
12148,2018,1,Pittsburgh Steelers,21,21,Cleveland Browns,Pittsburgh Steelers,Cleveland Browns,3.5,0,41.0,42,O,Cleveland Browns,Pittsburgh Steelers
12165,2018,2,Minnesota Vikings,29,29,Green Bay Packers,Minnesota Vikings,Green Bay Packers,2.0,0,45.0,58,O,Green Bay Packers,Minnesota Vikings
12412,2019,1,Detroit Lions,27,27,Arizona Cardinals,Detroit Lions,Arizona Cardinals,3.0,0,45.5,54,O,Arizona Cardinals,Detroit Lions
12722,2020,3,Cincinnati Bengals,23,23,Philadelphia Eagles,Philadelphia Eagles,Cincinnati Bengals,4.5,0,47.5,46,U,Philadelphia Eagles,Cincinnati Bengals
13093,2021,10,Detroit Lions,16,16,Pittsburgh Steelers,Pittsburgh Steelers,Detroit Lions,6.0,0,40.5,32,U,Pittsburgh Steelers,Detroit Lions
13240,2022,1,Indianapolis Colts,20,20,Houston Texans,Indianapolis Colts,Houston Texans,7.0,0,45.5,40,U,Houston Texans,Indianapolis Colts
13423,2022,13,Washington Commanders,20,20,New York Giants,Washington Commanders,New York Giants,2.5,0,40.5,40,U,New York Giants,Washington Commanders


In [1365]:
stdata1822_both = [stdata1822, stdata1822_tie]
stdata1822 = pd.concat(stdata1822_both).reset_index(drop=True)
stdata1822['schedule_week'] = stdata1822['schedule_week'].astype(np.int64)
stdata1822['schedule_season'] = stdata1822['schedule_season'].astype(np.int64)
stdata1822 = stdata1822.sort_values(by=['schedule_season', 'schedule_week']).reset_index(drop=True)
stdata1822

Unnamed: 0,schedule_season,schedule_week,winning_team,winning_score,losing_score,losing_team,team_favorite,team_underdog,spread_favorite,actual_spread,over_under_line,actual_total,O/U,Tm1,Tm2
0,2018,1,Philadelphia Eagles,18,12,Atlanta Falcons,Philadelphia Eagles,Atlanta Falcons,1.0,6,44.5,30,U,Philadelphia Eagles,Atlanta Falcons
1,2018,1,Washington Redskins,24,6,Arizona Cardinals,Arizona Cardinals,Washington Redskins,2.0,18,43.5,30,U,Washington Redskins,Arizona Cardinals
2,2018,1,Baltimore Ravens,47,3,Buffalo Bills,Baltimore Ravens,Buffalo Bills,7.5,44,39.0,50,O,Baltimore Ravens,Buffalo Bills
3,2018,1,Carolina Panthers,16,8,Dallas Cowboys,Carolina Panthers,Dallas Cowboys,2.5,8,42.5,24,U,Carolina Panthers,Dallas Cowboys
4,2018,1,Denver Broncos,27,24,Seattle Seahawks,Denver Broncos,Seattle Seahawks,3.0,3,42.5,51,O,Denver Broncos,Seattle Seahawks
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1306,2022,18,Philadelphia Eagles,22,16,New York Giants,Philadelphia Eagles,New York Giants,17.0,6,43.0,38,U,Philadelphia Eagles,New York Giants
1307,2022,18,Pittsburgh Steelers,28,14,Cleveland Browns,Pittsburgh Steelers,Cleveland Browns,2.5,14,40.0,42,O,Pittsburgh Steelers,Cleveland Browns
1308,2022,18,San Francisco 49ers,38,13,Arizona Cardinals,San Francisco 49ers,Arizona Cardinals,14.5,25,40.0,51,O,San Francisco 49ers,Arizona Cardinals
1309,2022,18,Seattle Seahawks,19,16,Los Angeles Rams,Seattle Seahawks,Los Angeles Rams,4.5,3,43.0,35,U,Seattle Seahawks,Los Angeles Rams


In [1366]:
#Creating function to determine whether or not the team_favorite (F) or team_underdog (U) covered the spread or pushed (P) if the ending spread equaled the original spread
def fu(row):
    if row['team_underdog'] == row['winning_team']:
        val = 'U'
    elif row['actual_spread'] > row['spread_favorite']:
        val = 'F'
    elif row['actual_spread'] == ['spread_favorite']:
        val = 'P'
    else:
        val = 'U'
    return val
#Adding 'F/U_cover' (favorite/underdog) to DataFrame
stdata1822['F/U_cover'] = stdata1822.apply(fu, axis=1)
stdata1822 = stdata1822.filter(['schedule_season', 'schedule_week', 'winning_team', 
                                'winning_score', 'losing_score', 'losing_team', 'team_favorite', 'team_underdog', 'spread_favorite', 
                                'actual_spread', 'F/U_cover', 'over_under_line', 'actual_total', 'O/U'])
stdata1822

Unnamed: 0,schedule_season,schedule_week,winning_team,winning_score,losing_score,losing_team,team_favorite,team_underdog,spread_favorite,actual_spread,F/U_cover,over_under_line,actual_total,O/U
0,2018,1,Philadelphia Eagles,18,12,Atlanta Falcons,Philadelphia Eagles,Atlanta Falcons,1.0,6,F,44.5,30,U
1,2018,1,Washington Redskins,24,6,Arizona Cardinals,Arizona Cardinals,Washington Redskins,2.0,18,U,43.5,30,U
2,2018,1,Baltimore Ravens,47,3,Buffalo Bills,Baltimore Ravens,Buffalo Bills,7.5,44,F,39.0,50,O
3,2018,1,Carolina Panthers,16,8,Dallas Cowboys,Carolina Panthers,Dallas Cowboys,2.5,8,F,42.5,24,U
4,2018,1,Denver Broncos,27,24,Seattle Seahawks,Denver Broncos,Seattle Seahawks,3.0,3,U,42.5,51,O
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1306,2022,18,Philadelphia Eagles,22,16,New York Giants,Philadelphia Eagles,New York Giants,17.0,6,U,43.0,38,U
1307,2022,18,Pittsburgh Steelers,28,14,Cleveland Browns,Pittsburgh Steelers,Cleveland Browns,2.5,14,F,40.0,42,O
1308,2022,18,San Francisco 49ers,38,13,Arizona Cardinals,San Francisco 49ers,Arizona Cardinals,14.5,25,F,40.0,51,O
1309,2022,18,Seattle Seahawks,19,16,Los Angeles Rams,Seattle Seahawks,Los Angeles Rams,4.5,3,U,43.0,35,U


## GBR_Adv (optimal spread)

In [1369]:
#Merging the matchup Df with the actual schedule DF
stdata1822['Tm1'] = stdata1822['winning_team']
stdata1822['Tm2'] = stdata1822['losing_team']

both_df3_5 = pd.merge(stdata1822, match_gbrpred_adv, on=['Tm1', 'Tm2'], how='left')
both_df_na_5 = both_df3_5[both_df3_5.isna().any(axis=1)]
both_df_na_5 = both_df_na_5.rename(columns = {'Tm1': 'Tm3', 'Tm2': 'Tm4'})
both_df_na_5['Tm1'] = both_df_na_5['Tm4']
both_df_na_5['Tm2'] = both_df_na_5['Tm3']
both_df_na_5 = both_df_na_5.filter(['schedule_season', 'schedule_week', 'winning_team', 'winning_score', 'losing_score', 'losing_team', 'team_favorite',
                               'team_underdog', 'spread_favorite', 'actual_spread', 'F/U_cover', 'over_under_line', 'actual_total',
                               'O/U', 'Tm1', 'Tm2'])
both_df2_5 = pd.merge(both_df_na_5, match_gbrpred_adv, on=['Tm2', 'Tm1'], how='left').dropna()
both_df2_5

Unnamed: 0,schedule_season,schedule_week,winning_team,winning_score,losing_score,losing_team,team_favorite,team_underdog,spread_favorite,actual_spread,F/U_cover,over_under_line,actual_total,O/U,Tm1,Tm2,PF_gbradv_Pred1,PA_gbradv_Pred1,PF_gbradv_Pred2,PA_gbradv_Pred2,PF_Tm1,PF_Tm2,Pred_team_favorite,Pred_Spread,Pred_team_underdog,Pred_Total
1,2018,1,Baltimore Ravens,47,3,Buffalo Bills,Baltimore Ravens,Buffalo Bills,7.5,44,F,39.0,50,O,Buffalo Bills,Baltimore Ravens,28.400784,18.303863,21.352428,19.308233,23.854508,19.828146,Buffalo Bills,4.026,Baltimore Ravens,43.683
2,2018,1,Carolina Panthers,16,8,Dallas Cowboys,Carolina Panthers,Dallas Cowboys,2.5,8,F,42.5,24,U,Dallas Cowboys,Carolina Panthers,27.498042,19.918712,20.797886,22.840240,25.169141,20.358299,Dallas Cowboys,4.811,Carolina Panthers,45.527
3,2018,1,Denver Broncos,27,24,Seattle Seahawks,Denver Broncos,Seattle Seahawks,3.0,3,U,42.5,51,O,Seattle Seahawks,Denver Broncos,23.899308,22.254408,16.862098,20.052801,21.976054,19.558253,Seattle Seahawks,2.418,Denver Broncos,41.534
4,2018,1,Minnesota Vikings,24,16,San Francisco 49ers,Minnesota Vikings,San Francisco 49ers,6.0,8,F,46.5,40,U,San Francisco 49ers,Minnesota Vikings,27.433570,19.044422,24.436709,24.353770,25.893670,21.740566,San Francisco 49ers,4.153,Minnesota Vikings,47.634
5,2018,1,Tampa Bay Buccaneers,48,40,New Orleans Saints,New Orleans Saints,Tampa Bay Buccaneers,10.0,8,U,50.0,88,O,New Orleans Saints,Tampa Bay Buccaneers,19.402789,19.819162,18.551291,21.484837,20.443813,19.185226,New Orleans Saints,1.259,Tampa Bay Buccaneers,39.629
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
654,2022,17,New Orleans Saints,20,10,Philadelphia Eagles,Philadelphia Eagles,New Orleans Saints,5.0,10,U,42.0,30,U,Philadelphia Eagles,New Orleans Saints,28.098718,19.924910,19.848193,19.542768,23.820743,19.886552,Philadelphia Eagles,3.934,New Orleans Saints,43.707
655,2022,17,Tampa Bay Buccaneers,30,24,Carolina Panthers,Tampa Bay Buccaneers,Carolina Panthers,3.5,6,F,40.5,54,O,Carolina Panthers,Tampa Bay Buccaneers,20.400487,22.927502,18.551291,21.484837,20.942662,20.739396,Carolina Panthers,0.203,Tampa Bay Buccaneers,41.682
656,2022,18,Denver Broncos,31,28,Los Angeles Chargers,Denver Broncos,Los Angeles Chargers,6.5,3,U,39.0,59,O,Los Angeles Chargers,Denver Broncos,22.999484,22.235236,16.862098,20.052801,21.526143,19.548667,Los Angeles Chargers,1.977,Denver Broncos,41.075
657,2022,18,Pittsburgh Steelers,28,14,Cleveland Browns,Pittsburgh Steelers,Cleveland Browns,2.5,14,F,40.0,42,O,Cleveland Browns,Pittsburgh Steelers,21.201247,22.602988,19.551624,21.295456,21.248351,21.077306,Cleveland Browns,0.171,Pittsburgh Steelers,42.326


In [1370]:
#Merging the rest of the matchup DF with the actual schedule DF. Then filtering for a final DF. 
df_comb_5 = [both_df3_5, both_df2_5]
result_5 = pd.concat(df_comb_5).reset_index(drop=True).dropna()
result_5['schedule_week'] = result_5['schedule_week'].astype(np.int64)
result_5['schedule_season'] = result_5['schedule_season'].astype(np.int64)
final_result_5 = result_5.sort_values(by=['schedule_season', 'schedule_week']).reset_index(drop=True)
stdata1822_full = final_result_5.filter(['schedule_season', 'schedule_week', 'winning_team', 'winning_score', 'losing_score', 'losing_team',
                                      'team_favorite', 'team_underdog', 'spread_favorite', 'actual_spread', 'F/U_cover',
                                      'over_under_line', 'actual_total', 'O/U', 'PF_Tm1', 'PF_Tm2', 'Pred_team_favorite',
                                      'Pred_Spread', 'Pred_team_underdog', 'Pred_Total'])
def pft(row):
    if row['PF_Tm1'] > row['PF_Tm2']:
        val = row['PF_Tm1']
    else:
        val = row['PF_Tm2']
    return val
def put(row):
    if row['PF_Tm1'] < row['PF_Tm2']:
        val = row['PF_Tm1']
    else:
        val = row['PF_Tm2']
    return val
stdata1822_full['Pred_PF_favorite'] = stdata1822_full.apply(pft, axis=1)
stdata1822_full['Pred_PF_underdog'] = stdata1822_full.apply(put, axis=1)
stdata1822_full = stdata1822_full.filter(['schedule_season', 'schedule_week', 'winning_team', 'winning_score', 'losing_score', 'losing_team',
                                      'team_favorite', 'spread_favorite', 'team_underdog', 'actual_spread', 'F/U_cover',
                                      'over_under_line', 'actual_total', 'O/U', 'Pred_PF_favorite', 'Pred_team_favorite',
                                    'Pred_Spread', 'Pred_team_underdog', 'Pred_PF_underdog', 'Pred_Total'])
stdata1822_full

Unnamed: 0,schedule_season,schedule_week,winning_team,winning_score,losing_score,losing_team,team_favorite,spread_favorite,team_underdog,actual_spread,F/U_cover,over_under_line,actual_total,O/U,Pred_PF_favorite,Pred_team_favorite,Pred_Spread,Pred_team_underdog,Pred_PF_underdog,Pred_Total
0,2018,1,Philadelphia Eagles,18,12,Atlanta Falcons,Philadelphia Eagles,1.0,Atlanta Falcons,6,F,44.5,30,U,26.039946,Philadelphia Eagles,5.710,Atlanta Falcons,20.329934,46.370
1,2018,1,Green Bay Packers,24,23,Chicago Bears,Green Bay Packers,6.5,Chicago Bears,1,U,45.0,47,O,24.446210,Green Bay Packers,3.636,Chicago Bears,20.809925,45.256
2,2018,1,Cincinnati Bengals,34,23,Indianapolis Colts,Cincinnati Bengals,1.0,Indianapolis Colts,11,F,47.5,57,O,25.491533,Cincinnati Bengals,7.426,Indianapolis Colts,18.065164,43.557
3,2018,1,Kansas City Chiefs,38,28,Los Angeles Chargers,Los Angeles Chargers,3.5,Kansas City Chiefs,10,U,48.0,66,O,25.920349,Kansas City Chiefs,3.087,Los Angeles Chargers,22.833612,48.754
4,2018,1,Miami Dolphins,27,20,Tennessee Titans,Tennessee Titans,1.0,Miami Dolphins,7,U,43.5,47,O,22.268931,Miami Dolphins,1.186,Tennessee Titans,21.083034,43.352
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1209,2022,18,San Francisco 49ers,38,13,Arizona Cardinals,San Francisco 49ers,14.5,Arizona Cardinals,25,F,40.0,51,O,26.523139,San Francisco 49ers,6.928,Arizona Cardinals,19.595062,46.118
1210,2022,18,Seattle Seahawks,19,16,Los Angeles Rams,Seattle Seahawks,4.5,Los Angeles Rams,3,U,43.0,35,U,23.342801,Seattle Seahawks,3.024,Los Angeles Rams,20.319232,43.662
1211,2022,18,Denver Broncos,31,28,Los Angeles Chargers,Denver Broncos,6.5,Los Angeles Chargers,3,U,39.0,59,O,21.526143,Los Angeles Chargers,1.977,Denver Broncos,19.548667,41.075
1212,2022,18,Pittsburgh Steelers,28,14,Cleveland Browns,Pittsburgh Steelers,2.5,Cleveland Browns,14,F,40.0,42,O,21.248351,Cleveland Browns,0.171,Pittsburgh Steelers,21.077306,42.326


In [1371]:
#Creating functions that compares the predicted spread bets and predicted total bets with what actually happened in during the 2022 season. 
def psb(row):
    if row['team_underdog'] == row['Pred_team_favorite']:
        val = 'U'
    elif row['Pred_Spread'] < row['spread_favorite']:
        val = 'U'
    else:
        val = 'F'
    return val
def ptb(row):
    if row['Pred_Total'] > row['over_under_line']:
        val = 'O'
    else:
        val = 'U'
    return val
stdata1822_full['Pred_spread_bet'] = stdata1822_full.apply(psb, axis=1)
stdata1822_full['Pred_total_bet'] = stdata1822_full.apply(ptb, axis=1)
#Creating rows that track the W/L for both spread and total based on the predicted bets
def sbwl(row):
    if row['Pred_spread_bet'] == row['F/U_cover']:
        val = 'W'
    else:
        val = 'L'
    return val
def tbwl(row):
    if row['Pred_total_bet'] == row['O/U']:
        val = 'W'
    else:
        val = 'L'
    return val
stdata1822_full['spread_bet_W/L'] = stdata1822_full.apply(sbwl, axis=1)
stdata1822_full['total_bet_W/L'] = stdata1822_full.apply(tbwl, axis=1)
stdata1822_full

Unnamed: 0,schedule_season,schedule_week,winning_team,winning_score,losing_score,losing_team,team_favorite,spread_favorite,team_underdog,actual_spread,F/U_cover,over_under_line,actual_total,O/U,Pred_PF_favorite,Pred_team_favorite,Pred_Spread,Pred_team_underdog,Pred_PF_underdog,Pred_Total,Pred_spread_bet,Pred_total_bet,spread_bet_W/L,total_bet_W/L
0,2018,1,Philadelphia Eagles,18,12,Atlanta Falcons,Philadelphia Eagles,1.0,Atlanta Falcons,6,F,44.5,30,U,26.039946,Philadelphia Eagles,5.710,Atlanta Falcons,20.329934,46.370,F,O,W,L
1,2018,1,Green Bay Packers,24,23,Chicago Bears,Green Bay Packers,6.5,Chicago Bears,1,U,45.0,47,O,24.446210,Green Bay Packers,3.636,Chicago Bears,20.809925,45.256,U,O,W,W
2,2018,1,Cincinnati Bengals,34,23,Indianapolis Colts,Cincinnati Bengals,1.0,Indianapolis Colts,11,F,47.5,57,O,25.491533,Cincinnati Bengals,7.426,Indianapolis Colts,18.065164,43.557,F,U,W,L
3,2018,1,Kansas City Chiefs,38,28,Los Angeles Chargers,Los Angeles Chargers,3.5,Kansas City Chiefs,10,U,48.0,66,O,25.920349,Kansas City Chiefs,3.087,Los Angeles Chargers,22.833612,48.754,U,O,W,W
4,2018,1,Miami Dolphins,27,20,Tennessee Titans,Tennessee Titans,1.0,Miami Dolphins,7,U,43.5,47,O,22.268931,Miami Dolphins,1.186,Tennessee Titans,21.083034,43.352,U,U,W,L
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1209,2022,18,San Francisco 49ers,38,13,Arizona Cardinals,San Francisco 49ers,14.5,Arizona Cardinals,25,F,40.0,51,O,26.523139,San Francisco 49ers,6.928,Arizona Cardinals,19.595062,46.118,U,O,L,W
1210,2022,18,Seattle Seahawks,19,16,Los Angeles Rams,Seattle Seahawks,4.5,Los Angeles Rams,3,U,43.0,35,U,23.342801,Seattle Seahawks,3.024,Los Angeles Rams,20.319232,43.662,U,O,W,L
1211,2022,18,Denver Broncos,31,28,Los Angeles Chargers,Denver Broncos,6.5,Los Angeles Chargers,3,U,39.0,59,O,21.526143,Los Angeles Chargers,1.977,Denver Broncos,19.548667,41.075,U,O,W,W
1212,2022,18,Pittsburgh Steelers,28,14,Cleveland Browns,Pittsburgh Steelers,2.5,Cleveland Browns,14,F,40.0,42,O,21.248351,Cleveland Browns,0.171,Pittsburgh Steelers,21.077306,42.326,U,O,L,W


In [1373]:
stdata1822_full['spread_bet_W/L'].value_counts()

W    669
L    545
Name: spread_bet_W/L, dtype: int64

In [1374]:
stdata1822_full['total_bet_W/L'].value_counts()

W    625
L    589
Name: total_bet_W/L, dtype: int64

## Teasers on 5 Year Optimal Spreads Model

In [1396]:
#Testing optimal spread model on teasers
stdata1822_full_adv_teas = stdata1822_full.copy()
stdata1822_full_adv_teas['underdog_tease6'] = stdata1822_full_adv_teas['spread_favorite'] + 6
stdata1822_full_adv_teas['favorite_tease6'] = (stdata1822_full_adv_teas['spread_favorite'] - 6).abs()
stdata1822_full_adv_teas['underdog_tease7'] = stdata1822_full_adv_teas['spread_favorite'] + 7
stdata1822_full_adv_teas['favorite_tease7'] = (stdata1822_full_adv_teas['spread_favorite'] - 7).abs()
stdata1822_full_adv_teas['underdog_tease10'] = stdata1822_full_adv_teas['spread_favorite'] + 10
stdata1822_full_adv_teas['favorite_tease10'] = (stdata1822_full_adv_teas['spread_favorite'] - 10).abs()

def psbt6(row):
    if row['Pred_spread_bet'] == 'F':
        val = row['favorite_tease6']
    else:
        val = row['underdog_tease6']
    return val
stdata1822_full_adv_teas['Pred_tease_spread6'] = stdata1822_full_adv_teas.apply(psbt6, axis=1)

def psbt7(row):
    if row['Pred_spread_bet'] == 'F':
        val = row['favorite_tease7']
    else:
        val = row['underdog_tease7']
    return val
stdata1822_full_adv_teas['Pred_tease_spread7'] = stdata1822_full_adv_teas.apply(psbt7, axis=1)

def psbt10(row):
    if row['Pred_spread_bet'] == 'F':
        val = row['favorite_tease10']
    else:
        val = row['underdog_tease10']
    return val
stdata1822_full_adv_teas['Pred_tease_spread10'] = stdata1822_full_adv_teas.apply(psbt10, axis=1)

stdata1822_full_adv_teas

Unnamed: 0,schedule_season,schedule_week,winning_team,winning_score,losing_score,losing_team,team_favorite,spread_favorite,team_underdog,actual_spread,F/U_cover,over_under_line,actual_total,O/U,Pred_PF_favorite,Pred_team_favorite,Pred_Spread,Pred_team_underdog,Pred_PF_underdog,Pred_Total,Pred_spread_bet,Pred_total_bet,spread_bet_W/L,total_bet_W/L,underdog_tease6,favorite_tease6,underdog_tease7,favorite_tease7,underdog_tease10,favorite_tease10,Pred_tease_spread6,Pred_tease_spread7,Pred_tease_spread10
0,2018,1,Philadelphia Eagles,18,12,Atlanta Falcons,Philadelphia Eagles,1.0,Atlanta Falcons,6,F,44.5,30,U,26.039946,Philadelphia Eagles,5.710,Atlanta Falcons,20.329934,46.370,F,O,W,L,7.0,5.0,8.0,6.0,11.0,9.0,5.0,6.0,9.0
1,2018,1,Green Bay Packers,24,23,Chicago Bears,Green Bay Packers,6.5,Chicago Bears,1,U,45.0,47,O,24.446210,Green Bay Packers,3.636,Chicago Bears,20.809925,45.256,U,O,W,W,12.5,0.5,13.5,0.5,16.5,3.5,12.5,13.5,16.5
2,2018,1,Cincinnati Bengals,34,23,Indianapolis Colts,Cincinnati Bengals,1.0,Indianapolis Colts,11,F,47.5,57,O,25.491533,Cincinnati Bengals,7.426,Indianapolis Colts,18.065164,43.557,F,U,W,L,7.0,5.0,8.0,6.0,11.0,9.0,5.0,6.0,9.0
3,2018,1,Kansas City Chiefs,38,28,Los Angeles Chargers,Los Angeles Chargers,3.5,Kansas City Chiefs,10,U,48.0,66,O,25.920349,Kansas City Chiefs,3.087,Los Angeles Chargers,22.833612,48.754,U,O,W,W,9.5,2.5,10.5,3.5,13.5,6.5,9.5,10.5,13.5
4,2018,1,Miami Dolphins,27,20,Tennessee Titans,Tennessee Titans,1.0,Miami Dolphins,7,U,43.5,47,O,22.268931,Miami Dolphins,1.186,Tennessee Titans,21.083034,43.352,U,U,W,L,7.0,5.0,8.0,6.0,11.0,9.0,7.0,8.0,11.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1209,2022,18,San Francisco 49ers,38,13,Arizona Cardinals,San Francisco 49ers,14.5,Arizona Cardinals,25,F,40.0,51,O,26.523139,San Francisco 49ers,6.928,Arizona Cardinals,19.595062,46.118,U,O,L,W,20.5,8.5,21.5,7.5,24.5,4.5,20.5,21.5,24.5
1210,2022,18,Seattle Seahawks,19,16,Los Angeles Rams,Seattle Seahawks,4.5,Los Angeles Rams,3,U,43.0,35,U,23.342801,Seattle Seahawks,3.024,Los Angeles Rams,20.319232,43.662,U,O,W,L,10.5,1.5,11.5,2.5,14.5,5.5,10.5,11.5,14.5
1211,2022,18,Denver Broncos,31,28,Los Angeles Chargers,Denver Broncos,6.5,Los Angeles Chargers,3,U,39.0,59,O,21.526143,Los Angeles Chargers,1.977,Denver Broncos,19.548667,41.075,U,O,W,W,12.5,0.5,13.5,0.5,16.5,3.5,12.5,13.5,16.5
1212,2022,18,Pittsburgh Steelers,28,14,Cleveland Browns,Pittsburgh Steelers,2.5,Cleveland Browns,14,F,40.0,42,O,21.248351,Cleveland Browns,0.171,Pittsburgh Steelers,21.077306,42.326,U,O,L,W,8.5,3.5,9.5,4.5,12.5,7.5,8.5,9.5,12.5


In [1397]:
def pteamb(row):
    if row['team_underdog'] == row['Pred_team_favorite']:
        val = row['Pred_team_favorite']
    elif row['Pred_spread_bet'] == 'F':
        val = row['Pred_team_favorite']
    else:
        val = row['Pred_team_underdog']
    return val

def tease6swl(row):
    if row['spread_bet_W/L'] == 'W':
        val = 'W'
    elif row['actual_spread'] > row['Pred_tease_spread6']:
        val = 'L'
    elif row['actual_spread'] == row['Pred_tease_spread6']:
        val = 'P'
    else:
        val = 'W'
    return val

def tease7swl(row):
    if row['spread_bet_W/L'] == 'W':
        val = 'W'
    elif row['actual_spread'] > row['Pred_tease_spread7']:
        val = 'L'
    elif row['actual_spread'] == row['Pred_tease_spread7']:
        val = 'P'
    else:
        val = 'W'
    return val

def tease10swl(row):
    if row['spread_bet_W/L'] == 'W':
        val = 'W'
    elif row['actual_spread'] > row['Pred_tease_spread10']:
        val = 'L'
    elif row['actual_spread'] == row['Pred_tease_spread10']:
        val = 'L'
    else:
        val = 'W'
    return val

stdata1822_full_adv_teas['Pred_team_bet'] = stdata1822_full_adv_teas.apply(pteamb, axis=1)
stdata1822_full_adv_teas['tease6_spread_W/L'] = stdata1822_full_adv_teas.apply(tease6swl, axis=1)
stdata1822_full_adv_teas['tease7_spread_W/L'] = stdata1822_full_adv_teas.apply(tease7swl, axis=1)
stdata1822_full_adv_teas['tease10_spread_W/L'] = stdata1822_full_adv_teas.apply(tease10swl, axis=1)
stdata1822_full_adv_teas = stdata1822_full_adv_teas.filter(['schedule_week', 'winning_team', 'winning_score', 'losing_score', 'losing_team',
                                      'team_favorite', 'spread_favorite', 'team_underdog', 'actual_spread', 'F/U_cover',
                                      'over_under_line', 'actual_total', 'O/U', 'Pred_PF_favorite', 'Pred_team_favorite',
                                    'Pred_Spread', 'Pred_team_underdog', 'Pred_PF_underdog', 'Pred_Total', 'Pred_spread_bet', 'Pred_total_bet',
                                    'spread_bet_W/L', 'total_bet_W/L', 'underdog_tease6', 'favorite_tease6', 'underdog_tease7', 'favorite_tease7',
                                        'underdog_tease10', 'favorite_tease10', 'Pred_team_bet', 'Pred_tease_spread6', 'tease6_spread_W/L',
                                        'Pred_tease_spread7', 'tease7_spread_W/L', 'Pred_tease_spread10', 'tease10_spread_W/L'])
stdata1822_full_adv_teas

Unnamed: 0,schedule_week,winning_team,winning_score,losing_score,losing_team,team_favorite,spread_favorite,team_underdog,actual_spread,F/U_cover,over_under_line,actual_total,O/U,Pred_PF_favorite,Pred_team_favorite,Pred_Spread,Pred_team_underdog,Pred_PF_underdog,Pred_Total,Pred_spread_bet,Pred_total_bet,spread_bet_W/L,total_bet_W/L,underdog_tease6,favorite_tease6,underdog_tease7,favorite_tease7,underdog_tease10,favorite_tease10,Pred_team_bet,Pred_tease_spread6,tease6_spread_W/L,Pred_tease_spread7,tease7_spread_W/L,Pred_tease_spread10,tease10_spread_W/L
0,1,Philadelphia Eagles,18,12,Atlanta Falcons,Philadelphia Eagles,1.0,Atlanta Falcons,6,F,44.5,30,U,26.039946,Philadelphia Eagles,5.710,Atlanta Falcons,20.329934,46.370,F,O,W,L,7.0,5.0,8.0,6.0,11.0,9.0,Philadelphia Eagles,5.0,W,6.0,W,9.0,W
1,1,Green Bay Packers,24,23,Chicago Bears,Green Bay Packers,6.5,Chicago Bears,1,U,45.0,47,O,24.446210,Green Bay Packers,3.636,Chicago Bears,20.809925,45.256,U,O,W,W,12.5,0.5,13.5,0.5,16.5,3.5,Chicago Bears,12.5,W,13.5,W,16.5,W
2,1,Cincinnati Bengals,34,23,Indianapolis Colts,Cincinnati Bengals,1.0,Indianapolis Colts,11,F,47.5,57,O,25.491533,Cincinnati Bengals,7.426,Indianapolis Colts,18.065164,43.557,F,U,W,L,7.0,5.0,8.0,6.0,11.0,9.0,Cincinnati Bengals,5.0,W,6.0,W,9.0,W
3,1,Kansas City Chiefs,38,28,Los Angeles Chargers,Los Angeles Chargers,3.5,Kansas City Chiefs,10,U,48.0,66,O,25.920349,Kansas City Chiefs,3.087,Los Angeles Chargers,22.833612,48.754,U,O,W,W,9.5,2.5,10.5,3.5,13.5,6.5,Kansas City Chiefs,9.5,W,10.5,W,13.5,W
4,1,Miami Dolphins,27,20,Tennessee Titans,Tennessee Titans,1.0,Miami Dolphins,7,U,43.5,47,O,22.268931,Miami Dolphins,1.186,Tennessee Titans,21.083034,43.352,U,U,W,L,7.0,5.0,8.0,6.0,11.0,9.0,Miami Dolphins,7.0,W,8.0,W,11.0,W
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1209,18,San Francisco 49ers,38,13,Arizona Cardinals,San Francisco 49ers,14.5,Arizona Cardinals,25,F,40.0,51,O,26.523139,San Francisco 49ers,6.928,Arizona Cardinals,19.595062,46.118,U,O,L,W,20.5,8.5,21.5,7.5,24.5,4.5,Arizona Cardinals,20.5,L,21.5,L,24.5,L
1210,18,Seattle Seahawks,19,16,Los Angeles Rams,Seattle Seahawks,4.5,Los Angeles Rams,3,U,43.0,35,U,23.342801,Seattle Seahawks,3.024,Los Angeles Rams,20.319232,43.662,U,O,W,L,10.5,1.5,11.5,2.5,14.5,5.5,Los Angeles Rams,10.5,W,11.5,W,14.5,W
1211,18,Denver Broncos,31,28,Los Angeles Chargers,Denver Broncos,6.5,Los Angeles Chargers,3,U,39.0,59,O,21.526143,Los Angeles Chargers,1.977,Denver Broncos,19.548667,41.075,U,O,W,W,12.5,0.5,13.5,0.5,16.5,3.5,Los Angeles Chargers,12.5,W,13.5,W,16.5,W
1212,18,Pittsburgh Steelers,28,14,Cleveland Browns,Pittsburgh Steelers,2.5,Cleveland Browns,14,F,40.0,42,O,21.248351,Cleveland Browns,0.171,Pittsburgh Steelers,21.077306,42.326,U,O,L,W,8.5,3.5,9.5,4.5,12.5,7.5,Cleveland Browns,8.5,L,9.5,L,12.5,L


In [1398]:
stdata1822_full_adv_teas['tease6_spread_W/L'].value_counts()

W    874
L    323
P     17
Name: tease6_spread_W/L, dtype: int64

In [1399]:
stdata1822_full_adv_teas['tease7_spread_W/L'].value_counts()

W    906
L    294
P     14
Name: tease7_spread_W/L, dtype: int64

In [1400]:
stdata1822_full_adv_teas['tease10_spread_W/L'].value_counts()

W    970
L    244
Name: tease10_spread_W/L, dtype: int64

## GBR_Adv_Red (optimal totals)

In [1376]:
both_df3_adv_red_5 = pd.merge(stdata1822, match_gbrpred_adv_red, on=['Tm1', 'Tm2'], how='left')
both_df_na_adv_red_5 = both_df3_adv_red_5[both_df3_adv_red_5.isna().any(axis=1)]
both_df_na_adv_red_5 = both_df_na_adv_red_5.rename(columns = {'Tm1': 'Tm3', 'Tm2': 'Tm4'})
both_df_na_adv_red_5['Tm1'] = both_df_na_adv_red_5['Tm4']
both_df_na_adv_red_5['Tm2'] = both_df_na_adv_red_5['Tm3']
both_df_na_adv_red_5 = both_df_na_adv_red_5.filter(['schedule_season', 'schedule_week', 'winning_team', 'winning_score', 'losing_score', 'losing_team', 'team_favorite',
                               'team_underdog', 'spread_favorite', 'actual_spread', 'F/U_cover', 'over_under_line', 'actual_total',
                               'O/U', 'Tm1', 'Tm2'])
both_df2_adv_red_5 = pd.merge(both_df_na_adv_red_5, match_gbrpred_adv_red, on=['Tm2', 'Tm1'], how='left').dropna()
both_df2_adv_red_5

Unnamed: 0,schedule_season,schedule_week,winning_team,winning_score,losing_score,losing_team,team_favorite,team_underdog,spread_favorite,actual_spread,F/U_cover,over_under_line,actual_total,O/U,Tm1,Tm2,PF_gbradv_Pred1_red,PA_gbradv_Pred1_red,PF_gbradv_Pred2_red,PA_gbradv_Pred2_red,PF_Tm1,PF_Tm2,Pred_team_favorite,Pred_Spread,Pred_team_underdog,Pred_Total
1,2018,1,Baltimore Ravens,47,3,Buffalo Bills,Baltimore Ravens,Buffalo Bills,7.5,44,F,39.0,50,O,Buffalo Bills,Baltimore Ravens,28.196786,17.628941,20.694819,19.173094,23.684940,19.161880,Buffalo Bills,4.523,Baltimore Ravens,42.847
2,2018,1,Carolina Panthers,16,8,Dallas Cowboys,Carolina Panthers,Dallas Cowboys,2.5,8,F,42.5,24,U,Dallas Cowboys,Carolina Panthers,27.561768,19.837221,20.253297,22.379602,24.970685,20.045259,Dallas Cowboys,4.925,Carolina Panthers,45.016
3,2018,1,Denver Broncos,27,24,Seattle Seahawks,Denver Broncos,Seattle Seahawks,3.0,3,U,42.5,51,O,Seattle Seahawks,Denver Broncos,23.592883,22.338753,17.776099,20.168281,21.880582,20.057426,Seattle Seahawks,1.823,Denver Broncos,41.938
4,2018,1,Minnesota Vikings,24,16,San Francisco 49ers,Minnesota Vikings,San Francisco 49ers,6.0,8,F,46.5,40,U,San Francisco 49ers,Minnesota Vikings,27.415036,18.478367,24.924564,23.644820,25.529928,21.701466,San Francisco 49ers,3.828,Minnesota Vikings,47.231
5,2018,1,Tampa Bay Buccaneers,48,40,New Orleans Saints,New Orleans Saints,Tampa Bay Buccaneers,10.0,8,U,50.0,88,O,New Orleans Saints,Tampa Bay Buccaneers,19.243383,19.785249,23.908236,20.713440,19.978412,21.846742,Tampa Bay Buccaneers,1.868,New Orleans Saints,41.825
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
654,2022,17,New Orleans Saints,20,10,Philadelphia Eagles,Philadelphia Eagles,New Orleans Saints,5.0,10,U,42.0,30,U,Philadelphia Eagles,New Orleans Saints,27.917533,19.752214,19.243383,19.785249,23.851391,19.497799,Philadelphia Eagles,4.354,New Orleans Saints,43.349
655,2022,17,Tampa Bay Buccaneers,30,24,Carolina Panthers,Tampa Bay Buccaneers,Carolina Panthers,3.5,6,F,40.5,54,O,Carolina Panthers,Tampa Bay Buccaneers,20.253297,22.379602,23.908236,20.713440,20.483368,23.143919,Tampa Bay Buccaneers,2.661,Carolina Panthers,43.627
656,2022,18,Denver Broncos,31,28,Los Angeles Chargers,Denver Broncos,Los Angeles Chargers,6.5,3,U,39.0,59,O,Los Angeles Chargers,Denver Broncos,23.441154,22.761763,17.776099,20.168281,21.804718,20.268931,Los Angeles Chargers,1.536,Denver Broncos,42.074
657,2022,18,Pittsburgh Steelers,28,14,Cleveland Browns,Pittsburgh Steelers,Cleveland Browns,2.5,14,F,40.0,42,O,Cleveland Browns,Pittsburgh Steelers,20.852184,22.679205,18.455516,21.551922,21.202053,20.567361,Cleveland Browns,0.635,Pittsburgh Steelers,41.769


In [1377]:
#Merging the rest of the matchup DF with the actual schedule DF. Then filtering for a final DF. 
df_comb_adv_red_5 = [both_df3_adv_red_5, both_df2_adv_red_5]
result_adv_red_5 = pd.concat(df_comb_adv_red_5).reset_index(drop=True).dropna()
result_adv_red_5['schedule_week'] = result_adv_red_5['schedule_week'].astype(np.int64)
result_adv_red_5['schedule_season'] = result_adv_red_5['schedule_season'].astype(np.int64)
final_result_adv_red_5 = result_adv_red_5.sort_values(by=['schedule_season', 'schedule_week']).reset_index(drop=True)
stdata1822_full_adv_red = final_result_adv_red_5.filter(['schedule_season', 'schedule_week', 'winning_team', 'winning_score', 'losing_score', 'losing_team',
                                      'team_favorite', 'team_underdog', 'spread_favorite', 'actual_spread', 'F/U_cover',
                                      'over_under_line', 'actual_total', 'O/U', 'PF_Tm1', 'PF_Tm2', 'Pred_team_favorite',
                                      'Pred_Spread', 'Pred_team_underdog', 'Pred_Total'])
def pft(row):
    if row['PF_Tm1'] > row['PF_Tm2']:
        val = row['PF_Tm1']
    else:
        val = row['PF_Tm2']
    return val
def put(row):
    if row['PF_Tm1'] < row['PF_Tm2']:
        val = row['PF_Tm1']
    else:
        val = row['PF_Tm2']
    return val
stdata1822_full_adv_red['Pred_PF_favorite'] = stdata1822_full_adv_red.apply(pft, axis=1)
stdata1822_full_adv_red['Pred_PF_underdog'] = stdata1822_full_adv_red.apply(put, axis=1)
stdata1822_full_adv_red = stdata1822_full_adv_red.filter(['schedule_season', 'schedule_week', 'winning_team', 'winning_score', 'losing_score', 'losing_team',
                                      'team_favorite', 'spread_favorite', 'team_underdog', 'actual_spread', 'F/U_cover',
                                      'over_under_line', 'actual_total', 'O/U', 'Pred_PF_favorite', 'Pred_team_favorite',
                                    'Pred_Spread', 'Pred_team_underdog', 'Pred_PF_underdog', 'Pred_Total'])
stdata1822_full_adv_red

Unnamed: 0,schedule_season,schedule_week,winning_team,winning_score,losing_score,losing_team,team_favorite,spread_favorite,team_underdog,actual_spread,F/U_cover,over_under_line,actual_total,O/U,Pred_PF_favorite,Pred_team_favorite,Pred_Spread,Pred_team_underdog,Pred_PF_underdog,Pred_Total
0,2018,1,Philadelphia Eagles,18,12,Atlanta Falcons,Philadelphia Eagles,1.0,Atlanta Falcons,6,F,44.5,30,U,25.549508,Philadelphia Eagles,5.116,Atlanta Falcons,20.433416,45.983
1,2018,1,Green Bay Packers,24,23,Chicago Bears,Green Bay Packers,6.5,Chicago Bears,1,U,45.0,47,O,24.632758,Green Bay Packers,3.868,Chicago Bears,20.764776,45.398
2,2018,1,Cincinnati Bengals,34,23,Indianapolis Colts,Cincinnati Bengals,1.0,Indianapolis Colts,11,F,47.5,57,O,25.014248,Cincinnati Bengals,6.164,Indianapolis Colts,18.850151,43.864
3,2018,1,Kansas City Chiefs,38,28,Los Angeles Chargers,Los Angeles Chargers,3.5,Kansas City Chiefs,10,U,48.0,66,O,25.996404,Kansas City Chiefs,3.271,Los Angeles Chargers,22.725032,48.721
4,2018,1,Miami Dolphins,27,20,Tennessee Titans,Tennessee Titans,1.0,Miami Dolphins,7,U,43.5,47,O,22.360604,Miami Dolphins,1.411,Tennessee Titans,20.949818,43.310
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1209,2022,18,San Francisco 49ers,38,13,Arizona Cardinals,San Francisco 49ers,14.5,Arizona Cardinals,25,F,40.0,51,O,26.964671,San Francisco 49ers,7.873,Arizona Cardinals,19.091481,46.056
1210,2022,18,Seattle Seahawks,19,16,Los Angeles Rams,Seattle Seahawks,4.5,Los Angeles Rams,3,U,43.0,35,U,22.612419,Seattle Seahawks,2.383,Los Angeles Rams,20.229270,42.842
1211,2022,18,Denver Broncos,31,28,Los Angeles Chargers,Denver Broncos,6.5,Los Angeles Chargers,3,U,39.0,59,O,21.804718,Los Angeles Chargers,1.536,Denver Broncos,20.268931,42.074
1212,2022,18,Pittsburgh Steelers,28,14,Cleveland Browns,Pittsburgh Steelers,2.5,Cleveland Browns,14,F,40.0,42,O,21.202053,Cleveland Browns,0.635,Pittsburgh Steelers,20.567361,41.769


In [1378]:
#Creating functions that compares the predicted spread bets and predicted total bets with what actually happened in during the 2022 season. 
def psb(row):
    if row['team_underdog'] == row['Pred_team_favorite']:
        val = 'U'
    elif row['Pred_Spread'] < row['spread_favorite']:
        val = 'U'
    else:
        val = 'F'
    return val
def ptb(row):
    if row['Pred_Total'] > row['over_under_line']:
        val = 'O'
    else:
        val = 'U'
    return val
stdata1822_full_adv_red['Pred_spread_bet'] = stdata1822_full_adv_red.apply(psb, axis=1)
stdata1822_full_adv_red['Pred_total_bet'] = stdata1822_full_adv_red.apply(ptb, axis=1)
#Creating rows that track the W/L for both spread and total based on the predicted bets
def sbwl(row):
    if row['Pred_spread_bet'] == row['F/U_cover']:
        val = 'W'
    else:
        val = 'L'
    return val
def tbwl(row):
    if row['Pred_total_bet'] == row['O/U']:
        val = 'W'
    else:
        val = 'L'
    return val
stdata1822_full_adv_red['spread_bet_W/L'] = stdata1822_full_adv_red.apply(sbwl, axis=1)
stdata1822_full_adv_red['total_bet_W/L'] = stdata1822_full_adv_red.apply(tbwl, axis=1)
stdata1822_full_adv_red

Unnamed: 0,schedule_season,schedule_week,winning_team,winning_score,losing_score,losing_team,team_favorite,spread_favorite,team_underdog,actual_spread,F/U_cover,over_under_line,actual_total,O/U,Pred_PF_favorite,Pred_team_favorite,Pred_Spread,Pred_team_underdog,Pred_PF_underdog,Pred_Total,Pred_spread_bet,Pred_total_bet,spread_bet_W/L,total_bet_W/L
0,2018,1,Philadelphia Eagles,18,12,Atlanta Falcons,Philadelphia Eagles,1.0,Atlanta Falcons,6,F,44.5,30,U,25.549508,Philadelphia Eagles,5.116,Atlanta Falcons,20.433416,45.983,F,O,W,L
1,2018,1,Green Bay Packers,24,23,Chicago Bears,Green Bay Packers,6.5,Chicago Bears,1,U,45.0,47,O,24.632758,Green Bay Packers,3.868,Chicago Bears,20.764776,45.398,U,O,W,W
2,2018,1,Cincinnati Bengals,34,23,Indianapolis Colts,Cincinnati Bengals,1.0,Indianapolis Colts,11,F,47.5,57,O,25.014248,Cincinnati Bengals,6.164,Indianapolis Colts,18.850151,43.864,F,U,W,L
3,2018,1,Kansas City Chiefs,38,28,Los Angeles Chargers,Los Angeles Chargers,3.5,Kansas City Chiefs,10,U,48.0,66,O,25.996404,Kansas City Chiefs,3.271,Los Angeles Chargers,22.725032,48.721,U,O,W,W
4,2018,1,Miami Dolphins,27,20,Tennessee Titans,Tennessee Titans,1.0,Miami Dolphins,7,U,43.5,47,O,22.360604,Miami Dolphins,1.411,Tennessee Titans,20.949818,43.310,U,U,W,L
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1209,2022,18,San Francisco 49ers,38,13,Arizona Cardinals,San Francisco 49ers,14.5,Arizona Cardinals,25,F,40.0,51,O,26.964671,San Francisco 49ers,7.873,Arizona Cardinals,19.091481,46.056,U,O,L,W
1210,2022,18,Seattle Seahawks,19,16,Los Angeles Rams,Seattle Seahawks,4.5,Los Angeles Rams,3,U,43.0,35,U,22.612419,Seattle Seahawks,2.383,Los Angeles Rams,20.229270,42.842,U,U,W,W
1211,2022,18,Denver Broncos,31,28,Los Angeles Chargers,Denver Broncos,6.5,Los Angeles Chargers,3,U,39.0,59,O,21.804718,Los Angeles Chargers,1.536,Denver Broncos,20.268931,42.074,U,O,W,W
1212,2022,18,Pittsburgh Steelers,28,14,Cleveland Browns,Pittsburgh Steelers,2.5,Cleveland Browns,14,F,40.0,42,O,21.202053,Cleveland Browns,0.635,Pittsburgh Steelers,20.567361,41.769,U,O,L,W


In [1379]:
stdata1822_full_adv_red['total_bet_W/L'].value_counts()

W    642
L    572
Name: total_bet_W/L, dtype: int64

In [1380]:
stdata1822_full_adv_red['spread_bet_W/L'].value_counts()

W    658
L    556
Name: spread_bet_W/L, dtype: int64

## Teasers on Optimal 5 Year Totals Model

In [1390]:
#Testing optimal totals model on teasers
stdata1822_full_adv_red_teas = stdata1822_full_adv_red.copy()

def ptbt6(row):
    if row['Pred_total_bet'] == 'U':
        val = row['over_under_line'] + 6
    else:
        val = row['over_under_line'] - 6
    return val
stdata1822_full_adv_red_teas['Pred_tease_total6'] = stdata1822_full_adv_red_teas.apply(ptbt6, axis=1)

def ptbt7(row):
    if row['Pred_total_bet'] == 'U':
        val = row['over_under_line'] + 7
    else:
        val = row['over_under_line'] - 7
    return val
stdata1822_full_adv_red_teas['Pred_tease_total7'] = stdata1822_full_adv_red_teas.apply(ptbt7, axis=1)

def ptbt10(row):
    if row['Pred_total_bet'] == 'U':
        val = row['over_under_line'] + 10
    else:
        val = row['over_under_line'] - 10
    return val
stdata1822_full_adv_red_teas['Pred_tease_total10'] = stdata1822_full_adv_red_teas.apply(ptbt10, axis=1)

stdata1822_full_adv_red_teas

Unnamed: 0,schedule_season,schedule_week,winning_team,winning_score,losing_score,losing_team,team_favorite,spread_favorite,team_underdog,actual_spread,F/U_cover,over_under_line,actual_total,O/U,Pred_PF_favorite,Pred_team_favorite,Pred_Spread,Pred_team_underdog,Pred_PF_underdog,Pred_Total,Pred_spread_bet,Pred_total_bet,spread_bet_W/L,total_bet_W/L,Pred_tease_total6,Pred_tease_total7,Pred_tease_total10
0,2018,1,Philadelphia Eagles,18,12,Atlanta Falcons,Philadelphia Eagles,1.0,Atlanta Falcons,6,F,44.5,30,U,25.549508,Philadelphia Eagles,5.116,Atlanta Falcons,20.433416,45.983,F,O,W,L,38.5,37.5,34.5
1,2018,1,Green Bay Packers,24,23,Chicago Bears,Green Bay Packers,6.5,Chicago Bears,1,U,45.0,47,O,24.632758,Green Bay Packers,3.868,Chicago Bears,20.764776,45.398,U,O,W,W,39.0,38.0,35.0
2,2018,1,Cincinnati Bengals,34,23,Indianapolis Colts,Cincinnati Bengals,1.0,Indianapolis Colts,11,F,47.5,57,O,25.014248,Cincinnati Bengals,6.164,Indianapolis Colts,18.850151,43.864,F,U,W,L,53.5,54.5,57.5
3,2018,1,Kansas City Chiefs,38,28,Los Angeles Chargers,Los Angeles Chargers,3.5,Kansas City Chiefs,10,U,48.0,66,O,25.996404,Kansas City Chiefs,3.271,Los Angeles Chargers,22.725032,48.721,U,O,W,W,42.0,41.0,38.0
4,2018,1,Miami Dolphins,27,20,Tennessee Titans,Tennessee Titans,1.0,Miami Dolphins,7,U,43.5,47,O,22.360604,Miami Dolphins,1.411,Tennessee Titans,20.949818,43.310,U,U,W,L,49.5,50.5,53.5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1209,2022,18,San Francisco 49ers,38,13,Arizona Cardinals,San Francisco 49ers,14.5,Arizona Cardinals,25,F,40.0,51,O,26.964671,San Francisco 49ers,7.873,Arizona Cardinals,19.091481,46.056,U,O,L,W,34.0,33.0,30.0
1210,2022,18,Seattle Seahawks,19,16,Los Angeles Rams,Seattle Seahawks,4.5,Los Angeles Rams,3,U,43.0,35,U,22.612419,Seattle Seahawks,2.383,Los Angeles Rams,20.229270,42.842,U,U,W,W,49.0,50.0,53.0
1211,2022,18,Denver Broncos,31,28,Los Angeles Chargers,Denver Broncos,6.5,Los Angeles Chargers,3,U,39.0,59,O,21.804718,Los Angeles Chargers,1.536,Denver Broncos,20.268931,42.074,U,O,W,W,33.0,32.0,29.0
1212,2022,18,Pittsburgh Steelers,28,14,Cleveland Browns,Pittsburgh Steelers,2.5,Cleveland Browns,14,F,40.0,42,O,21.202053,Cleveland Browns,0.635,Pittsburgh Steelers,20.567361,41.769,U,O,L,W,34.0,33.0,30.0


In [1391]:
#Creating function to find W/L for predicted teaser totals compared to actual scores
def tease6twl(row):
    if row['Pred_total_bet'] == 'U':
       
        if row['Pred_tease_total6'] > row['actual_total']:
            val = 'W'
        elif row['Pred_tease_total6'] == row['actual_total']:
            val = 'P'
        else:
            val = 'L'
            
    elif row['Pred_total_bet'] == 'O':
        
        if row['Pred_tease_total6'] < row['actual_total']:
            val = 'W'
        elif row['Pred_tease_total6'] == row['actual_total']:
            val = 'P'
        else:
            val = 'L'
            
    else:
        val = 'W'
    return val

def tease7twl(row):
    if row['Pred_total_bet'] == 'U':
       
        if row['Pred_tease_total7'] > row['actual_total']:
            val = 'W'
        elif row['Pred_tease_total7'] == row['actual_total']:
            val = 'P'
        else:
            val = 'L'
            
    elif row['Pred_total_bet'] == 'O':
        
        if row['Pred_tease_total7'] < row['actual_total']:
            val = 'W'
        elif row['Pred_tease_total7'] == row['actual_total']:
            val = 'P'
        else:
            val = 'L'
            
    else:
        val = 'W'
    return val

def tease10twl(row):
    if row['Pred_total_bet'] == 'U':
       
        if row['Pred_tease_total10'] > row['actual_total']:
            val = 'W'
        elif row['Pred_tease_total10'] == row['actual_total']:
            val = 'L'
        else:
            val = 'L'
            
    elif row['Pred_total_bet'] == 'O':
        
        if row['Pred_tease_total10'] < row['actual_total']:
            val = 'W'
        elif row['Pred_tease_total10'] == row['actual_total']:
            val = 'L'
        else:
            val = 'L'
            
    else:
        val = 'W'
    return val

stdata1822_full_adv_red_teas['tease6_total_W/L'] = stdata1822_full_adv_red_teas.apply(tease6twl, axis=1)
stdata1822_full_adv_red_teas['tease7_total_W/L'] = stdata1822_full_adv_red_teas.apply(tease7twl, axis=1)
stdata1822_full_adv_red_teas['tease10_total_W/L'] = stdata1822_full_adv_red_teas.apply(tease10twl, axis=1)
stdata1822_full_adv_red_teas

Unnamed: 0,schedule_season,schedule_week,winning_team,winning_score,losing_score,losing_team,team_favorite,spread_favorite,team_underdog,actual_spread,F/U_cover,over_under_line,actual_total,O/U,Pred_PF_favorite,Pred_team_favorite,Pred_Spread,Pred_team_underdog,Pred_PF_underdog,Pred_Total,Pred_spread_bet,Pred_total_bet,spread_bet_W/L,total_bet_W/L,Pred_tease_total6,Pred_tease_total7,Pred_tease_total10,tease6_total_W/L,tease7_total_W/L,tease10_total_W/L
0,2018,1,Philadelphia Eagles,18,12,Atlanta Falcons,Philadelphia Eagles,1.0,Atlanta Falcons,6,F,44.5,30,U,25.549508,Philadelphia Eagles,5.116,Atlanta Falcons,20.433416,45.983,F,O,W,L,38.5,37.5,34.5,L,L,L
1,2018,1,Green Bay Packers,24,23,Chicago Bears,Green Bay Packers,6.5,Chicago Bears,1,U,45.0,47,O,24.632758,Green Bay Packers,3.868,Chicago Bears,20.764776,45.398,U,O,W,W,39.0,38.0,35.0,W,W,W
2,2018,1,Cincinnati Bengals,34,23,Indianapolis Colts,Cincinnati Bengals,1.0,Indianapolis Colts,11,F,47.5,57,O,25.014248,Cincinnati Bengals,6.164,Indianapolis Colts,18.850151,43.864,F,U,W,L,53.5,54.5,57.5,L,L,W
3,2018,1,Kansas City Chiefs,38,28,Los Angeles Chargers,Los Angeles Chargers,3.5,Kansas City Chiefs,10,U,48.0,66,O,25.996404,Kansas City Chiefs,3.271,Los Angeles Chargers,22.725032,48.721,U,O,W,W,42.0,41.0,38.0,W,W,W
4,2018,1,Miami Dolphins,27,20,Tennessee Titans,Tennessee Titans,1.0,Miami Dolphins,7,U,43.5,47,O,22.360604,Miami Dolphins,1.411,Tennessee Titans,20.949818,43.310,U,U,W,L,49.5,50.5,53.5,W,W,W
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1209,2022,18,San Francisco 49ers,38,13,Arizona Cardinals,San Francisco 49ers,14.5,Arizona Cardinals,25,F,40.0,51,O,26.964671,San Francisco 49ers,7.873,Arizona Cardinals,19.091481,46.056,U,O,L,W,34.0,33.0,30.0,W,W,W
1210,2022,18,Seattle Seahawks,19,16,Los Angeles Rams,Seattle Seahawks,4.5,Los Angeles Rams,3,U,43.0,35,U,22.612419,Seattle Seahawks,2.383,Los Angeles Rams,20.229270,42.842,U,U,W,W,49.0,50.0,53.0,W,W,W
1211,2022,18,Denver Broncos,31,28,Los Angeles Chargers,Denver Broncos,6.5,Los Angeles Chargers,3,U,39.0,59,O,21.804718,Los Angeles Chargers,1.536,Denver Broncos,20.268931,42.074,U,O,W,W,33.0,32.0,29.0,W,W,W
1212,2022,18,Pittsburgh Steelers,28,14,Cleveland Browns,Pittsburgh Steelers,2.5,Cleveland Browns,14,F,40.0,42,O,21.202053,Cleveland Browns,0.635,Pittsburgh Steelers,20.567361,41.769,U,O,L,W,34.0,33.0,30.0,W,W,W


In [1392]:
stdata1822_full_adv_red_teas['tease6_total_W/L'].value_counts()

W    822
L    375
P     17
Name: tease6_total_W/L, dtype: int64

In [1393]:
stdata1822_full_adv_red_teas['tease7_total_W/L'].value_counts()

W    860
L    333
P     21
Name: tease7_total_W/L, dtype: int64

In [1394]:
stdata1822_full_adv_red_teas['tease10_total_W/L'].value_counts()

W    961
L    253
Name: tease10_total_W/L, dtype: int64

## GBR_Full_Red (optimal total)

In [1383]:
both_df3_full_red_5 = pd.merge(stdata1822, match_gbrpred_red, on=['Tm1', 'Tm2'], how='left')
both_df_na_full_red_5 = both_df3_full_red_5[both_df3_full_red_5.isna().any(axis=1)]
both_df_na_full_red_5 = both_df_na_full_red_5.rename(columns = {'Tm1': 'Tm3', 'Tm2': 'Tm4'})
both_df_na_full_red_5['Tm1'] = both_df_na_full_red_5['Tm4']
both_df_na_full_red_5['Tm2'] = both_df_na_full_red_5['Tm3']
both_df_na_full_red_5 = both_df_na_full_red_5.filter(['schedule_season', 'schedule_week', 'winning_team', 'winning_score', 'losing_score', 'losing_team', 'team_favorite',
                               'team_underdog', 'spread_favorite', 'actual_spread', 'F/U_cover', 'over_under_line', 'actual_total',
                               'O/U', 'Tm1', 'Tm2'])
both_df2_full_red_5 = pd.merge(both_df_na_full_red_5, match_gbrpred_red, on=['Tm2', 'Tm1'], how='left').dropna()
both_df2_full_red_5

Unnamed: 0,schedule_season,schedule_week,winning_team,winning_score,losing_score,losing_team,team_favorite,team_underdog,spread_favorite,actual_spread,F/U_cover,over_under_line,actual_total,O/U,Tm1,Tm2,PF_gbrred_Pred1,PA_gbrred_Pred1,PF_gbrred_Pred2,PA_gbrred_Pred2,PF_Tm1,PF_Tm2,Pred_team_favorite,Pred_Spread,Pred_team_underdog,Pred_Total
1,2018,1,Baltimore Ravens,47,3,Buffalo Bills,Baltimore Ravens,Buffalo Bills,7.5,44,F,39.0,50,O,Buffalo Bills,Baltimore Ravens,27.977903,18.969305,21.649245,19.512820,23.745361,20.309275,Buffalo Bills,3.436,Baltimore Ravens,44.055
2,2018,1,Carolina Panthers,16,8,Dallas Cowboys,Carolina Panthers,Dallas Cowboys,2.5,8,F,42.5,24,U,Dallas Cowboys,Carolina Panthers,27.302403,19.668586,20.706828,22.593123,24.947763,20.187707,Dallas Cowboys,4.760,Carolina Panthers,45.135
3,2018,1,Denver Broncos,27,24,Seattle Seahawks,Denver Broncos,Seattle Seahawks,3.0,3,U,42.5,51,O,Seattle Seahawks,Denver Broncos,24.727380,23.623704,17.056772,19.984455,22.355918,20.340238,Seattle Seahawks,2.016,Denver Broncos,42.696
4,2018,1,Minnesota Vikings,24,16,San Francisco 49ers,Minnesota Vikings,San Francisco 49ers,6.0,8,F,46.5,40,U,San Francisco 49ers,Minnesota Vikings,25.966943,16.725423,24.602116,23.932514,24.949729,20.663770,San Francisco 49ers,4.286,Minnesota Vikings,45.613
5,2018,1,Tampa Bay Buccaneers,48,40,New Orleans Saints,New Orleans Saints,Tampa Bay Buccaneers,10.0,8,U,50.0,88,O,New Orleans Saints,Tampa Bay Buccaneers,20.152110,19.732115,18.751479,21.467483,20.809797,19.241797,New Orleans Saints,1.568,Tampa Bay Buccaneers,40.052
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
654,2022,17,New Orleans Saints,20,10,Philadelphia Eagles,Philadelphia Eagles,New Orleans Saints,5.0,10,U,42.0,30,U,Philadelphia Eagles,New Orleans Saints,27.686473,20.643006,20.152110,19.732115,23.709294,20.397558,Philadelphia Eagles,3.312,New Orleans Saints,44.107
655,2022,17,Tampa Bay Buccaneers,30,24,Carolina Panthers,Tampa Bay Buccaneers,Carolina Panthers,3.5,6,F,40.5,54,O,Carolina Panthers,Tampa Bay Buccaneers,20.706828,22.593123,18.751479,21.467483,21.087155,20.672301,Carolina Panthers,0.415,Tampa Bay Buccaneers,41.759
656,2022,18,Denver Broncos,31,28,Los Angeles Chargers,Denver Broncos,Los Angeles Chargers,6.5,3,U,39.0,59,O,Los Angeles Chargers,Denver Broncos,23.341837,22.800059,17.056772,19.984455,21.663146,19.928416,Los Angeles Chargers,1.735,Denver Broncos,41.592
657,2022,18,Pittsburgh Steelers,28,14,Cleveland Browns,Pittsburgh Steelers,Cleveland Browns,2.5,14,F,40.0,42,O,Cleveland Browns,Pittsburgh Steelers,20.945524,24.129603,19.727235,21.435633,21.190579,21.928419,Pittsburgh Steelers,0.738,Cleveland Browns,43.119


In [1384]:
df_comb_full_red_5 = [both_df3_full_red_5, both_df2_full_red_5]
result_full_red_5 = pd.concat(df_comb_full_red_5).reset_index(drop=True).dropna()
result_full_red_5['schedule_week'] = result_full_red_5['schedule_week'].astype(np.int64)
result_full_red_5['schedule_season'] = result_full_red_5['schedule_season'].astype(np.int64)
final_result_full_red_5 = result_full_red_5.sort_values(by=['schedule_season', 'schedule_week']).reset_index(drop=True)
stdata1822_full_red = final_result_full_red_5.filter(['schedule_season', 'schedule_week', 'winning_team', 'winning_score', 'losing_score', 'losing_team',
                                      'team_favorite', 'team_underdog', 'spread_favorite', 'actual_spread', 'F/U_cover',
                                      'over_under_line', 'actual_total', 'O/U', 'PF_Tm1', 'PF_Tm2', 'Pred_team_favorite',
                                      'Pred_Spread', 'Pred_team_underdog', 'Pred_Total'])
def pft(row):
    if row['PF_Tm1'] > row['PF_Tm2']:
        val = row['PF_Tm1']
    else:
        val = row['PF_Tm2']
    return val
def put(row):
    if row['PF_Tm1'] < row['PF_Tm2']:
        val = row['PF_Tm1']
    else:
        val = row['PF_Tm2']
    return val
stdata1822_full_red['Pred_PF_favorite'] = stdata1822_full_red.apply(pft, axis=1)
stdata1822_full_red['Pred_PF_underdog'] = stdata1822_full_red.apply(put, axis=1)
stdata1822_full_red = stdata1822_full_red.filter(['schedule_season', 'schedule_week', 'winning_team', 'winning_score', 'losing_score', 'losing_team',
                                      'team_favorite', 'spread_favorite', 'team_underdog', 'actual_spread', 'F/U_cover',
                                      'over_under_line', 'actual_total', 'O/U', 'Pred_PF_favorite', 'Pred_team_favorite',
                                    'Pred_Spread', 'Pred_team_underdog', 'Pred_PF_underdog', 'Pred_Total'])
stdata1822_full_red

Unnamed: 0,schedule_season,schedule_week,winning_team,winning_score,losing_score,losing_team,team_favorite,spread_favorite,team_underdog,actual_spread,F/U_cover,over_under_line,actual_total,O/U,Pred_PF_favorite,Pred_team_favorite,Pred_Spread,Pred_team_underdog,Pred_PF_underdog,Pred_Total
0,2018,1,Philadelphia Eagles,18,12,Atlanta Falcons,Philadelphia Eagles,1.0,Atlanta Falcons,6,F,44.5,30,U,25.979503,Philadelphia Eagles,5.006,Atlanta Falcons,20.973901,46.953
1,2018,1,Green Bay Packers,24,23,Chicago Bears,Green Bay Packers,6.5,Chicago Bears,1,U,45.0,47,O,24.974397,Green Bay Packers,2.509,Chicago Bears,22.465440,47.440
2,2018,1,Cincinnati Bengals,34,23,Indianapolis Colts,Cincinnati Bengals,1.0,Indianapolis Colts,11,F,47.5,57,O,25.479667,Cincinnati Bengals,7.757,Indianapolis Colts,17.722952,43.203
3,2018,1,Kansas City Chiefs,38,28,Los Angeles Chargers,Los Angeles Chargers,3.5,Kansas City Chiefs,10,U,48.0,66,O,26.371561,Kansas City Chiefs,3.879,Los Angeles Chargers,22.492518,48.864
4,2018,1,Miami Dolphins,27,20,Tennessee Titans,Tennessee Titans,1.0,Miami Dolphins,7,U,43.5,47,O,22.062245,Miami Dolphins,1.159,Tennessee Titans,20.903325,42.966
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1209,2022,18,San Francisco 49ers,38,13,Arizona Cardinals,San Francisco 49ers,14.5,Arizona Cardinals,25,F,40.0,51,O,25.957615,San Francisco 49ers,7.671,Arizona Cardinals,18.286475,44.244
1210,2022,18,Seattle Seahawks,19,16,Los Angeles Rams,Seattle Seahawks,4.5,Los Angeles Rams,3,U,43.0,35,U,23.688190,Seattle Seahawks,2.523,Los Angeles Rams,21.165688,44.854
1211,2022,18,Denver Broncos,31,28,Los Angeles Chargers,Denver Broncos,6.5,Los Angeles Chargers,3,U,39.0,59,O,21.663146,Los Angeles Chargers,1.735,Denver Broncos,19.928416,41.592
1212,2022,18,Pittsburgh Steelers,28,14,Cleveland Browns,Pittsburgh Steelers,2.5,Cleveland Browns,14,F,40.0,42,O,21.928419,Pittsburgh Steelers,0.738,Cleveland Browns,21.190579,43.119


In [1385]:
def psb(row):
    if row['team_underdog'] == row['Pred_team_favorite']:
        val = 'U'
    elif row['Pred_Spread'] < row['spread_favorite']:
        val = 'U'
    else:
        val = 'F'
    return val
def ptb(row):
    if row['Pred_Total'] > row['over_under_line']:
        val = 'O'
    else:
        val = 'U'
    return val
stdata1822_full_red['Pred_spread_bet'] = stdata1822_full_red.apply(psb, axis=1)
stdata1822_full_red['Pred_total_bet'] = stdata1822_full_red.apply(ptb, axis=1)
#Creating rows that track the W/L for both spread and total based on the predicted bets
def sbwl(row):
    if row['Pred_spread_bet'] == row['F/U_cover']:
        val = 'W'
    else:
        val = 'L'
    return val
def tbwl(row):
    if row['Pred_total_bet'] == row['O/U']:
        val = 'W'
    else:
        val = 'L'
    return val
stdata1822_full_red['spread_bet_W/L'] = stdata1822_full_red.apply(sbwl, axis=1)
stdata1822_full_red['total_bet_W/L'] = stdata1822_full_red.apply(tbwl, axis=1)
stdata1822_full_red

Unnamed: 0,schedule_season,schedule_week,winning_team,winning_score,losing_score,losing_team,team_favorite,spread_favorite,team_underdog,actual_spread,F/U_cover,over_under_line,actual_total,O/U,Pred_PF_favorite,Pred_team_favorite,Pred_Spread,Pred_team_underdog,Pred_PF_underdog,Pred_Total,Pred_spread_bet,Pred_total_bet,spread_bet_W/L,total_bet_W/L
0,2018,1,Philadelphia Eagles,18,12,Atlanta Falcons,Philadelphia Eagles,1.0,Atlanta Falcons,6,F,44.5,30,U,25.979503,Philadelphia Eagles,5.006,Atlanta Falcons,20.973901,46.953,F,O,W,L
1,2018,1,Green Bay Packers,24,23,Chicago Bears,Green Bay Packers,6.5,Chicago Bears,1,U,45.0,47,O,24.974397,Green Bay Packers,2.509,Chicago Bears,22.465440,47.440,U,O,W,W
2,2018,1,Cincinnati Bengals,34,23,Indianapolis Colts,Cincinnati Bengals,1.0,Indianapolis Colts,11,F,47.5,57,O,25.479667,Cincinnati Bengals,7.757,Indianapolis Colts,17.722952,43.203,F,U,W,L
3,2018,1,Kansas City Chiefs,38,28,Los Angeles Chargers,Los Angeles Chargers,3.5,Kansas City Chiefs,10,U,48.0,66,O,26.371561,Kansas City Chiefs,3.879,Los Angeles Chargers,22.492518,48.864,U,O,W,W
4,2018,1,Miami Dolphins,27,20,Tennessee Titans,Tennessee Titans,1.0,Miami Dolphins,7,U,43.5,47,O,22.062245,Miami Dolphins,1.159,Tennessee Titans,20.903325,42.966,U,U,W,L
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1209,2022,18,San Francisco 49ers,38,13,Arizona Cardinals,San Francisco 49ers,14.5,Arizona Cardinals,25,F,40.0,51,O,25.957615,San Francisco 49ers,7.671,Arizona Cardinals,18.286475,44.244,U,O,L,W
1210,2022,18,Seattle Seahawks,19,16,Los Angeles Rams,Seattle Seahawks,4.5,Los Angeles Rams,3,U,43.0,35,U,23.688190,Seattle Seahawks,2.523,Los Angeles Rams,21.165688,44.854,U,O,W,L
1211,2022,18,Denver Broncos,31,28,Los Angeles Chargers,Denver Broncos,6.5,Los Angeles Chargers,3,U,39.0,59,O,21.663146,Los Angeles Chargers,1.735,Denver Broncos,19.928416,41.592,U,O,W,W
1212,2022,18,Pittsburgh Steelers,28,14,Cleveland Browns,Pittsburgh Steelers,2.5,Cleveland Browns,14,F,40.0,42,O,21.928419,Pittsburgh Steelers,0.738,Cleveland Browns,21.190579,43.119,U,O,L,W


In [1387]:
stdata1822_full_red['total_bet_W/L'].value_counts()

W    630
L    584
Name: total_bet_W/L, dtype: int64

In [1388]:
stdata1822_full_red['spread_bet_W/L'].value_counts()

W    664
L    550
Name: spread_bet_W/L, dtype: int64