In [1]:
import pandas as pd
import numpy as np
import pickle

import matplotlib.pyplot as plt
import seaborn as sns

# from sklearn.tree import DecisionTreeClassifier, plot_tree
# from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier
# import lightgbm as lgb
# from catboost import CatBoostClassifier

import warnings
warnings.filterwarnings('ignore')

### Models

In [2]:
logit = '../modeling/logit.pkl'
rf = '../modeling/rf.pkl'
gbm = '../modeling/gbm.pkl'
lgb = '../modeling/lgb.pkl'
catboost = '../modeling/catboost.pkl'

with open(logit, 'rb') as file:
    LOGIT = pickle.load(file)

with open(rf, 'rb') as file:
    RF = pickle.load(file)
    
with open(gbm, 'rb') as file:
    GBM = pickle.load(file)
    
with open(lgb, 'rb') as file:
    LGB = pickle.load(file)
    
with open(catboost, 'rb') as file:
    CATBOOST= pickle.load(file)

### Data (in necessary format)

In [3]:
data = pd.read_csv('../data/scraped_for_modeling_labeled_2022.csv')
data.fillna(0, inplace = True) # divisions with 0

X = data.drop(['winner', 'played'], 1).copy()
y = data['played'].copy()

In [4]:
# from boruta elimination method:
#keep_cols = ['offense_points_per_game', 'offense_passing_AVG', 'offense_passing_YDS/G', 'offense_passing_RTG', 
#             'offense_receiving_AVG', 'offense_rushing_YDS/G', 'defense_points_per_game', 'defense_passing_AVG', 
#             'defense_receiving_AVG', 'defense_rushing_YDS/G', 'defense_passing_SYL_per_game', 
#             'offense_downs_Third Downs_ATT_per_game', 'defense_downs_Third Downs_ATT_per_game', 
#             'defense_downs_Fourth Downs_ATT_per_game', 'defense_pass_to_rush_ratio', 
#             'defense_downs_First Downs_penalty_ratio', 'offense_passing_TD_per_game', 'offense_pass_TD_to_INT']

keep_cols = ['offense_points_per_game',
 'offense_downs_Fourth Downs_PCT',
 'offense_passing_AVG',
 'offense_passing_YDS/G',
 'offense_passing_RTG',
 'offense_receiving_AVG',
 'defense_points_per_game',
 'defense_passing_AVG',
 'defense_receiving_AVG',
 'defense_rushing_YDS/G',
 'defense_passing_SYL_per_game',
 'offense_downs_Third Downs_ATT_per_game',
 'defense_downs_Third Downs_ATT_per_game',
 'defense_downs_First Downs_penalty_ratio',
 'offense_passing_TD_per_game',
 'defense_rushing_TD_per_game',
 'defense_pass_TD_per_rush_TD',
 'offense_pass_TD_to_INT']

X = X[['Team', 'games_played', 'season'] + keep_cols].copy()

### Prediction

In [5]:
predictions = data[['Team', 'season', 'winner', 'played']].copy()

predictions['pred_logit'] = LOGIT.predict_proba(X.drop(['Team', 'games_played', 'season'], 1))[:,1]
predictions['pred_rf'] = RF.predict_proba(X.drop(['Team', 'games_played', 'season'], 1))[:,1]
predictions['pred_gbm'] = GBM.predict_proba(X.drop(['Team', 'games_played', 'season'], 1))[:,1]
predictions['pred_lgb'] = LGB.predict_proba(X.drop(['Team', 'games_played', 'season'], 1))[:,1]
predictions['pred_catboost'] = CATBOOST.predict_proba(X.drop(['Team', 'games_played', 'season'], 1))[:,1]
predictions['pred_mean'] = (predictions['pred_logit'] + predictions['pred_rf'] + predictions['pred_gbm'] + predictions['pred_lgb'] + predictions['pred_catboost']) / 5
predictions['pred_opt_combo'] = (predictions['pred_logit'] + 7*predictions['pred_catboost']) / 8

predictions.head(3)

Unnamed: 0,Team,season,winner,played,pred_logit,pred_rf,pred_gbm,pred_lgb,pred_catboost,pred_mean,pred_opt_combo
0,Kansas City Chiefs,2004,0,0,0.013104,0.169245,0.099372,0.208321,0.356028,0.169214,0.313162
1,Indianapolis Colts,2004,0,0,0.690508,0.305955,0.197491,0.295406,0.385753,0.375023,0.423848
2,Green Bay Packers,2004,0,0,0.017505,0.020536,0.04472,0.181967,0.339043,0.120754,0.298851


### Compare predictions vs actuals

Top - high probs and played

In [6]:
predictions[predictions['played'] == 1].sort_values('pred_opt_combo', ascending = False).head(10)

Unnamed: 0,Team,season,winner,played,pred_logit,pred_rf,pred_gbm,pred_lgb,pred_catboost,pred_mean,pred_opt_combo
96,New England Patriots,2007,0,1,0.875535,0.364412,0.283844,0.462269,0.43741,0.484694,0.492176
387,New England Patriots,2016,1,1,0.880848,0.323557,0.240181,0.4286,0.404565,0.45555,0.4641
385,Atlanta Falcons,2016,0,1,0.569314,0.342743,0.23188,0.400921,0.411814,0.391334,0.431502
305,Seattle Seahawks,2013,1,1,0.469181,0.331712,0.235539,0.460687,0.423079,0.384039,0.428841
8,Philadelphia Eagles,2004,0,1,0.535147,0.304324,0.180364,0.407294,0.397868,0.364999,0.415028
362,Carolina Panthers,2015,0,1,0.643624,0.270108,0.174958,0.355561,0.381218,0.365094,0.414019
518,Tampa Bay Buccaneers,2020,1,1,0.53258,0.228415,0.166639,0.362309,0.390435,0.336076,0.408203
288,Denver Broncos,2013,0,1,0.538232,0.248852,0.161775,0.329655,0.383462,0.332395,0.402808
485,Kansas City Chiefs,2019,1,1,0.555986,0.328593,0.186751,0.335105,0.378048,0.356896,0.40029
205,Pittsburgh Steelers,2010,0,1,0.404146,0.312962,0.241048,0.42019,0.39804,0.355277,0.398803


Top - high probs but missed

In [7]:
predictions[predictions['played'] == 0].sort_values('pred_opt_combo', ascending = False).head(10)

Unnamed: 0,Team,season,winner,played,pred_logit,pred_rf,pred_gbm,pred_lgb,pred_catboost,pred_mean,pred_opt_combo
1,Indianapolis Colts,2004,0,0,0.690508,0.305955,0.197491,0.295406,0.385753,0.375023,0.423848
34,Indianapolis Colts,2005,0,0,0.532529,0.336384,0.207055,0.303619,0.382941,0.352506,0.401639
352,Arizona Cardinals,2015,0,0,0.630574,0.311646,0.164962,0.235409,0.367161,0.34195,0.400087
226,Green Bay Packers,2011,0,0,0.384745,0.283433,0.177397,0.297726,0.387499,0.30616,0.387154
259,Denver Broncos,2012,0,0,0.445885,0.24169,0.163424,0.288458,0.37632,0.303155,0.385015
357,New England Patriots,2015,0,0,0.433276,0.24256,0.157694,0.230721,0.369562,0.286763,0.377527
448,Kansas City Chiefs,2018,0,0,0.380266,0.283727,0.157704,0.276318,0.373942,0.294391,0.374732
199,New England Patriots,2010,0,0,0.462068,0.234228,0.120529,0.276796,0.361765,0.291077,0.374303
323,Denver Broncos,2014,0,0,0.432136,0.272721,0.144139,0.264457,0.364857,0.295662,0.373267
354,Pittsburgh Steelers,2015,0,0,0.438431,0.105876,0.092361,0.222389,0.359518,0.243715,0.369382


Top - low probs but played (unexpected SB candidates!)

In [8]:
predictions[predictions['played'] == 1].sort_values('pred_opt_combo', ascending = True).head(10)

Unnamed: 0,Team,season,winner,played,pred_logit,pred_rf,pred_gbm,pred_lgb,pred_catboost,pred_mean,pred_opt_combo
271,Baltimore Ravens,2012,1,1,0.047103,0.021728,0.070097,0.180201,0.326795,0.129185,0.291833
111,New York Giants,2007,1,1,0.02216,0.044278,0.074471,0.213723,0.332663,0.137459,0.29385
131,Arizona Cardinals,2008,0,1,0.020384,0.209132,0.140193,0.336752,0.349016,0.211096,0.307937
452,New England Patriots,2018,1,1,0.079612,0.163586,0.163081,0.355017,0.359394,0.224138,0.324421
231,New York Giants,2011,1,1,0.104603,0.11343,0.120067,0.26182,0.360305,0.192045,0.328343
367,Denver Broncos,2015,1,1,0.216068,0.099339,0.112205,0.246759,0.350397,0.204954,0.333606
330,New England Patriots,2014,1,1,0.074781,0.160795,0.140599,0.312798,0.371294,0.212053,0.33423
66,Indianapolis Colts,2006,1,1,0.070651,0.218744,0.129607,0.266816,0.374086,0.211981,0.336157
422,Philadelphia Eagles,2017,1,1,0.116667,0.202722,0.170783,0.351837,0.368711,0.242144,0.337206
160,New Orleans Saints,2009,1,1,0.149743,0.299289,0.175239,0.359389,0.377776,0.272287,0.349272


Top - low probs and missed (correctly predicting absence)

In [9]:
predictions[predictions['played'] == 0].sort_values('pred_opt_combo', ascending = True).head(10)

Unnamed: 0,Team,season,winner,played,pred_logit,pred_rf,pred_gbm,pred_lgb,pred_catboost,pred_mean,pred_opt_combo
382,San Francisco 49ers,2015,0,0,0.000345,0.000796,0.039717,0.179305,0.314175,0.106868,0.274947
413,Cleveland Browns,2016,0,0,9.4e-05,0.002946,0.039877,0.179305,0.314489,0.107342,0.27519
25,San Francisco 49ers,2004,0,0,0.000102,0.002946,0.04037,0.179305,0.315475,0.10764,0.276053
505,Cincinnati Bengals,2019,0,0,0.000184,0.000796,0.039962,0.179305,0.315502,0.10715,0.276087
147,Jacksonville Jaguars,2008,0,0,0.000569,0.000796,0.039717,0.179305,0.315507,0.107179,0.27614
250,Kansas City Chiefs,2011,0,0,0.0002,0.000796,0.039969,0.179305,0.315814,0.107217,0.276362
121,New York Jets,2007,0,0,0.000616,0.000796,0.040131,0.179305,0.316036,0.107377,0.276608
61,Houston Texans,2005,0,0,2.1e-05,0.000796,0.040785,0.179305,0.31616,0.107413,0.276642
177,Jacksonville Jaguars,2009,0,0,0.00045,0.000796,0.039962,0.179305,0.316157,0.107334,0.276694
284,Jacksonville Jaguars,2012,0,0,0.000288,0.004214,0.040484,0.179305,0.316239,0.108106,0.276745


Very high probs but lost

In [10]:
predictions[predictions['winner'] == 0].sort_values('pred_opt_combo', ascending = False).head(10)

Unnamed: 0,Team,season,winner,played,pred_logit,pred_rf,pred_gbm,pred_lgb,pred_catboost,pred_mean,pred_opt_combo
96,New England Patriots,2007,0,1,0.875535,0.364412,0.283844,0.462269,0.43741,0.484694,0.492176
385,Atlanta Falcons,2016,0,1,0.569314,0.342743,0.23188,0.400921,0.411814,0.391334,0.431502
1,Indianapolis Colts,2004,0,0,0.690508,0.305955,0.197491,0.295406,0.385753,0.375023,0.423848
8,Philadelphia Eagles,2004,0,1,0.535147,0.304324,0.180364,0.407294,0.397868,0.364999,0.415028
362,Carolina Panthers,2015,0,1,0.643624,0.270108,0.174958,0.355561,0.381218,0.365094,0.414019
288,Denver Broncos,2013,0,1,0.538232,0.248852,0.161775,0.329655,0.383462,0.332395,0.402808
34,Indianapolis Colts,2005,0,0,0.532529,0.336384,0.207055,0.303619,0.382941,0.352506,0.401639
352,Arizona Cardinals,2015,0,0,0.630574,0.311646,0.164962,0.235409,0.367161,0.34195,0.400087
205,Pittsburgh Steelers,2010,0,1,0.404146,0.312962,0.241048,0.42019,0.39804,0.355277,0.398803
33,Seattle Seahawks,2005,0,1,0.303248,0.221699,0.203207,0.442231,0.402319,0.314541,0.389935


Very low probs but won

In [11]:
predictions[predictions['winner'] == 1].sort_values('pred_opt_combo', ascending = True).head(10)

Unnamed: 0,Team,season,winner,played,pred_logit,pred_rf,pred_gbm,pred_lgb,pred_catboost,pred_mean,pred_opt_combo
271,Baltimore Ravens,2012,1,1,0.047103,0.021728,0.070097,0.180201,0.326795,0.129185,0.291833
111,New York Giants,2007,1,1,0.02216,0.044278,0.074471,0.213723,0.332663,0.137459,0.29385
452,New England Patriots,2018,1,1,0.079612,0.163586,0.163081,0.355017,0.359394,0.224138,0.324421
231,New York Giants,2011,1,1,0.104603,0.11343,0.120067,0.26182,0.360305,0.192045,0.328343
367,Denver Broncos,2015,1,1,0.216068,0.099339,0.112205,0.246759,0.350397,0.204954,0.333606
330,New England Patriots,2014,1,1,0.074781,0.160795,0.140599,0.312798,0.371294,0.212053,0.33423
66,Indianapolis Colts,2006,1,1,0.070651,0.218744,0.129607,0.266816,0.374086,0.211981,0.336157
422,Philadelphia Eagles,2017,1,1,0.116667,0.202722,0.170783,0.351837,0.368711,0.242144,0.337206
160,New Orleans Saints,2009,1,1,0.149743,0.299289,0.175239,0.359389,0.377776,0.272287,0.349272
6,New England Patriots,2004,1,1,0.307621,0.139856,0.082169,0.22689,0.356854,0.222678,0.350699
