In [1]:
import pandas as pd
import numpy as np
import pickle

import matplotlib.pyplot as plt
import seaborn as sns

# from sklearn.tree import DecisionTreeClassifier, plot_tree
# from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier
# import lightgbm as lgb
# from catboost import CatBoostClassifier

import warnings
warnings.filterwarnings('ignore')

### Models

In [3]:
rf = '../modeling/rf.pkl'
gbm = '../modeling/gbm.pkl'
lgb = '../modeling/lgb.pkl'
catboost = '../modeling/catboost.pkl'

with open(rf, 'rb') as file:
    RF = pickle.load(file)
    
with open(gbm, 'rb') as file:
    GBM = pickle.load(file)
    
with open(lgb, 'rb') as file:
    LGB = pickle.load(file)
    
with open(catboost, 'rb') as file:
    CATBOOST= pickle.load(file)

### Data (in necessary format)

In [4]:
data = pd.read_csv('../data/scraped_for_modeling_labeled.csv')
data.fillna(0, inplace = True) # divisions with 0

X = data.drop(['winner', 'played'], 1).copy()
y = data['played'].copy()

In [5]:
# from boruta elimination method:
keep_cols = ['offense_points_per_game', 'offense_passing_AVG', 'offense_passing_YDS/G', 'offense_passing_RTG', 
             'offense_receiving_AVG', 'offense_rushing_YDS/G', 'defense_points_per_game', 'defense_passing_AVG', 
             'defense_receiving_AVG', 'defense_rushing_YDS/G', 'defense_passing_SYL_per_game', 
             'offense_downs_Third Downs_ATT_per_game', 'defense_downs_Third Downs_ATT_per_game', 
             'defense_downs_Fourth Downs_ATT_per_game', 'defense_pass_to_rush_ratio', 
             'defense_downs_First Downs_penalty_ratio', 'offense_passing_TD_per_game', 'offense_pass_TD_to_INT']

X = X[['Team', 'games_played', 'season'] + keep_cols].copy()

### Prediction

In [6]:
predictions = data[['Team', 'season', 'winner', 'played']].copy()

predictions['pred_rf'] = RF.predict_proba(X.drop(['Team', 'games_played', 'season'], 1))[:,1]
predictions['pred_gbm'] = GBM.predict_proba(X.drop(['Team', 'games_played', 'season'], 1))[:,1]
predictions['pred_lgb'] = LGB.predict_proba(X.drop(['Team', 'games_played', 'season'], 1))[:,1]
predictions['pred_catboost'] = CATBOOST.predict_proba(X.drop(['Team', 'games_played', 'season'], 1))[:,1]
predictions['pred_mean'] = (predictions['pred_rf'] + predictions['pred_gbm'] + predictions['pred_lgb'] + predictions['pred_catboost']) / 4

predictions.head(3)

Unnamed: 0,Team,season,winner,played,pred_rf,pred_gbm,pred_lgb,pred_catboost,pred_mean
0,Kansas City Chiefs,2004,0,0,0.196649,0.124841,0.160476,0.184038,0.166501
1,Indianapolis Colts,2004,0,0,0.328111,0.294157,0.278968,0.27849,0.294932
2,Green Bay Packers,2004,0,0,0.045855,0.032971,0.07135,0.163598,0.078443


### Compare predictions vs actuals

Top - high probs and played

In [11]:
predictions[predictions['played'] == 1].sort_values('pred_mean', ascending = False).head(10)

Unnamed: 0,Team,season,winner,played,pred_rf,pred_gbm,pred_lgb,pred_catboost,pred_mean
96,New England Patriots,2007,0,1,0.351968,0.434264,0.685798,0.516131,0.49704
362,Carolina Panthers,2015,0,1,0.317189,0.349817,0.727457,0.43853,0.458248
305,Seattle Seahawks,2013,1,1,0.290212,0.377354,0.665427,0.489901,0.455724
483,San Francisco 49ers,2019,0,1,0.328725,0.363684,0.610618,0.446373,0.43735
485,Kansas City Chiefs,2019,1,1,0.332993,0.353904,0.572082,0.37675,0.408932
205,Pittsburgh Steelers,2010,0,1,0.289912,0.357756,0.580732,0.39992,0.40708
46,Pittsburgh Steelers,2005,1,1,0.244743,0.303196,0.682187,0.369739,0.399966
33,Seattle Seahawks,2005,0,1,0.249388,0.293944,0.690351,0.345195,0.394719
8,Philadelphia Eagles,2004,0,1,0.234348,0.289649,0.634837,0.387625,0.386615
266,San Francisco 49ers,2012,0,1,0.256329,0.29903,0.516041,0.438393,0.377448


Top - high probs but missed

In [14]:
predictions[predictions['played'] == 0].sort_values('pred_mean', ascending = False).head(10)

Unnamed: 0,Team,season,winner,played,pred_rf,pred_gbm,pred_lgb,pred_catboost,pred_mean
259,Denver Broncos,2012,0,0,0.29666,0.328105,0.400581,0.260068,0.321353
448,Kansas City Chiefs,2018,0,0,0.31275,0.282516,0.307828,0.299755,0.300712
34,Indianapolis Colts,2005,0,0,0.272242,0.272539,0.338536,0.306142,0.297365
1,Indianapolis Colts,2004,0,0,0.328111,0.294157,0.278968,0.27849,0.294932
226,Green Bay Packers,2011,0,0,0.278301,0.23741,0.398967,0.246741,0.290355
357,New England Patriots,2015,0,0,0.272901,0.256415,0.332913,0.249379,0.277902
224,New Orleans Saints,2011,0,0,0.257567,0.242703,0.298754,0.284658,0.27092
513,Buffalo Bills,2020,0,0,0.250644,0.236445,0.276196,0.253335,0.254155
323,Denver Broncos,2014,0,0,0.264951,0.233704,0.225564,0.263713,0.246983
100,Indianapolis Colts,2007,0,0,0.236938,0.190934,0.29484,0.26487,0.246895


Top - low probs but played (unexpected SB candidates!)

In [15]:
predictions[predictions['played'] == 1].sort_values('pred_mean', ascending = True).head(10)

Unnamed: 0,Team,season,winner,played,pred_rf,pred_gbm,pred_lgb,pred_catboost,pred_mean
452,New England Patriots,2018,1,1,0.074188,0.056628,0.086802,0.177895,0.098879
271,Baltimore Ravens,2012,1,1,0.064515,0.085987,0.094661,0.15323,0.099598
330,New England Patriots,2014,1,1,0.154185,0.095716,0.07956,0.17827,0.126933
111,New York Giants,2007,1,1,0.077548,0.105452,0.211487,0.161602,0.139022
66,Indianapolis Colts,2006,1,1,0.130318,0.111567,0.123399,0.205296,0.142645
131,Arizona Cardinals,2008,0,1,0.130491,0.121578,0.157217,0.19938,0.152167
231,New York Giants,2011,1,1,0.165267,0.127785,0.271817,0.23167,0.199135
422,Philadelphia Eagles,2017,1,1,0.212203,0.147819,0.302002,0.232899,0.223731
367,Denver Broncos,2015,1,1,0.194381,0.159757,0.35477,0.222455,0.232841
200,Green Bay Packers,2010,1,1,0.200861,0.201729,0.289242,0.270868,0.240675


Top - low probs and missed (correctly predicting absence)

In [17]:
predictions[predictions['played'] == 0].sort_values('pred_mean', ascending = True).head(10)

Unnamed: 0,Team,season,winner,played,pred_rf,pred_gbm,pred_lgb,pred_catboost,pred_mean
413,Cleveland Browns,2016,0,0,0.000167,0.024464,0.037912,0.132052,0.048649
382,San Francisco 49ers,2015,0,0,0.0,0.024221,0.037912,0.132523,0.048664
48,Tennessee Titans,2005,0,0,0.0,0.024647,0.037912,0.132256,0.048704
25,San Francisco 49ers,2004,0,0,0.000559,0.025892,0.037912,0.13078,0.048786
446,Indianapolis Colts,2017,0,0,0.0,0.023843,0.037912,0.133594,0.048837
123,Miami Dolphins,2007,0,0,0.000392,0.02551,0.037912,0.131579,0.048848
253,Indianapolis Colts,2011,0,0,0.000871,0.024817,0.037912,0.13188,0.04887
376,Cleveland Browns,2015,0,0,0.0,0.024138,0.037912,0.133952,0.049
121,New York Jets,2007,0,0,0.000479,0.024213,0.037912,0.13373,0.049083
441,Green Bay Packers,2017,0,0,0.0,0.024207,0.037912,0.134319,0.049109


Very high probs but lost

In [18]:
predictions[predictions['winner'] == 0].sort_values('pred_mean', ascending = False).head(10)

Unnamed: 0,Team,season,winner,played,pred_rf,pred_gbm,pred_lgb,pred_catboost,pred_mean
96,New England Patriots,2007,0,1,0.351968,0.434264,0.685798,0.516131,0.49704
362,Carolina Panthers,2015,0,1,0.317189,0.349817,0.727457,0.43853,0.458248
483,San Francisco 49ers,2019,0,1,0.328725,0.363684,0.610618,0.446373,0.43735
205,Pittsburgh Steelers,2010,0,1,0.289912,0.357756,0.580732,0.39992,0.40708
33,Seattle Seahawks,2005,0,1,0.249388,0.293944,0.690351,0.345195,0.394719
8,Philadelphia Eagles,2004,0,1,0.234348,0.289649,0.634837,0.387625,0.386615
266,San Francisco 49ers,2012,0,1,0.256329,0.29903,0.516041,0.438393,0.377448
385,Atlanta Falcons,2016,0,1,0.30654,0.2863,0.51123,0.381049,0.37128
288,Denver Broncos,2013,0,1,0.294944,0.28222,0.52456,0.37782,0.369886
416,New England Patriots,2017,0,1,0.287721,0.291674,0.473017,0.301003,0.338354


Very low probs but won

In [19]:
predictions[predictions['winner'] == 1].sort_values('pred_mean', ascending = True).head(10)

Unnamed: 0,Team,season,winner,played,pred_rf,pred_gbm,pred_lgb,pred_catboost,pred_mean
452,New England Patriots,2018,1,1,0.074188,0.056628,0.086802,0.177895,0.098879
271,Baltimore Ravens,2012,1,1,0.064515,0.085987,0.094661,0.15323,0.099598
330,New England Patriots,2014,1,1,0.154185,0.095716,0.07956,0.17827,0.126933
111,New York Giants,2007,1,1,0.077548,0.105452,0.211487,0.161602,0.139022
66,Indianapolis Colts,2006,1,1,0.130318,0.111567,0.123399,0.205296,0.142645
231,New York Giants,2011,1,1,0.165267,0.127785,0.271817,0.23167,0.199135
422,Philadelphia Eagles,2017,1,1,0.212203,0.147819,0.302002,0.232899,0.223731
367,Denver Broncos,2015,1,1,0.194381,0.159757,0.35477,0.222455,0.232841
200,Green Bay Packers,2010,1,1,0.200861,0.201729,0.289242,0.270868,0.240675
6,New England Patriots,2004,1,1,0.187741,0.239652,0.38539,0.305765,0.279637
