In [1]:
from scrapers.scrape_2021 import get_2021_data
import pandas as pd
import numpy as np
import pickle

import warnings
warnings.filterwarnings('ignore')

### Models

In [2]:
rf = 'modeling/rf.pkl'
gbm = 'modeling/gbm.pkl'
lgb = 'modeling/lgb.pkl'
catboost = 'modeling/catboost.pkl'

with open(rf, 'rb') as file:
    RF = pickle.load(file)
    
with open(gbm, 'rb') as file:
    GBM = pickle.load(file)
    
with open(lgb, 'rb') as file:
    LGB = pickle.load(file)
    
with open(catboost, 'rb') as file:
    CATBOOST= pickle.load(file)

### 2021 data

In [9]:
X = get_2021_data()

# from boruta elimination method:
keep_cols = ['offense_downs_Fourth Downs_PCT', 'offense_passing_AVG', 'offense_passing_YDS/G', 'offense_passing_RTG', 
             'offense_receiving_AVG', 'defense_passing_AVG', 'defense_passing_RTG', 'defense_receiving_AVG', 
             'punting_punts_per_game', 'defense_passing_SYL_per_game', 'offense_downs_Third Downs_ATT_per_game', 
             'defense_downs_Third Downs_ATT_per_game', 'defense_downs_Fourth Downs_ATT_per_game', 
             'defense_downs_First Downs_penalty_ratio', 'offense_passing_TD_per_game', 
             'offense_passing_INT_per_game', 'defense_passing_INT_per_game', 'offense_pass_TD_to_INT', 
             'defense_pass_TD_to_INT']

X = X[['Team', 'games_played', 'season'] + keep_cols].copy()

X.head(2)

Unnamed: 0,Team,games_played,season,offense_downs_Fourth Downs_PCT,offense_passing_AVG,offense_passing_YDS/G,offense_passing_RTG,offense_receiving_AVG,defense_passing_AVG,defense_passing_RTG,...,defense_passing_SYL_per_game,offense_downs_Third Downs_ATT_per_game,defense_downs_Third Downs_ATT_per_game,defense_downs_Fourth Downs_ATT_per_game,defense_downs_First Downs_penalty_ratio,offense_passing_TD_per_game,offense_passing_INT_per_game,defense_passing_INT_per_game,offense_pass_TD_to_INT,defense_pass_TD_to_INT
0,Las Vegas Raiders,1,2021,0.0,7.8,409.0,89.5,12.8,7.8,98.6,...,18.0,15.0,12.0,2.0,0.05,2.0,1.0,0.0,2.0,inf
1,Dallas Cowboys,1,2021,0.0,6.9,391.0,101.4,9.6,7.6,97.0,...,0.0,17.0,11.0,0.0,0.041667,3.0,1.0,2.0,3.0,2.0


In [16]:
X.fillna(0, inplace = True)
X.replace([np.inf, -np.inf], 0, inplace=True)

In [18]:
predictions = X[['Team', 'season']].copy()

predictions['pred_rf'] = RF.predict_proba(X.drop(['Team', 'games_played', 'season'], 1))[:,1]
predictions['pred_gbm'] = GBM.predict_proba(X.drop(['Team', 'games_played', 'season'], 1))[:,1]
predictions['pred_lgb'] = LGB.predict_proba(X.drop(['Team', 'games_played', 'season'], 1))[:,1]
predictions['pred_catboost'] = CATBOOST.predict_proba(X.drop(['Team', 'games_played', 'season'], 1))[:,1]
predictions['pred_mean'] = (predictions['pred_rf'] + predictions['pred_gbm'] + predictions['pred_lgb'] + predictions['pred_catboost']) / 4

predictions.head(3)

Unnamed: 0,Team,season,pred_rf,pred_gbm,pred_lgb,pred_catboost,pred_mean
0,Las Vegas Raiders,2021,0.127655,0.079768,0.020914,0.262384,0.12268
1,Dallas Cowboys,2021,0.1233,0.086218,0.018544,0.266005,0.123517
2,Tampa Bay Buccaneers,2021,0.183365,0.170964,0.582563,0.282579,0.304868


In [19]:
# todo
# week marker so that after every week I can join them together column wise
# team - season - week1 - week2 - .... - week17

In [23]:
predictions.sort_values('pred_mean', ascending = False)

Unnamed: 0,Team,season,pred_rf,pred_gbm,pred_lgb,pred_catboost,pred_mean
8,Los Angeles Rams,2021,0.338434,0.399649,0.830483,0.437053,0.501405
9,San Francisco 49ers,2021,0.327355,0.405187,0.835084,0.406737,0.493591
12,Arizona Cardinals,2021,0.343278,0.341233,0.339986,0.42578,0.362569
5,Kansas City Chiefs,2021,0.248749,0.201192,0.500111,0.328736,0.319697
2,Tampa Bay Buccaneers,2021,0.183365,0.170964,0.582563,0.282579,0.304868
19,Seattle Seahawks,2021,0.290345,0.281733,0.074999,0.357663,0.251185
22,Baltimore Ravens,2021,0.152085,0.163704,0.410286,0.27088,0.249239
21,Cincinnati Bengals,2021,0.266793,0.233474,0.076343,0.336191,0.2282
11,Houston Texans,2021,0.220041,0.164069,0.204398,0.316454,0.226241
15,Philadelphia Eagles,2021,0.21179,0.168021,0.162648,0.329015,0.217869
