In [1]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sportsreference.nfl.boxscore import Boxscores, Boxscore
from functions import display,get_schedule, game_data, game_data_up_to_week, agg_weekly_data, get_elo, merge_rankings, prep_test_train

In [2]:
current_week = 17
weeks = list(range(1,current_week + 1))
year = 2020

pred_games_df, comp_games_df = prep_test_train(current_week, weeks, year)

In [3]:
msk = np.random.rand(len(comp_games_df)) < 0.8

train_df = comp_games_df[msk]
test_df = comp_games_df[~msk]

X_train = train_df.drop(columns = ['away_name', 'away_abbr', 'home_name', 'home_abbr', 'week','result'])
y_train = train_df[['result']] 
X_test = test_df.drop(columns = ['away_name', 'away_abbr', 'home_name', 'home_abbr', 'week','result'])
y_test = test_df[['result']]

Reviewing Logistic Regression

In [4]:
clf = LogisticRegression(penalty='l1', dual=False, tol=0.001, C=1.0, fit_intercept=True, 
                   intercept_scaling=1, class_weight='balanced', random_state=None, 
                   solver='liblinear', max_iter=10000, multi_class='ovr', verbose=0)

clf.fit(X_train, np.ravel(y_train.values))
y_pred = clf.predict_proba(X_test)
y_pred = y_pred[:,1]

display(y_pred,test_df)

The Minnesota Vikings have a probability of 0.21 of beating the Indianapolis Colts.
The Los Angeles Rams have a probability of 0.59 of beating the Philadelphia Eagles.
The Houston Texans have a probability of 0.11 of beating the Pittsburgh Steelers.
The Dallas Cowboys have a probability of 0.09 of beating the Seattle Seahawks.
The Green Bay Packers have a probability of 0.83 of beating the New Orleans Saints.
The Arizona Cardinals have a probability of 0.71 of beating the Carolina Panthers.
The New York Giants have a probability of 0.4 of beating the Los Angeles Rams.
The Philadelphia Eagles have a probability of 0.58 of beating the San Francisco 49ers.
The Philadelphia Eagles have a probability of 0.28 of beating the Pittsburgh Steelers.
The Las Vegas Raiders have a probability of 0.15 of beating the Kansas City Chiefs.
The Minnesota Vikings have a probability of 0.21 of beating the Seattle Seahawks.
The Cincinnati Bengals have a probability of 0.2 of beating the Indianapolis Colts.
T

In [5]:
accuracy_score(y_test,np.round(y_pred))

0.6190476190476191

Reviewing Gradient Boosting Model

In [6]:
from sklearn.ensemble import GradientBoostingClassifier

In [7]:
GradientBoost = GradientBoostingClassifier(loss='deviance', learning_rate=0.1, n_estimators=10000, subsample=1.0,criterion='friedman_mse', min_samples_split=2, min_samples_leaf=1, min_weight_fraction_leaf=0.0, max_depth=3, min_impurity_decrease=0.0,min_impurity_split=None, init=None, random_state=None, max_features=None, verbose=0, max_leaf_nodes=None, warm_start=False, validation_fraction=0.1,n_iter_no_change=None, tol=0.0001, ccp_alpha=0.0)

In [8]:
GradientBoost.fit(X_train, np.ravel(y_train.values))
GradientBoost.score(X_test, y_test)

0.6190476190476191

In [9]:
gb_y_pred = GradientBoost.predict_proba(X_test)
gb_y_pred = gb_y_pred[:,1]

array([7.31738449e-33, 1.86652795e-32, 1.56786118e-27, 5.98208598e-31,
       9.98688175e-01, 1.14636640e-06, 1.38935583e-49, 7.02945975e-08,
       1.69205746e-26, 4.12246277e-24, 2.20416103e-12, 7.74404040e-01,
       3.23440675e-29, 7.94991631e-01, 1.00000000e+00, 1.20522570e-08,
       7.04912370e-30, 1.00000000e+00, 4.97781380e-01, 9.99999993e-01,
       7.96939227e-01, 6.89631324e-07, 8.49328892e-03, 1.00000000e+00,
       3.19260080e-09, 2.92128092e-05, 1.00000000e+00, 9.96636275e-01,
       9.42275833e-10, 6.33061480e-33, 4.30038813e-24, 9.99813818e-01,
       3.24289705e-28, 9.99999996e-01, 8.26475938e-12, 2.24256013e-29,
       9.99981839e-01, 3.54555282e-23, 4.63846897e-12, 9.99996975e-01,
       9.99999882e-01, 9.99995591e-01])

In [10]:
display(y_pred,test_df)

The Minnesota Vikings have a probability of 0.21 of beating the Indianapolis Colts.
The Los Angeles Rams have a probability of 0.59 of beating the Philadelphia Eagles.
The Houston Texans have a probability of 0.11 of beating the Pittsburgh Steelers.
The Dallas Cowboys have a probability of 0.09 of beating the Seattle Seahawks.
The Green Bay Packers have a probability of 0.83 of beating the New Orleans Saints.
The Arizona Cardinals have a probability of 0.71 of beating the Carolina Panthers.
The New York Giants have a probability of 0.4 of beating the Los Angeles Rams.
The Philadelphia Eagles have a probability of 0.58 of beating the San Francisco 49ers.
The Philadelphia Eagles have a probability of 0.28 of beating the Pittsburgh Steelers.
The Las Vegas Raiders have a probability of 0.15 of beating the Kansas City Chiefs.
The Minnesota Vikings have a probability of 0.21 of beating the Seattle Seahawks.
The Cincinnati Bengals have a probability of 0.2 of beating the Indianapolis Colts.
T