In [1]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sportsreference.nfl.boxscore import Boxscores, Boxscore
from functions import display,get_schedule, game_data, game_data_up_to_week, agg_weekly_data, get_elo, merge_rankings, prep_test_train

In [2]:
current_week = 17
weeks = list(range(1,current_week + 1))
year = 2020

pred_games_df, comp_games_df = prep_test_train(current_week, weeks, year)

In [3]:
msk = np.random.rand(len(comp_games_df)) < 0.8

train_df = comp_games_df[msk]
test_df = comp_games_df[~msk]

X_train = train_df.drop(columns = ['away_name', 'away_abbr', 'home_name', 'home_abbr', 'week','result'])
y_train = train_df[['result']] 
X_test = test_df.drop(columns = ['away_name', 'away_abbr', 'home_name', 'home_abbr', 'week','result'])
y_test = test_df[['result']]

Reviewing Logistic Regression

In [4]:
clf = LogisticRegression(penalty='l1', dual=False, tol=0.001, C=1.0, fit_intercept=True, 
                   intercept_scaling=1, class_weight='balanced', random_state=None, 
                   solver='liblinear', max_iter=10000, multi_class='ovr', verbose=0)

clf.fit(X_train, np.ravel(y_train.values))
y_pred = clf.predict_proba(X_test)
y_pred = y_pred[:,1]

display(y_pred,test_df)

The New York Giants have a probability of 0.17 of beating the Chicago Bears.
The Denver Broncos have a probability of 0.29 of beating the Pittsburgh Steelers.
The Washington Football Team have a probability of 0.21 of beating the Arizona Cardinals.
The Baltimore Ravens have a probability of 0.61 of beating the Houston Texans.
The New Orleans Saints have a probability of 0.77 of beating the Las Vegas Raiders.
The Los Angeles Rams have a probability of 0.23 of beating the Buffalo Bills.
The Houston Texans have a probability of 0.13 of beating the Pittsburgh Steelers.
The Las Vegas Raiders have a probability of 0.47 of beating the New England Patriots.
The Indianapolis Colts have a probability of 0.41 of beating the Chicago Bears.
The Arizona Cardinals have a probability of 0.68 of beating the Carolina Panthers.
The New York Giants have a probability of 0.4 of beating the Los Angeles Rams.
The Indianapolis Colts have a probability of 0.69 of beating the Cleveland Browns.
The Houston Texan

In [5]:
accuracy_score(y_test,np.round(y_pred))

0.6111111111111112

Reviewing Gradient Boosting Model

In [6]:
from sklearn.ensemble import GradientBoostingClassifier

In [7]:
GradientBoost = GradientBoostingClassifier(loss='deviance', learning_rate=0.1, n_estimators=10000, subsample=1.0,criterion='friedman_mse', min_samples_split=2, min_samples_leaf=1, min_weight_fraction_leaf=0.0, max_depth=3, min_impurity_decrease=0.0,min_impurity_split=None, init=None, random_state=None, max_features=None, verbose=0, max_leaf_nodes=None, warm_start=False, validation_fraction=0.1,n_iter_no_change=None, tol=0.0001, ccp_alpha=0.0)

In [8]:
GradientBoost.fit(X_train, np.ravel(y_train.values))

GradientBoostingClassifier(n_estimators=10000)

In [9]:
gb_y_pred = GradientBoost.predict_proba(X_test)
gb_y_pred = gb_y_pred[:,1]

In [10]:
display(y_pred,test_df)

The New York Giants have a probability of 0.17 of beating the Chicago Bears.
The Denver Broncos have a probability of 0.29 of beating the Pittsburgh Steelers.
The Washington Football Team have a probability of 0.21 of beating the Arizona Cardinals.
The Baltimore Ravens have a probability of 0.61 of beating the Houston Texans.
The New Orleans Saints have a probability of 0.77 of beating the Las Vegas Raiders.
The Los Angeles Rams have a probability of 0.23 of beating the Buffalo Bills.
The Houston Texans have a probability of 0.13 of beating the Pittsburgh Steelers.
The Las Vegas Raiders have a probability of 0.47 of beating the New England Patriots.
The Indianapolis Colts have a probability of 0.41 of beating the Chicago Bears.
The Arizona Cardinals have a probability of 0.68 of beating the Carolina Panthers.
The New York Giants have a probability of 0.4 of beating the Los Angeles Rams.
The Indianapolis Colts have a probability of 0.69 of beating the Cleveland Browns.
The Houston Texan

In [11]:

accuracy_score(y_test,np.round(gb_y_pred))

0.5555555555555556