In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.neighbors import KNeighborsClassifier, NearestNeighbors, LocalOutlierFactor
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, ConfusionMatrixDisplay
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
import nfl_data_py as nfl
import datetime as dt
today = dt.date.today()
year = today.year
pd.set_option("display.max_columns", None)
pd.set_option('display.max_colwidth', None)

In [None]:
df = nfl.import_schedules(years=range(2000,year+1))
currSeason = df[df.season == year]
predWeek = currSeason[['week', 'total_line']].dropna()
predWeek = predWeek.week.max()

In [None]:
df = df[['home_team', 'away_team', 'season', 'total', 'week', 'gametime', 'spread_line', 'total_line', 'under_odds']]
df['Over'] = np.where(df['total'] > df['total_line'], 1, 0)
df['Under'] = np.where(df['total'] < df['total_line'], 1, 0)
df['Push'] = np.where(df['total'] == df['total_line'], 1, 0)
df = df[df.Push != 1]
df.drop(columns='total', inplace=True)
df.reset_index(drop=True, inplace=True)
df = df.dropna()

In [None]:
# Model building
features = ['spread_line', 'total_line', 'under_odds']
target = 'Under'

train_df = df[df.season < df.season.max()-1]
test_df = df[df.season == df.season.max()-1]
X_train = train_df[features]
y_train = train_df[target]
X_test = test_df[features]
y_test = test_df[target]

# model = KNeighborsClassifier(n_neighbors=13)

pipe = Pipeline([
    ('scaler', StandardScaler()),
    ('model', KNeighborsClassifier(n_neighbors=7))
])

classif = pipe.fit(X_train, y_train)

pipe2 = Pipeline([
    ('scaler', StandardScaler()),
    ('lof', LocalOutlierFactor(novelty=True))
])

pipe2.fit(X_train)
y_test_nov = pipe2.predict(X_test)

mask = [y == 1 for y in y_test_nov]

X_test = X_test[mask]
y_test = y_test[mask]
y_pred = classif.predict(X_test)
y_true = y_test

print(f'Total accuracy score={accuracy_score(y_true, y_pred):.2%}')
print(f'\nClassification Report:')
print(classification_report(y_true, y_pred, target_names=['Over', 'Under']))

cm = confusion_matrix(y_true, y_pred)
display = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=['Over', 'Under'])
display.plot()
plt.grid(False)
plt.show()

In [None]:
train_df = df[(df.season < year) & (df.week < predWeek) | (df.season < year)]
test_df = df[(df.season == year) & (df.week == predWeek)]
X_train = train_df[features]
y_train = train_df[target]
X_test = test_df[features]
y_test = test_df[target]

pipe = Pipeline([
    ('scaler', StandardScaler()),
    ('model', KNeighborsClassifier(n_neighbors=7))
])

classif = pipe.fit(X_train, y_train)

pipe2 = Pipeline([
    ('scaler', StandardScaler()),
    ('lof', LocalOutlierFactor(novelty=True))
])

pipe2.fit(X_train)
y_test_nov = pipe2.predict(X_test)

mask = [y == 1 for y in y_test_nov]
X_test = X_test[mask]
y_test = y_test[mask]
y_pred = classif.predict(X_test)
y_true = y_test

# Predicted Plays log
nextPlays = currSeason[currSeason.week == predWeek]
nextPlays['Predicted Outcome'] = y_pred
nextPlays = nextPlays[nextPlays['Predicted Outcome'] == 1]
nextPlays = nextPlays[['game_id', 'season', 'week', 'home_team', 'away_team', 'gametime', 'weekday', 'total_line', 'under_odds']]
nextPlays.columns = ['Game ID', 'Season', 'Week', 'Home', 'Away', 'Start Time', 'Day', 'Total Line', 'Under Odds']

In [None]:
nextPlays