In [2]:
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, ConfusionMatrixDisplay
from sklearn.model_selection import train_test_split

from helper_functions import data_load, data_split, rf_model, google_export
import nfl_data_py as nfl
import warnings
warnings.filterwarnings('ignore')
pd.set_option("display.max_columns", None)
pd.set_option('display.max_colwidth', None)

In [3]:
year = 2024
week = 5
day = 0 # 0-Sun, 1-Mon, 4-Thu

In [4]:
allSeasons, currSeason = data_load(year, week)

In [5]:
features = allSeasons.drop(['Under', 'Push', 'gameday', 'game_id', 'surface', 'home_score', 'away_score', 'result', 'total', 'overtime', 'old_game_id', 'gsis', 'nfl_detail_id', 'pfr', 'pff', 'espn', 'ftn', 'away_qb_id', 'home_qb_id', 'away_qb_name', 'home_qb_name', 'away_coach', 'home_coach', 'referee', 'stadium', 'wind', 'temp'], axis=1).columns

# Accuracy Testing

In [6]:
# Model building
df_acc = allSeasons.dropna()
df_acc.reset_index(drop=True, inplace=True)
y = df_acc.Under
X = df_acc[features]

precis_array = []
acc_array = []
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score
for i in range(1, 26):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=i, stratify=y)
    for j in range(1,11):
        rf = RandomForestClassifier(n_estimators=50, min_samples_split=10, random_state=j)
        rf.fit(X_train, y_train)
        preds = rf.predict(X_test)
        acc = accuracy_score(y_test, preds)
        precis = precision_score(y_test, preds)
        precis_array.append(precis)
        acc_array.append(acc)
# combined = pd.DataFrame(dict(actual=y_test, prediction=preds))
# pd.crosstab(index=combined['actual'], columns=combined['prediction'])

In [7]:
def find_mean(array):
    if len(array) == 0:
        return 0  # To handle empty arrays
    return sum(array) / len(array)
find_mean(precis_array)

0.596502913317647

In [8]:
find_mean(acc_array)

0.5502222222222222

# Weekly Plays

In [9]:
X_train, y_train, X_test, y_test = data_split(allSeasons, features, year, week, day)

prediction_df = rf_model(X_train, y_train, X_test)

# Predicted Plays log
nextPlays = pd.merge(right=prediction_df, left=currSeason, right_index=True, left_index=True, how='left')
nextPlays = nextPlays[nextPlays.Prediction == 1]
nextPlays = nextPlays[['game_id', 'season_x', 'week_x', 'home_team', 'away_team', 'gametime_x', 'weekday_x', 'total_line_x', 'under_odds_x']]
nextPlays.columns = ['Game ID', 'Season', 'Week', 'Home', 'Away', 'Start Time', 'Day', 'Total Line', 'Under Odds']
nextPlays

Unnamed: 0,Game ID,Season,Week,Home,Away,Start Time,Day,Total Line,Under Odds
6771,2024_05_NYJ_MIN,2024,5,MIN,NYJ,09:30,Sunday,42.0,-112.0
6772,2024_05_CAR_CHI,2024,5,CHI,CAR,13:00,Sunday,40.0,-108.0
6773,2024_05_BAL_CIN,2024,5,CIN,BAL,13:00,Sunday,48.5,-112.0
6774,2024_05_BUF_HOU,2024,5,HOU,BUF,13:00,Sunday,48.0,-110.0
6775,2024_05_IND_JAX,2024,5,JAX,IND,13:00,Sunday,45.0,-110.0
6778,2024_05_LV_DEN,2024,5,DEN,LV,16:05,Sunday,36.0,-108.0
6779,2024_05_ARI_SF,2024,5,SF,ARI,16:05,Sunday,48.5,-108.0
6780,2024_05_GB_LA,2024,5,LA,GB,16:25,Sunday,49.5,-112.0
6782,2024_05_DAL_PIT,2024,5,PIT,DAL,20:20,Sunday,43.5,-112.0
