In [1]:
import pandas as pd
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, ConfusionMatrixDisplay
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score
from helper_functions import data_load, data_split, rf_model, google_export
import warnings

warnings.filterwarnings('ignore')
pd.set_option("display.max_columns", None)
pd.set_option('display.max_colwidth', None)

In [2]:
year = 2024
week = 5
day = 0 # 0-Sun, 1-Mon, 4-Thu

In [3]:
allSeasons, currSeason = data_load(year, week)

In [4]:
features = ['season', 'week', 'weekday', 'gametime', 'away_team', 'home_team', 'away_rest', 'home_rest', 'away_moneyline', 'home_moneyline', 'spread_line', 'total_line', 'under_odds', 'over_odds', 'div_game']

# Accuracy Testing

In [5]:
# Model building
accuracy_df = allSeasons.dropna(axis=0)
y = accuracy_df.Under
X = accuracy_df[features]

precis_array = []
acc_array = []
for i in range(1, 26):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=i, stratify=y)
    for j in range(1,11):
        rf = RandomForestClassifier(n_estimators=50, min_samples_split=10, random_state=j)
        rf.fit(X_train, y_train)
        preds = rf.predict(X_test)
        acc = accuracy_score(y_test, preds)
        precis = precision_score(y_test, preds)
        precis_array.append(precis)
        acc_array.append(acc)

In [6]:
def find_mean(array):
    if len(array) == 0:
        return 0  # To handle empty arrays
    return sum(array) / len(array)
find_mean(precis_array)

0.5951603915915791

In [7]:
find_mean(acc_array)

0.5460444444444443

# Weekly Plays

In [8]:
X_train, y_train, X_test, y_test = data_split(allSeasons, features, year, week, day)

prediction_df = rf_model(X_train, y_train, X_test)

# Predicted Plays log
nextPlays = pd.merge(right=prediction_df, left=currSeason, right_index=True, left_index=True, how='left')
nextPlays = nextPlays[nextPlays.Prediction == 1]
nextPlays = nextPlays[['game_id', 'season_x', 'week_x', 'home_team_x', 'away_team_x', 'gametime_x', 'weekday_x', 'total_line_x', 'under_odds_x']]
nextPlays.columns = ['Game ID', 'Season', 'Week', 'Home', 'Away', 'Start Time', 'Day', 'Total Line', 'Under Odds']
nextPlays

Unnamed: 0,Game ID,Season,Week,Home,Away,Start Time,Day,Total Line,Under Odds
6771,2024_05_NYJ_MIN,2024,5,MIN,NYJ,09:30,Sunday,42.0,-112.0
6772,2024_05_CAR_CHI,2024,5,CHI,CAR,13:00,Sunday,40.0,-108.0
6773,2024_05_BAL_CIN,2024,5,CIN,BAL,13:00,Sunday,48.5,-112.0
6774,2024_05_BUF_HOU,2024,5,HOU,BUF,13:00,Sunday,48.0,-110.0
6775,2024_05_IND_JAX,2024,5,JAX,IND,13:00,Sunday,45.0,-110.0
6776,2024_05_MIA_NE,2024,5,NE,MIA,13:00,Sunday,37.0,-108.0
6777,2024_05_CLE_WAS,2024,5,WAS,CLE,13:00,Sunday,44.0,-112.0
6778,2024_05_LV_DEN,2024,5,DEN,LV,16:05,Sunday,36.0,-108.0
6780,2024_05_GB_LA,2024,5,LA,GB,16:25,Sunday,49.5,-112.0
6781,2024_05_NYG_SEA,2024,5,SEA,NYG,16:25,Sunday,42.5,-110.0
