In [63]:
import pandas as pd
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, ConfusionMatrixDisplay
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score
from helper_functions import data_load, data_split, rf_model, google_export
import warnings

warnings.filterwarnings('ignore')
pd.set_option("display.max_columns", None)
pd.set_option('display.max_colwidth', None)

In [64]:
year = 2024
week = 6
day = 0 # 0-Sun, 1-Mon, 4-Thu

In [65]:
allSeasons, currSeason = data_load(year, week)

In [66]:
features = ['season', 'week', 'weekday', 'gametime', 'away_team', 'home_team', 'away_rest', 'home_rest', 'away_moneyline', 'home_moneyline', 'spread_line', 'total_line', 'under_odds', 'over_odds', 'div_game']

# Accuracy Testing

In [67]:
# Model building
accuracy_df = allSeasons.dropna(axis=0)
y = accuracy_df.Under
X = accuracy_df[features]

precis_array = []
acc_array = []
for i in range(1, 26):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=i, stratify=y)
    for j in range(1,11):
        rf = RandomForestClassifier(n_estimators=50, min_samples_split=10, random_state=j)
        rf.fit(X_train, y_train)
        preds = rf.predict(X_test)
        acc = accuracy_score(y_test, preds)
        precis = precision_score(y_test, preds)
        precis_array.append(precis)
        acc_array.append(acc)

In [68]:
def find_mean(array):
    if len(array) == 0:
        return 0  # To handle empty arrays
    return sum(array) / len(array)
find_mean(precis_array)

0.5951603915915791

In [69]:
find_mean(acc_array)

0.5460444444444443

# Weekly Plays

In [70]:
X_train, y_train, X_test, y_test = data_split(allSeasons, features, year, week, day)

prediction_df = rf_model(X_train, y_train, X_test)

# Predicted Plays log
nextPlays = pd.merge(right=prediction_df, left=currSeason, right_index=True, left_index=True, how='left')
nextPlays = nextPlays[nextPlays.Prediction == 1]
nextPlays = nextPlays[['game_id', 'season_x', 'week_x', 'home_team_x', 'away_team_x', 'gametime_x', 'weekday_x', 'total_line_x', 'under_odds_x']]
nextPlays.columns = ['Game ID', 'Season', 'Week', 'Home', 'Away', 'Start Time', 'Day', 'Total Line', 'Under Odds']
nextPlays

Unnamed: 0,Game ID,Season,Week,Home,Away,Start Time,Day,Total Line,Under Odds
6785,2024_06_JAX_CHI,2024,6,CHI,JAX,09:30,Sunday,44.5,-110.0
6787,2024_06_ARI_GB,2024,6,GB,ARI,13:00,Sunday,47.0,-108.0
6788,2024_06_HOU_NE,2024,6,NE,HOU,13:00,Sunday,37.5,-110.0
6789,2024_06_TB_NO,2024,6,NO,TB,13:00,Sunday,42.0,-110.0
6791,2024_06_IND_TEN,2024,6,TEN,IND,13:00,Sunday,43.0,-110.0
6792,2024_06_LAC_DEN,2024,6,DEN,LAC,16:05,Sunday,35.5,-108.0
6793,2024_06_PIT_LV,2024,6,LV,PIT,16:05,Sunday,36.5,-108.0
6794,2024_06_ATL_CAR,2024,6,CAR,ATL,16:25,Sunday,47.0,-112.0
6795,2024_06_DET_DAL,2024,6,DAL,DET,16:25,Sunday,52.5,-110.0
6796,2024_06_CIN_NYG,2024,6,NYG,CIN,20:20,Sunday,47.0,-112.0
