In [1]:
import numpy as np
import pandas as pd
import xgboost as xgb
from sklearn.feature_selection import SelectFromModel
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.metrics import accuracy_score, precision_score
import warnings
from helper_functions import data_load, data_split, xgb_model, data_split_tune

warnings.filterwarnings('ignore')
pd.set_option("display.max_columns", None)
pd.set_option('display.max_colwidth', None)

In [2]:
year = 2024
week = 6
day = 0 # 0-Sun, 1-Mon, 4-Thu

In [3]:
allSeasons, currSeason = data_load(year, week)

In [4]:
features = ['season', 'week', 'weekday', 'gametime', 'away_team', 'home_team', 'away_rest', 'home_rest', 'away_moneyline', 'home_moneyline', 'spread_line', 'total_line', 'under_odds', 'over_odds', 'div_game']

# Model Tuning

In [5]:
X_train, y_train = data_split_tune(allSeasons, features)

param_grid = {
    'max_depth': [3, 6, 9],
    'learning_rate': [0.01, 0.1, 0.2],
    'n_estimators': [100, 500, 1000],
    'reg_alpha': [0, 0.1, 0.5],
    'reg_lambda': [0.1, 0.5, 1.0]
}

xgb_clf = xgb.XGBClassifier(use_label_encoder=False, eval_metric='logloss')
grid_search = GridSearchCV(estimator=xgb_clf, param_grid=param_grid, cv=3, scoring='roc_auc', verbose=1)
grid_search.fit(X_train, y_train)

print("Best parameters found: ", grid_search.best_params_)
print("Best cross-validation score: ", grid_search.best_score_)

Fitting 3 folds for each of 243 candidates, totalling 729 fits


# Accuracy Testing

In [None]:
# Model building
precis_array = []
acc_array = []
for i in range(1, 11):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=i)

    model = xgb.XGBClassifier(
        use_label_encoder=False,
        eval_metric='logloss',
        **grid_search.best_params_
    )
    # Evaluation set
    X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=i)

    model.fit(
        X_train, y_train,
        eval_set=[(X_val, y_val)],
        verbose=False
    )

    # Predict probabilities and classes on selected features
    y_pred_proba = model.predict_proba(X_test)[:, 1]
    y_pred = model.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    precis = precision_score(y_test, y_pred)
    precis_array.append(precis)
    acc_array.append(acc)

In [None]:
def find_mean(array):
    if len(array) == 0:
        return 0  # To handle empty arrays
    return sum(array) / len(array)
find_mean(precis_array)

0.5716737603404856

In [None]:
find_mean(acc_array)

0.49777777777777776

# Weekly Plays

In [None]:
X_train, y_train, X_test, y_test = data_split(allSeasons, features, year, week, day)

xgb_model(X_train, y_train, X_test, grid_search.best_params_)

# Predicted Plays log
nextPlays = pd.merge(right=X_test, left=currSeason, right_index=True, left_index=True, how='left')
nextPlays = nextPlays[nextPlays.Prediction >= 0]
nextPlays = nextPlays[['game_id', 'season_x', 'week_x', 'home_team_x', 'away_team_x', 'gametime_x', 'weekday_x', 'total_line_x', 'under_odds_x', 'Under Probability']]
nextPlays.columns = ['Game ID', 'Season', 'Week', 'Home', 'Away', 'Start Time', 'Day', 'Total Line', 'Under Odds', 'Under Probability']
nextPlays

Unnamed: 0,Game ID,Season,Week,Home,Away,Start Time,Day,Total Line,Under Odds,Under Probability
6785,2024_06_JAX_CHI,2024,6,CHI,JAX,09:30,Sunday,44.5,-108.0,0.451535
6786,2024_06_WAS_BAL,2024,6,BAL,WAS,13:00,Sunday,51.5,-110.0,0.459519
6787,2024_06_ARI_GB,2024,6,GB,ARI,13:00,Sunday,47.0,-108.0,0.501186
6788,2024_06_HOU_NE,2024,6,NE,HOU,13:00,Sunday,37.5,-110.0,0.432707
6789,2024_06_TB_NO,2024,6,NO,TB,13:00,Sunday,42.5,-112.0,0.673949
6790,2024_06_CLE_PHI,2024,6,PHI,CLE,13:00,Sunday,42.5,-112.0,0.471201
6791,2024_06_IND_TEN,2024,6,TEN,IND,13:00,Sunday,43.0,-112.0,0.528771
6792,2024_06_LAC_DEN,2024,6,DEN,LAC,16:05,Sunday,35.5,-108.0,0.453427
6793,2024_06_PIT_LV,2024,6,LV,PIT,16:05,Sunday,36.5,-112.0,0.553562
6794,2024_06_ATL_CAR,2024,6,CAR,ATL,16:25,Sunday,46.0,-108.0,0.451669
