In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.pipeline import Pipeline

import warnings
warnings.filterwarnings("ignore")


data = [
    # Week 2
    {"Week": 2, "NBA": "Rockets vs Spurs",       "Outcome": "Rockets",      "ITM?": "Y", "Date": "10/28", "Day_Before_Odd": 58,   "Game_Time_Odds": 50},
    {"Week": 2, "NBA": "Grizzlies vs Bulls",     "Outcome": "Bulls",        "ITM?": "N", "Date": "10/28", "Day_Before_Odd": 71,   "Game_Time_Odds": 50},
    {"Week": 2, "NBA": "Mavs vs Timberwolves",   "Outcome": "Mavs",         "ITM?": "N", "Date": "10/29", "Day_Before_Odd": 37.5, "Game_Time_Odds": 50},
    {"Week": 2, "NBA": "Kings vs Jazz",          "Outcome": "Kings",        "ITM?": "Y", "Date": "10/29", "Day_Before_Odd": 68,   "Game_Time_Odds": 50},
    {"Week": 2, "NBA": "Cavs vs Lakers",         "Outcome": "Cavs",         "ITM?": "Y", "Date": "10/30", "Day_Before_Odd": 59.5, "Game_Time_Odds": 50},
    {"Week": 2, "NBA": "Celtics vs Pacers",      "Outcome": "Pacers",       "ITM?": "N", "Date": "10/30", "Day_Before_Odd": 70.5, "Game_Time_Odds": 50},
    {"Week": 2, "NBA": "Heat vs Knicks",         "Outcome": "Heat",         "ITM?": "N", "Date": "10/30", "Day_Before_Odd": 55,   "Game_Time_Odds": 50},
    {"Week": 2, "NBA": "Clippers vs Trailblazers","Outcome": "Trailblazers","ITM?": "N", "Date": "10/30", "Day_Before_Odd": 74,   "Game_Time_Odds": 50},
    {"Week": 2, "NBA": "Bucks vs Grizzlies",     "Outcome": "Grizzlies",    "ITM?": "N", "Date": "10/31", "Day_Before_Odd": 64,   "Game_Time_Odds": 50},
    {"Week": 2, "NBA": "Spurs vs Jazz",          "Outcome": "Spurs",        "ITM?": "N", "Date": "10/31", "Day_Before_Odd": 47,   "Game_Time_Odds": 50},
    {"Week": 2, "NBA": "Suns vs Clippers",       "Outcome": "Suns",         "ITM?": "Y", "Date": "10/31", "Day_Before_Odd": 64,   "Game_Time_Odds": 50},
    {"Week": 2, "NBA": "Celtics vs Hornets",     "Outcome": "Celtics",      "ITM?": "Y", "Date": "11/1",  "Day_Before_Odd": 81,   "Game_Time_Odds": 50},
    {"Week": 2, "NBA": "Lakers vs Raptors",      "Outcome": "Lakers",       "ITM?": "Y", "Date": "11/1",  "Day_Before_Odd": 77,   "Game_Time_Odds": 50},
    {"Week": 2, "NBA": "Grizzlies vs 76ers",     "Outcome": "Grizzlies",    "ITM?": "Y", "Date": "11/2",  "Day_Before_Odd": 56.5, "Game_Time_Odds": 50.5},

    # Week 3
    {"Week": 3, "NBA": "Cavs vs Bucks",          "Outcome": "Cavs",         "ITM?": "Y", "Date": "11/4",  "Day_Before_Odd": 73,   "Game_Time_Odds": 50},
    {"Week": 3, "NBA": "Grizzlies vs Nets",      "Outcome": "Nets",         "ITM?": "N", "Date": "11/4",  "Day_Before_Odd": 65.5, "Game_Time_Odds": 64},
    {"Week": 3, "NBA": "Heat vs Kings",          "Outcome": "Kings",        "ITM?": "N", "Date": "11/4",  "Day_Before_Odd": 54,   "Game_Time_Odds": 73},
    {"Week": 3, "NBA": "Mavs vs Pacers",         "Outcome": "Pacers",       "ITM?": "N", "Date": "11/4",  "Day_Before_Odd": 65,   "Game_Time_Odds": 67.5},
    {"Week": 3, "NBA": "Clppers vs 76ers",       "Outcome": "Clippers",     "ITM?": "Y", "Date": "11/6",  "Day_Before_Odd": 53.5, "Game_Time_Odds": 55},
    {"Week": 3, "NBA": "Kings vs Raptors",       "Outcome": "Kings",        "ITM?": "Y", "Date": "11/6",  "Day_Before_Odd": 83.5, "Game_Time_Odds": 50},
    {"Week": 3, "NBA": "Bucks vs Jazz",          "Outcome": "Bucks",        "ITM?": "Y", "Date": "11/7",  "Day_Before_Odd": 76.5, "Game_Time_Odds": 50},
    {"Week": 3, "NBA": "Spurs vs Trail Blazers", "Outcome": "Spurs",        "ITM?": "Y", "Date": "11/7",  "Day_Before_Odd": 64,   "Game_Time_Odds": 50},
    {"Week": 3, "NBA": "Wolves vs Bulls",        "Outcome": "Wolves",       "ITM?": "Y", "Date": "11/7",  "Day_Before_Odd": 71.5, "Game_Time_Odds": 50},
    {"Week": 3, "NBA": "Magic vs Pelicans",      "Outcome": "Magic",        "ITM?": "Y", "Date": "11/8",  "Day_Before_Odd": 59,   "Game_Time_Odds": 50},
    {"Week": 3, "NBA": "Pacers vs Hornets",      "Outcome": "Hornets",      "ITM?": "N", "Date": "11/8",  "Day_Before_Odd": 70,   "Game_Time_Odds": 50},
    {"Week": 3, "NBA": "Celtics vs Nets",        "Outcome": "Celtics",      "ITM?": "Y", "Date": "11/8",  "Day_Before_Odd": 86,   "Game_Time_Odds": 86},
    {"Week": 3, "NBA": "Knicks vs Pacers",       "Outcome": "Knicks",       "ITM?": "Y", "Date": "11/10", "Day_Before_Odd": 56.5, "Game_Time_Odds": 58},
    {"Week": 3, "NBA": "Nuggets vs Mavs",        "Outcome": "Nuggets",      "ITM?": "Y", "Date": "11/10", "Day_Before_Odd": 65,   "Game_Time_Odds": 55},
    {"Week": 3, "NBA": "Kings vs Suns",          "Outcome": "Kings",        "ITM?": "N", "Date": "11/10", "Day_Before_Odd": 43,   "Game_Time_Odds": 30},
    {"Week": 3, "NBA": "Grizzlies vs Trail Blazers","Outcome": "Grizzlies","ITM?": "Y", "Date": "11/10", "Day_Before_Odd": 55,   "Game_Time_Odds": 51},

    # Week 4
    {"Week": 4, "NBA": "Kings vs Spurs",         "Outcome": "Spurs",        "ITM?": "N", "Date": "11/11", "Day_Before_Odd": 73.5, "Game_Time_Odds": 50},
    {"Week": 4, "NBA": "Cavs vs Bulls",          "Outcome": "Cavs",         "ITM?": "Y", "Date": "11/11", "Day_Before_Odd": 75,   "Game_Time_Odds": 50},
    {"Week": 4, "NBA": "Celtics vs Hawks",       "Outcome": "Hawks",        "ITM?": "N", "Date": "11/12", "Day_Before_Odd": 90.5, "Game_Time_Odds": 90.5},
    {"Week": 4, "NBA": "Celtics vs Nets",        "Outcome": "Celtics",      "ITM?": "Y", "Date": "11/13", "Day_Before_Odd": 80.5, "Game_Time_Odds": 50},
    {"Week": 4, "NBA": "Magic vs 76ers",         "Outcome": "Magic",        "ITM?": "Y", "Date": "11/15", "Day_Before_Odd": 51.5, "Game_Time_Odds": 50},
    {"Week": 4, "NBA": "Heat vs Pacers",         "Outcome": "Pacers",       "ITM?": "N", "Date": "11/15", "Day_Before_Odd": 39.5, "Game_Time_Odds": 50},
    {"Week": 4, "NBA": "Raptors vs Pistons",     "Outcome": "Pistons",      "ITM?": "N", "Date": "11/15", "Day_Before_Odd": 47,   "Game_Time_Odds": 50},
    {"Week": 4, "NBA": "Lakers vs Spurs",        "Outcome": "Lakers",       "ITM?": "Y", "Date": "11/15", "Day_Before_Odd": 56.5, "Game_Time_Odds": 50},
    {"Week": 4, "NBA": "Knicks vs Nets",         "Outcome": "Knicks",       "ITM?": "Y", "Date": "11/15", "Day_Before_Odd": 80,   "Game_Time_Odds": 41},
    {"Week": 4, "NBA": "Cavs vs Bulls",          "Outcome": "Cavs",         "ITM?": "Y", "Date": "11/15", "Day_Before_Odd": 81.5, "Game_Time_Odds": 50},
    {"Week": 4, "NBA": "Rockets vs Clippers",    "Outcome": "Rockets",      "ITM?": "Y", "Date": "11/15", "Day_Before_Odd": 64,   "Game_Time_Odds": 50},
    {"Week": 4, "NBA": "Thunder vs Suns",        "Outcome": "Thunder",      "ITM?": "Y", "Date": "11/15", "Day_Before_Odd": 73,   "Game_Time_Odds": 72},

    # Week 5
    {"Week": 5, "NBA": "Bucks vs Rockets",       "Outcome": "Bucks",        "ITM?": "Y", "Date": "11/18", "Day_Before_Odd": 56.5, "Game_Time_Odds": 59.5},
    {"Week": 5, "NBA": "Nets vs Hornets",        "Outcome": "Nets",         "ITM?": "Y", "Date": "11/19", "Day_Before_Odd": 58,   "Game_Time_Odds": 51.5},
    {"Week": 5, "NBA": "Cavs vs Pelicans",       "Outcome": "Cavs",         "ITM?": "Y", "Date": "11/20", "Day_Before_Odd": 90,   "Game_Time_Odds": 50},
    {"Week": 5, "NBA": "Bucks vs Bulls",         "Outcome": "Bucks",        "ITM?": "Y", "Date": "11/20", "Day_Before_Odd": 73.5, "Game_Time_Odds": 50},
    {"Week": 5, "NBA": "Bulls vs Hawks",         "Outcome": "Bulls",        "ITM?": "Y", "Date": "11/22", "Day_Before_Odd": 52,   "Game_Time_Odds": 53.5},
    {"Week": 5, "NBA": "Bucks vs Pacers",        "Outcome": "Bucks",        "ITM?": "Y", "Date": "11/22", "Day_Before_Odd": 64,   "Game_Time_Odds": 50},
    {"Week": 5, "NBA": "Bucks vs Hornets",       "Outcome": "Bucks",        "ITM?": "Y", "Date": "11/23", "Day_Before_Odd": 73.5, "Game_Time_Odds": 50},
    {"Week": 5, "NBA": "Rockets vs Trailblazers","Outcome": "Trailblazers","ITM?": "N", "Date": "11/23", "Day_Before_Odd": 88,   "Game_Time_Odds": 50},
    {"Week": 5, "NBA": "Celtics vs Wolves",      "Outcome": "Celtics",      "ITM?": "Y", "Date": "11/24", "Day_Before_Odd": 76.5, "Game_Time_Odds": 50},

    # Week 6
    {"Week": 6, "NBA": "Bulls vs Wizards",       "Outcome": "Bulls",        "ITM?": "Y", "Date": "11/26", "Day_Before_Odd": 62.5, "Game_Time_Odds": 50},
    {"Week": 6, "NBA": "Heat vs Bucks",          "Outcome": "Bucks",        "ITM?": "N", "Date": "11/26", "Day_Before_Odd": 55.5, "Game_Time_Odds": 68},
    {"Week": 6, "NBA": "Cavs vs Hawks",          "Outcome": "Hawks",        "ITM?": "N", "Date": "11/27", "Day_Before_Odd": 84.5, "Game_Time_Odds": 50},
    {"Week": 6, "NBA": "Pacers vs Blazers",      "Outcome": "Pacers",       "ITM?": "Y", "Date": "11/27", "Day_Before_Odd": 82.5, "Game_Time_Odds": 50},
    {"Week": 6, "NBA": "Magic vs Bulls",         "Outcome": "Magic",        "ITM?": "Y", "Date": "11/27", "Day_Before_Odd": 81,   "Game_Time_Odds": 50},
    {"Week": 6, "NBA": "Rockets vs 76ers",       "Outcome": "Rockets",      "ITM?": "Y", "Date": "11/27", "Day_Before_Odd": 67,   "Game_Time_Odds": 50},
    {"Week": 6, "NBA": "Pelicans vs Raptors",    "Outcome": "Raptors",      "ITM?": "N", "Date": "11/27", "Day_Before_Odd": None, "Game_Time_Odds": None},
    {"Week": 6, "NBA": "Hawks vs Hornets",       "Outcome": "Hawks",        "ITM?": "Y", "Date": "11/30", "Day_Before_Odd": 59.5, "Game_Time_Odds": 70.5},
    {"Week": 6, "NBA": "Suns vs Warriors",       "Outcome": "Warriors",     "ITM?": "N", "Date": "11/30", "Day_Before_Odd": None, "Game_Time_Odds": None},
    {"Week": 6, "NBA": "Grizzlies vs Pacers",    "Outcome": "Grizzlies",    "ITM?": "Y", "Date": "12/1",  "Day_Before_Odd": 73.5, "Game_Time_Odds": 50},
    {"Week": 6, "NBA": "Magic vs Nets",          "Outcome": "Magic",        "ITM?": "Y", "Date": "12/1",  "Day_Before_Odd": 77,   "Game_Time_Odds": 50},
    {"Week": 6, "NBA": "Cavs vs Celtics",        "Outcome": "Cavs",         "ITM?": "N", "Date": "12/1",  "Day_Before_Odd": 48,   "Game_Time_Odds": 50.5},
    {"Week": 6, "NBA": "Thunder vs Rockets",     "Outcome": "Rockets",      "ITM?": "N", "Date": "12/1",  "Day_Before_Odd": 52,   "Game_Time_Odds": 54.5},
]

df = pd.DataFrame(data)
df


Unnamed: 0,Week,NBA,Outcome,ITM?,Date,Day_Before_Odd,Game_Time_Odds
0,2,Rockets vs Spurs,Rockets,Y,10/28,58.0,50.0
1,2,Grizzlies vs Bulls,Bulls,N,10/28,71.0,50.0
2,2,Mavs vs Timberwolves,Mavs,N,10/29,37.5,50.0
3,2,Kings vs Jazz,Kings,Y,10/29,68.0,50.0
4,2,Cavs vs Lakers,Cavs,Y,10/30,59.5,50.0
...,...,...,...,...,...,...,...
59,6,Suns vs Warriors,Warriors,N,11/30,,
60,6,Grizzlies vs Pacers,Grizzlies,Y,12/1,73.5,50.0
61,6,Magic vs Nets,Magic,Y,12/1,77.0,50.0
62,6,Cavs vs Celtics,Cavs,N,12/1,48.0,50.5


In [2]:
# 1. Calculate Odds Difference and Percentage Change
df["Odds_Diff"] = df["Game_Time_Odds"] - df["Day_Before_Odd"]
df["Pct_Change"] = (df["Odds_Diff"] / df["Day_Before_Odd"]) * 100

#summary
summary = df.groupby("ITM?").agg({
    "Odds_Diff":   ["mean", "median", "min", "max"],
    "Pct_Change":  ["mean", "median", "min", "max"]
})
print("Summary of Odds Difference and Pct Change by ITM:\n", summary)

#Classification
df["ITM_binary"] = df["ITM?"].map({"Y": 1, "N": 0})

Summary of Odds Difference and Pct Change by ITM:
       Odds_Diff                    Pct_Change                                 
           mean median   min   max       mean     median        min        max
ITM?                                                                          
N     -7.000000   -1.5 -38.0  19.0  -7.375211  -2.290076 -43.181818  35.185185
Y    -16.170732  -14.0 -40.0  11.0 -21.671146 -21.875000 -48.750000  18.487395


In [3]:
#Logistic Regression & Decision Boundary for Odds_Diff
X_lr = df[["Odds_Diff"]].values
y_lr = df["ITM_binary"].values

# Remove NaN in X_lr
nan_mask = ~np.isnan(X_lr).ravel()
X_lr = X_lr[nan_mask]
y_lr = y_lr[nan_mask]

lr = LogisticRegression()
lr.fit(X_lr, y_lr)

beta0 = lr.intercept_[0]
beta1 = lr.coef_[0][0]

lr_threshold = -beta0 / beta1

# Predict again using this boundary
y_pred_lr = (X_lr[:, 0] >= lr_threshold).astype(int)
acc_lr = accuracy_score(y_lr, y_pred_lr)

print(f"Logistic Regression boundary = {lr_threshold:.4f}, Accuracy = {acc_lr:.4f}")

Logistic Regression boundary = 2.8811, Accuracy = 0.2742


In [4]:
# Drop rows with NaN in "Odds_Diff", "Pct_Change", and "ITM_binary"
df = df.dropna(subset=["Odds_Diff", "Pct_Change", "ITM_binary"])

X = df[["Odds_Diff", "Pct_Change"]].values
y = df["ITM_binary"].values

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# 4. Build a Pipeline for polynomial expansion, scaling, and logistic regression
#    Then use GridSearchCV to tune hyperparameters:
# ------------------------------------------------------------------------------
pipeline = Pipeline([
    ("poly", PolynomialFeatures(include_bias=False)),  # we can vary 'degree'
    ("scaler", StandardScaler()),
    ("lr", LogisticRegression())
])

param_grid = {
    "poly__degree": [1, 2, 3],             # test linear (1) and quadratic (2)
    "lr__C": [0.001, 0.01, 0.1, 1, 5],  # regularization strength
    "lr__penalty": ["l1"],           # Lasso regularization
    "lr__solver": ["liblinear"]    # L1 regularization requires this solver
}

grid_search = GridSearchCV(
    estimator=pipeline,
    param_grid=param_grid,
    scoring="accuracy",
    cv=5,             # 5-fold cross-validation
)
grid_search.fit(X_train, y_train)

best_model = grid_search.best_estimator_

print("Best Parameters:", grid_search.best_params_)
print("Best Cross-Validation Accuracy: {:.4f}".format(grid_search.best_score_))

#Evaluate the best model on the test set
y_pred_test = best_model.predict(X_test)
test_acc = accuracy_score(y_test, y_pred_test)
print(f"Test Accuracy (default threshold=0.5): {test_acc:.4f}")


Best Parameters: {'lr__C': 5, 'lr__penalty': 'l1', 'lr__solver': 'liblinear', 'poly__degree': 2}
Best Cross-Validation Accuracy: 0.7111
Test Accuracy (default threshold=0.5): 0.7692


In [5]:
X_full = df[["Odds_Diff", "Pct_Change"]].values
y_full = df["ITM_binary"].values
day_before_full = df["Day_Before_Odd"].values

# Predict probabilities on the entire dataset
y_proba_full = best_model.predict_proba(X_full)[:, 1]

day_before_values = df["Day_Before_Odd"].dropna().unique()
best_threshold = None
best_model_rate = 0.0

for t in day_before_values:
    # Indices of rows with Day_Before_Odd >= t
    mask = (day_before_full >= t) & (~np.isnan(day_before_full))
    if np.sum(mask) == 0:
        continue
    
    # Subset the predicted probabilities for these rows
    proba_subset = y_proba_full[mask]
    # If you want the fraction that the MODEL would label as "Y" (prob >= 0.5):
    model_pred_binary = (proba_subset >= 0.5).astype(int)
    rate_model = model_pred_binary.mean()  # fraction predicted as Y

    if rate_model > best_model_rate:
        best_model_rate = rate_model
        best_threshold = t

print(f"Best 1-day-before-odd threshold (by model's predicted Y-rate) = {best_threshold:.2f}")
print(f"Model Y-rate in that subset = {best_model_rate:.3f}")


Best 1-day-before-odd threshold (by model's predicted Y-rate) = 71.00
Model Y-rate in that subset = 1.000


In [6]:
#Alternate Approach without model
unique_odds = df["Day_Before_Odd"].unique()

best_threshold = None
best_rate = 0.0

for t in unique_odds:
    # Subset of games with day before odds >= t
    subset = df[df["Day_Before_Odd"] >= t]
    
    if len(subset) > 0:
        # fraction of "Y" among these games
        rate_y = subset["ITM_binary"].mean() 
        if rate_y > best_rate:
            best_rate = rate_y
            best_threshold = t

print(f"Best 1-day-before-odd threshold = {best_threshold:.2f}")
print(f"Conditional probability of being ITM above that threshold = {best_rate:.3f}")

Best 1-day-before-odd threshold = 75.00
Conditional probability of being ITM above that threshold = 0.824
