In [2]:
import pandas as pd
df = pd.read_parquet('df_final.parquet')

In [5]:
cutoff_date = "2020-01-01"
begin_date = "2016-01-01"
train = df[(df["game_date"] < cutoff_date) & (df["game_date"] >= begin_date)]
test = df[df["game_date"] >= cutoff_date]

In [6]:
def make_training_rows(df):
    rows = []
    for _, row in df.iterrows():
        w, l = row["Winner"], row["Loser"]

        # features
        w_feats = {
            "elo_diff": row["winner_elo_pre"] - row["loser_elo_pre"],
            "elo_surf_diff": row["winner_elo_surf_pre"] - row["loser_elo_surf_pre"],
            "h2h_pre": row["h2h_pre"],
            "recent_form_diff": row["recent_form_diff"],
            "label": 1  # winner perspective
        }
        l_feats = {
            "elo_diff": row["loser_elo_pre"] - row["winner_elo_pre"],
            "elo_surf_diff": row["loser_elo_surf_pre"] - row["winner_elo_surf_pre"],
            "h2h_pre": -row["h2h_pre"],  # flip perspective
            "recent_form_diff": -row["recent_form_diff"],
            "label": 0  # loser perspective
        }

        rows.append(w_feats)
        rows.append(l_feats)

    return pd.DataFrame(rows)

In [7]:
train_data = make_training_rows(train)
test_data = make_training_rows(test)

X_train = train_data.drop(columns=["label"])
y_train = train_data["label"]

X_test = test_data.drop(columns=["label"])
y_test = test_data["label"]

print(y_train.value_counts())  # should now show both 0 and 1

label
1    5450
0    5450
Name: count, dtype: int64


In [8]:
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report

# Linear Models

In [9]:
from sklearn.linear_model import LogisticRegression

model = LogisticRegression(random_state=42)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

Accuracy: 0.6126730103806228
              precision    recall  f1-score   support

           0       0.61      0.61      0.61      4624
           1       0.61      0.61      0.61      4624

    accuracy                           0.61      9248
   macro avg       0.61      0.61      0.61      9248
weighted avg       0.61      0.61      0.61      9248



In [10]:
from sklearn.linear_model import SGDClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(),
                    SGDClassifier(max_iter=1000, tol=1e-3))
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

Accuracy: 0.6126730103806228
              precision    recall  f1-score   support

           0       0.62      0.58      0.60      4624
           1       0.61      0.65      0.63      4624

    accuracy                           0.61      9248
   macro avg       0.61      0.61      0.61      9248
weighted avg       0.61      0.61      0.61      9248



In [11]:
from sklearn.linear_model import RidgeClassifier

model = RidgeClassifier()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

Accuracy: 0.6126730103806228
              precision    recall  f1-score   support

           0       0.61      0.61      0.61      4624
           1       0.61      0.61      0.61      4624

    accuracy                           0.61      9248
   macro avg       0.61      0.61      0.61      9248
weighted avg       0.61      0.61      0.61      9248



In [12]:
from sklearn.linear_model import Perceptron

model = Perceptron(tol=1e-3, random_state=0)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

Accuracy: 0.38538062283737023
              precision    recall  f1-score   support

           0       0.39      0.39      0.39      4624
           1       0.39      0.39      0.39      4624

    accuracy                           0.39      9248
   macro avg       0.39      0.39      0.39      9248
weighted avg       0.39      0.39      0.39      9248



# Support Vector Machines

In [13]:
from sklearn.svm import LinearSVC

model = make_pipeline(StandardScaler(),
                    LinearSVC(random_state=0, tol=1e-5))
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

Accuracy: 0.6126730103806228
              precision    recall  f1-score   support

           0       0.61      0.61      0.61      4624
           1       0.61      0.61      0.61      4624

    accuracy                           0.61      9248
   macro avg       0.61      0.61      0.61      9248
weighted avg       0.61      0.61      0.61      9248



# Tree Based

In [14]:
from sklearn.tree import DecisionTreeClassifier

model = DecisionTreeClassifier(random_state=0)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

Accuracy: 0.5367647058823529
              precision    recall  f1-score   support

           0       0.54      0.53      0.53      4624
           1       0.54      0.54      0.54      4624

    accuracy                           0.54      9248
   macro avg       0.54      0.54      0.54      9248
weighted avg       0.54      0.54      0.54      9248



In [15]:
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import ExtraTreeClassifier

extra_tree = ExtraTreeClassifier(random_state=0)
model = BaggingClassifier(extra_tree, random_state=0).fit(
   X_train, y_train)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

Accuracy: 0.5618512110726643
              precision    recall  f1-score   support

           0       0.55      0.63      0.59      4624
           1       0.57      0.50      0.53      4624

    accuracy                           0.56      9248
   macro avg       0.56      0.56      0.56      9248
weighted avg       0.56      0.56      0.56      9248



In [16]:
from sklearn.ensemble import RandomForestClassifier

model = RandomForestClassifier(max_depth=2, random_state=0)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

Accuracy: 0.6151600346020761
              precision    recall  f1-score   support

           0       0.62      0.61      0.61      4624
           1       0.61      0.62      0.62      4624

    accuracy                           0.62      9248
   macro avg       0.62      0.62      0.62      9248
weighted avg       0.62      0.62      0.62      9248



In [17]:
from sklearn.ensemble import GradientBoostingClassifier

model = GradientBoostingClassifier(n_estimators=100, learning_rate=1.0,
    max_depth=1, random_state=0).fit(X_train, y_train)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

Accuracy: 0.615916955017301
              precision    recall  f1-score   support

           0       0.62      0.59      0.61      4624
           1       0.61      0.64      0.62      4624

    accuracy                           0.62      9248
   macro avg       0.62      0.62      0.62      9248
weighted avg       0.62      0.62      0.62      9248



In [18]:
from sklearn.ensemble import AdaBoostClassifier

model = AdaBoostClassifier(n_estimators=100, random_state=0)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

Accuracy: 0.6164576124567474
              precision    recall  f1-score   support

           0       0.61      0.64      0.62      4624
           1       0.62      0.59      0.61      4624

    accuracy                           0.62      9248
   macro avg       0.62      0.62      0.62      9248
weighted avg       0.62      0.62      0.62      9248



# Nearest Neighbors

In [19]:
from sklearn.neighbors import KNeighborsClassifier

model = KNeighborsClassifier(n_neighbors=5)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

Accuracy: 0.5741782006920415
              precision    recall  f1-score   support

           0       0.57      0.57      0.57      4624
           1       0.57      0.57      0.57      4624

    accuracy                           0.57      9248
   macro avg       0.57      0.57      0.57      9248
weighted avg       0.57      0.57      0.57      9248



# Naive Bayes

In [20]:
from sklearn.naive_bayes import GaussianNB

model = GaussianNB()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

Accuracy: 0.6068339100346021
              precision    recall  f1-score   support

           0       0.61      0.61      0.61      4624
           1       0.61      0.61      0.61      4624

    accuracy                           0.61      9248
   macro avg       0.61      0.61      0.61      9248
weighted avg       0.61      0.61      0.61      9248



In [21]:
from sklearn.naive_bayes import BernoulliNB
model = BernoulliNB()

model.fit(X_train, y_train)

y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

Accuracy: 0.6133217993079585
              precision    recall  f1-score   support

           0       0.61      0.61      0.61      4624
           1       0.61      0.61      0.61      4624

    accuracy                           0.61      9248
   macro avg       0.61      0.61      0.61      9248
weighted avg       0.61      0.61      0.61      9248



# LightGBM

In [23]:
import lightgbm as lgb
model = lgb.LGBMClassifier()

model.fit(X_train, y_train)

y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

[LightGBM] [Info] Number of positive: 5450, number of negative: 5450
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000559 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 707
[LightGBM] [Info] Number of data points in the train set: 10900, number of used features: 4
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000
Accuracy: 0.6078070934256056
              precision    recall  f1-score   support

           0       0.61      0.61      0.61      4624
           1       0.61      0.60      0.61      4624

    accuracy                           0.61      9248
   macro avg       0.61      0.61      0.61      9248
weighted avg       0.61      0.61      0.61      9248



# Discriminant Analysis

In [24]:
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
model = LinearDiscriminantAnalysis()

model.fit(X_train, y_train)

y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

Accuracy: 0.6126730103806228
              precision    recall  f1-score   support

           0       0.61      0.61      0.61      4624
           1       0.61      0.61      0.61      4624

    accuracy                           0.61      9248
   macro avg       0.61      0.61      0.61      9248
weighted avg       0.61      0.61      0.61      9248



In [25]:
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
model = QuadraticDiscriminantAnalysis()

model.fit(X_train, y_train)

y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

Accuracy: 0.613538062283737
              precision    recall  f1-score   support

           0       0.61      0.61      0.61      4624
           1       0.61      0.61      0.61      4624

    accuracy                           0.61      9248
   macro avg       0.61      0.61      0.61      9248
weighted avg       0.61      0.61      0.61      9248





# XGBoost

In [26]:
from xgboost import XGBClassifier

model = XGBClassifier(
    n_estimators=200, max_depth=6, learning_rate=0.05, eval_metric="logloss"
)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

Accuracy: 0.6115916955017301
              precision    recall  f1-score   support

           0       0.61      0.61      0.61      4624
           1       0.61      0.62      0.61      4624

    accuracy                           0.61      9248
   macro avg       0.61      0.61      0.61      9248
weighted avg       0.61      0.61      0.61      9248



# Neural Network

In [33]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import numpy as np

# Assume your data is already loaded and split as in your prompt
# X_train, y_train, X_test, y_test

# Convert pandas DataFrames to numpy arrays
X_train_np = X_train.values.astype(np.float32)
y_train_np = y_train.values.astype(np.int64) # Use int64 for CrossEntropyLoss

X_test_np = X_test.values.astype(np.float32)
y_test_np = y_test.values.astype(np.int64)

# Create a custom PyTorch Dataset
class TabularDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X, dtype=torch.float32)
        self.y = torch.tensor(y, dtype=torch.long)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

# Create Dataset and DataLoader instances
train_dataset = TabularDataset(X_train_np, y_train_np)
test_dataset = TabularDataset(X_test_np, y_test_np)

batch_size = 64
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# Define number of input features and classes
input_dim = X_train.shape[1]
output_dim = len(y_train.value_counts())

In [35]:
class MLP(nn.Module):
    def __init__(self, input_dim, hidden_dim1, hidden_dim2, output_dim):
        super(MLP, self).__init__()
        self.layers = nn.Sequential(
            nn.Linear(input_dim, hidden_dim1),
            nn.ReLU(),
            nn.Linear(hidden_dim1, hidden_dim2),
            nn.ReLU(),
            nn.Linear(hidden_dim2, hidden_dim2//2),
            nn.ReLU(),
            nn.Linear(hidden_dim2//2, output_dim)
        )

    def forward(self, x):
        return self.layers(x)

# Model, Loss, and Optimizer
model_mlp = MLP(input_dim=input_dim, hidden_dim1=256, hidden_dim2=64, output_dim=output_dim)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model_mlp.parameters(), lr=0.001)

# Training loop
epochs = 20
for epoch in range(epochs):
    model_mlp.train()
    for X_batch, y_batch in train_loader:
        optimizer.zero_grad()
        outputs = model_mlp(X_batch)
        loss = criterion(outputs, y_batch)
        loss.backward()
        optimizer.step()

    print(f"Epoch {epoch+1}/{epochs}, Loss: {loss.item():.4f}")

# Evaluation
model_mlp.eval()
correct = 0
total = 0
with torch.no_grad():
    for X_batch, y_batch in test_loader:
        outputs = model_mlp(X_batch)
        _, predicted = torch.max(outputs.data, 1)
        total += y_batch.size(0)
        correct += (predicted == y_batch).sum().item()

print(f"Accuracy of MLP on test data: {100 * correct / total:.2f}%")

Epoch 1/20, Loss: 0.7642
Epoch 2/20, Loss: 0.8948
Epoch 3/20, Loss: 0.8299
Epoch 4/20, Loss: 0.6999
Epoch 5/20, Loss: 0.6923
Epoch 6/20, Loss: 0.6099
Epoch 7/20, Loss: 0.5960
Epoch 8/20, Loss: 0.6636
Epoch 9/20, Loss: 0.6682
Epoch 10/20, Loss: 0.6256
Epoch 11/20, Loss: 0.6260
Epoch 12/20, Loss: 0.7220
Epoch 13/20, Loss: 0.5828
Epoch 14/20, Loss: 0.6118
Epoch 15/20, Loss: 0.4654
Epoch 16/20, Loss: 0.5154
Epoch 17/20, Loss: 0.6035
Epoch 18/20, Loss: 0.6113
Epoch 19/20, Loss: 0.6205
Epoch 20/20, Loss: 0.6997
Accuracy of MLP on test data: 61.40%


# Stacking Classifer

In [38]:
from sklearn.ensemble import StackingClassifier

stack = StackingClassifier(
    estimators=[
        ('logreg', LogisticRegression()),
        ('svc', LinearSVC()),
        ('adb', AdaBoostClassifier(n_estimators=100, random_state=0)),
        ('rf', RandomForestClassifier(max_depth=2, random_state=0))
    ],
    final_estimator=LogisticRegression()  # meta-learner
)
stack.fit(X_train, y_train)
print("Stacking accuracy:", stack.score(X_test, y_test))

Stacking accuracy: 0.614294982698962


In [40]:
# Get predictions from sklearn stack
sk_pred = stack.predict_proba(X_test)

# Get predictions from PyTorch MLP
mlp_pred = []
with torch.no_grad():
    for X_batch, _ in test_loader:
        probs = torch.softmax(model_mlp(X_batch), dim=1).cpu().numpy()
        mlp_pred.append(probs)
mlp_pred = np.vstack(mlp_pred)

# Combine (average probs)
final_pred = (sk_pred + mlp_pred) / 2
y_final = np.argmax(final_pred, axis=1)

accuracy = (y_final == y_test).mean()
print(f"Ensemble Accuracy (sklearn + MLP): {accuracy:.4f}")

Ensemble Accuracy (sklearn + MLP): 0.6126


In [43]:
from skorch import NeuralNetClassifier

# Wrap your MLP into sklearn-compatible estimator
net = NeuralNetClassifier(
    MLP,
    module__input_dim=input_dim,
    module__hidden_dim1=256,
    module__hidden_dim2=64,
    module__output_dim=output_dim,
    max_epochs=20,
    lr=0.001,
    optimizer=torch.optim.Adam,
    criterion=nn.CrossEntropyLoss,
    batch_size=64,
    device='cuda' if torch.cuda.is_available() else 'cpu'
)

# Add it to stacking ensemble
stack = StackingClassifier(
    estimators=[
        ('logreg', LogisticRegression(max_iter=1000)),
        ('svc', LinearSVC()),
        ('adb', AdaBoostClassifier(n_estimators=100, random_state=0)),
        ('rf', RandomForestClassifier(max_depth=2, random_state=0)),
        ('mlp', net)  # torch model wrapped
    ],
    final_estimator=LogisticRegression(max_iter=1000)
)

stack.fit(X_train_np, y_train_np)
print("Stacked Ensemble Accuracy:", stack.score(X_test_np, y_test_np))


  epoch    train_loss    valid_acc    valid_loss     dur
-------  ------------  -----------  ------------  ------
      1        [36m0.7092[0m       [32m0.6514[0m        [35m0.6301[0m  0.6385
      2        [36m0.6627[0m       [32m0.6537[0m        [35m0.6216[0m  0.6198
      3        [36m0.6512[0m       0.6505        0.6234  0.7988
      4        [36m0.6493[0m       [32m0.6560[0m        [35m0.6196[0m  0.7085
      5        [36m0.6479[0m       [32m0.6619[0m        [35m0.6192[0m  0.4398
      6        [36m0.6461[0m       0.6601        [35m0.6179[0m  0.5049
      7        [36m0.6453[0m       [32m0.6638[0m        [35m0.6178[0m  0.5041
      8        0.6453       [32m0.6670[0m        [35m0.6174[0m  0.5191
      9        [36m0.6451[0m       0.6670        0.6178  0.4892
     10        [36m0.6450[0m       0.6670        0.6181  0.4205
     11        [36m0.6449[0m       0.6610        0.6184  0.4523
     12        0.6460       [32m0.6688[0m        0