In [653]:
import torch
import torch.nn as nn
import pandas as pd
from sklearn.model_selection import train_test_split

In [654]:
# converts 'MM:SS' strings to float minutes
def convert_mp(mp):
    if isinstance(mp, str):
        if ':' in mp:
            try:
                mins, secs = mp.split(':')
                return int(mins) + int(secs) / 60
            except:
                return None
        else:
            try:
                return float(mp)
            except:
                return None
    elif isinstance(mp, (int, float)):
        return float(mp)
    else:
        return None


In [655]:
# import data
data = pd.read_csv('../data-collection/bbref_players_games_simple/g/gilgesh01_Shai_Gilgeous-Alexander_last3.csv')

# make sure games are ordered oldest -> newest
if 'Date' in data.columns:
    data = data.sort_values('Date')

# is_home: 1 if home, 0 if away
data['is_home'] = (data['Unnamed: 5'] != '@').astype(float)
# result_win: 1 if team won, 0 if lost
data['result_win'] = data['Result'].str.startswith('W').astype(float)

# features (previous game)
features = [
    'MP', 'FGA', '3PA', 'FTA',
    'FG%', '3P%', '2P%', 'eFG%', 'FT%',
    'TRB', 'AST', 'TOV',
    'GmSc', '+/-', 'PTS'
]

# convert MP to float minutes
data['MP'] = data['MP'].apply(convert_mp)

# convert numeric columns
for col in features + ['PTS']:
    data[col] = pd.to_numeric(data[col], errors='coerce')

# create TARGET from *current* game points (before shifting)
current_ppg = 32.4
data['above_ppg'] = (data['PTS'] > current_ppg).astype(float)
target = ['above_ppg']

# now shift features so they refer to the *previous* game
data[features] = data[features].shift(1)

# drop first row and rows with NaNs
data = data.dropna(subset=features + target)

# numpy arrays
X_np = data[features].values
y_np = data[target].values

# chronological split: last 20 games as test
n_test = 20
X_train_np, X_test_np = X_np[:-n_test], X_np[-n_test:]
y_train_np, y_test_np = y_np[:-n_test], y_np[-n_test:]

# to torch
X_train = torch.tensor(X_train_np, dtype=torch.float32)
y_train = torch.tensor(y_train_np, dtype=torch.float32)
X_test = torch.tensor(X_test_np, dtype=torch.float32)
y_test = torch.tensor(y_test_np, dtype=torch.float32)

In [656]:
# define model, loss function, and optimizer
model = nn.Sequential(
    nn.Linear(X_train.shape[1], 1),
    nn.Sigmoid()
)
criterion = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=1e-4)

# number of epochs
epochs = 1000

# training loop
for _ in range(epochs):
    optimizer.zero_grad()
    loss = criterion(model(X_train), y_train)
    loss.backward()
    optimizer.step()

print("Weights:", model[0].weight.data)
print("Bias:", model[0].bias.data)

Weights: tensor([[ 0.0111, -0.1323, -0.0885, -0.0961,  0.0895, -1.0935, -1.2409,  0.0352,
          1.5448, -0.0268,  0.0287,  0.1552, -0.0805,  0.0051,  0.1605]])
Bias: tensor([-0.4792])


In [None]:
with torch.no_grad():
    y_pred = model(X_test)  
    #print("Test Predictions:", y_pred)
    #print("Actual Values:", y_test)

# above average vs. below average calculation
predicted_above_below = []
actual_above_below = []
for pred in y_pred:
    if pred.item() > 0.5:
        predicted_above_below.append("Above Average")
    else:
        predicted_above_below.append("Below Average")

for actual in y_test:
    if actual.item() > 0.5:
        actual_above_below.append("Above Average")
    else:
        actual_above_below.append("Below Average")

correct = 0
incorrect = 0

# compare predicted vs actual
for i in range(len(y_test)):
    #print(f"Predicted: {predicted_above_below[i]}, Actual: {actual_above_below[i]}")
    if predicted_above_below[i] == actual_above_below[i]:
        correct += 1
    else:
        incorrect += 1

print(str(correct) + "-" + str(incorrect))
# accuracy is hovering right around 50%
# we will want to include the option to run against 
# the sports betting line as well and compare to odds and 
# implied probabilities and return on investment

11-9


In [None]:
# hardcoded to test today's game
X_test_np_today = [[36.6, 23, 7, 4, .522, .143, .688, .543, 1.000, 4, 5, 5, 16.5, 2, 29]]
X_test_today = torch.tensor(X_test_np_today, dtype=torch.float32)
y_today = model(X_test_today)

# above average vs. below average calculation
predicted_above_below = []
actual_above_below = []
for pred in y_today:
    if pred.item() > 0.5:
        predicted_above_below.append("Above Average")
    else:
        predicted_above_below.append("Below Average")

for actual in y_test:
    if actual.item() > 0.5:
        actual_above_below.append("Above Average")
    else:
        actual_above_below.append("Below Average")

predicted_above_below