In [6]:
import torch
import torch.nn as nn

#Using Softmax for multiclass classification

#Two Hidden layer MLP; ReLU for nonlinearity; Softmax at output layer
class SeasonSoftmaxNN(nn.Module):
    def __init__(self, input_dim):
        super().__init__()
        self.mlp = nn.Sequential(
            nn.Linear(input_dim, 32),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(32, 16),
            nn.ReLU(),
            nn.Linear(16, 1)
        )

    def forward(self, x):
        logits = self.mlp(x)         
        logits = logits.squeeze(-1)  
        return logits


In [7]:
import pandas as pd
import numpy as np
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler

df = pd.read_csv("team_season_features_v2_clean-2.csv")

# same as your logistic setup
num_cols = df.select_dtypes(include="number").columns.tolist()
for col in ["champion", "season"]:
    if col in num_cols:
        num_cols.remove(col)

X_full = df[num_cols].replace([np.inf, -np.inf], np.nan).values

imputer = SimpleImputer(strategy="median")
X_imp = imputer.fit_transform(X_full)

scaler = StandardScaler(with_mean=True, with_std=True)
X_scaled = scaler.fit_transform(X_imp)

# write scaled features back into df
df_proc = df.copy()
for i, col in enumerate(num_cols):
    df_proc[col] = X_scaled[:, i]


In [8]:
def build_season_dict(df_proc, feature_cols):
    season_data = {}
    for season, g in df_proc.groupby("season"):
        X_season = g[feature_cols].values.astype(np.float32)  # (n_teams, F)
        champ_idx_arr = np.where(g["champion"].values == 1)[0]
        if len(champ_idx_arr) != 1:
            continue
        champ_idx = int(champ_idx_arr[0])
        teams = g["team"].tolist()
        season_data[season] = (X_season, champ_idx, teams)
    return season_data

season_data = build_season_dict(df_proc, num_cols)

train_seasons = {s: v for s, v in season_data.items() if s <= 2015}
test_seasons  = {s: v for s, v in season_data.items() if s > 2015}


In [9]:
import torch
import torch.nn.functional as F
import torch.optim as optim

input_dim = len(num_cols)
model = SeasonSoftmaxNN(input_dim)
optimizer = optim.Adam(model.parameters(), lr=1e-3, weight_decay=1e-4)

num_epochs = 500

for epoch in range(num_epochs):
    model.train()
    total_loss = 0.0
    for season, (X_season, champ_idx, _) in train_seasons.items():
        X_tensor = torch.from_numpy(X_season).unsqueeze(0)
        y_tensor = torch.tensor([champ_idx], dtype=torch.long) 

        logits = model(X_tensor)              
        loss = F.cross_entropy(logits, y_tensor) 

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    if (epoch + 1) % 50 == 0:
        avg_loss = total_loss / len(train_seasons)
        print(f"Epoch {epoch+1}/{num_epochs} - train loss: {avg_loss:.4f}")


Epoch 50/500 - train loss: 0.1924
Epoch 100/500 - train loss: 0.0714
Epoch 150/500 - train loss: 0.0650
Epoch 200/500 - train loss: 0.0153
Epoch 250/500 - train loss: 0.0090
Epoch 300/500 - train loss: 0.0166
Epoch 350/500 - train loss: 0.0021
Epoch 400/500 - train loss: 0.0003
Epoch 450/500 - train loss: 0.0041
Epoch 500/500 - train loss: 0.0040


In [10]:
model.eval()
correct = 0
total = 0
results = []

with torch.no_grad():
    for season, (X_season, champ_idx, teams) in test_seasons.items():
        X_tensor = torch.from_numpy(X_season).unsqueeze(0)
        logits = model(X_tensor)                 # (1, n_teams)
        probs = F.softmax(logits, dim=1).squeeze(0).numpy()  # (n_teams,)

        pred_idx = probs.argmax()
        pred_team = teams[pred_idx]
        true_team = teams[champ_idx]
        is_correct = int(pred_idx == champ_idx)

        results.append((season, pred_team, true_team, probs[pred_idx], is_correct))
        correct += is_correct
        total += 1

top1_acc = correct / total
print("Softmax NN Top-1 accuracy:", top1_acc)

print("\nPredicted vs True Champions (Softmax NN):")
for season, pred_team, true_team, prob, ok in sorted(results):
    print(season, "-", pred_team, "(pred)", "|", true_team, "(true)",
          "| prob:", f"{prob:.3f}", "| correct:", ok)


Softmax NN Top-1 accuracy: 0.4

Predicted vs True Champions (Softmax NN):
2016 - Warriors (pred) | Cavaliers (true) | prob: 0.886 | correct: 0
2017 - Warriors (pred) | Warriors (true) | prob: 1.000 | correct: 1
2018 - Rockets (pred) | Warriors (true) | prob: 0.903 | correct: 0
2019 - Nuggets (pred) | Raptors (true) | prob: 0.766 | correct: 0
2020 - Lakers (pred) | Lakers (true) | prob: 1.000 | correct: 1
2021 - Nuggets (pred) | Bucks (true) | prob: 0.944 | correct: 0
2022 - Heat (pred) | Warriors (true) | prob: 0.975 | correct: 0
2023 - Nuggets (pred) | Nuggets (true) | prob: 0.995 | correct: 1
2024 - Knicks (pred) | Celtics (true) | prob: 0.667 | correct: 0
2025 - Thunder (pred) | Thunder (true) | prob: 1.000 | correct: 1
