NN Using Softmax and Relu

In [2]:
import pandas as pd
import numpy as np

from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torch.nn.utils import clip_grad_norm_
import random

seed = 42

random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)

torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False


# Using Softmax for multiclass classification

# Two Hidden layer MLP; ReLU for nonlinearity; Softmax at output layer
# Each team produces single logit score
class SeasonSoftmaxNN(nn.Module):
    def __init__(self, input_dim):
        super().__init__()

        self.fc1 = nn.Linear(input_dim, 64)
        self.fc2 = nn.Linear(64, 32)
        self.out = nn.Linear(32, 1)

        self.act = nn.ReLU()
        # Try to reduce overfitting, Regularization
        self.dropout = nn.Dropout(0.15)

        self._init_weights()

    # Scales for ReLu
    def _init_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Linear):
                nn.init.kaiming_uniform_(m.weight, a=0)
                if m.bias is not None:
                    nn.init.zeros_(m.bias)

    def forward(self, X, lengths):
        B, max_teams, F = X.shape

        x = X.view(B * max_teams, F)

        x = self.act(self.fc1(x))
        x = self.dropout(x)

        x = self.act(self.fc2(x))
        x = self.dropout(x)

        logits = self.out(x).view(B, max_teams)

        return logits

class SeasonRNN(nn.Module):
    def __init__(self, input_dim, hidden_dim=64, dropout_p=0.15):
        super().__init__()
        self.rnn = nn.LSTM(
            input_size=input_dim,
            hidden_size=hidden_dim,
            num_layers=1,
            batch_first=True,
            bidirectional=False
        )
        self.dropout = nn.Dropout(dropout_p)
        self.out = nn.Linear(hidden_dim, 1)

        self._init_weights()

    def _init_weights(self):
        nn.init.kaiming_uniform_(self.out.weight, a=0)
        nn.init.zeros_(self.out.bias)

    def forward(self, X, lengths):
        B, T, F = X.shape
        packed = nn.utils.rnn.pack_padded_sequence(
            X, lengths.cpu(), batch_first=True, enforce_sorted=False
        )
        packed_out, _ = self.rnn(packed)
        rnn_out, _ = nn.utils.rnn.pad_packed_sequence(
            packed_out, batch_first=True, total_length=T
        )

        rnn_out = self.dropout(rnn_out)
        logits = self.out(rnn_out).squeeze(-1)

        idx = torch.arange(T, device=lengths.device).unsqueeze(0)
        mask = idx >= lengths.unsqueeze(1)
        logits = logits.masked_fill(mask, -1e9)

        return logits





# DATA IMPORT

df = pd.read_csv("team_season_features_v2_clean-2.csv")

num_cols = df.select_dtypes(include="number").columns.tolist()
for col in ["champion", "season"]:
    if col in num_cols:
        num_cols.remove(col)

X_full = df[num_cols].replace([np.inf, -np.inf], np.nan).values


# Temporal Training Split
# ... -> 2010 : train
# 2010 -> 2015 : validation
# 2016... : test
train_mask = df["season"] <= 2010

# Gets rid of NaNs for scaling
imputer = SimpleImputer(strategy="median")
X_train_imp = imputer.fit_transform(X_full[train_mask])

scaler = StandardScaler(with_mean=True, with_std=True)
scaler.fit(X_train_imp)

X_all_scaled = scaler.transform(imputer.transform(X_full))

df_proc = df.copy()
for i, col in enumerate(num_cols):
    df_proc[col] = X_all_scaled[:, i]





# BUILD DICT

# Group by season
def build_season_dict(df_proc, feature_cols):
    season_data = {}
    for season, g in df_proc.groupby("season"):
        X_season = g[feature_cols].values.astype(np.float32)
        champ_idx_arr = np.where(g["champion"].values == 1)[0]
        if len(champ_idx_arr) != 1:
            continue
        champ_idx = int(champ_idx_arr[0])
        teams = g["team"].tolist()
        season_data[season] = (X_season, champ_idx, teams)
    return season_data

season_data = build_season_dict(df_proc, num_cols)


# Temporal Training Split
# ... -> 2010 : train
# 2010 -> 2015 : validation
# 2016... : test
train_seasons = {s: v for s, v in season_data.items() if s <= 2010}
val_seasons   = {s: v for s, v in season_data.items() if 2010 < s <= 2015}
test_seasons  = {s: v for s, v in season_data.items() if s > 2015}

# padding; exactly 30 teams
max_teams = max(X.shape[0] for (X, _, _) in season_data.values())
feature_dim = len(num_cols)




# SeasonDataset

# Padding features; team index
class SeasonDataset(Dataset):
    def __init__(self, season_dict, max_teams, feature_dim):
        self.seasons = sorted(season_dict.keys())
        self.X_list = []
        self.lengths = []
        self.champs = []
        self.team_lists = []

        for s in self.seasons:
            X, champ_idx, teams = season_dict[s]
            n_teams = X.shape[0]

            X_pad = np.zeros((max_teams, feature_dim), dtype=np.float32)
            X_pad[:n_teams, :] = X

            self.X_list.append(X_pad)
            self.lengths.append(n_teams)
            self.champs.append(champ_idx)
            self.team_lists.append(teams)

    def __len__(self):
        return len(self.seasons)

    def __getitem__(self, idx):
        return {
            "season": self.seasons[idx],
            "X": torch.from_numpy(self.X_list[idx]),
            "length": self.lengths[idx],
            "champ_idx": self.champs[idx],
            "teams": self.team_lists[idx],
        }

# Create batches for train, val, and test
def collate_fn(batch):
    seasons = [b["season"] for b in batch]
    X = torch.stack([b["X"] for b in batch], dim=0)
    lengths = torch.tensor([b["length"] for b in batch], dtype=torch.long)
    champs = torch.tensor([b["champ_idx"] for b in batch], dtype=torch.long)
    teams = [b["teams"] for b in batch]
    return seasons, X, lengths, champs, teams

batch_size = 8

train_ds = SeasonDataset(train_seasons, max_teams, feature_dim)
val_ds   = SeasonDataset(val_seasons,   max_teams, feature_dim)
test_ds  = SeasonDataset(test_seasons,  max_teams, feature_dim)

train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True,
                          collate_fn=collate_fn)
val_loader   = DataLoader(val_ds, batch_size=batch_size, shuffle=False,
                          collate_fn=collate_fn)
test_loader  = DataLoader(test_ds, batch_size=1, shuffle=False,
                          collate_fn=collate_fn)




# TRAINING LOOP

# Ensure prediction is confident
def season_margin_loss(logits, champs, margin=0.5):
    B, T = logits.shape

    champ_logits = logits[torch.arange(B), champs]

    mask = torch.ones_like(logits, dtype=torch.bool)
    mask[torch.arange(B), champs] = False
    non_champ_logits = logits.masked_fill(~mask, -1e9)

    max_non_champ, _ = non_champ_logits.max(dim=1)

    diff = champ_logits - max_non_champ
    loss_margin = F.relu(margin - diff).mean()
    return loss_margin


# Choose model b/w RNN and MLP
model = SeasonSoftmaxNN(feature_dim)
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-3, weight_decay=1e-4)

def evaluate(model, loader):
    model.eval()
    total = 0
    correct = 0
    with torch.no_grad():
        for seasons, X, lengths, champs, teams in loader:
            X = X
            lengths = lengths
            champs = champs

            logits = model(X, lengths)
            preds = logits.argmax(dim=1)
            correct += (preds == champs).sum().item()
            total += champs.size(0)
    return correct / total if total > 0 else 0.0

num_epochs = 200
best_val_top1 = 0.0
best_state = None

patience = 20
epochs_no_improve = 0




for epoch in range(1, num_epochs + 1):
    model.train()
    total_loss = 0.0
    n_batches = 0

    for seasons, X, lengths, champs, teams in train_loader:
        logits = model(X, lengths)

        ce_loss = F.cross_entropy(logits, champs, label_smoothing=0.1)
        margin_loss = season_margin_loss(logits, champs, margin=0.5)

        lambda_margin = .5
        loss = ce_loss + lambda_margin* margin_loss

        optimizer.zero_grad()
        loss.backward()
        clip_grad_norm_(model.parameters(), max_norm=3.0)
        optimizer.step()

        total_loss += loss.item()
        n_batches += 1

    avg_train_loss = total_loss / max(1, n_batches)
    val_top1 = evaluate(model, val_loader)

    if val_top1 > best_val_top1 + 1e-4:
        best_val_top1 = val_top1
        best_state = {k: v.cpu().clone() for k, v in model.state_dict().items()}
        epochs_no_improve = 0
    else:
        epochs_no_improve += 1
        if epochs_no_improve >= patience:
            print(f"Early stopping at epoch {epoch}. Best val top-1: {best_val_top1:.3f}")
            break

if best_state is not None:
    model.load_state_dict(best_state)
    model




# EVAL


model.eval()
test_results = []
correct = 0
total = 0

with torch.no_grad():
    for seasons, X, lengths, champs, teams in test_loader:
        logits = model(X, lengths)
        probs = F.softmax(logits, dim=1).cpu().numpy()[0]

        pred_idx = probs.argmax()
        true_idx = champs.item()
        season = seasons[0]
        team_list = teams[0]

        pred_team = team_list[pred_idx]
        true_team = team_list[true_idx]
        is_correct = int(pred_idx == true_idx)

        test_results.append(
            (season, pred_team, true_team, probs[pred_idx], is_correct)
        )
        correct += is_correct
        total += 1

top1 = correct / total
print("Softmax NN (batched) Top-1 accuracy:", top1)

print("\nPredicted vs True Champions (Softmax NN batched):")
for season, pred_team, true_team, prob, ok in sorted(test_results):
    print(season, "-", pred_team, "(pred) |", true_team, "(true) |",
          "prob:", f"{prob:.3f}", "| correct:", ok)


def evaluate_average_precision(model, loader):
    model.eval()
    ap_list = []

    with torch.no_grad():
        for seasons, X, lengths, champs, teams in loader:
            logits = model(X, lengths)
            probs = F.softmax(logits, dim=1)

            for b in range(probs.size(0)):
                season_probs = probs[b].cpu().numpy()
                champ_idx = champs[b].item()

                sorted_indices = np.argsort(-season_probs)

                rank = int(np.where(sorted_indices == champ_idx)[0][0]) + 1

                ap = 1.0 / rank
                ap_list.append(ap)

    return float(np.mean(ap_list)) if ap_list else 0.0

test_ap = evaluate_average_precision(model, test_loader)
print(f"Test Average Precision (per season): {test_ap:.3f}")


Early stopping at epoch 33. Best val top-1: 0.600
Softmax NN (batched) Top-1 accuracy: 0.6

Predicted vs True Champions (Softmax NN batched):
2016 - Warriors (pred) | Cavaliers (true) | prob: 0.436 | correct: 0
2017 - Warriors (pred) | Warriors (true) | prob: 0.562 | correct: 1
2018 - Warriors (pred) | Warriors (true) | prob: 0.529 | correct: 1
2019 - Raptors (pred) | Raptors (true) | prob: 0.295 | correct: 1
2020 - Lakers (pred) | Lakers (true) | prob: 0.203 | correct: 1
2021 - Clippers (pred) | Bucks (true) | prob: 0.206 | correct: 0
2022 - Celtics (pred) | Warriors (true) | prob: 0.264 | correct: 0
2023 - Celtics (pred) | Nuggets (true) | prob: 0.162 | correct: 0
2024 - Celtics (pred) | Celtics (true) | prob: 0.449 | correct: 1
2025 - Thunder (pred) | Thunder (true) | prob: 0.764 | correct: 1
Test Average Precision (per season): 0.725


In [3]:
import matplotlib.pyplot as plt

model.eval()
season_stats = []

with torch.no_grad():
    for seasons, X, lengths, champs, teams in test_loader:
        logits = model(X, lengths)
        probs = F.softmax(logits, dim=1)[0]

        season = seasons[0]
        champ_idx = champs.item()
        champ_team = teams[0][champ_idx]
        champ_prob = probs[champ_idx].item()

        sorted_idx = torch.argsort(probs, descending=True)
        rank = (sorted_idx == champ_idx).nonzero(as_tuple=True)[0].item() + 1

        top_idx = sorted_idx[0].item()
        top_team = teams[0][top_idx]
        top_prob = probs[top_idx].item()

        season_stats.append({
            "season": season,
            "champ_team": champ_team,
            "champ_prob": champ_prob,
            "rank": rank,
            "top_team": top_team,
            "top_prob": top_prob,
        })

season_df = pd.DataFrame(season_stats).sort_values("season")
print(season_df)

   season champ_team  champ_prob  rank  top_team  top_prob
0    2016  Cavaliers    0.064645     4  Warriors  0.436071
1    2017   Warriors    0.562497     1  Warriors  0.562497
2    2018   Warriors    0.529278     1  Warriors  0.529278
3    2019    Raptors    0.294785     1   Raptors  0.294785
4    2020     Lakers    0.202915     1    Lakers  0.202915
5    2021      Bucks    0.067018     6  Clippers  0.205720
6    2022   Warriors    0.203903     2   Celtics  0.263656
7    2023    Nuggets    0.134823     3   Celtics  0.162370
8    2024    Celtics    0.449129     1   Celtics  0.449129
9    2025    Thunder    0.763685     1   Thunder  0.763685


In [5]:
df_2026 = pd.read_csv("2026_stats_december.csv")

X_2026_raw = df_2026[num_cols].replace([np.inf, -np.inf], np.nan).values
X_2026_scaled = scaler.transform(imputer.transform(X_2026_raw)).astype(np.float32)

n_teams_2026 = X_2026_scaled.shape[0]

X_2026_pad = np.zeros((1, max_teams, feature_dim), dtype=np.float32)
X_2026_pad[0, :n_teams_2026, :] = X_2026_scaled

X_2026_tensor = torch.from_numpy(X_2026_pad)
lengths_2026 = torch.tensor([n_teams_2026], dtype=torch.long)

model.eval()
with torch.no_grad():
    logits_2026 = model(X_2026_tensor, lengths_2026)
    probs_2026 = F.softmax(logits_2026, dim=1).cpu().numpy()[0]

pred_idx_2026 = probs_2026.argmax()
pred_team_2026 = df_2026.iloc[pred_idx_2026]["team"]
pred_prob_2026 = probs_2026[pred_idx_2026]

print(f"Predicted 2026 champion: {pred_team_2026} (prob = {pred_prob_2026:.3f})")

topk = 5
top_indices = probs_2026.argsort()[::-1][:topk]
print("\nTop-5 2026 contenders:")
for i in top_indices:
    team = df_2026.iloc[i]["team"]
    print(f"{team}: {probs_2026[i]:.3f}")


def evaluate_average_precision(model, loader):
    model.eval()
    ap_list = []

    with torch.no_grad():
        for seasons, X, lengths, champs, teams in loader:
            logits = model(X, lengths)
            probs = F.softmax(logits, dim=1)

            for b in range(probs.size(0)):
                season_probs = probs[b].cpu().numpy()
                champ_idx = champs[b].item()

                sorted_indices = np.argsort(-season_probs)

                rank = int(np.where(sorted_indices == champ_idx)[0][0]) + 1

                ap = 1.0 / rank
                ap_list.append(ap)

    return float(np.mean(ap_list)) if ap_list else 0.0

test_ap = evaluate_average_precision(model, test_loader)
print(f"Test Average Precision (per season): {test_ap:.3f}")


Predicted 2026 champion: Thunder (prob = 0.998)

Top-5 2026 contenders:
Thunder: 0.998
Bucks: 0.002
Heat: 0.000
Lakers: 0.000
Mavericks: 0.000
Test Average Precision (per season): 0.725
