In [None]:
import torch
import torch.nn as nn

#Using Softmax for multiclass classification

#Two Hidden layer MLP; ReLU for nonlinearity; Softmax at output layer
class SeasonSoftmaxNN(nn.Module):
    def __init__(self, input_dim):
        super().__init__()
        self.mlp = nn.Sequential(
            nn.Linear(input_dim, 32),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(32, 16),
            nn.ReLU(),
            nn.Linear(16, 1)
        )

    def forward(self, x):
        logits = self.mlp(x)         
        logits = logits.squeeze(-1)  
        return logits


In [4]:
import pandas as pd
import numpy as np
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler

df = pd.read_csv("team_season_features_v2_clean-2.csv")

# same as your logistic setup
num_cols = df.select_dtypes(include="number").columns.tolist()
for col in ["champion", "season"]:
    if col in num_cols:
        num_cols.remove(col)

X_full = df[num_cols].replace([np.inf, -np.inf], np.nan).values

imputer = SimpleImputer(strategy="median")
X_imp = imputer.fit_transform(X_full)

scaler = StandardScaler(with_mean=True, with_std=True)
X_scaled = scaler.fit_transform(X_imp)

# write scaled features back into df
df_proc = df.copy()
for i, col in enumerate(num_cols):
    df_proc[col] = X_scaled[:, i]


FileNotFoundError: [Errno 2] No such file or directory: 'team_season_features_v2_clean-2.csv'

In [None]:
def build_season_dict(df_proc, feature_cols):
    season_data = {}
    for season, g in df_proc.groupby("season"):
        X_season = g[feature_cols].values.astype(np.float32)  # (n_teams, F)
        champ_idx_arr = np.where(g["champion"].values == 1)[0]
        if len(champ_idx_arr) != 1:
            continue
        champ_idx = int(champ_idx_arr[0])
        teams = g["team"].tolist()
        season_data[season] = (X_season, champ_idx, teams)
    return season_data

season_data = build_season_dict(df_proc, num_cols)

train_seasons = {s: v for s, v in season_data.items() if s <= 2015}
test_seasons  = {s: v for s, v in season_data.items() if s > 2015}


In [None]:
import torch
import torch.nn.functional as F
import torch.optim as optim

input_dim = len(num_cols)
model = SeasonSoftmaxNN(input_dim)
optimizer = optim.Adam(model.parameters(), lr=1e-3, weight_decay=1e-4)

num_epochs = 500

for epoch in range(num_epochs):
    model.train()
    total_loss = 0.0
    for season, (X_season, champ_idx, _) in train_seasons.items():
        X_tensor = torch.from_numpy(X_season).unsqueeze(0)
        y_tensor = torch.tensor([champ_idx], dtype=torch.long) 

        logits = model(X_tensor)              
        loss = F.cross_entropy(logits, y_tensor) 

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    if (epoch + 1) % 50 == 0:
        avg_loss = total_loss / len(train_seasons)
        print(f"Epoch {epoch+1}/{num_epochs} - train loss: {avg_loss:.4f}")
