In [2]:
# --- Imports ---

import torch
from torch.utils.data import Dataset, DataLoader, Subset
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

from sklearn.model_selection import KFold
from sklearn.metrics import mean_squared_error

import pandas as pd
import numpy as np

from tqdm import tqdm
import time
import math

import os

In [3]:
# -- Thread management --

os.environ["OMP_NUM_THREADS"] = "1"
os.environ["MKL_NUM_THREADS"] = "1"
os.environ["VECLIB_MAXIMUM_THREADS"] = "1"
os.environ["NUMEXPR_NUM_THREADS"] = "1"

In [29]:
# --- Configurations ---

class CONFIG:
    DATA_PATH = "./kaggle/input/nfl-big-data-bowl-2026-prediction/"

    N_FOLDS = 5
    EPOCHS = 20
    PATIENCE = 3
    FACTOR = .5
    LEARNING_RATE = 2e-5
    DROPOUT = 0.01
    FORCE_MAX = .8
    FORCE_MIN = .2


config = CONFIG()

In [5]:
# -- Dataset -- 

class TrainDataset(Dataset):
    def __init__(self, ids, x0, x1a, x1b, x2, duration, targets):
        # N: Number of training examples, L1: Length of input sequence, L2: Length of output sequence
        self.ids = ids                  # str list      # shape: N
        self.x0 = x0                    # np.arr(N)     # tensor(27)
        self.x1a = x1a                  # np.arr(N)     # np.arr(L1, 10)
        self.x1b = x1b                  # np.arr(N)     # np.arr(L1, 10)
        self.x2 = x2                    # np.arr(N)     # tensor(2)
        self.targets = targets          # np.arr(N)     # np.arr(L2, 2)
        self.duration = duration        # int list      # shape: N

    def __len__(self):
        return len(self.x0)

    def __getitem__(self, idx):
        id = self.ids[idx]
        x0 = self.x0[idx]
        x1a = torch.tensor(self.x1a[idx], dtype=torch.float32)
        if self.x1b[idx] is None: x1b = torch.empty(0, x1a.shape[1], dtype=torch.float32)
        else: x1b = torch.tensor(self.x1b[idx], dtype=torch.float32)
        x2 = self.x2[idx]
        dur = self.duration[idx]
        targets = torch.tensor(self.targets[idx], dtype=torch.float32)
        return (id, x0, x1a, x1b, x2, targets, dur)  

train_set = torch.load("train_set.pt", weights_only=False)

In [6]:
# -- Model --

class DotAttention(nn.Module):
    def forward(self, s_t, memory):
        s_t = s_t.unsqueeze(1)
        scores = torch.bmm(s_t, memory.transpose(1, 2)).squeeze(1)
        weights = F.softmax(scores, dim=1)
        context = torch.bmm(weights.unsqueeze(1), memory).squeeze(1)
        return context



class Encoder(nn.Module):
    def __init__(self, in_dim=10, hid_dim=256, n_layers=2):
        super().__init__()
        self.rnn = nn.LSTM(in_dim, hid_dim, n_layers, batch_first=True, dropout=CONFIG.DROPOUT)
        self.h0 = nn.Parameter(torch.randn(n_layers, 1, hid_dim))
        self.c0 = nn.Parameter(torch.randn(n_layers, 1, hid_dim))

    def forward(self, x1):
        if x1.size(1) > 0:
            e1, (h1, c1) = self.rnn(x1)
        else:
            e1 = torch.tensor([0])
            h1 = self.h0
            c1 = self.c0
        return  e1, h1, c1


class Decoder(nn.Module):
    def __init__(self, out_dim=2, hid_dim=256, n_layers=2):
        super().__init__()
        self.rnn = nn.LSTM(out_dim, hid_dim, n_layers, batch_first=True, dropout=CONFIG.DROPOUT)
        self.attn = DotAttention()
        self.out = nn.Sequential(
            nn.Linear(2*hid_dim, out_dim),
            nn.ReLU()
        )
    
    def forward(self, y0, h_c, mem):
        d0, h_c = self.rnn(y0, h_c)
        d0 = torch.squeeze(d0, dim=1)
        att = self.attn(d0, mem)
        d0 = torch.concat([d0, att], dim=1)
        d0 = self.out(d0)
        return d0, h_c
        

class MultiScaleCNNLSTMSeq2Seq(nn.Module):
    def __init__(self, in_dim=10, hid_dim=256, out_dim=2, const_dim=27):
        super().__init__()
        self.enc1 = Encoder(hid_dim=hid_dim)
        self.enc2 = Encoder(hid_dim=hid_dim)
        self.dec = Decoder(hid_dim=hid_dim)

        self.conv1 = nn.Sequential(
            nn.Conv1d(in_dim, hid_dim, 3, padding=1),
            nn.ReLU(),
            nn.Conv1d(hid_dim, hid_dim, 3, padding=1),
            nn.ReLU(),
            nn.Conv1d(hid_dim, hid_dim, 3, padding=1),
            nn.ReLU(),
        )

        self.conv2 = nn.Sequential(
            nn.Conv1d(in_dim, hid_dim, 5, padding=2),
            nn.ReLU(),
            nn.Conv1d(hid_dim, hid_dim, 5, padding=2),
            nn.ReLU(),
            nn.Conv1d(hid_dim, hid_dim, 5, padding=2),
            nn.ReLU(),
        )

        self.conv3 = nn.Sequential(
            nn.Conv1d(in_dim, hid_dim, 7, padding=3),
            nn.ReLU(),
            nn.Conv1d(hid_dim, hid_dim, 7, padding=3),
            nn.ReLU(),
            nn.Conv1d(hid_dim, hid_dim, 7, padding=3),
            nn.ReLU(),
        )

        self.lin1 = nn.Linear(const_dim, hid_dim)
        self.lin2 = nn.Linear(const_dim, hid_dim)
        self.lin3 = nn.Linear(const_dim, hid_dim)
        self.lin4 = nn.Linear(const_dim, hid_dim)
        self.lin5 = nn.Linear(3*hid_dim, hid_dim)
        self.lin6 = nn.Linear(3*hid_dim, hid_dim)


    def forward(self, x0, x1a, x1b, x2, dur, force=False, trg=None):
        e1, h1, c1 = self.enc1(x1a)
        _, h2, c2 = self.enc2(x1b)

        x1a = torch.transpose(x1a, 1, 2)
        e2 = torch.transpose(self.conv1(x1a), 1, 2)
        e3 = torch.transpose(self.conv2(x1a), 1, 2)
        e4 = torch.transpose(self.conv3(x1a), 1, 2)

        mem = torch.concat([e1,e2,e3,e4], dim=1)
        y0 = torch.unsqueeze(x2, 1)

        h0 = torch.stack([self.lin1(x0), self.lin2(x0)], dim=0)
        h0 = torch.concat([h0, h1, h2], dim=2)
        h0 = F.relu(self.lin5(h0))      

        c0 = torch.stack([self.lin3(x0), self.lin4(x0)], dim=0)
        c0 = torch.concat([c0, c1, c2], dim=2)
        c0 = F.relu(self.lin6(c0))

        output = []

        for t in range(dur):
            d0, (h0, c0) = self.dec(y0, (h0,c0), mem)
            output.append(d0)
            if force:
                y0 = trg[:, t:(t+1), :]
            else:
                y0 = torch.unsqueeze(d0, dim=1)
            
        output = torch.stack(output).transpose(0,1)
        return output
            
        

In [None]:
# -- 5 Fold Cross Validation Training --
kf = KFold(n_splits=CONFIG.N_FOLDS, shuffle=True, random_state=1)

for fold, (train_idx, val_idx) in enumerate(kf.split(train_set)):
    print(f"Fold #{fold}")

    # Data prep
    train_subset = Subset(train_set, train_idx)
    val_subset   = Subset(train_set, val_idx)
    train_loader = DataLoader(train_subset, batch_size=1, shuffle=True)
    val_loader   = DataLoader(val_subset, batch_size=1)

    # Model prep

    # Uncomment if loading saved model
    # model = MultiScaleCNNLSTMSeq2Seq(hid_dim=64)
    # checkpoint = torch.load("MS-CNN-LSTM-Seq2Seq.pth", weights_only=True)
    # model.load_state_dict(checkpoint)

    model = MultiScaleCNNLSTMSeq2Seq(hid_dim=64)
    state_dict = torch.load("MS-CNN-LSTM-Seq2Seq-10.pth", map_location="cpu", weights_only=True)
    clean_state_dict = {k.replace("_orig_mod.", ""): v for k, v in state_dict.items()}
    model.load_state_dict(clean_state_dict)

    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=CONFIG.LEARNING_RATE)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(
        optimizer,
        mode = 'min',
        factor = CONFIG.FACTOR,
        patience = CONFIG.PATIENCE,
    )
    model = torch.compile(model)

    # Training
    train_rmse = 0.0
    for epoch in range(1, CONFIG.EPOCHS+1):
        force_ratio = CONFIG.FORCE_MAX - ((epoch-1)/CONFIG.EPOCHS)*(CONFIG.FORCE_MAX - CONFIG.FORCE_MIN)
        pbar = tqdm(train_loader, leave=False)

        for index, (id, x0, x1a, x1b, x2, target, dur) in enumerate(pbar):
            optimizer.zero_grad()
            force = torch.rand(1).item() < force_ratio

            pred = model(x0, x1a, x1b, x2, dur, force=force, trg=target)
            loss = criterion(pred, target)
            loss.backward()
            optimizer.step()
            rmse = (loss.item()) ** 0.5
            train_rmse += rmse
            if index%200==0:
                pbar.set_postfix({'Loss': f"{rmse:.4f}"})
                # torch.save(model.state_dict(), "MS-CNN-LSTM-Seq2Seq.pth") # save every so often


        train_rmse = train_rmse/len(train_loader)

        # Validation
        val_rmse = 0.0
        with torch.no_grad():
            model.eval()
            for id, x0, x1a, x1b, x2, target, dur in tqdm(val_loader, leave=False):
                pred = model(x0, x1a, x1b, x2, dur, force=False)
                loss = criterion(pred, target)
                val_rmse += (loss.item()) ** 0.5
            val_rmse = val_rmse/len(val_loader)

        # Epoch over
        print(f"Epoch {epoch} Train loss: {(train_rmse):.4f} Val loss: {(val_rmse):.4f}")
        scheduler.step(val_rmse)
        torch.save(model.state_dict(), "MS-CNN-LSTM-Seq2Seq.pth")
        

    

Fold #0


                                                                   

Epoch 1 Train loss: 0.3084 Val loss: 0.3852


                                                                  

Epoch 2 Train loss: 0.2307 Val loss: 0.3708


                                                                  

Epoch 3 Train loss: 0.2268 Val loss: 0.3692


                                                                  

Epoch 4 Train loss: 0.2303 Val loss: 0.3770


                                                                  

Epoch 5 Train loss: 0.2342 Val loss: 0.3712


                                                                  

Epoch 6 Train loss: 0.2396 Val loss: 0.3683


                                                                  

Epoch 7 Train loss: 0.2450 Val loss: 0.3755


                                                                  

Epoch 8 Train loss: 0.2501 Val loss: 0.3678


                                                                  

Epoch 9 Train loss: 0.2573 Val loss: 0.3699


                                                                  

Epoch 10 Train loss: 0.2627 Val loss: 0.3746


                                                                  

Epoch 11 Train loss: 0.2690 Val loss: 0.3896


                                                                  

Epoch 12 Train loss: 0.2748 Val loss: 0.3796


                                                                  

Epoch 13 Train loss: 0.2727 Val loss: 0.3684


                                                                  

Epoch 14 Train loss: 0.2791 Val loss: 0.3692


                                                                  

Epoch 15 Train loss: 0.2842 Val loss: 0.3892


                                                                  

Epoch 16 Train loss: 0.2893 Val loss: 0.3699


                                                                  

Epoch 17 Train loss: 0.2922 Val loss: 0.3652


                                                                  

Epoch 18 Train loss: 0.2969 Val loss: 0.3646


                                                                  

Epoch 19 Train loss: 0.3034 Val loss: 0.3654


                                                                  

Epoch 20 Train loss: 0.3074 Val loss: 0.3668
Fold #1


                                                                  

Epoch 1 Train loss: 0.3121 Val loss: 0.3739


                                                                  

Epoch 2 Train loss: 0.2270 Val loss: 0.3669


                                                                  

Epoch 3 Train loss: 0.2259 Val loss: 0.3693


                                                                  

Epoch 4 Train loss: 0.2270 Val loss: 0.3676


                                                                  

Epoch 5 Train loss: 0.2318 Val loss: 0.3723


                                                                  

Epoch 6 Train loss: 0.2381 Val loss: 0.3716


                                                                  

Epoch 7 Train loss: 0.2382 Val loss: 0.3672


                                                                  

Epoch 8 Train loss: 0.2428 Val loss: 0.3690


                                                                  

Epoch 9 Train loss: 0.2481 Val loss: 0.3689


                                                                  

Epoch 10 Train loss: 0.2557 Val loss: 0.3676


                                                                  

Epoch 11 Train loss: 0.2584 Val loss: 0.3642


                                                                  

Epoch 12 Train loss: 0.2628 Val loss: 0.3646


                                                                  

Epoch 13 Train loss: 0.2680 Val loss: 0.3625


                                                                  

Epoch 14 Train loss: 0.2736 Val loss: 0.3632


                                                                  

Epoch 15 Train loss: 0.2787 Val loss: 0.3644


                                                                  

Epoch 16 Train loss: 0.2846 Val loss: 0.3656


                                                                  

Epoch 17 Train loss: 0.2918 Val loss: 0.3684


                                                                  

Epoch 18 Train loss: 0.2951 Val loss: 0.3622


                                                                  

Epoch 19 Train loss: 0.3000 Val loss: 0.3625


                                                                  

Epoch 20 Train loss: 0.3063 Val loss: 0.3621
Fold #2


                                                                   

Epoch 1 Train loss: 0.3325 Val loss: 0.3666


                                                                  

Epoch 2 Train loss: 0.2249 Val loss: 0.3546


                                                                  

Epoch 3 Train loss: 0.2234 Val loss: 0.3558


                                                                  

Epoch 4 Train loss: 0.2282 Val loss: 0.3678


                                                                  

Epoch 5 Train loss: 0.2322 Val loss: 0.3567


                                                                  

Epoch 6 Train loss: 0.2379 Val loss: 0.3582


                                                                  

Epoch 7 Train loss: 0.2372 Val loss: 0.3513


                                                                  

Epoch 8 Train loss: 0.2410 Val loss: 0.3474


                                                                  

Epoch 9 Train loss: 0.2511 Val loss: 0.3470


                                                                  

Epoch 10 Train loss: 0.2543 Val loss: 0.3475


                                                                  

Epoch 11 Train loss: 0.2601 Val loss: 0.3493


                                                                  

Epoch 12 Train loss: 0.2660 Val loss: 0.3491


                                                                  

Epoch 13 Train loss: 0.2710 Val loss: 0.3536


                                                                  

Epoch 14 Train loss: 0.2748 Val loss: 0.3447


                                                                  

Epoch 15 Train loss: 0.2799 Val loss: 0.3462


                                                                  

Epoch 16 Train loss: 0.2828 Val loss: 0.3503


                                                                  

Epoch 17 Train loss: 0.2903 Val loss: 0.3480


                                                                  

Epoch 18 Train loss: 0.2947 Val loss: 0.3462


                                                                  

Epoch 19 Train loss: 0.2989 Val loss: 0.3441


                                                                  

Epoch 20 Train loss: 0.3060 Val loss: 0.3446
Fold #3


                                                                  

Epoch 1 Train loss: 0.3178 Val loss: 0.3669


                                                                  

Epoch 2 Train loss: 0.2244 Val loss: 0.3596


                                                                  

Epoch 3 Train loss: 0.2215 Val loss: 0.3517


                                                                  

Epoch 4 Train loss: 0.2251 Val loss: 0.3552


                                                                  

Epoch 5 Train loss: 0.2294 Val loss: 0.3515


                                                                  

Epoch 6 Train loss: 0.2361 Val loss: 0.3568


                                                                  

Epoch 7 Train loss: 0.2423 Val loss: 0.3674


                                                                  

Epoch 8 Train loss: 0.2490 Val loss: 0.3584


                                                                  

Epoch 9 Train loss: 0.2549 Val loss: 0.3587


                                                                  

Epoch 10 Train loss: 0.2503 Val loss: 0.3585


                                                                  

Epoch 11 Train loss: 0.2578 Val loss: 0.3560


                                                                  

Epoch 12 Train loss: 0.2636 Val loss: 0.3502


                                                                  

Epoch 13 Train loss: 0.2682 Val loss: 0.3513


                                                                  

Epoch 14 Train loss: 0.2760 Val loss: 0.3553


                                                                  

Epoch 15 Train loss: 0.2820 Val loss: 0.3569


                                                                  

Epoch 16 Train loss: 0.2895 Val loss: 0.3587


                                                                  

Epoch 17 Train loss: 0.2882 Val loss: 0.3486


                                                                  

Epoch 18 Train loss: 0.2958 Val loss: 0.3481


                                                                  

Epoch 19 Train loss: 0.3012 Val loss: 0.3499


                                                                  

Epoch 20 Train loss: 0.3063 Val loss: 0.3495
Fold #4


                                                                  

Epoch 1 Train loss: 0.3220 Val loss: 0.3578


                                                                  

Epoch 2 Train loss: 0.2227 Val loss: 0.3456


                                                                  

Epoch 3 Train loss: 0.2223 Val loss: 0.3491


                                                                  

Epoch 4 Train loss: 0.2278 Val loss: 0.3440


                                                                  

Epoch 5 Train loss: 0.2294 Val loss: 0.3468


                                                                  

Epoch 6 Train loss: 0.2375 Val loss: 0.3425


                                                                  

Epoch 7 Train loss: 0.2440 Val loss: 0.3486


                                                                  

Epoch 8 Train loss: 0.2495 Val loss: 0.3696


                                                                  

Epoch 9 Train loss: 0.2543 Val loss: 0.3465


                                                                  

Epoch 10 Train loss: 0.2614 Val loss: 0.3585


                                                                  

Epoch 11 Train loss: 0.2604 Val loss: 0.3426


                                                                  

Epoch 12 Train loss: 0.2664 Val loss: 0.3433


                                                                  

Epoch 13 Train loss: 0.2717 Val loss: 0.3461


                                                                  

Epoch 14 Train loss: 0.2783 Val loss: 0.3581


                                                                  

Epoch 15 Train loss: 0.2807 Val loss: 0.3402


                                                                  

Epoch 16 Train loss: 0.2862 Val loss: 0.3420


                                                                  

Epoch 17 Train loss: 0.2893 Val loss: 0.3392


                                                                  

Epoch 18 Train loss: 0.2971 Val loss: 0.3418


                                                                  

Epoch 19 Train loss: 0.3017 Val loss: 0.3398


                                                                  

Epoch 20 Train loss: 0.3077 Val loss: 0.3389




In [None]:
# -- Validation --
kf = KFold(n_splits=CONFIG.N_FOLDS, shuffle=True, random_state=2)

model = MultiScaleCNNLSTMSeq2Seq(hid_dim=64)

# Uncomment if loading saved model
# state_dict = torch.load("MS-CNN-LSTM-Seq2Seq.pth", map_location="cpu", weights_only=True)
# clean_state_dict = {k.replace("_orig_mod.", ""): v for k, v in state_dict.items()}
# model.load_state_dict(clean_state_dict)

model = torch.compile(model)

criterion = nn.MSELoss()

for fold, (train_idx, val_idx) in enumerate(kf.split(train_set)):

    # Data prep
    train_subset = Subset(train_set, train_idx)
    val_subset   = Subset(train_set, val_idx)
    train_loader = DataLoader(train_subset, batch_size=1, shuffle=True)
    val_loader   = DataLoader(val_subset, batch_size=1)

    # Validation
    val_rmse = 0.0
    
    counter = 0
    running_rmse = 0.0
    avg_rmse = 0.0
    b_count=0


    model.eval()
    with torch.no_grad():
        for id, x0, x1a, x1b, x2, target, dur in tqdm(val_loader, leave=False):
            pred = model(x0, x1a, x1b, x2, dur, force=False)
            loss = criterion(pred, target)
            val_rmse += (loss.item()) ** 0.5
            
            counter+=dur.item()
            running_rmse+=((loss.item())*dur.item())
            if (counter>4000):
                running_rmse=(running_rmse/counter)**0.5
                avg_rmse+=running_rmse
                b_count+=1
                running_rmse=0
                counter=0
        running_rmse=(running_rmse/counter)**0.5
        avg_rmse+=running_rmse
        b_count+=1

        
    val_rmse = val_rmse/len(val_loader)
    avg_rmse = avg_rmse/b_count

    print(f"Fold {fold} Val loss 1: {(val_rmse):.4f} Val loss 2: {(avg_rmse):.4f}")



                                                    

Fold 0 Val loss 1: 0.3415 Val loss 2: 0.6302


                                                    

Fold 1 Val loss 1: 0.3523 Val loss 2: 0.6694


                                                    

Fold 2 Val loss 1: 0.3454 Val loss 2: 0.6181


                                                    

Fold 3 Val loss 1: 0.3474 Val loss 2: 0.6241


                                                    

Fold 4 Val loss 1: 0.3424 Val loss 2: 0.6135




In [None]:
#torch.save(model.state_dict(), "MS-CNN-LSTM-Seq2Seq.pth")
