In [1]:
# --- Imports ---

import torch
from torch.utils.data import Dataset, DataLoader, Subset
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

from sklearn.model_selection import KFold
from sklearn.metrics import mean_squared_error

import pandas as pd
import numpy as np

from tqdm import tqdm
import time
import math

In [2]:
# --- Configurations ---

class CONFIG:
    DATA_PATH = "./kaggle/input/nfl-big-data-bowl-2026-prediction/"

    N_FOLDS = 5
    EPOCHS = 20
    PATIENCE = 5
    FACTOR = .9
    LEARNING_RATE = 5e-4
    DROPOUT = 0.01
    FORCE_MAX = .8
    FORCE_MIN = .2


config = CONFIG()

In [None]:
# --- Data import ---

train_input_files = [f"{config.DATA_PATH}train/input_2023_w{w:02d}.csv" for w in range(1, 19)]
train_output_files = [f"{config.DATA_PATH}train/output_2023_w{w:02d}.csv" for w in range(1, 19)]
    
train_input = pd.concat([pd.read_csv(f) for f in train_input_files])
train_output = pd.concat([pd.read_csv(f) for f in train_output_files])   
train_input_dict = {(g, p): subdf for (g, p), subdf in train_input.groupby(['game_id', 'play_id'])}

In [4]:
# -- Feature engineering helpers --
def height_to_feet(height_str):
    try:
        ft, inches = map(int, str(height_str).split('-'))
        return ft + inches/12
    except:
        return 6.0

def encode_play_dir(height_str):
    return 1 if height_str=="Left" else 0

def encode_player_role(height_str):
    return 1 if height_str=="Targeted Receiver" else 0


In [5]:
# -- Input constructors --

def build_x0a(player_df):
    height = height_to_feet(player_df.loc[0, 'player_height'])
    weight = player_df.loc[0, 'player_weight']
    land_x = player_df.loc[0, 'ball_land_x']
    land_y = player_df.loc[0, 'ball_land_y']
    start_x = player_df.loc[0, 'x']
    start_y = player_df.loc[0, 'y']
    end_x = player_df.iloc[-1]['x']
    end_y = player_df.iloc[-1]['y']
    end_s = player_df.iloc[-1]['s']
    end_a = player_df.iloc[-1]['a']
    end_dir = math.radians(player_df.iloc[-1]['dir'])
    end_o = math.radians(player_df.iloc[-1]['o'])
    end_vx = end_s * math.sin(end_dir)
    end_vy = end_s * math.cos(end_dir)
    end_ox = math.sin(end_o)
    end_oy = math.cos(end_o)
    play_dir = encode_play_dir(player_df.loc[0, 'play_direction'])
    role = encode_player_role(player_df.loc[0, 'player_role'])
    duration = player_df.loc[0, 'num_frames_output']
    return (np.array([height, weight, land_x, land_y, start_x, start_y, end_x, end_y, end_a, end_vx, end_vy, end_ox, end_oy, play_dir, role, duration]), duration)

def build_x0b(qb_df):
    if qb_df.empty:
        return np.array([6.25, 220, 60, 26.6, 60, 26.6, 0, 0, 0, 0, 0])
    height = height_to_feet(qb_df.loc[0, 'player_height'])
    weight = qb_df.loc[0, 'player_weight']
    start_x = qb_df.loc[0, 'x']
    start_y = qb_df.loc[0, 'y']
    end_x = qb_df.iloc[-1]['x']
    end_y = qb_df.iloc[-1]['y']
    end_s = qb_df.iloc[-1]['s']
    end_a = qb_df.iloc[-1]['a']
    end_dir = math.radians(qb_df.iloc[-1]['dir'])
    end_o = math.radians(qb_df.iloc[-1]['o'])
    end_vx = end_s * math.sin(end_dir)
    end_vy = end_s * math.cos(end_dir)
    end_ox = math.sin(end_o)
    end_oy = math.cos(end_o)
    return np.array([height, weight, start_x, start_y, end_x, end_y, end_a, end_vx, end_vy, end_ox, end_oy])


def build_x1(player_df):
    dir_radians = np.deg2rad(player_df['dir'])
    o_radians = np.deg2rad(player_df['o'])
    player_df['vx'] = player_df['s'] * np.sin(dir_radians)
    player_df['vy'] = player_df['s'] * np.cos(dir_radians)
    player_df['ax'] = player_df['vx'].diff().fillna(0)
    player_df['ay'] = player_df['vy'].diff().fillna(0)
    player_df['ox'] = np.sin(o_radians)
    player_df['oy'] = np.cos(o_radians)
    return player_df[['x', 'y', 'vx', 'vy', 'ax', 'ay', 's', 'a', 'ox', 'oy']].to_numpy()

def build_x2(player_df):
    end_x = player_df.iloc[-1]['x']
    end_y = player_df.iloc[-1]['y']
    return np.array([end_x, end_y])


def build_inputs(gid, pid, nid):
    in_df = train_input_dict[(gid, pid)]
    player_df = in_df[in_df['nfl_id'] == nid].copy().reset_index(drop=True)
    qb_df = in_df[in_df['player_role'] == 'Passer'].copy().reset_index(drop=True)

    x0a, dur = build_x0a(player_df)
    x0b = build_x0b(qb_df)
    x0 = np.concatenate((x0a, x0b), axis=0)
    x0 = torch.from_numpy(x0).float()

    x1a = build_x1(player_df)
    x1b = build_x1(qb_df)
    if x1b.size==0: x1b = None

    x2 = build_x2(player_df)
    x2 = torch.from_numpy(x2).float()

    return (x0, x1a, x1b, x2, dur)


In [6]:
# -- Dataset and Dataloader-- 

class TrainDataset(Dataset):
    def __init__(self, ids, x0, x1a, x1b, x2, duration, targets):
        # N: Number of training examples, L1: Length of input sequence, L2: Length of output sequence
        self.ids = ids                  # str list      # shape: N
        self.x0 = x0                    # np.arr(N)     # tensor(27)
        self.x1a = x1a                  # np.arr(N)     # np.arr(L1, 10)
        self.x1b = x1b                  # np.arr(N)     # np.arr(L1, 10)
        self.x2 = x2                    # np.arr(N)     # tensor(2)
        self.targets = targets          # np.arr(N)     # np.arr(L2, 2)
        self.duration = duration        # int list      # shape: N

    def __len__(self):
        return len(self.x0)

    def __getitem__(self, idx):
        id = self.ids[idx]
        x0 = self.x0[idx]
        x1a = torch.tensor(self.x1a[idx], dtype=torch.float32)
        if self.x1b[idx] is None: x1b = torch.empty(0, x1a.shape[1], dtype=torch.float32)
        else: x1b = torch.tensor(self.x1b[idx], dtype=torch.float32)
        x2 = self.x2[idx]
        dur = self.duration[idx]
        targets = torch.tensor(self.targets[idx], dtype=torch.float32)
        return (id, x0, x1a, x1b, x2, targets, dur)  

def build_train_ds(train_output):
    train = train_output.groupby(['game_id', 'play_id', 'nfl_id'])[['x', 'y']].apply(lambda a: a.to_numpy()).reset_index(name = 'targets')
    train[['x0', 'x1a', 'x1b', 'x2', 'dur']] = train.apply(
        lambda row: pd.Series(build_inputs(row['game_id'], row['play_id'], row['nfl_id'])),
        axis=1
    )
    train["id"] = train["game_id"].astype(str) + "_" + train["play_id"].astype(str) + "_" + train["nfl_id"].astype(str)
    train = TrainDataset(
        train['id'], 
        train['x0'].to_numpy(), 
        train['x1a'].to_numpy(), 
        train['x1b'].to_numpy(), 
        train['x2'].to_numpy(), 
        train['dur'].to_numpy(), 
        targets = train['targets'].to_numpy(),
    )
    return train

train_set = build_train_ds(train_output)

In [7]:
# -- Model --

class DotAttention(nn.Module):
    def forward(self, s_t, memory):
        s_t = s_t.unsqueeze(1)
        scores = torch.bmm(s_t, memory.transpose(1, 2)).squeeze(1)
        weights = F.softmax(scores, dim=1)
        context = torch.bmm(weights.unsqueeze(1), memory).squeeze(1)
        return context



class Encoder(nn.Module):
    def __init__(self, in_dim=10, hid_dim=256, n_layers=2):
        super().__init__()
        self.rnn = nn.LSTM(in_dim, hid_dim, n_layers, batch_first=True, dropout=CONFIG.DROPOUT)
        self.h0 = nn.Parameter(torch.randn(n_layers, 1, hid_dim))
        self.c0 = nn.Parameter(torch.randn(n_layers, 1, hid_dim))

    def forward(self, x1):
        if x1.size(1) > 0:
            e1, (h1, c1) = self.rnn(x1)
        else:
            e1 = torch.tensor([0])
            h1 = self.h0
            c1 = self.c0
        return  e1, h1, c1


class Decoder(nn.Module):
    def __init__(self, out_dim=2, hid_dim=256, n_layers=2):
        super().__init__()
        self.rnn = nn.LSTM(out_dim, hid_dim, n_layers, batch_first=True, dropout=CONFIG.DROPOUT)
        self.attn = DotAttention()
        self.out = nn.Sequential(
            nn.Linear(2*hid_dim, out_dim),
            nn.ReLU()
        )
    
    def forward(self, y0, h_c, mem):
        d0, h_c = self.rnn(y0, h_c)
        d0 = torch.squeeze(d0, dim=1)
        att = self.attn(d0, mem)
        d0 = torch.concat([d0, att], dim=1)
        d0 = self.out(d0)
        return d0, h_c
        

class MultiScaleCNNLSTMSeq2Seq(nn.Module):
    def __init__(self, in_dim=10, hid_dim=256, out_dim=2, const_dim=27):
        super().__init__()
        self.enc1 = Encoder()
        self.enc2 = Encoder()
        self.dec = Decoder()

        self.conv1 = nn.Sequential(
            nn.Conv1d(in_dim, hid_dim, 3, padding=1),
            nn.ReLU(),
            nn.Conv1d(hid_dim, hid_dim, 3, padding=1),
            nn.ReLU(),
            nn.Conv1d(hid_dim, hid_dim, 3, padding=1),
            nn.ReLU(),
        )

        self.conv2 = nn.Sequential(
            nn.Conv1d(in_dim, hid_dim, 5, padding=2),
            nn.ReLU(),
            nn.Conv1d(hid_dim, hid_dim, 5, padding=2),
            nn.ReLU(),
            nn.Conv1d(hid_dim, hid_dim, 5, padding=2),
            nn.ReLU(),
        )

        self.conv3 = nn.Sequential(
            nn.Conv1d(in_dim, hid_dim, 7, padding=3),
            nn.ReLU(),
            nn.Conv1d(hid_dim, hid_dim, 7, padding=3),
            nn.ReLU(),
            nn.Conv1d(hid_dim, hid_dim, 7, padding=3),
            nn.ReLU(),
        )

        self.lin1 = nn.Linear(const_dim, hid_dim)
        self.lin2 = nn.Linear(const_dim, hid_dim)
        self.lin3 = nn.Linear(const_dim, hid_dim)
        self.lin4 = nn.Linear(const_dim, hid_dim)
        self.lin5 = nn.Linear(3*hid_dim, hid_dim)
        self.lin6 = nn.Linear(3*hid_dim, hid_dim)


    def forward(self, x0, x1a, x1b, x2, dur, force=False, trg=None):
        e1, h1, c1 = self.enc1(x1a)
        _, h2, c2 = self.enc2(x1b)

        x1a = torch.transpose(x1a, 1, 2)
        e2 = torch.transpose(self.conv1(x1a), 1, 2)
        e3 = torch.transpose(self.conv2(x1a), 1, 2)
        e4 = torch.transpose(self.conv3(x1a), 1, 2)

        mem = torch.concat([e1,e2,e3,e4], dim=1)
        y0 = torch.unsqueeze(x2, 1)

        h0 = torch.stack([self.lin1(x0), self.lin2(x0)], dim=0)
        h0 = torch.concat([h0, h1, h2], dim=2)
        h0 = F.relu(self.lin5(h0))      

        c0 = torch.stack([self.lin3(x0), self.lin4(x0)], dim=0)
        c0 = torch.concat([c0, c1, c2], dim=2)
        c0 = F.relu(self.lin6(c0))

        output = []

        for t in range(dur):
            d0, (h0, c0) = self.dec(y0, (h0,c0), mem)
            output.append(d0)
            if force:
                y0 = trg[:, t:(t+1), :]
            else:
                y0 = torch.unsqueeze(d0, dim=1)
            
        output = torch.stack(output).transpose(0,1)
        return output
            
        

In [8]:
# 5 Fold Cross Validation Training
kf = KFold(n_splits=CONFIG.N_FOLDS, shuffle=True, random_state=1)

for fold, (train_idx, val_idx) in enumerate(kf.split(train_set)):
    print(f"Fold #{fold}")

    # Data prep
    train_subset = Subset(train_set, train_idx)
    val_subset   = Subset(train_set, val_idx)
    train_loader = DataLoader(train_subset, batch_size=1, shuffle=True)
    val_loader   = DataLoader(val_subset, batch_size=1)

    # Model prep
    model = MultiScaleCNNLSTMSeq2Seq()
    #checkpoint = torch.load("MS-CNN-LSTM-Seq2Seq-1.pth", weights_only=True)
    #model.load_state_dict(checkpoint)

    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=CONFIG.LEARNING_RATE)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(
        optimizer,
        mode = 'min',
        factor = CONFIG.FACTOR,
        patience = CONFIG.PATIENCE,
    )

    # Training
    train_rmse = 0.0
    for epoch in range(1, CONFIG.EPOCHS+1):

        force_ratio = CONFIG.FORCE_MAX - ((epoch-1)/CONFIG.EPOCHS)*(CONFIG.FORCE_MAX - CONFIG.FORCE_MIN)
        pbar = tqdm(train_loader, leave=False)

        for index, (id, x0, x1a, x1b, x2, target, dur) in enumerate(pbar):
            optimizer.zero_grad()
            force = torch.rand(1).item() < force_ratio

            pred = model(x0, x1a, x1b, x2, dur, force=force, trg=target)
            loss = criterion(pred, target)
            loss.backward()
            optimizer.step()
            rmse = (loss.item()) ** 0.5
            train_rmse += rmse
            if index%200==0:pbar.set_postfix({'Loss': f"{rmse:.4f}"})

        train_rmse = train_rmse/len(train_loader)

        # Validation
        val_rmse = 0.0
        with torch.no_grad():
            model.eval()
            for id, x0, x1a, x1b, x2, target, dur in val_loader:
                pred = model(x0, x1a, x1b, x2, dur, force=False)
                loss = criterion(pred, target)
                val_rmse += (loss.item()) ** 0.5
            val_rmse = val_rmse/len(val_loader)

        # Epoch over
        print(f"Epoch {epoch} Train loss: {(train_rmse):.4f} Val loss: {(val_rmse):.4f}")
        scheduler.step(val_rmse)
        torch.save(model.state_dict(), "MS-CNN-LSTM-Seq2Seq-1.pth")
        
    break

    

Fold #0


                                                                

KeyboardInterrupt: 