In [8]:
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn

import pandas as pd
import numpy as np

from tqdm import tqdm


In [9]:
base_path = './kaggle/input/nfl-big-data-bowl-2026-prediction/train'
lr = .001
num_epochs = 3
batch_size = 32

In [10]:
def format_input(input):
    ipt = pd.concat(input, ignore_index=True)
    ipt = ipt[ipt['player_to_predict'] == True].copy(deep=True)
    radians = np.deg2rad(ipt['dir'])
    ipt['vx'] = ipt['s'] * np.sin(radians)
    ipt['vy'] = ipt['s'] * np.cos(radians)
    ipt = ipt[['game_id', 'play_id', 'nfl_id', 'x', 'y', 'ball_land_x', 'ball_land_y', 'vx', 'vy']]
    ipt = {
        f"{gid}_{pid}_{nid}": g
        for (gid, pid, nid), g in ipt.groupby(['game_id', 'play_id', 'nfl_id'])
    }
    return ipt


In [11]:
raw_input_train = [pd.read_csv(f'{base_path}/input_2023_w0{i}.csv') for i in range(1,10)]
raw_output_train = [pd.read_csv(f'{base_path}/output_2023_w0{i}.csv') for i in range(1,10)]

in_train = format_input(raw_input_train)
out_train = pd.concat(raw_output_train, ignore_index=True)

In [12]:
class TrainDataset(Dataset):
    def __init__(self, train_out, train_in):
        self.train_out = train_out
        self.train_in = train_in

    def __len__(self):
        return len(self.train_out)

    def __getitem__(self, idx):
        row = self.train_out.iloc[idx]
        fid = f"{int(row['game_id'])}_{int(row['play_id'])}_{int(row['nfl_id'])}"
        train_instance = self.train_in[fid]

        x1 = train_instance.iloc[-1][['ball_land_x', 'ball_land_y', 'vx', 'vy']]
        x1 = pd.concat([x1, pd.Series([row['frame_id']])], ignore_index=True)
        x1 = torch.tensor(x1.values, dtype=torch.float32)

        x2 = train_instance.iloc[-1][['x', 'y']]
        x2 = torch.tensor(x2.values, dtype=torch.float32)

        y = row[['x', 'y']]
        y = torch.tensor(y.values, dtype=torch.float32)

        return x1, x2, y


train_dataset = TrainDataset(out_train, in_train)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

In [13]:
class MLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.mlp = nn.Sequential(
            nn.Linear(5, 16),
            nn.ReLU(),
            nn.Linear(16, 2)
        )

    def forward(self, x1, x2):
        y = self.mlp(x1)
        out = x2 + y
        return out
    
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = MLP().to(device)
model = torch.compile(model)
optimizer = torch.optim.Adam(model.parameters(), lr=lr)
criterion = nn.MSELoss()

In [14]:
# --- Training Loop ---
for epoch in range(num_epochs):
    total_loss = 0.0
    for t1, t2, t3 in tqdm(train_loader, desc=f"Epoch {epoch+1}", leave=False):
        t1, t2, t3 = t1.to(device), t2.to(device), t3.to(device)

        preds = model(t1, t2)
        loss = criterion(preds, t3)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    print(f"Epoch {epoch+1}: Loss = {total_loss / len(train_loader):.4f}")


                                                             

Epoch 1: Loss = 3.9110


                                                             

Epoch 2: Loss = 2.5523


                                                             

Epoch 3: Loss = 2.3934




In [15]:
# Test data preprocessing
instances = pd.read_csv('./kaggle/input/nfl-big-data-bowl-2026-prediction/test.csv')
eval_in = pd.read_csv('./kaggle/input/nfl-big-data-bowl-2026-prediction/test_input.csv')

eval_in = format_input([eval_in])

In [16]:
def compute_pos(row):
    gid, pid, nid, fid = row['game_id'], row['play_id'], row['nfl_id'], row['frame_id']
    full_id = f"{gid}_{pid}_{nid}_{fid}"
    ipt = eval_in[f"{gid}_{pid}_{nid}"]

    # Model implementation dependent --------------
    fid = f"{int(row['game_id'])}_{int(row['play_id'])}_{int(row['nfl_id'])}"
    train_instance = eval_in[fid]

    x1 = train_instance.iloc[-1][['ball_land_x', 'ball_land_y', 'vx', 'vy']]
    x1 = pd.concat([x1, pd.Series([row['frame_id']])], ignore_index=True)
    x1 = torch.tensor(x1.values, dtype=torch.float32)
    x1 = x1.to(device)

    x2 = train_instance.iloc[-1][['x', 'y']]
    x2 = torch.tensor(x2.values, dtype=torch.float32)
    x2 = x2.to(device)
    # ----------------------------------------

    pred_x, pred_y = model(x1, x2).squeeze(0).tolist()
    return pd.Series([full_id, pred_x, pred_y])

In [17]:
# Final submission df
model.eval()

submission = instances.copy(deep=True)
with torch.no_grad():
    submission[['id', 'x', 'y']] = submission.apply(compute_pos, axis=1)
submission = submission[['id', 'x', 'y']]
submission.head(10)

Unnamed: 0,id,x,y
0,2024120805_74_54586_1,88.46032,33.482166
1,2024120805_74_54586_2,88.618599,33.436996
2,2024120805_74_54586_3,88.787148,33.376438
3,2024120805_74_54586_4,88.971947,33.424522
4,2024120805_74_54586_5,89.168053,33.651058
5,2024120805_74_54586_6,89.364166,33.877594
6,2024120805_74_54586_7,89.56028,34.104126
7,2024120805_74_54586_8,89.756386,34.330662
8,2024120805_74_54586_9,89.952499,34.557198
9,2024120805_74_54586_10,90.117966,34.754608


In [18]:
# Submission to csv
submission.to_csv('submission.csv', index=False)