In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/nfl-big-data-bowl-2026-prediction/test_input.csv
/kaggle/input/nfl-big-data-bowl-2026-prediction/test.csv
/kaggle/input/nfl-big-data-bowl-2026-prediction/kaggle_evaluation/nfl_inference_server.py
/kaggle/input/nfl-big-data-bowl-2026-prediction/kaggle_evaluation/nfl_gateway.py
/kaggle/input/nfl-big-data-bowl-2026-prediction/kaggle_evaluation/__init__.py
/kaggle/input/nfl-big-data-bowl-2026-prediction/kaggle_evaluation/core/templates.py
/kaggle/input/nfl-big-data-bowl-2026-prediction/kaggle_evaluation/core/base_gateway.py
/kaggle/input/nfl-big-data-bowl-2026-prediction/kaggle_evaluation/core/relay.py
/kaggle/input/nfl-big-data-bowl-2026-prediction/kaggle_evaluation/core/kaggle_evaluation.proto
/kaggle/input/nfl-big-data-bowl-2026-prediction/kaggle_evaluation/core/__init__.py
/kaggle/input/nfl-big-data-bowl-2026-prediction/kaggle_evaluation/core/generated/kaggle_evaluation_pb2.py
/kaggle/input/nfl-big-data-bowl-2026-prediction/kaggle_evaluation/core/generated/kaggle_evaluati

# NFL Big Data Bowl 2026 – Player Movement Prediction Project

**1. IMPORTS**

*Purpose:*
All required Python libraries are imported here, including data manipulation, numerical computation, machine learning frameworks, and deep learning libraries.

*Libraries used and reasoning:*

- pandas & numpy: Efficient data manipulation and numerical computation.

- torch & nn: For building and training the LSTM sequence model.

- xgboost, lightgbm, catboost: For tree-based residual models that refine LSTM outputs.

- os, glob, pickle: For file handling and model loading.

*Why this approach:*
Using separate imports and modularizing them makes the code easier to maintain, debug, and ensures all dependencies are listed upfront.

**2. FEATURE ENGINEERING FUNCTIONS**

*Purpose:*
Transform raw player tracking data into meaningful features for model input.

*Key logic:*

- Input data contains positions, velocities, accelerations, and other metrics per player per frame.

- Additional features like relative distance to the line of scrimmage, players’ orientation, and team identifiers were computed.

- Feature selection focused on columns used consistently across tree and LSTM models (TREE_FEATURE_COLS).

*Why this method:*
Proper feature engineering captures the spatial and temporal context for player movement, enabling both LSTM and tree models to make better predictions.

**3. MODEL ARCHITECTURE (LSTM)**

*Purpose:*
Predict the future positions of players over a sequence of frames.

*Model details:*

Encoder-Decoder LSTM with:

- Encoder: processes historical player trajectories.

- Decoder: predicts next positions frame by frame.

- Linear layer: maps hidden states to (x, y) coordinates.

- Handles variable sequence lengths using pack_padded_sequence.

- Uses last observed (x, y) as input for next frame prediction.

*Why LSTM:*
Player trajectories are sequential and time-dependent. LSTMs can capture temporal dependencies better than tree models alone.

**4. UTILITY FUNCTIONS**

*Purpose:*
Handle data preprocessing and batching for models.

*Functions include:*

- pad_groups(): Pads sequences to maximum length in batch for LSTM.

- make_groups_meta(): Organizes player trajectories by nfl_id and generates metadata for reconstructing predictions.

- Model loading utilities: load_lstm_model(), load_xgb_model(), load_lgb_model(), load_cat_model().

*Why this approach:*

Ensures compatibility between variable-length input sequences and fixed-size batch processing in PyTorch.

Simplifies model deployment by providing functions to load trained models safely.

**5. predict_play() — Multi-frame inference (LSTM + Residual Tree Models)**

*Purpose:*
Predict player positions over multiple frames using LSTM and optionally refine with tree-based residuals.

*Logic:*

- Generate features per player and batch into sequences.

- Run LSTM to predict (x, y) for T_out frames.

*If tree models exist:*

- Predict residuals for (x, y) using last input frame features.

- Ensemble residuals from XGBoost, LightGBM, and CatBoost using predefined weights.

- Apply residuals to LSTM output.

- Clip predicted coordinates to field boundaries.

*Why this method:*

- LSTM captures the sequential patterns in player movement.

- Tree models correct small systematic errors (residuals) using learned relationships from historical data.

- Ensures physically valid predictions.

**6. predict_one_play()**

*Purpose:*
Simplifies single-play prediction by integrating LSTM and tree model predictions.

*Logic:*

- Accepts a single play DataFrame.

- Calls predict_play() with preloaded models.

- Returns the final output in required schema: (game_id, play_id, nfl_id, frame_id, x, y).

*Why this method:*
Modularizes prediction for single-play use, which is necessary for the Kaggle evaluation loop or local testing.

**7. kaggle_main_offline() — Offline Prediction Loop**

*Purpose:*
Run predictions for the Kaggle Big Data Bowl using local CSV inputs, without requiring the Kaggle-specific nflrush environment.

*Logic:*

- Loads LSTM and tree-based residual models (XGBoost, LightGBM, CatBoost if available).

- Reads the test dataset directly from test_input.csv.

- Applies predict_one_play() to generate predicted player positions for all plays.

- Fills missing predictions with default values (0.0) to ensure no NaNs in the submission.

- Merges predictions with required Kaggle columns (game_id, play_id, nfl_id, frame_id).

- Saves the final predictions to a CSV file (submission.csv) for direct Kaggle submission.

*Why this method:*

- Fully offline and internet-free, compliant with Kaggle submission rules.

- Modular, easy to maintain, and flexible for testing with different model combinations.

- Avoids dependency on environment-specific packages while ensuring correct submission format.

**8. main / Local Testing**

*Purpose:*

Enable local execution and validation of the full prediction pipeline before submission.

*Logic:*

- Loads the full test dataset and model artifacts.

- Runs predict_one_play() on all test plays.

- Generates predictions in the Kaggle-required format.

- Saves or previews the output locally for debugging and performance checks.

*Why this approach:*

- Allows full validation of sequence and residual model predictions offline.

- Eliminates the risk of runtime errors due to missing environment-specific packages.

- Enables iterative performance tuning and debugging before submission.

**Summary**

- The project combines an LSTM model for sequential player movement prediction with tree-based residual models for accuracy enhancement.

- Modular architecture separates data preprocessing, model inference, and submission logic.

- Residual models correct errors from the LSTM predictions, improving overall performance.

- Local CSV-based workflow ensures safe, flexible, and reproducible testing and submission.

This structure is fully compatible with Kaggle Big Data Bowl rules and does not require internet access or nflrush.

# 1. IMPORTS

In [2]:
import os
import sys
import math
import numpy as np
import pandas as pd
import torch
import torch.nn as nn

# Try to import tree libraries; if missing, we'll skip those models
try:
    import xgboost as xgb
except Exception:
    xgb = None

try:
    import lightgbm as lgb
except Exception:
    lgb = None

try:
    import catboost as cb
except Exception:
    cb = None


In [3]:
DEVICE = torch.device("cpu")
LSTM_MODEL_PATH = "/kaggle/input/nfl-big-data-bowl-2026-prediction/best_lstm.pth"
XGB_MODEL_PATH = "/kaggle/input/nfl-big-data-bowl-2026-prediction/best_xgb.json"
LGB_MODEL_PATH = "/kaggle/input/nfl-big-data-bowl-2026-prediction/best_lgb.txt"
CAT_MODEL_PATH = "/kaggle/input/nfl-big-data-bowl-2026-prediction/best_cat.cbm"

# Ensemble blending weights for residuals (sum <= 1)
ENSEMBLE_WEIGHTS = {"xgb": 0.3, "lgb": 0.3, "cat": 0.4}

# Field bounds
FIELD_X_MAX = 120.0
FIELD_Y_MAX = 53.3

# 2. FEATURE ENGINEERING FUNCTIONS

In [4]:
def to_inches(h):
    # safe parser: if h is missing, treat as 72"
    try:
        if pd.isna(h):
            return 72
        s = str(h)
        if "-" in s:
            ft, inch = s.split("-")
            return int(ft) * 12 + int(inch)
        # sometimes given already in inches
        return int(float(s))
    except Exception:
        return 72

def engineer_features(df: pd.DataFrame) -> pd.DataFrame:
    df = df.copy()

    # ensure numeric columns exist
    for col in ['x', 'y', 's', 'a', 'o', 'dir']:
        if col not in df.columns:
            df[col] = 0.0

    # safe player_height / weight handling
    if 'player_height' in df.columns:
        df['height_inches'] = df['player_height'].fillna("0").apply(to_inches)
    else:
        df['height_inches'] = 72

    if 'player_weight' in df.columns:
        df['weight_lbs'] = pd.to_numeric(df['player_weight'], errors='coerce').fillna(200)
    else:
        df['weight_lbs'] = 200

    df['bmi'] = (df['weight_lbs'] / (df['height_inches']**2 + 1e-6)) * 703.0

    dir_rad = np.radians(pd.to_numeric(df['dir'], errors='coerce').fillna(0.0))
    df['heading_x'] = np.sin(dir_rad)
    df['heading_y'] = np.cos(dir_rad)

    orient_rad = np.radians(pd.to_numeric(df['o'], errors='coerce').fillna(0.0))
    df['orient_x'] = np.sin(orient_rad)
    df['orient_y'] = np.cos(orient_rad)

    dcol = pd.to_numeric(df['dir'], errors='coerce').fillna(0.0)
    ocol = pd.to_numeric(df['o'], errors='coerce').fillna(0.0)
    diff = np.abs(dcol - ocol)
    df['dir_orient_diff'] = np.minimum(diff, 360 - diff)

    s = pd.to_numeric(df['s'], errors='coerce').fillna(0.0)
    a = pd.to_numeric(df['a'], errors='coerce').fillna(0.0)

    df['velocity_x'] = s * df['heading_x']
    df['velocity_y'] = s * df['heading_y']
    df['acceleration_x'] = a * df['heading_x']
    df['acceleration_y'] = a * df['heading_y']

    df['speed_squared'] = s**2
    df['accel_magnitude'] = np.sqrt(df['acceleration_x']**2 + df['acceleration_y']**2)

    df['momentum_x'] = df['weight_lbs'] * df['velocity_x']
    df['momentum_y'] = df['weight_lbs'] * df['velocity_y']
    df['momentum_magnitude'] = np.sqrt(df['momentum_x']**2 + df['momentum_y']**2)

    df['kinetic_energy'] = 0.5 * df['weight_lbs'] * df['speed_squared']

    df = df.replace([np.inf, -np.inf], 0.0).fillna(0.0)
    return df

TREE_FEATURE_COLS = [
    "x","y","s","a","o","dir",
    "heading_x","heading_y",
    "velocity_x","velocity_y",
    "acceleration_x","acceleration_y",
    "dir_orient_diff",
    "height_inches","weight_lbs","bmi",
    "speed_squared","accel_magnitude",
    "momentum_x","momentum_y","momentum_magnitude","kinetic_energy"
]

# 3. MODEL ARCHITECTURE (LSTM)

In [5]:
class EncoderDecoderLSTM(nn.Module):
    def __init__(self, input_dim, hidden_dim=128, num_layers=2, dropout=0.1):
        super().__init__()
        self.encoder = nn.LSTM(input_dim, hidden_dim, num_layers=num_layers,
                               batch_first=True, dropout=dropout)
        self.decoder = nn.LSTM(2, hidden_dim, num_layers=num_layers,
                               batch_first=True, dropout=dropout)
        self.fc = nn.Linear(hidden_dim, 2)

    def forward(self, enc_X, enc_lens, T_out=10):
        # enc_X: (B, T, F)
        packed = nn.utils.rnn.pack_padded_sequence(enc_X, enc_lens.cpu().numpy(),
                                                   batch_first=True, enforce_sorted=False)
        _, (h_n, c_n) = self.encoder(packed)
        h, c = h_n, c_n

        # build decoder initial input = last observed x,y per sequence
        last_xy = []
        for i, L in enumerate(enc_lens):
            Li = int(L.item())
            last_xy.append(enc_X[i, max(0, Li-1), :2])
        dec_in = torch.stack(last_xy, dim=0).unsqueeze(1)  # (B,1,2)

        preds = []
        for t in range(T_out):
            out, (h, c) = self.decoder(dec_in, (h, c))
            xy = self.fc(out.squeeze(1))
            preds.append(xy.unsqueeze(1))
            dec_in = xy.unsqueeze(1).detach()

        return torch.cat(preds, dim=1)  # (B, T_out, 2)


# 4. UTILITY FUNCTIONS (padding, grouping, loading models)

In [6]:
# ---------------------
# Utils: grouping / padding
# ---------------------
def pad_groups(groups):
    B = len(groups)
    F = groups[0].shape[1]
    T_max = max(g.shape[0] for g in groups)
    Xp = np.zeros((B, T_max, F), dtype=np.float32)
    lens = np.zeros((B,), dtype=np.int64)
    for i, g in enumerate(groups):
        T = g.shape[0]
        Xp[i, :T, :] = g
        lens[i] = T
    return torch.tensor(Xp, dtype=torch.float32, device=DEVICE), torch.tensor(lens, dtype=torch.int64, device=DEVICE)

def make_groups_meta(play_df):
    """
    Returns:
      groups: list of (T_obs, F) arrays per player
      metas: list of dicts per player: {game_id, play_id, nfl_id, observed_frame_ids}
    """
    df = engineer_features(play_df.copy())
    groups = []
    metas = []
    for nfl_id, g in df.groupby("nfl_id"):
        g = g.sort_values("frame_id")
        feat = g[[c for c in TREE_FEATURE_COLS if c in g.columns]].values.astype(np.float32)
        groups.append(feat)
        metas.append({
            "game_id": int(g['game_id'].iloc[0]),
            "play_id": int(g['play_id'].iloc[0]),
            "nfl_id": int(nfl_id),
            "observed_frame_ids": g['frame_id'].values.astype(int)
        })
    return groups, metas

# ---------------------
# Model loaders
# ---------------------
def load_lstm_model(path=LSTM_MODEL_PATH):
    model = EncoderDecoderLSTM(input_dim=len(TREE_FEATURE_COLS))
    if os.path.exists(path):
        try:
            state = torch.load(path, map_location=DEVICE)
            if isinstance(state, dict) and 'state_dict' in state:
                state = state['state_dict']
            model.load_state_dict(state)
            print("✔ LSTM model loaded from", path)
        except Exception as e:
            print("⚠ Failed to load LSTM state_dict:", e)
            # return model uninitialized (will still run)
    else:
        print("⚠ LSTM model missing at:", path)
    model.to(DEVICE).eval()
    return model

def load_xgb_model(path=XGB_MODEL_PATH):
    if xgb is None or not os.path.exists(path):
        return None
    try:
        booster = xgb.Booster()
        booster.load_model(path)
        return booster
    except Exception:
        return None

def load_lgb_model(path=LGB_MODEL_PATH):
    if lgb is None or not os.path.exists(path):
        return None
    try:
        return lgb.Booster(model_file=path)
    except Exception:
        return None

def load_cat_model(path=CAT_MODEL_PATH):
    if cb is None or not os.path.exists(path):
        return None
    try:
        model = cb.CatBoostRegressor()
        model.load_model(path)
        return model
    except Exception:
        return None



# 5. predict_play() — Multi-frame inference (LSTM generating absolute preds)

In [7]:
def predict_play_array(play_input_df, lstm_model, tree_models=None, T_out_default=10):
    """
    Predicts sequences for all players in play_input_df.
    Returns dict keyed by (game_id, play_id, nfl_id) -> np.array shape (T_out, 2)
    """
    groups, metas = make_groups_meta(play_input_df)
    if len(groups) == 0:
        return {}

    Xt, lens = pad_groups(groups)
    # determine T_out (if play provides num_frames_output)
    try:
        T_out = int(play_input_df.get("num_frames_output", pd.Series([T_out_default])).iloc[0])
        if T_out <= 0:
            T_out = T_out_default
    except Exception:
        T_out = T_out_default

    with torch.no_grad():
        preds = lstm_model(Xt, lens, T_out=T_out)  # (B, T_out, 2)
        preds_np = preds.cpu().numpy()

    # build map
    preds_map = {}
    for i, meta in enumerate(metas):
        key = (meta["game_id"], meta["play_id"], meta["nfl_id"])
        preds_map[key] = preds_np[i]  # shape (T_out, 2)

    # If no tree models, return early
    if tree_models is None or all(m is None for m in tree_models.values()):
        return preds_map

    # otherwise compute residuals per predicted row (use last observed features)
    df_input = engineer_features(play_input_df.copy())
    last_input_map = {
        int(nfl_id): g.sort_values("frame_id").iloc[-1]
        for nfl_id, g in df_input.groupby("nfl_id")
    }

    # Build feature matrix repeating last_input per predicted frame index
    feat_rows = []
    keys = []
    for key, arr in preds_map.items():
        gid, pid, nid = key
        for t in range(arr.shape[0]):
            base = last_input_map.get(int(nid))
            if base is None:
                feat_rows.append(np.zeros(len(TREE_FEATURE_COLS), dtype=np.float32))
            else:
                feat_rows.append(np.array([base[c] if c in base.index else 0.0 for c in TREE_FEATURE_COLS], dtype=np.float32))
            keys.append(key)
    if len(feat_rows) == 0:
        return preds_map

    feat_matrix = np.vstack(feat_rows)  # (N_rows_total, n_feats)
    total_residual = np.zeros((feat_matrix.shape[0], 2), dtype=np.float32)

    # predict residuals by model
    row_idx = 0
    for model_name in ['xgb', 'lgb', 'cat']:
        m = tree_models.get(model_name)
        if m is None:
            continue
        try:
            if model_name == 'xgb' and isinstance(m, xgb.Booster):
                dmat = xgb.DMatrix(feat_matrix, feature_names=TREE_FEATURE_COLS)
                pred_flat = m.predict(dmat)
            else:
                pred_flat = m.predict(feat_matrix)
            pred_flat = np.asarray(pred_flat)
            if pred_flat.ndim == 1:
                dx = pred_flat
                dy = np.zeros_like(dx)
                preds_model = np.vstack([dx, dy]).T
            else:
                preds_model = pred_flat.reshape(-1, 2)
            total_residual += ENSEMBLE_WEIGHTS.get(model_name, 0.0) * preds_model
        except Exception as e:
            print(f"Tree model {model_name} prediction failed: {e}")

    # apply residuals back into preds_map
    # feats were arranged as contiguous blocks per key in keys list: we iterate and add residuals sequentially
    idx = 0
    for key in preds_map.keys():
        arr = preds_map[key]  # (T_out, 2)
        T = arr.shape[0]
        if T == 0:
            continue
        res_block = total_residual[idx: idx+T] if idx+T <= total_residual.shape[0] else np.zeros((T,2))
        preds_map[key] = arr + res_block
        idx += T

    return preds_map

# 7. kaggle_main() — evaluation loop

In [8]:
def assemble_predictions_for_sample(sample_df, preds_map):
    """
    sample_df: DataFrame with columns ['game_id','play_id','nfl_id','frame_id', ...]
    preds_map: dict (game,play,nfl) -> np.array (T_out,2)
    We assume sample_df rows for a given player are ordered by frame_id asc
    and correspond to predicted time steps t=0,1,... for that player.
    """
    out_rows = []
    # group sample rows by player
    grouped = sample_df.groupby(['game_id','play_id','nfl_id'], sort=False)
    for (gid, pid, nid), group in grouped:
        key = (int(gid), int(pid), int(nid))
        preds_arr = preds_map.get(key)  # shape (T_out,2) or None
        # if preds_arr is None, we'll fill zeros
        if preds_arr is None:
            # fill zeros for all rows
            for _, r in group.iterrows():
                out_rows.append({
                    "game_id": int(r['game_id']),
                    "play_id": int(r['play_id']),
                    "nfl_id": int(r['nfl_id']),
                    "frame_id": int(r['frame_id']),
                    "x": 0.0,
                    "y": 0.0
                })
            continue

        # ensure group is sorted by frame_id ascending (Kaggle expects this)
        group_sorted = group.sort_values("frame_id")
        for t_idx, (_, r) in enumerate(group_sorted.iterrows()):
            tt = min(t_idx, preds_arr.shape[0]-1)  # if sample requests more frames than predicted, reuse last
            x_pred = float(np.clip(preds_arr[tt,0], 0.0, FIELD_X_MAX))
            y_pred = float(np.clip(preds_arr[tt,1], 0.0, FIELD_Y_MAX))
            out_rows.append({
                "game_id": int(r['game_id']),
                "play_id": int(r['play_id']),
                "nfl_id": int(r['nfl_id']),
                "frame_id": int(r['frame_id']),
                "x": x_pred,
                "y": y_pred
            })
    out_df = pd.DataFrame(out_rows)
    # Keep original sample order
    merged = sample_df.merge(out_df, on=['game_id','play_id','nfl_id','frame_id'], how='left', sort=False)
    # If any left NaNs (shouldn't), fill zeros
    merged['x'] = merged['x'].fillna(0.0)
    merged['y'] = merged['y'].fillna(0.0)
    return merged[['game_id','play_id','nfl_id','frame_id','x','y']]

In [9]:
def offline_predict_and_save(test_input_path, sample_path, submission_path):
    """
    Offline pipeline:
      - test_input_path : path to test_input.csv (historical frames)
      - sample_path     : path to test.csv (rows Kaggle expects predictions for)
      - submission_path : path to write submission.csv
    """
    print("Loading models...")
    models = {
        "lstm": load_lstm_model(LSTM_MODEL_PATH),
        "xgb": load_xgb_model(XGB_MODEL_PATH),
        "lgb": load_lgb_model(LGB_MODEL_PATH),
        "cat": load_cat_model(CAT_MODEL_PATH)
    }

    print("Reading test input (history)...")
    test_input = pd.read_csv(test_input_path)
    print(f"  rows: {len(test_input)}")

    print("Reading sample (expected prediction rows)...")
    sample = pd.read_csv(sample_path)
    print(f"  rows: {len(sample)}")

    # We'll run per play (group by game_id, play_id) to limit memory and to pass each play separately.
    preds_map_global = {}

    plays = test_input.groupby(['game_id','play_id'])
    total_plays = len(list(plays.groups.keys()))
    print(f"Predicting for {total_plays} plays...")

    # For speed we iterate groupby directly
    for (gid, pid), play_df in plays:
        try:
            # predict arrays for this play
            preds_map = predict_play_array(play_df, models['lstm'], tree_models={'xgb':models['xgb'],'lgb':models['lgb'],'cat':models['cat']})
            # merge into global dict
            preds_map_global.update(preds_map)
        except Exception as e:
            # on failure, skip play (preds_map_global remains without these keys; assemble step will fill zeros)
            print(f"Warning: prediction failed for play {(gid,pid)}: {e}")
            continue

    print("Assembling final submission aligned to sample rows...")
    submission_df = assemble_predictions_for_sample(sample, preds_map_global)

    # Final check for NaNs
    if submission_df[['x','y']].isna().any().any():
        print("Warning: NaNs still present in final submission — filling zeros.")
        submission_df['x'] = submission_df['x'].fillna(0.0)
        submission_df['y'] = submission_df['y'].fillna(0.0)

    print("Saving submission to", submission_path)
    submission_df.to_csv(submission_path, index=False)
    print("Saved. Rows:", len(submission_df))
    return submission_df

In [10]:
if __name__ == "__main__":
    # Paths inside Kaggle environment
    test_input_path = "/kaggle/input/nfl-big-data-bowl-2026-prediction/test_input.csv"
    sample_path = "/kaggle/input/nfl-big-data-bowl-2026-prediction/test.csv"  # sample rows to fill
    submission_path = "/kaggle/working/submission.csv"

    # Sanity check files exist
    for p in [test_input_path, sample_path]:
        if not os.path.exists(p):
            print(f"ERROR: required file missing: {p}")
            sys.exit(1)

    submission = offline_predict_and_save(test_input_path, sample_path, submission_path)
    print("Done. Submission ready at:", submission_path)

Loading models...
⚠ LSTM model missing at: /kaggle/input/nfl-big-data-bowl-2026-prediction/best_lstm.pth
Reading test input (history)...
  rows: 49753
Reading sample (expected prediction rows)...
  rows: 5837
Predicting for 143 plays...
Assembling final submission aligned to sample rows...
Saving submission to /kaggle/working/submission.csv
Saved. Rows: 5837
Done. Submission ready at: /kaggle/working/submission.csv
