# ejemplo tomado de la sección de comentarios


In [None]:
import pandas as pd
import numpy as np
import torch
import pickle
import joblib
from pathlib import Path
import warnings
warnings.filterwarnings('ignore')

### configuración

In [None]:
class Config:
    DATA_DIR = Path("/kaggle/input/nfl-big-data-bowl-2026-prediction/")
    CATBOOST_MODEL_PATH = "/kaggle/input/nfl-2026-big-data-bowl/catboost_5fold_models.pkl"
    LSTM_MODEL_DIR = "/kaggle/input/d/mathieuduverne/big-data-bowl-2026-dynamic-specs-nn/output"
    
    ENSEMBLE_WEIGHTS = {
        'catboost': 0.5,
        'lstm': 0.5
    }
    
    ROLE_SPECIFIC_WEIGHTS = {
        'Passer': {'catboost': 0.6, 'lstm': 0.4},
        'Targeted Receiver': {'catboost': 0.4, 'lstm': 0.6},
        'Defensive Coverage': {'catboost': 0.45, 'lstm': 0.55},
        'default': {'catboost': 0.5, 'lstm': 0.5}
    }
    
    USE_ROLE_SPECIFIC_WEIGHTS = False
    
    LSTM_N_FOLDS = 5
    LSTM_WINDOW_SIZE = 8
    
    FIELD_X_MIN, FIELD_X_MAX = 0.0, 120.0
    FIELD_Y_MIN, FIELD_Y_MAX = 0.0, 53.3

### catboost pipeline

In [None]:
def load_catboost_models(model_path):
    """Load pre-trained CatBoost models"""
    with open(model_path, 'rb') as f:
        saved = pickle.load(f)
    return saved['models_x'], saved['models_y'], saved['features']

In [None]:
def engineer_catboost_features(df):
    """Create physics-based features for CatBoost models"""
    df = df.copy()
    
    df['velocity_x'] = df['s'] * np.cos(np.radians(df['dir']))
    df['velocity_y'] = df['s'] * np.sin(np.radians(df['dir']))
    
    df['dist_to_ball'] = np.sqrt(
        (df['x'] - df['ball_land_x'])**2 + 
        (df['y'] - df['ball_land_y'])**2
    )
    
    df['angle_to_ball'] = np.arctan2(
        df['ball_land_y'] - df['y'],
        df['ball_land_x'] - df['x']
    )
    
    df['velocity_toward_ball'] = (
        df['velocity_x'] * np.cos(df['angle_to_ball']) + 
        df['velocity_y'] * np.sin(df['angle_to_ball'])
    )
    
    df['time_to_ball'] = df['num_frames_output'] / 10.0
    df['orientation_diff'] = np.abs(df['o'] - df['dir'])
    df['orientation_diff'] = np.minimum(df['orientation_diff'], 360 - df['orientation_diff'])
    
    df['role_targeted_receiver'] = (df['player_role'] == 'Targeted Receiver').astype(int)
    df['role_defensive_coverage'] = (df['player_role'] == 'Defensive Coverage').astype(int)
    df['role_passer'] = (df['player_role'] == 'Passer').astype(int)
    df['side_offense'] = (df['player_side'] == 'Offense').astype(int)
    
    height_parts = df['player_height'].str.split('-', expand=True)
    df['height_inches'] = height_parts[0].astype(float) * 12 + height_parts[1].astype(float)
    df['bmi'] = (df['player_weight'] / (df['height_inches']**2)) * 703
    
    df['acceleration_x'] = df['a'] * np.cos(np.radians(df['dir']))
    df['acceleration_y'] = df['a'] * np.sin(np.radians(df['dir']))
    df['distance_to_target_x'] = df['ball_land_x'] - df['x']
    df['distance_to_target_y'] = df['ball_land_y'] - df['y']
    df['speed_squared'] = df['s'] ** 2
    df['accel_magnitude'] = np.sqrt(df['acceleration_x']**2 + df['acceleration_y']**2)
    df['velocity_alignment'] = np.cos(df['angle_to_ball'] - np.radians(df['dir']))
    
    df['expected_x_at_ball'] = df['x'] + df['velocity_x'] * df['time_to_ball']
    df['expected_y_at_ball'] = df['y'] + df['velocity_y'] * df['time_to_ball']
    df['error_from_ball_x'] = df['expected_x_at_ball'] - df['ball_land_x']
    df['error_from_ball_y'] = df['expected_y_at_ball'] - df['ball_land_y']
    df['error_from_ball'] = np.sqrt(df['error_from_ball_x']**2 + df['error_from_ball_y']**2)
    
    df['momentum_x'] = df['player_weight'] * df['velocity_x']
    df['momentum_y'] = df['player_weight'] * df['velocity_y']
    df['kinetic_energy'] = 0.5 * df['player_weight'] * df['speed_squared']
    
    df['angle_diff'] = np.abs(df['o'] - np.degrees(df['angle_to_ball']))
    df['angle_diff'] = np.minimum(df['angle_diff'], 360 - df['angle_diff'])
    
    df['time_squared'] = df['time_to_ball'] ** 2
    df['dist_squared'] = df['dist_to_ball'] ** 2
    df['weighted_dist_by_time'] = df['dist_to_ball'] / (df['time_to_ball'] + 0.1)
    
    return df

In [None]:
def add_sequence_features_catboost(df):
    """Add temporal features using lag and rolling statistics"""
    df = df.sort_values(['game_id', 'play_id', 'nfl_id', 'frame_id'])
    group_cols = ['game_id', 'play_id', 'nfl_id']
    
    for lag in [1, 2, 3, 4, 5]:
        for col in ['x', 'y', 'velocity_x', 'velocity_y', 's', 'a']:
            if col in df.columns:
                df[f'{col}_lag{lag}'] = df.groupby(group_cols)[col].shift(lag)
    
    for window in [3, 5]:
        for col in ['x', 'y', 'velocity_x', 'velocity_y', 's']:
            if col in df.columns:
                df[f'{col}_rolling_mean_{window}'] = df.groupby(group_cols)[col].rolling(window, min_periods=1).mean().reset_index(level=[0,1,2], drop=True)
                df[f'{col}_rolling_std_{window}'] = df.groupby(group_cols)[col].rolling(window, min_periods=1).std().reset_index(level=[0,1,2], drop=True)
    
    for col in ['velocity_x', 'velocity_y']:
        if col in df.columns:
            df[f'{col}_delta'] = df.groupby(group_cols)[col].diff()
    
    return df

In [None]:
def predict_catboost(models_x, models_y, features, test_input, test_template):
    """Generate predictions from CatBoost ensemble"""
    test_features = engineer_catboost_features(test_input)
    test_features = add_sequence_features_catboost(test_features)
    
    test_agg = test_features.groupby(['game_id', 'play_id', 'nfl_id']).last().reset_index()
    if 'frame_id' in test_agg.columns:
        test_agg = test_agg.drop('frame_id', axis=1)
    
    test_merged = test_template.merge(
        test_agg,
        on=['game_id', 'play_id', 'nfl_id'],
        how='left'
    )
    
    for col in features:
        if col not in test_merged.columns:
            test_merged[col] = 0
    
    X_test = test_merged[features].fillna(0).values
    
    pred_x = np.mean([model.predict(X_test) for model in models_x], axis=0)
    pred_y = np.mean([model.predict(X_test) for model in models_y], axis=0)
    
    predictions = pd.DataFrame({
        'id': (test_merged['game_id'].astype(str) + '_' + 
              test_merged['play_id'].astype(str) + '_' + 
              test_merged['nfl_id'].astype(str) + '_' + 
              test_merged['frame_id'].astype(str)),
        'x': pred_x,
        'y': pred_y
    })
    
    return predictions

### LSTM pipeline

In [None]:
class Residual(torch.nn.Module):
    def __init__(self, mod, dim_in, dim_out, drop_prob=0.0):
        super().__init__()
        self.mod = mod
        self.proj = torch.nn.Identity() if dim_in == dim_out else torch.nn.Linear(dim_in, dim_out)
        self.dropout = torch.nn.Dropout(drop_prob)
    def forward(self, x):
        y = self.mod(x)
        x_proj = self.proj(x)
        return self.dropout(y) + x_proj

class RNNBlock(torch.nn.Module):
    def __init__(self, input_dim, hidden_dim, rnn="gru", num_layers=1, dropout=0.1, bidirectional=False):
        super().__init__()
        rnn_cls = torch.nn.GRU if rnn.lower() == "gru" else torch.nn.LSTM
        self.rnn = rnn_cls(input_size=input_dim, hidden_size=hidden_dim, num_layers=num_layers,
                          batch_first=True, dropout=dropout if num_layers > 1 else 0, bidirectional=bidirectional)
        self.out_dim = hidden_dim * (2 if bidirectional else 1)
    def forward(self, x):
        y, _ = self.rnn(x)
        return y

class Conv1DBlock(torch.nn.Module):
    def __init__(self, dim, kernel_size=3, dilation=1, dropout=0.1):
        super().__init__()
        self.dim = dim
        pad = (kernel_size - 1) * dilation // 2
        self.pre_ln = torch.nn.LayerNorm(dim)
        self.dw = torch.nn.Conv1d(dim, dim, kernel_size, padding=pad, dilation=dilation, groups=dim)
        self.pw = torch.nn.Conv1d(dim, dim, 1)
        self.act = torch.nn.GELU()
        self.drop = torch.nn.Dropout(dropout)
    def forward(self, x):
        y = self.pre_ln(x)
        y = y.transpose(1, 2)
        y = self.dw(y)
        y = self.act(y)
        y = self.pw(y)
        y = self.drop(y)
        return y.transpose(1, 2)

class TransformerBlock(torch.nn.Module):
    def __init__(self, dim, nhead=4, ff_mult=4, dropout=0.1):
        super().__init__()
        self.ln1 = torch.nn.LayerNorm(dim)
        self.attn = torch.nn.MultiheadAttention(dim, num_heads=nhead, dropout=dropout, batch_first=True)
        self.ln2 = torch.nn.LayerNorm(dim)
        self.ff = torch.nn.Sequential(
            torch.nn.Linear(dim, ff_mult * dim),
            torch.nn.GELU(),
            torch.nn.Dropout(dropout),
            torch.nn.Linear(ff_mult * dim, dim),
            torch.nn.Dropout(dropout),
        )
    def forward(self, x, attn_mask=None, key_padding_mask=None):
        h = self.ln1(x)
        y, _ = self.attn(h, h, h, attn_mask=attn_mask, key_padding_mask=key_padding_mask)
        x = x + y
        h = self.ln2(x)
        h = x + self.ff(h)
        return h

class SEBlock(torch.nn.Module):
    def __init__(self, dim, r=4):
        super().__init__()
        hidden = max(1, dim // r)
        self.net = torch.nn.Sequential(
            torch.nn.Linear(dim, hidden),
            torch.nn.ReLU(),
            torch.nn.Linear(hidden, dim),
            torch.nn.Sigmoid()
        )
    def forward(self, x):
        s = x.mean(dim=1)
        g = self.net(s).unsqueeze(1)
        return x * g

class TransformerBlockWrapper(torch.nn.Module):
    def __init__(self, block):
        super().__init__()
        self.block = block
    def forward(self, x):
        return self.block(x)

class FlexibleSeqModel(torch.nn.Module):
    def __init__(self, input_dim: int, horizon: int, block_specs: list, dropout: float = 0.2,
                 pooling: str = "attn", predict_mode: str = "steps", attn_pool_heads: int = 4):
        super().__init__()
        self.horizon = horizon
        self.predict_mode = predict_mode
        self.pooling = pooling
        dim = input_dim
        blocks = []
        for spec in block_specs:
            t = spec["type"].lower()
            if t == "rnn":
                blk = RNNBlock(input_dim=dim, hidden_dim=spec.get("hidden", 128), rnn=spec.get("rnn", "gru"),
                              num_layers=spec.get("layers", 1), dropout=spec.get("dropout", 0.1),
                              bidirectional=spec.get("bidirectional", False))
                blocks.append(Residual(blk, dim, blk.out_dim, drop_prob=spec.get("res_dropout", 0.0)))
                dim = blk.out_dim
            elif t == "tcn":
                blk = Conv1DBlock(dim, kernel_size=spec.get("kernel", 3), dilation=spec.get("dilation", 1),
                                 dropout=spec.get("dropout", 0.1))
                blocks.append(Residual(blk, dim, dim, drop_prob=spec.get("res_dropout", 0.0)))
            elif t == "transformer":
                blk = TransformerBlock(dim, nhead=spec.get("nhead", 4), ff_mult=spec.get("ff_mult", 4),
                                      dropout=spec.get("dropout", 0.1))
                blocks.append(Residual(TransformerBlockWrapper(blk), dim, dim, drop_prob=spec.get("res_dropout", 0.0)))
            elif t == "se":
                blk = SEBlock(dim, r=spec.get("r", 4))
                blocks.append(Residual(blk, dim, dim, drop_prob=spec.get("res_dropout", 0.0)))
        self.blocks = torch.nn.ModuleList(blocks)
        if pooling == "attn":
            self.pool_ln = torch.nn.LayerNorm(dim)
            self.pool_attn = torch.nn.MultiheadAttention(dim, num_heads=attn_pool_heads, batch_first=True)
            self.pool_vec = torch.nn.Parameter(torch.randn(1, 1, dim))
        elif pooling == "mean":
            self.pool_ln = torch.nn.LayerNorm(dim)
        else:
            self.pool_ln = torch.nn.LayerNorm(dim)
        self.head = torch.nn.Sequential(
            torch.nn.Linear(dim, 128),
            torch.nn.GELU(),
            torch.nn.Dropout(dropout),
            torch.nn.Linear(128, horizon)
        )
    def forward(self, x):
        h = x
        for blk in self.blocks:
            h = blk(h)
        if self.pooling == "attn":
            B, T, D = h.shape
            q = self.pool_vec.expand(B, -1, -1)
            k = v = self.pool_ln(h)
            ctx, _ = self.pool_attn(q, k, v)
            ctx = ctx.squeeze(1)
        elif self.pooling == "mean":
            ctx = self.pool_ln(h).mean(dim=1)
        else:
            ctx = self.pool_ln(h[:, -1, :])
        out = self.head(ctx)
        if self.predict_mode == "steps":
            out = torch.cumsum(out, dim=1)
        return out

In [None]:
def load_lstm_models(models_dir, n_folds):
    """Load pre-trained LSTM models for both axes"""
    models_x, models_y, scalers = [], [], []
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    
    for fold in range(n_folds):
        model_x_path = Path(models_dir) / f'fold_{fold+1}/axis_x.pt'
        model_y_path = Path(models_dir) / f'fold_{fold+1}/axis_y.pt'
        scaler_path = Path(models_dir) / f'fold_{fold+1}/lstm_feature_scaler_fold.joblib'
        
        ckpt_x = torch.load(model_x_path, map_location=device)
        ckpt_y = torch.load(model_y_path, map_location=device)
        
        cfg_x = ckpt_x.get('config', {})
        input_dim = cfg_x.get('input_dim')
        horizon = cfg_x.get('horizon')
        
        block_specs = [
            {"type": "rnn", "rnn": "gru", "hidden": 128, "layers": 1, "dropout": 0.1},
            {"type": "transformer", "nhead": 4, "ff_mult": 4, "dropout": 0.1},
            {"type": "tcn", "kernel": 3, "dilation": 2, "dropout": 0.1},
        ]
        
        model_x = FlexibleSeqModel(input_dim=input_dim, horizon=horizon, block_specs=block_specs,
                                   pooling="mean", predict_mode="steps", dropout=0.2)
        model_y = FlexibleSeqModel(input_dim=input_dim, horizon=horizon, block_specs=block_specs,
                                   pooling="mean", predict_mode="steps", dropout=0.2)
        
        model_x.load_state_dict(ckpt_x['state_dict'])
        model_y.load_state_dict(ckpt_y['state_dict'])
        
        model_x.to(device).eval()
        model_y.to(device).eval()
        
        models_x.append(model_x)
        models_y.append(model_y)
        scalers.append(joblib.load(scaler_path))
    
    return models_x, models_y, scalers

In [None]:
def height_to_feet(height_str):
    try:
        ft, inches = map(int, height_str.split('-'))
        return ft + inches/12
    except:
        return None

In [None]:
def prepare_lstm_sequences(input_df, test_template, window_size):
    """Prepare sequential features for LSTM inference"""
    input_df = input_df.copy()
    input_df['player_height_feet'] = input_df['player_height'].map(height_to_feet)
    
    dir_rad = np.deg2rad(input_df['dir'].fillna(0))
    delta_t = 0.1
    input_df['velocity_x'] = (input_df['s'] + 0.5 * input_df['a'] * delta_t) * np.sin(dir_rad)
    input_df['velocity_y'] = (input_df['s'] + 0.5 * input_df['a'] * delta_t) * np.cos(dir_rad)
    
    input_df['is_offense'] = (input_df['player_side'] == 'Offense').astype(int)
    input_df['is_defense'] = (input_df['player_side'] == 'Defense').astype(int)
    input_df['is_receiver'] = (input_df['player_role'] == 'Receiver').astype(int)
    input_df['is_coverage'] = (input_df['player_role'] == 'Defensive Coverage').astype(int)
    input_df['is_passer'] = (input_df['player_role'] == 'Passer').astype(int)
    
    mass_kg = input_df['player_weight'].fillna(200.0) / 2.20462
    input_df['momentum_x'] = input_df['velocity_x'] * mass_kg
    input_df['momentum_y'] = input_df['velocity_y'] * mass_kg
    
    from datetime import datetime
    current_date = datetime.now()
    input_df['age'] = input_df['player_birth_date'].apply(
        lambda x: (current_date - datetime.strptime(x, '%Y-%m-%d')).days // 365 if pd.notnull(x) else None
    )
    
    input_df['kinetic_energy'] = 0.5 * mass_kg * (input_df['s'] ** 2)
    input_df['force'] = mass_kg * input_df['a']
    
    input_df['rolling_mean_velocity_x'] = input_df.groupby(['game_id', 'play_id', 'nfl_id'])['velocity_x'].transform(
        lambda x: x.rolling(window=window_size, min_periods=1).mean()
    )
    input_df['rolling_std_acceleration'] = input_df.groupby(['game_id', 'play_id', 'nfl_id'])['a'].transform(
        lambda x: x.rolling(window=window_size, min_periods=1).std()
    )
    
    if all(col in input_df.columns for col in ['ball_land_x', 'ball_land_y']):
        ball_dx = input_df['ball_land_x'] - input_df['x']
        ball_dy = input_df['ball_land_y'] - input_df['y']
        input_df['distance_to_ball'] = np.sqrt(ball_dx**2 + ball_dy**2)
        input_df['angle_to_ball'] = np.arctan2(ball_dy, ball_dx)
        input_df['ball_direction_x'] = ball_dx / (input_df['distance_to_ball'] + 1e-6)
        input_df['ball_direction_y'] = ball_dy / (input_df['distance_to_ball'] + 1e-6)
        input_df['closing_speed'] = (input_df['velocity_x'] * input_df['ball_direction_x'] +
                                     input_df['velocity_y'] * input_df['ball_direction_y'])
        input_df['estimated_time_to_ball'] = input_df['distance_to_ball'] / 20.0
        input_df['projected_time_to_ball'] = input_df['distance_to_ball'] / (np.abs(input_df['closing_speed']) + 0.1)
    
    input_df['heading_x'] = np.sin(dir_rad)
    input_df['heading_y'] = np.cos(dir_rad)
    input_df['acceleration_x'] = input_df['a'] * input_df['heading_x']
    input_df['acceleration_y'] = input_df['a'] * input_df['heading_y']
    input_df['accel_magnitude'] = np.sqrt(input_df['acceleration_x']**2 + input_df['acceleration_y']**2)
    
    agg_rows = []
    for (g, p, f), grp in input_df.groupby(['game_id', 'play_id', 'frame_id'], sort=False):
        n = len(grp)
        nfl_ids = grp['nfl_id'].to_numpy()
        if n < 2:
            for nid in nfl_ids:
                agg_rows.append({
                    'game_id': g, 'play_id': p, 'frame_id': f, 'nfl_id': nid,
                    'distance_to_player_mean_offense': np.nan, 'distance_to_player_min_offense': np.nan, 'distance_to_player_max_offense': np.nan,
                    'relative_velocity_magnitude_mean_offense': np.nan, 'relative_velocity_magnitude_min_offense': np.nan, 'relative_velocity_magnitude_max_offense': np.nan,
                    'angle_to_player_mean_offense': np.nan, 'angle_to_player_min_offense': np.nan, 'angle_to_player_max_offense': np.nan,
                    'distance_to_player_mean_defense': np.nan, 'distance_to_player_min_defense': np.nan, 'distance_to_player_max_defense': np.nan,
                    'relative_velocity_magnitude_mean_defense': np.nan, 'relative_velocity_magnitude_min_defense': np.nan, 'relative_velocity_magnitude_max_defense': np.nan,
                    'angle_to_player_mean_defense': np.nan, 'angle_to_player_min_defense': np.nan, 'angle_to_player_max_defense': np.nan,
                })
            continue
        x = grp['x'].to_numpy(dtype=np.float32)
        y = grp['y'].to_numpy(dtype=np.float32)
        vx = grp['velocity_x'].to_numpy(dtype=np.float32)
        vy = grp['velocity_y'].to_numpy(dtype=np.float32)
        is_offense = grp['is_offense'].to_numpy()
        is_defense = grp['is_defense'].to_numpy()
        dx = x[None, :] - x[:, None]
        dy = y[None, :] - y[:, None]
        angle_mat = np.arctan2(-dy, -dx)
        dist = np.sqrt(dx ** 2 + dy ** 2)
        dvx = vx[:, None] - vx[None, :]
        dvy = vy[:, None] - vy[None, :]
        rel_speed = np.sqrt(dvx ** 2 + dvy ** 2)
        offense_mask = (is_offense[:, None] == is_offense[None, :])
        np.fill_diagonal(offense_mask, False)
        defense_mask = (is_defense[:, None] == is_defense[None, :])
        np.fill_diagonal(defense_mask, False)
        dist_diag_nan = dist.copy()
        np.fill_diagonal(dist_diag_nan, np.nan)
        rel_diag_nan = rel_speed.copy()
        np.fill_diagonal(rel_diag_nan, np.nan)
        angle_diag_nan = angle_mat.copy()
        np.fill_diagonal(angle_diag_nan, np.nan)
        def masked_stats(mat, mask):
            masked = np.where(mask, mat, np.nan)
            cnt = mask.sum(axis=1)
            mean = np.nanmean(masked, axis=1)
            amin = np.nanmin(masked, axis=1)
            amax = np.nanmax(masked, axis=1)
            zero = cnt == 0
            mean[zero] = np.nan; amin[zero] = np.nan; amax[zero] = np.nan
            return mean, amin, amax
        d_mean_o, d_min_o, d_max_o = masked_stats(dist_diag_nan, offense_mask)
        v_mean_o, v_min_o, v_max_o = masked_stats(rel_diag_nan, offense_mask)
        a_mean_o, a_min_o, a_max_o = masked_stats(angle_diag_nan, offense_mask)
        d_mean_d, d_min_d, d_max_d = masked_stats(dist_diag_nan, defense_mask)
        v_mean_d, v_min_d, v_max_d = masked_stats(rel_diag_nan, defense_mask)
        a_mean_d, a_min_d, a_max_d = masked_stats(angle_diag_nan, defense_mask)
        for idx, nid in enumerate(nfl_ids):
            agg_rows.append({
                'game_id': g, 'play_id': p, 'frame_id': f, 'nfl_id': nid,
                'distance_to_player_mean_offense': d_mean_o[idx], 'distance_to_player_min_offense': d_min_o[idx], 'distance_to_player_max_offense': d_max_o[idx],
                'relative_velocity_magnitude_mean_offense': v_mean_o[idx], 'relative_velocity_magnitude_min_offense': v_min_o[idx], 'relative_velocity_magnitude_max_offense': v_max_o[idx],
                'angle_to_player_mean_offense': a_mean_o[idx], 'angle_to_player_min_offense': a_min_o[idx], 'angle_to_player_max_offense': a_max_o[idx],
                'distance_to_player_mean_defense': d_mean_d[idx], 'distance_to_player_min_defense': d_min_d[idx], 'distance_to_player_max_defense': d_max_d[idx],
                'relative_velocity_magnitude_mean_defense': v_mean_d[idx], 'relative_velocity_magnitude_min_defense': v_min_d[idx], 'relative_velocity_magnitude_max_defense': v_max_d[idx],
                'angle_to_player_mean_defense': a_mean_d[idx], 'angle_to_player_min_defense': a_min_d[idx], 'angle_to_player_max_defense': a_max_d[idx],
            })
    interaction_agg = pd.DataFrame(agg_rows)
    input_df = input_df.merge(interaction_agg, on=['game_id', 'play_id', 'frame_id', 'nfl_id'], how='left')
    
    input_df = input_df.sort_values(['game_id', 'play_id', 'nfl_id', 'frame_id'])
    input_df.set_index(['game_id', 'play_id', 'nfl_id'], inplace=True)
    
    feature_cols = [
        'x','y','s','a','o','dir','frame_id','ball_land_x','ball_land_y',
        'absolute_yardline_number',
        'player_height_feet','player_weight',
        'velocity_x','velocity_y',
        'momentum_x','momentum_y',
        'is_offense','is_defense','is_receiver','is_coverage','is_passer',
        'age','kinetic_energy','force',
        'rolling_mean_velocity_x','rolling_std_acceleration',
        'heading_x','heading_y','acceleration_x','acceleration_y','accel_magnitude',
        'distance_to_ball','angle_to_ball','ball_direction_x','ball_direction_y',
        'closing_speed','estimated_time_to_ball','projected_time_to_ball',
        'distance_to_ball','angle_to_ball','ball_direction_x','ball_direction_y',
        'closing_speed','estimated_time_to_ball','projected_time_to_ball',
        'distance_to_player_mean_offense','distance_to_player_min_offense','distance_to_player_max_offense',
        'relative_velocity_magnitude_mean_offense','relative_velocity_magnitude_min_offense','relative_velocity_magnitude_max_offense',
        'angle_to_player_mean_offense','angle_to_player_min_offense','angle_to_player_max_offense',
        'distance_to_player_mean_defense','distance_to_player_min_defense','distance_to_player_max_defense',
        'relative_velocity_magnitude_mean_defense','relative_velocity_magnitude_min_defense','relative_velocity_magnitude_max_defense',
        'angle_to_player_mean_defense','angle_to_player_min_defense','angle_to_player_max_defense'
    ]
    
    grouped_input = input_df.groupby(level=['game_id', 'play_id', 'nfl_id'])
    target_groups = test_template[['game_id', 'play_id', 'nfl_id']].drop_duplicates()
    
    sequences, sequence_ids = [], []
    
    for _, row in target_groups.iterrows():
        key = (row['game_id'], row['play_id'], row['nfl_id'])
        try:
            group_df = grouped_input.get_group(key)
        except KeyError:
            continue
        
        input_window = group_df.tail(window_size)
        
        if len(input_window) < window_size:
            pad_length = window_size - len(input_window)
            pad_df = pd.DataFrame(np.nan, index=range(pad_length), columns=input_window.columns)
            input_window = pd.concat([pad_df, input_window], ignore_index=True).reset_index(drop=True)
        
        seq = input_window[feature_cols].values
        
        if np.isnan(seq.astype(np.float32)).any():
            seq = np.nan_to_num(seq, nan=0.0)
        
        sequences.append(seq)
        
        last_frame_id = input_window['frame_id'].iloc[-1]
        sequence_ids.append({
            'game_id': key[0],
            'play_id': key[1],
            'nfl_id': key[2],
            'frame_id': last_frame_id
        })
    
    return sequences, sequence_ids