In [1]:
# ============================================================
# NFL Big Data Bowl 2026 — CatBoost + GNN-lite
# ============================================================

import os, warnings, math
import numpy as np
import pandas as pd
from pathlib import Path
from multiprocessing import Pool as MP, cpu_count
from tqdm.auto import tqdm
from sklearn.model_selection import KFold, GroupKFold
from sklearn.metrics import mean_squared_error
from catboost import CatBoostRegressor, Pool as CatPool
import pickle

warnings.filterwarnings("ignore")

class NFLConfig:
    """Configuration class for NFL Big Data Bowl 2026"""
    def __init__(self):
        self.BASEDIR = Path("/kaggle/input/nfl-big-data-bowl-2026-prediction")
        self.SAVE_DIR = Path("/kaggle/working")
        
        self.N_WEEKS = 18
        self.N_FOLDS = 5
        
        # Model parameters
        self.ITERATIONS = 15000
        self.LR = 0.0008
        self.DEPTH = 8
        self.L2 = 3.0
        self.EARLY = 500
        
        self.SEED = 42
        self.USE_GPU = bool(os.environ.get("CUDA_VISIBLE_DEVICES", ""))
        self.USE_GROUP_KFOLD = True
        
        # GNN-lite parameters
        self.K_NEIGH = 6
        self.RADIUS = 30.0
        self.TAU = 8.0

class DataLoader:
    """Handles data loading and basic preprocessing"""
    
    def __init__(self, config):
        self.config = config
    
    def load_week(self, week_num: int):
        """Load single week data"""
        fin = self.config.BASEDIR / f"train/input_2023_w{week_num:02d}.csv"
        fout = self.config.BASEDIR / f"train/output_2023_w{week_num:02d}.csv"
        return pd.read_csv(fin), pd.read_csv(fout)
    
    def load_all_train(self):
        """Load all training data using multiprocessing"""
        print("Loading training data...")
        with MP(min(cpu_count(), 18)) as pool:
            res = list(tqdm(pool.imap(self.load_week, range(1, self.config.N_WEEKS+1)), total=self.config.N_WEEKS))
        tr_in  = pd.concat([r[0] for r in res], ignore_index=True)
        tr_out = pd.concat([r[1] for r in res], ignore_index=True)
        print(f"Train input:  {tr_in.shape}")
        print(f"Train output: {tr_out.shape}")
        return tr_in, tr_out
    
    def load_test_data(self):
        """Load test data"""
        te_in = pd.read_csv(self.config.BASEDIR / "test_input.csv")
        te_tpl = pd.read_csv(self.config.BASEDIR / "test.csv")
        return te_in, te_tpl

class FeatureEngineer:
    """Handles all feature engineering operations"""
    
    @staticmethod
    def to_inches(h):
        """Convert height string to inches"""
        try:
            a, b = str(h).split("-")
            return float(a)*12.0 + float(b)
        except Exception:
            return np.nan
    
    @staticmethod
    def engineer_advanced_features(df: pd.DataFrame) -> pd.DataFrame:
        """Physics/geometry with CORRECT NFL angle convention (dir=0 -> +y)"""
        df = df.copy()

        # Height/BMI
        df["height_inches"] = df["player_height"].map(FeatureEngineer.to_inches)
        df["bmi"] = (df["player_weight"] / (df["height_inches"]**2)) * 703.0

        # Heading unit (dir=0° points to +y)
        dir_rad = np.radians(df["dir"].fillna(0.0))
        df["heading_x"] = np.sin(dir_rad)
        df["heading_y"] = np.cos(dir_rad)

        # Velocity/Acceleration (correct axes)
        s = df["s"].fillna(0.0)
        a = df["a"].fillna(0.0)
        df["velocity_x"] = s * df["heading_x"]
        df["velocity_y"] = s * df["heading_y"]
        df["acceleration_x"] = a * df["heading_x"]
        df["acceleration_y"] = a * df["heading_y"]

        # Target (ball landing) geometry
        dx = df["ball_land_x"] - df["x"]
        dy = df["ball_land_y"] - df["y"]
        dist = np.sqrt(dx**2 + dy**2)
        df["dist_to_ball"] = dist
        df["angle_to_ball"] = np.arctan2(dy, dx)
        bux = dx / (dist + 1e-6)
        buy = dy / (dist + 1e-6)

        # Velocity toward ball & alignment
        df["velocity_toward_ball"] = df["velocity_x"]*bux + df["velocity_y"]*buy
        df["velocity_alignment"]   = df["heading_x"]*bux + df["heading_y"]*buy

        # Other physics
        df["speed_squared"]   = s**2
        df["accel_magnitude"] = np.sqrt(df["acceleration_x"]**2 + df["acceleration_y"]**2)
        w = df["player_weight"].fillna(0.0)
        df["momentum_x"] = w * df["velocity_x"]
        df["momentum_y"] = w * df["velocity_y"]
        df["kinetic_energy"] = 0.5 * w * df["speed_squared"]

        # Roles / side
        df["role_targeted_receiver"] = (df["player_role"] == "Targeted Receiver").astype(int)
        df["role_defensive_coverage"] = (df["player_role"] == "Defensive Coverage").astype(int)
        df["role_passer"] = (df["player_role"] == "Passer").astype(int)
        df["side_offense"] = (df["player_side"] == "Offense").astype(int)

        return df

    @staticmethod
    def add_sequence_features(df: pd.DataFrame) -> pd.DataFrame:
        """Lags & rolling computed on tracking order"""
        df = df.sort_values(["game_id","play_id","nfl_id","frame_id"])
        gcols = ["game_id","play_id","nfl_id"]

        for lag in [1,2,3,4,5]:
            for c in ["x","y","velocity_x","velocity_y","s","a"]:
                if c in df.columns:
                    df[f"{c}_lag{lag}"] = df.groupby(gcols)[c].shift(lag)

        for win in [3,5]:
            for c in ["x","y","velocity_x","velocity_y","s"]:
                if c in df.columns:
                    df[f"{c}_rolling_mean_{win}"] = (
                        df.groupby(gcols)[c].rolling(win, min_periods=1).mean()
                          .reset_index(level=[0,1,2], drop=True)
                    )
                    df[f"{c}_rolling_std_{win}"] = (
                        df.groupby(gcols)[c].rolling(win, min_periods=1).std()
                          .reset_index(level=[0,1,2], drop=True)
                    )

        for c in ["velocity_x","velocity_y"]:
            if c in df.columns:
                df[f"{c}_delta"] = df.groupby(gcols)[c].diff()

        return df

class GNNLiteProcessor:
    """Handles GNN-lite neighbor embedding computations"""
    
    def __init__(self, config):
        self.config = config
    
    def compute_neighbor_embeddings(self, input_df: pd.DataFrame) -> pd.DataFrame:
        """
        Embedding tương tác tại frame cuối của từng cầu thủ
        """
        cols_needed = ["game_id","play_id","nfl_id","frame_id","x","y",
                       "velocity_x","velocity_y","player_side"]
        src = input_df[cols_needed].copy()

        last = (src.sort_values(["game_id","play_id","nfl_id","frame_id"])
                   .groupby(["game_id","play_id","nfl_id"], as_index=False)
                   .tail(1)
                   .rename(columns={"frame_id":"last_frame_id"})
                   .reset_index(drop=True))

        # join neighbors at the ego's last_frame_id
        tmp = last.merge(
            src.rename(columns={
                "frame_id":"nb_frame_id", "nfl_id":"nfl_id_nb",
                "x":"x_nb", "y":"y_nb",
                "velocity_x":"vx_nb", "velocity_y":"vy_nb",
                "player_side":"player_side_nb"
            }),
            left_on=["game_id","play_id","last_frame_id"],
            right_on=["game_id","play_id","nb_frame_id"],
            how="left",
        )

        # drop self
        tmp = tmp[tmp["nfl_id_nb"] != tmp["nfl_id"]]

        # relative vectors
        tmp["dx"]  = tmp["x_nb"] - tmp["x"]
        tmp["dy"]  = tmp["y_nb"] - tmp["y"]
        tmp["dvx"] = tmp["vx_nb"] - tmp["velocity_x"]
        tmp["dvy"] = tmp["vy_nb"] - tmp["velocity_y"]
        tmp["dist"] = np.sqrt(tmp["dx"]**2 + tmp["dy"]**2)

        tmp = tmp[np.isfinite(tmp["dist"])]
        tmp = tmp[tmp["dist"] > 1e-6]
        if self.config.RADIUS is not None:
            tmp = tmp[tmp["dist"] <= self.config.RADIUS]

        # ally / opp flag
        tmp["is_ally"] = (tmp["player_side_nb"].fillna("") == tmp["player_side"].fillna("")).astype(np.float32)

        # rank by distance (keep top-K)
        keys = ["game_id","play_id","nfl_id"]
        tmp["rnk"] = tmp.groupby(keys)["dist"].rank(method="first")
        if self.config.K_NEIGH is not None:
            tmp = tmp[tmp["rnk"] <= float(self.config.K_NEIGH)]

        # attention weights: softmax(-dist/tau) within group
        tmp["w"] = np.exp(-tmp["dist"] / float(self.config.TAU))
        sum_w = tmp.groupby(keys)["w"].transform("sum")
        tmp["wn"] = np.where(sum_w > 0, tmp["w"]/sum_w, 0.0)

        tmp["wn_ally"] = tmp["wn"] * tmp["is_ally"]
        tmp["wn_opp"]  = tmp["wn"] * (1.0 - tmp["is_ally"])

        # pre-multiply for group sums
        for col in ["dx","dy","dvx","dvy"]:
            tmp[f"{col}_ally_w"] = tmp[col] * tmp["wn_ally"]
            tmp[f"{col}_opp_w"]  = tmp[col] * tmp["wn_opp"]

        tmp["dist_ally"] = np.where(tmp["is_ally"] > 0.5, tmp["dist"], np.nan)
        tmp["dist_opp"]  = np.where(tmp["is_ally"] < 0.5, tmp["dist"], np.nan)

        ag = tmp.groupby(keys).agg(
            gnn_ally_dx_mean = ("dx_ally_w", "sum"),
            gnn_ally_dy_mean = ("dy_ally_w", "sum"),
            gnn_ally_dvx_mean= ("dvx_ally_w","sum"),
            gnn_ally_dvy_mean= ("dvy_ally_w","sum"),
            gnn_opp_dx_mean  = ("dx_opp_w",  "sum"),
            gnn_opp_dy_mean  = ("dy_opp_w",  "sum"),
            gnn_opp_dvx_mean = ("dvx_opp_w", "sum"),
            gnn_opp_dvy_mean = ("dvy_opp_w", "sum"),
            gnn_ally_cnt     = ("is_ally",   "sum"),
            gnn_opp_cnt      = ("is_ally",   lambda s: float(len(s) - s.sum())),
            gnn_ally_dmin    = ("dist_ally", "min"),
            gnn_ally_dmean   = ("dist_ally", "mean"),
            gnn_opp_dmin     = ("dist_opp",  "min"),
            gnn_opp_dmean    = ("dist_opp",  "mean"),
        ).reset_index()

        # d1..d3 nearest (regardless of side)
        near = tmp.loc[tmp["rnk"]<=3, keys+["rnk","dist"]].copy()
        near["rnk"] = near["rnk"].astype(int)
        dwide = near.pivot_table(index=keys, columns="rnk", values="dist", aggfunc="first")
        dwide = dwide.rename(columns={1:"gnn_d1",2:"gnn_d2",3:"gnn_d3"}).reset_index()
        ag = ag.merge(dwide, on=keys, how="left")

        # safe fills
        for c in ["gnn_ally_dx_mean","gnn_ally_dy_mean","gnn_ally_dvx_mean","gnn_ally_dvy_mean",
                  "gnn_opp_dx_mean","gnn_opp_dy_mean","gnn_opp_dvx_mean","gnn_opp_dvy_mean"]:
            ag[c] = ag[c].fillna(0.0)
        for c in ["gnn_ally_cnt","gnn_opp_cnt"]:
            ag[c] = ag[c].fillna(0.0)
        for c in ["gnn_ally_dmin","gnn_opp_dmin","gnn_ally_dmean","gnn_opp_dmean","gnn_d1","gnn_d2","gnn_d3"]:
            ag[c] = ag[c].fillna(self.config.RADIUS if self.config.RADIUS is not None else 30.0)

        return ag

class TrainingDataBuilder:
    """Builds training data with physics baseline and residuals"""
    
    @staticmethod
    def create_training_rows(input_df: pd.DataFrame, output_df: pd.DataFrame) -> pd.DataFrame:
        """Merge each future frame with the LAST observed stats for that (gid,pid,nfl) + Δt"""
        agg = (
            input_df.sort_values(["game_id","play_id","nfl_id","frame_id"])
                    .groupby(["game_id","play_id","nfl_id"], as_index=False)
                    .tail(1)
                    .reset_index(drop=True)
                    .rename(columns={"frame_id":"last_frame_id"})
        )

        out = output_df.copy()
        out = out.rename(columns={"x":"target_x","y":"target_y"})
        out["id"] = (
            out["game_id"].astype(str) + "_" +
            out["play_id"].astype(str) + "_" +
            out["nfl_id"].astype(str) + "_" +
            out["frame_id"].astype(str)
        )

        m = out.merge(
            agg,
            on=["game_id","play_id","nfl_id"],
            how="left",
            suffixes=("","_last")
        )

        m["delta_frames"] = (m["frame_id"] - m["last_frame_id"]).clip(lower=0).astype(float)
        m["delta_t"] = m["delta_frames"] / 10.0
        return m
    
    @staticmethod
    def physics_baseline(x_last, y_last, vx_last, vy_last, dt):
        """Simple physics baseline prediction"""
        px = x_last + vx_last * dt
        py = y_last + vy_last * dt
        px = np.clip(px, 0.0, 120.0)
        py = np.clip(py, 0.0, 53.3)
        return px, py
    
    @staticmethod
    def build_feature_list(train_df: pd.DataFrame):
        """Build comprehensive feature list including GNN-lite features"""
        base = [
            "x","y","s","a","o","dir",
            "velocity_x","velocity_y",
            "acceleration_x","acceleration_y",
            "heading_x","heading_y",
            "player_weight","height_inches","bmi",
            "ball_land_x","ball_land_y",
            "dist_to_ball","angle_to_ball",
            "velocity_toward_ball","velocity_alignment",
            "speed_squared","accel_magnitude","momentum_x","momentum_y","kinetic_energy",
            "role_targeted_receiver","role_defensive_coverage","role_passer","side_offense",
            "delta_frames","delta_t",
            "frame_id",
            # GNN-lite
            "gnn_ally_dx_mean","gnn_ally_dy_mean","gnn_ally_dvx_mean","gnn_ally_dvy_mean",
            "gnn_opp_dx_mean","gnn_opp_dy_mean","gnn_opp_dvx_mean","gnn_opp_dvy_mean",
            "gnn_ally_cnt","gnn_opp_cnt",
            "gnn_ally_dmin","gnn_ally_dmean","gnn_opp_dmin","gnn_opp_dmean",
            "gnn_d1","gnn_d2","gnn_d3",
        ]
        for lag in [1,2,3,4,5]:
            for c in ["x","y","velocity_x","velocity_y","s","a"]:
                base.append(f"{c}_lag{lag}")
        for win in [3,5]:
            for c in ["x","y","velocity_x","velocity_y","s"]:
                base.append(f"{c}_rolling_mean_{win}")
                base.append(f"{c}_rolling_std_{win}")
        base += ["velocity_x_delta","velocity_y_delta"]

        feats = [c for c in base if c in train_df.columns]
        # include any extra 'gnn_' columns that exist
        feats = list(dict.fromkeys(feats + [c for c in train_df.columns if c.startswith("gnn_")]))
        return feats

class CatBoostTrainer:
    """Handles CatBoost model training with cross-validation"""
    
    def __init__(self, config):
        self.config = config
    
    def train_catboost_folds(self, X, yx, yy, ids_group=None,
                            base_x=None, base_y=None, use_residual=False):
        """
        Train CatBoost models with cross-validation
        """
        folds = []
        if self.config.USE_GROUP_KFOLD and ids_group is not None:
            print("Using GroupKFold by (game_id,play_id,nfl_id).")
            gkf = GroupKFold(n_splits=self.config.N_FOLDS)
            for tr, va in gkf.split(X, groups=ids_group):
                folds.append((tr, va))
        else:
            print("Using plain KFold.")
            kf = KFold(n_splits=self.config.N_FOLDS, shuffle=True, random_state=self.config.SEED)
            folds = list(kf.split(X))

        models_x, models_y, fold_rmse = [], [], []
        task_type = "GPU" if self.config.USE_GPU else "CPU"
        devices = "0:1" if self.config.USE_GPU else None

        for i, (tr, va) in enumerate(folds, 1):
            print(f"\nFold {i}/{self.config.N_FOLDS} — train {len(tr):,} | val {len(va):,}")

            Xtr, Xva = X[tr], X[va]
            yx_tr, yx_va = yx[tr], yx[va]
            yy_tr, yy_va = yy[tr], yy[va]

            p_tr_x = CatPool(Xtr, yx_tr)
            p_va_x = CatPool(Xva, yx_va)
            p_tr_y = CatPool(Xtr, yy_tr)
            p_va_y = CatPool(Xva, yy_va)

            # params = dict(
            #     iterations=self.config.ITERATIONS, learning_rate=self.config.LR, 
            #     depth=self.config.DEPTH, l2_leaf_reg=self.config.L2,
            #     random_seed=self.config.SEED, task_type=task_type, devices=devices,
            #     loss_function="RMSE", early_stopping_rounds=self.config.EARLY, verbose=200
            # )
            params = dict(
                # === Core Parameters ===
                iterations=self.config.ITERATIONS,          # e.g., 5000
                learning_rate=self.config.LR,               # e.g., 0.03
                depth=self.config.DEPTH,                    # e.g., 8
                l2_leaf_reg=self.config.L2,                 # e.g., 3
                random_seed=self.config.SEED,               # e.g., 42
                task_type="CPU",
            
                # === Objective & Metrics ===
                loss_function="RMSE",
                eval_metric="RMSE",
                early_stopping_rounds=self.config.EARLY,    # e.g., 200
                use_best_model=True,
                verbose=200,
            
                # === Regularization & Sampling ===
                # random_strength=1.5,                        # adds randomness to prevent overfitting
                # bagging_temperature=0.8,                    # stochastic bagging control
                rsm=0.9,                                    # feature sampling per tree
                min_data_in_leaf=25,                        # minimal number of samples per leaf
            
                # === Tree Building & Estimation ===
                grow_policy='SymmetricTree',                # stable and fast on CPU
                # leaf_estimation_method='Newton',            # better for RMSE
                # leaf_estimation_iterations=10,              # internal Newton steps
                bootstrap_type='Bayesian',                  # more stable than Bernoulli for regression
                # border_count=254,                           # number of splits for numerical features
            
                # === Feature Handling ===
                # feature_border_type='GreedyLogSum',         # how numeric features are split
                one_hot_max_size=2,                         # one-hot encoding threshold
            
                # === Overfitting Detector ===
                od_type='Iter',
            
                # === Performance ===
                thread_count=-1,                            # utilize all CPU cores
            )

            model_x = CatBoostRegressor(**params)
            model_x.fit(p_tr_x, eval_set=p_va_x, verbose=200)

            model_y = CatBoostRegressor(**params)
            model_y.fit(p_tr_y, eval_set=p_va_y, verbose=200)

            # Validation predictions
            pred_rx = model_x.predict(Xva)  # residual x
            pred_ry = model_y.predict(Xva)  # residual y

            if use_residual:
                if base_x is None or base_y is None:
                    raise ValueError("use_residual=True cần base_x, base_y.")
                bx_va = base_x[va]; by_va = base_y[va]
                # residual -> absolute
                px_abs = np.clip(pred_rx + bx_va, 0.0, 120.0)
                py_abs = np.clip(pred_ry + by_va, 0.0, 53.3)
                # ground-truth absolute
                yx_abs = yx_va + bx_va
                yy_abs = yy_va + by_va
            else:
                px_abs = np.clip(pred_rx, 0.0, 120.0)
                py_abs = np.clip(pred_ry, 0.0, 53.3)
                yx_abs = yx_va
                yy_abs = yy_va

            rmse = math.sqrt(0.5 * (
                mean_squared_error(yx_abs, px_abs) + mean_squared_error(yy_abs, py_abs)
            ))
            print(f"Fold {i} RMSE: {rmse:.5f}")

            models_x.append(model_x)
            models_y.append(model_y)
            fold_rmse.append(rmse)

        print("\nPer-fold RMSE:", [f"{v:.5f}" for v in fold_rmse])
        print(f"Mean ± std: {np.mean(fold_rmse):.5f} ± {np.std(fold_rmse):.5f}")
        return models_x, models_y, fold_rmse

class NFLPredictor:
    """Main class orchestrating the entire prediction pipeline"""
    
    def __init__(self):
        self.config = NFLConfig()
        self.data_loader = DataLoader(self.config)
        self.feature_engineer = FeatureEngineer()
        self.gnn_processor = GNNLiteProcessor(self.config)
        self.training_builder = TrainingDataBuilder()
        self.trainer = CatBoostTrainer(self.config)
        
        # Ensure save directory exists
        os.makedirs(self.config.SAVE_DIR, exist_ok=True)
    
    def prepare_training_data(self):
        """Prepare complete training dataset"""
        # Load data
        tr_in, tr_out = self.data_loader.load_all_train()
        
        # Feature engineering
        print("\nEngineering features on train…")
        tr_in = self.feature_engineer.engineer_advanced_features(tr_in)
        tr_in = self.feature_engineer.add_sequence_features(tr_in)
        
        # GNN-lite processing
        print("Computing neighbor embeddings (train)…")
        gnn_tr = self.gnn_processor.compute_neighbor_embeddings(tr_in)
        
        # Build training rows
        train_df = self.training_builder.create_training_rows(tr_in, tr_out)
        print("Train rows (pre-merge GNN):", train_df.shape)
        
        # Merge GNN features
        train_df = train_df.merge(gnn_tr, on=["game_id","play_id","nfl_id"], how="left")
        
        # Physics baseline and residuals
        bx, by = self.training_builder.physics_baseline(
            train_df["x"].values, train_df["y"].values,
            train_df["velocity_x"].values, train_df["velocity_y"].values,
            train_df["delta_t"].values
        )
        base_rmse = math.sqrt(0.5*(mean_squared_error(train_df["target_x"], bx) +
                                  mean_squared_error(train_df["target_y"], by)))
        print(f"Physics baseline RMSE: {base_rmse:.5f}")
        
        train_df["base_x"] = bx
        train_df["base_y"] = by
        train_df["res_x"]  = train_df["target_x"] - train_df["base_x"]
        train_df["res_y"]  = train_df["target_y"] - train_df["base_y"]
        
        return train_df, base_rmse
    
    def prepare_test_data(self, feat_cols):
        """Prepare test data for prediction"""
        te_in, te_tpl = self.data_loader.load_test_data()
        
        # Feature engineering
        te_in = self.feature_engineer.engineer_advanced_features(te_in)
        te_in = self.feature_engineer.add_sequence_features(te_in)
        
        # GNN-lite processing
        print("Computing neighbor embeddings (test)…")
        gnn_te = self.gnn_processor.compute_neighbor_embeddings(te_in)
        
        # Aggregate test data
        agg_te = (
            te_in.sort_values(["game_id","play_id","nfl_id","frame_id"])
                 .groupby(["game_id","play_id","nfl_id"], as_index=False)
                 .tail(1)
                 .rename(columns={"frame_id":"last_frame_id"})
        )
        
        te = te_tpl.merge(agg_te, on=["game_id","play_id","nfl_id"], how="left")
        te = te.merge(gnn_te, on=["game_id","play_id","nfl_id"], how="left")
        
        te["delta_frames"] = (te["frame_id"] - te["last_frame_id"]).clip(lower=0).astype(float)
        te["delta_t"] = te["delta_frames"] / 10.0
        
        # Ensure all feature columns exist
        for c in feat_cols:
            if c not in te.columns:
                te[c] = 0.0
        te.loc[:, feat_cols] = te[feat_cols].replace([np.inf, -np.inf], np.nan).fillna(0.0).to_numpy()
        
        return te
    
    def train_and_predict(self):
        """Main training and prediction pipeline"""
        # Prepare training data
        train_df, base_rmse = self.prepare_training_data()
        
        # Build feature list
        feat_cols = self.training_builder.build_feature_list(train_df)
        print(f"Using {len(feat_cols)} features (incl. GNN-lite).")
        
        # Clean and prepare matrices
        df_train = train_df.dropna(subset=feat_cols + ["res_x","res_y"]).reset_index(drop=True)
        df_train.loc[:, feat_cols] = (
            df_train[feat_cols].replace([np.inf, -np.inf], np.nan).fillna(0.0).to_numpy()
        )
        X  = df_train[feat_cols].values.astype(np.float32)
        yx = df_train["res_x"].values.astype(np.float32)   # residual
        yy = df_train["res_y"].values.astype(np.float32)
        base_vec_x = df_train["base_x"].values.astype(np.float32)
        base_vec_y = df_train["base_y"].values.astype(np.float32)
        
        # Groups for cross-validation
        groups = (df_train["game_id"].astype(str) + "_" +
                  df_train["play_id"].astype(str) + "_" +
                  df_train["nfl_id"].astype(str)).values if self.config.USE_GROUP_KFOLD else None
        
        # Train models
        models_x, models_y, fold_rmse = self.trainer.train_catboost_folds(
            X, yx, yy, ids_group=groups,
            base_x=base_vec_x, base_y=base_vec_y, use_residual=True
        )
        
        # Save models
        with open(self.config.SAVE_DIR/"catboost_models_5fold_gnnlite.pkl", "wb") as f:
            pickle.dump(
                {"models_x": models_x, "models_y": models_y, "features": feat_cols, "cv_rmse": fold_rmse},
                f
            )
        print("Saved:", self.config.SAVE_DIR/"catboost_models_5fold_gnnlite.pkl")
        
        # Prepare test data
        te = self.prepare_test_data(feat_cols)
        Xtest = te[feat_cols].values.astype(np.float32)
        
        # Baseline test predictions
        tbx, tby = self.training_builder.physics_baseline(
            te["x"].values, te["y"].values,
            te["velocity_x"].values, te["velocity_y"].values,
            te["delta_t"].values
        )
        
        # Ensemble predictions
        pred_rx = np.mean([m.predict(Xtest) for m in models_x], axis=0)
        pred_ry = np.mean([m.predict(Xtest) for m in models_y], axis=0)
        pred_x  = np.clip(pred_rx + tbx, 0.0, 120.0)
        pred_y  = np.clip(pred_ry + tby, 0.0, 53.3)
        
        # Create submission
        sub = pd.DataFrame({
            "id": (te["game_id"].astype(str) + "_" +
                   te["play_id"].astype(str) + "_" +
                   te["nfl_id"].astype(str) + "_" +
                   te["frame_id"].astype(str)),
            "x": pred_x,
            "y": pred_y
        })
        sub.to_csv(self.config.SAVE_DIR/"submission.csv", index=False)
        print("Saved submission:", self.config.SAVE_DIR/"submission.csv")
        
        # Final results
        print("\nCV Mean ± std (absolute, with baseline added back):", f"{np.mean(fold_rmse):.5f} ± {np.std(fold_rmse):.5f}")
        print("Physics baseline (absolute):", f"{base_rmse:.5f}")
        
        return sub, fold_rmse, base_rmse

def main():
    """Main execution function"""
    predictor = NFLPredictor()
    submission, cv_scores, baseline_rmse = predictor.train_and_predict()
    
    print("\n" + "="*50)
    print("NFL Big Data Bowl 2026 - Training Complete!")
    print(f"Final CV Score: {np.mean(cv_scores):.5f} ± {np.std(cv_scores):.5f}")
    print(f"Baseline Improvement: {baseline_rmse - np.mean(cv_scores):.5f}")
    print("="*50)

if __name__ == "__main__":
    main()


Loading training data...


  0%|          | 0/18 [00:00<?, ?it/s]

Train input:  (4880579, 23)
Train output: (562936, 6)

Engineering features on train…
Computing neighbor embeddings (train)…
Train rows (pre-merge GNN): (562936, 102)
Physics baseline RMSE: 4.23701
Using 102 features (incl. GNN-lite).
Using GroupKFold by (game_id,play_id,nfl_id).

Fold 1/5 — train 450,339 | val 112,583
0:	learn: 4.7113405	test: 4.6146535	best: 4.6146535 (0)	total: 324ms	remaining: 1h 20m 56s
200:	learn: 4.1061276	test: 4.0299093	best: 4.0299093 (200)	total: 48.2s	remaining: 59m 10s
400:	learn: 3.5932661	test: 3.5371133	best: 3.5371133 (400)	total: 1m 41s	remaining: 1h 1m 22s
600:	learn: 3.1585142	test: 3.1214944	best: 3.1214944 (600)	total: 2m 28s	remaining: 59m 7s
800:	learn: 2.7888944	test: 2.7705197	best: 2.7705197 (800)	total: 3m 14s	remaining: 57m 26s
1000:	learn: 2.4755276	test: 2.4759066	best: 2.4759066 (1000)	total: 4m	remaining: 55m 58s
1200:	learn: 2.2093372	test: 2.2282186	best: 2.2282186 (1200)	total: 4m 45s	remaining: 54m 43s
1400:	learn: 1.9839367	test: 2