In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import GroupKFold
from catboost import CatBoostRegressor
import lightgbm as lgb
from feature_engineering.dataset import build_dataset
from feature_engineering.features import FEATURE_CANDIDATES, CAT_CANDIDATES, SORT_KEYS, DROP_COLS


class Config:
    SEED = 42
    N_FOLDS = 5
    CLIP_X = (0, 105)
    CLIP_Y = (0, 68)

    # Base 모델
    BASE_PARAMS = {
        'depth' :10,
        'learning_rate' : 0.0222,
        'l2_leaf_reg' : 2.80,
        'rsm' : 0.6716,
        'bagging_temperature' : 0.121,
        'iterations' : 6000,
        'early_stopping_rounds' : 200
    }

    # Fixer 모델
    FIXER_PARAMS = {
        'depth' : 7,
        'learning_rate' : 0.0339,
        'l2_leaf_reg' : 26.74,
        'rsm' : 0.9103,
        'bagging_temperature' : 0.4276,
        'loss_function' : 'MAE',
        'early_stopping_rounds' : 250
    }

In [None]:
# 0) 공통 설정/평가/앙상블

def metric(true_dx, true_dy, pred_dx, pred_dy):
    return float(np.mean(np.sqrt((true_dx - pred_dx)**2 + (true_dy - pred_dy)**2)))

def bucket_from_dist(d):
    if d < 20:
        return "short"
    elif d < 35:
        return "mid"
    else:
        return "long"

def weights_from_oof_scores(scores, alpha=0.5, eps=1e-6):
    scores = np.array(scores, dtype=float)
    w = 1.0 / np.power(scores + eps, alpha)
    w = w / w.sum()
    return w

def weighted_sum2(a1, a2, w1, w2):
    return w1 * a1 + w2 * a2

In [None]:
from feature_engineering.dataset import build_dataset
X_train, X_test, y_dx, y_dy, groups, test_epi, cat_features = build_dataset(df_copy)

# 1) CatBoost 파이프라인 (Base + LongSwap + Fixer)
#    - 반환을 voting에 쓰기 좋게 고정

def train_long_model(X_train, y_dx, y_dy, cat_features):
    # long 정의 (OOF 기준과 동일)
    pass_dist = np.sqrt(y_dx**2 + y_dy**2)
    mask_long = pass_dist >= 35

    X_long = X_train[mask_long]
    y_dx_long = y_dx[mask_long]
    y_dy_long = y_dy[mask_long]

    print(f"[LONG] samples: {len(X_long)}")

    LONG_PARAMS = {
        'depth': 7,
        'learning_rate': 0.03,
        'iterations': 1200,
        'loss_function': 'RMSE',
        'early_stopping_rounds': 100,
        'random_seed': 42,
        'verbose': 200
    }

    model_dx = CatBoostRegressor(**LONG_PARAMS)
    model_dy = CatBoostRegressor(**LONG_PARAMS)

    model_dx.fit(X_long, y_dx_long, cat_features=cat_features)
    model_dy.fit(X_long, y_dy_long, cat_features=cat_features)

    return model_dx, model_dy

def train_residual_pipeline_cat(
    X_train, y_dx, y_dy, X_test, groups, cat_features,
    long_threshold=35.0, min_long_samples=50, verbose=200
):
    gkf = GroupKFold(n_splits=Config.N_FOLDS)

    # OOF
    oof_base_dx = np.zeros(len(X_train), dtype=np.float32)
    oof_base_dy = np.zeros(len(X_train), dtype=np.float32)
    oof_fix_dx  = np.zeros(len(X_train), dtype=np.float32)
    oof_fix_dy  = np.zeros(len(X_train), dtype=np.float32)

    # TEST (분리)
    test_base_dx = np.zeros(len(X_test), dtype=np.float32)
    test_base_dy = np.zeros(len(X_test), dtype=np.float32)
    test_fix_dx  = np.zeros(len(X_test), dtype=np.float32)
    test_fix_dy  = np.zeros(len(X_test), dtype=np.float32)

    # 중요도
    fi_df = pd.DataFrame(index=X_train.columns)
    fi_df["importance"] = 0.0

    print("\n===== [CatBoost] 5-Fold Training (Base + LongSwap + Fixer) =====")

    for fold, (tr_idx, va_idx) in enumerate(gkf.split(X_train, y_dx, groups=groups), 1):
        print(f"\n--- Fold {fold} ---")

        X_tr = X_train.iloc[tr_idx]
        X_va = X_train.iloc[va_idx]

        # 1) Base
        model_dx = CatBoostRegressor(**Config.BASE_PARAMS)
        model_dy = CatBoostRegressor(**Config.BASE_PARAMS)

        model_dx.fit(
            X_tr, y_dx[tr_idx],
            eval_set=[(X_va, y_dx[va_idx])],
            cat_features=cat_features, verbose=verbose
        )
        model_dy.fit(
            X_tr, y_dy[tr_idx],
            eval_set=[(X_va, y_dy[va_idx])],
            cat_features=cat_features, verbose=verbose
        )

        # 2) long 마스크(Train/Val은 true dist)
        tr_dist = np.sqrt(y_dx[tr_idx]**2 + y_dy[tr_idx]**2)
        va_dist = np.sqrt(y_dx[va_idx]**2 + y_dy[va_idx]**2)

        mask_long_tr = (tr_dist >= long_threshold)
        mask_long_va = (va_dist >= long_threshold)

        if mask_long_tr.sum() >= min_long_samples:
            long_dx_model, long_dy_model = train_long_model(
                X_tr[mask_long_tr],
                y_dx[tr_idx][mask_long_tr],
                y_dy[tr_idx][mask_long_tr],
                cat_features
            )
        else:
            long_dx_model, long_dy_model = None, None
            print(f"[Fold {fold}] long samples 부족({mask_long_tr.sum()}), long swap 스킵")

        # 3) OOF base pred
        val_dx = model_dx.predict(X_va).astype(np.float32)
        val_dy = model_dy.predict(X_va).astype(np.float32)

        # 4) OOF long swap (val은 true dist 기준)
        if long_dx_model is not None and mask_long_va.sum() > 0:
            val_dx[mask_long_va] = long_dx_model.predict(X_va[mask_long_va]).astype(np.float32)
            val_dy[mask_long_va] = long_dy_model.predict(X_va[mask_long_va]).astype(np.float32)

        # 5) OOF 저장
        oof_base_dx[va_idx] = val_dx
        oof_base_dy[va_idx] = val_dy

        # 6) TEST base pred + TEST long swap(예측거리 기준)
        test_dx_fold = model_dx.predict(X_test).astype(np.float32)
        test_dy_fold = model_dy.predict(X_test).astype(np.float32)

        if long_dx_model is not None:
            test_dist_pred = np.sqrt(test_dx_fold**2 + test_dy_fold**2)
            mask_long_te = (test_dist_pred >= long_threshold)
            if mask_long_te.sum() > 0:
                test_dx_fold[mask_long_te] = long_dx_model.predict(X_test[mask_long_te]).astype(np.float32)
                test_dy_fold[mask_long_te] = long_dy_model.predict(X_test[mask_long_te]).astype(np.float32)

        test_base_dx += test_dx_fold / Config.N_FOLDS
        test_base_dy += test_dy_fold / Config.N_FOLDS

        # 중요도
        fi_df["importance"] += model_dx.get_feature_importance() / Config.N_FOLDS

        # 7) Fixer (잔차)
        train_dx_pred = model_dx.predict(X_tr).astype(np.float32)
        train_dy_pred = model_dy.predict(X_tr).astype(np.float32)

        resid_dx_tr = y_dx[tr_idx] - train_dx_pred
        resid_dy_tr = y_dy[tr_idx] - train_dy_pred

        resid_dx_va = y_dx[va_idx] - val_dx
        resid_dy_va = y_dy[va_idx] - val_dy

        fixer_dx = CatBoostRegressor(**Config.FIXER_PARAMS)
        fixer_dy = CatBoostRegressor(**Config.FIXER_PARAMS)

        fixer_dx.fit(
            X_tr, resid_dx_tr,
            eval_set=[(X_va, resid_dx_va)],
            cat_features=cat_features, verbose=verbose
        )
        fixer_dy.fit(
            X_tr, resid_dy_tr,
            eval_set=[(X_va, resid_dy_va)],
            cat_features=cat_features, verbose=verbose
        )

        oof_fix_dx[va_idx] = fixer_dx.predict(X_va).astype(np.float32)
        oof_fix_dy[va_idx] = fixer_dy.predict(X_va).astype(np.float32)

        test_fix_dx += fixer_dx.predict(X_test).astype(np.float32) / Config.N_FOLDS
        test_fix_dy += fixer_dy.predict(X_test).astype(np.float32) / Config.N_FOLDS

    # 최종 합
    oof_pred_dx = oof_base_dx + oof_fix_dx
    oof_pred_dy = oof_base_dy + oof_fix_dy

    test_pred_dx = test_base_dx + test_fix_dx
    test_pred_dy = test_base_dy + test_fix_dy

    oof_df = X_train.copy()
    oof_df["true_dx"] = y_dx
    oof_df["true_dy"] = y_dy
    oof_df["pred_dx"] = oof_pred_dx
    oof_df["pred_dy"] = oof_pred_dy
    oof_df["pass_dist"] = np.sqrt(oof_df["true_dx"]**2 + oof_df["true_dy"]**2)

    score = metric(y_dx, y_dy, oof_pred_dx, oof_pred_dy)
    print(f"\n[CatBoost] OOF: {score:.4f}m")

    return oof_pred_dx, oof_pred_dy, test_pred_dx, test_pred_dy, fi_df, oof_df

In [None]:
# 1) (LGBM) 인코딩: 범주형은 __NA__로 처리 후 factorize
#    수치형만 -999로 결측 처리

def encode_for_lgbm(X_train, X_test, cat_cols, make_category_dtype=True):
    Xtr = X_train.copy()
    Xte = X_test.copy()
    ntr = len(Xtr)

    cat_cols = [c for c in cat_cols if c in Xtr.columns]

    # 범주형: __NA__ -> factorize (train+test together)
    for c in cat_cols:
        combined = pd.concat([Xtr[c], Xte[c]], axis=0).astype("object").fillna("__NA__")
        codes, _ = pd.factorize(combined, sort=False)

        Xtr[c] = pd.Series(codes[:ntr], index=Xtr.index).astype("int32")
        Xte[c] = pd.Series(codes[ntr:], index=Xte.index).astype("int32")

        if make_category_dtype:
            Xtr[c] = Xtr[c].astype("category")
            Xte[c] = Xte[c].astype("category")

    # 수치형: inf/NaN -> -999
    num_cols = [c for c in Xtr.columns if c not in cat_cols]
    if num_cols:
        Xtr[num_cols] = Xtr[num_cols].replace([np.inf, -np.inf], np.nan).fillna(-999)
        Xte[num_cols] = Xte[num_cols].replace([np.inf, -np.inf], np.nan).fillna(-999)

    return Xtr, Xte

In [None]:
import lightgbm as lgb

# 3) LightGBM: A1 잔차 Fixer 2-stage + A2 LongSwap
def run_lgbm_oof_residual_longswap(
    X_train, y_dx, y_dy, X_test, groups, cat_cols,
    long_threshold=35.0, min_long_samples=50,
    make_category_dtype=True,
    base_params=None, fixer_params=None,
    num_boost_round=6000, early_stopping_rounds=200, verbose_eval=200
):
    Xtr, Xte = encode_for_lgbm(X_train, X_test, cat_cols, make_category_dtype=make_category_dtype)
    gkf = GroupKFold(n_splits=Config.N_FOLDS)
    cat_feats_present = [c for c in cat_cols if c in Xtr.columns]


    # 기본 파라미터
    if base_params is None:
        base_params = dict(
            objective="huber",
            alpha=0.9,
            metric="rmse",
            learning_rate=0.03,
            num_leaves=64,
            feature_fraction=0.8,
            bagging_fraction=0.8,
            bagging_freq=1,
            min_data_in_leaf=50,
            lambda_l2=5.0,
            seed=Config.SEED if hasattr(Config, "SEED") else 42,
            verbosity=-1
        )
    if fixer_params is None:
        fixer_params = dict(
            objective="regression_l1",  # 잔차는 L1 안정적
            metric="rmse",
            learning_rate=0.03,
            num_leaves=48,
            feature_fraction=0.8,
            bagging_fraction=0.8,
            bagging_freq=1,
            min_data_in_leaf=80,
            lambda_l2=8.0,
            seed=Config.SEED if hasattr(Config, "SEED") else 42,
            verbosity=-1
        )

    # OOF/TEST 저장소
    oof_base_dx = np.zeros(len(Xtr), dtype=np.float32)
    oof_base_dy = np.zeros(len(Xtr), dtype=np.float32)
    oof_fix_dx  = np.zeros(len(Xtr), dtype=np.float32)
    oof_fix_dy  = np.zeros(len(Xtr), dtype=np.float32)

    test_base_dx = np.zeros(len(Xte), dtype=np.float32)
    test_base_dy = np.zeros(len(Xte), dtype=np.float32)
    test_fix_dx  = np.zeros(len(Xte), dtype=np.float32)
    test_fix_dy  = np.zeros(len(Xte), dtype=np.float32)

    print("\n===== [LightGBM] 5-Fold Training (Base + Fixer + LongSwap) =====")

    for fold, (tr_idx, va_idx) in enumerate(gkf.split(Xtr, y_dx, groups=groups), 1):
        print(f"\n[LGBM] Fold {fold}")

        X_tr, X_va = Xtr.iloc[tr_idx], Xtr.iloc[va_idx]
        ydx_tr, ydx_va = y_dx[tr_idx], y_dx[va_idx]
        ydy_tr, ydy_va = y_dy[tr_idx], y_dy[va_idx]

        # ---------- Base dx ----------
        dtrain_dx = lgb.Dataset(X_tr, label=ydx_tr, categorical_feature=cat_feats_present, free_raw_data=False)
        dvalid_dx = lgb.Dataset(X_va, label=ydx_va, categorical_feature=cat_feats_present, free_raw_data=False)

        base_dx = lgb.train(
            base_params, dtrain_dx, num_boost_round=num_boost_round, valid_sets=[dvalid_dx],
            callbacks=[lgb.early_stopping(early_stopping_rounds, verbose=True),
                       lgb.log_evaluation(period=verbose_eval)]
        )

        # ---------- Base dy ----------
        dtrain_dy = lgb.Dataset(X_tr, label=ydy_tr, categorical_feature=cat_feats_present, free_raw_data=False)
        dvalid_dy = lgb.Dataset(X_va, label=ydy_va, categorical_feature=cat_feats_present, free_raw_data=False)

        base_dy = lgb.train(
            base_params, dtrain_dy, num_boost_round=num_boost_round, valid_sets=[dvalid_dy],
            callbacks=[lgb.early_stopping(early_stopping_rounds, verbose=True),
                       lgb.log_evaluation(period=verbose_eval)]
        )

        # base pred (val)
        val_base_dx = base_dx.predict(X_va, num_iteration=base_dx.best_iteration).astype(np.float32)
        val_base_dy = base_dy.predict(X_va, num_iteration=base_dy.best_iteration).astype(np.float32)
        oof_base_dx[va_idx] = val_base_dx
        oof_base_dy[va_idx] = val_base_dy

        # base pred (test fold)
        te_base_dx = base_dx.predict(Xte, num_iteration=base_dx.best_iteration).astype(np.float32)
        te_base_dy = base_dy.predict(Xte, num_iteration=base_dy.best_iteration).astype(np.float32)
        test_base_dx += te_base_dx / Config.N_FOLDS
        test_base_dy += te_base_dy / Config.N_FOLDS

        # ---------- Fixer residual targets ----------
        tr_base_dx = base_dx.predict(X_tr, num_iteration=base_dx.best_iteration).astype(np.float32)
        tr_base_dy = base_dy.predict(X_tr, num_iteration=base_dy.best_iteration).astype(np.float32)

        resid_dx_tr = (ydx_tr - tr_base_dx).astype(np.float32)
        resid_dy_tr = (ydy_tr - tr_base_dy).astype(np.float32)

        resid_dx_va_true = (ydx_va - val_base_dx).astype(np.float32)
        resid_dy_va_true = (ydy_va - val_base_dy).astype(np.float32)

        # ---------- Fixer dx ----------
        ftrain_dx = lgb.Dataset(X_tr, label=resid_dx_tr, categorical_feature=cat_feats_present, free_raw_data=False)
        fvalid_dx = lgb.Dataset(X_va, label=resid_dx_va_true, categorical_feature=cat_feats_present, free_raw_data=False)

        fix_dx = lgb.train(
            fixer_params, ftrain_dx, num_boost_round=num_boost_round, valid_sets=[fvalid_dx],
            callbacks=[lgb.early_stopping(early_stopping_rounds, verbose=True),
                       lgb.log_evaluation(period=verbose_eval)]
        )

        # ---------- Fixer dy ----------
        ftrain_dy = lgb.Dataset(X_tr, label=resid_dy_tr, categorical_feature=cat_feats_present, free_raw_data=False)
        fvalid_dy = lgb.Dataset(X_va, label=resid_dy_va_true, categorical_feature=cat_feats_present, free_raw_data=False)

        fix_dy = lgb.train(
            fixer_params, ftrain_dy, num_boost_round=num_boost_round, valid_sets=[fvalid_dy],
            callbacks=[lgb.early_stopping(early_stopping_rounds, verbose=True),
                       lgb.log_evaluation(period=verbose_eval)]
        )

        # fixer pred
        val_fix_dx = fix_dx.predict(X_va, num_iteration=fix_dx.best_iteration).astype(np.float32)
        val_fix_dy = fix_dy.predict(X_va, num_iteration=fix_dy.best_iteration).astype(np.float32)
        oof_fix_dx[va_idx] = val_fix_dx
        oof_fix_dy[va_idx] = val_fix_dy

        te_fix_dx = fix_dx.predict(Xte, num_iteration=fix_dx.best_iteration).astype(np.float32)
        te_fix_dy = fix_dy.predict(Xte, num_iteration=fix_dy.best_iteration).astype(np.float32)
        test_fix_dx += te_fix_dx / Config.N_FOLDS
        test_fix_dy += te_fix_dy / Config.N_FOLDS

    # base+fixer 합
    oof_dx = oof_base_dx + oof_fix_dx
    oof_dy = oof_base_dy + oof_fix_dy
    test_dx = test_base_dx + test_fix_dx
    test_dy = test_base_dy + test_fix_dy

    # ---------- A2) LongSwap ----------
    # OOF: true dist 기준으로 long 구간 swap (train_epi의 true dist 필요)
    true_dist = np.sqrt(y_dx**2 + y_dy**2)
    mask_long_oof = (true_dist >= long_threshold)

    # long 모델 학습은 "전체 train의 long"으로 한번 실행
    if mask_long_oof.sum() >= min_long_samples:
        # long 전용은 base+fixer가 아닌, "별도 long 전용 base"로 가는 게 보통 더 안정적
        # 여기서는 간단히 long 전용 dx/dy를 따로 학습
        # (다만 cat_cols는 이미 인코딩된 Xtr/Xte를 쓰므로 cat_feats_present 그대로)
        import lightgbm as lgb

        X_long = Xtr[mask_long_oof]
        ydx_long = y_dx[mask_long_oof]
        ydy_long = y_dy[mask_long_oof]

        dlong_dx = lgb.Dataset(X_long, label=ydx_long, categorical_feature=cat_feats_present, free_raw_data=False)
        dlong_dy = lgb.Dataset(X_long, label=ydy_long, categorical_feature=cat_feats_present, free_raw_data=False)

        long_params = dict(base_params)
        long_params.update({"learning_rate": 0.05, "num_leaves": 96, "min_data_in_leaf": 30})

        long_dx = lgb.train(long_params, dlong_dx, num_boost_round=1500, valid_sets=None)
        long_dy = lgb.train(long_params, dlong_dy, num_boost_round=1500, valid_sets=None)

        # OOF long swap (true dist mask)
        if mask_long_oof.sum() > 0:
            oof_dx_swap = oof_dx.copy()
            oof_dy_swap = oof_dy.copy()
            oof_dx_swap[mask_long_oof] = long_dx.predict(Xtr.iloc[np.where(mask_long_oof)[0]]).astype(np.float32)
            oof_dy_swap[mask_long_oof] = long_dy.predict(Xtr.iloc[np.where(mask_long_oof)[0]]).astype(np.float32)
            oof_dx, oof_dy = oof_dx_swap, oof_dy_swap

        # TEST long swap (pred dist 기준)
        pred_dist_test = np.sqrt(test_dx**2 + test_dy**2)
        mask_long_te = (pred_dist_test >= long_threshold)
        if mask_long_te.sum() > 0:
            test_dx_swap = test_dx.copy()
            test_dy_swap = test_dy.copy()
            test_dx_swap[mask_long_te] = long_dx.predict(Xte.iloc[np.where(mask_long_te)[0]]).astype(np.float32)
            test_dy_swap[mask_long_te] = long_dy.predict(Xte.iloc[np.where(mask_long_te)[0]]).astype(np.float32)
            test_dx, test_dy = test_dx_swap, test_dy_swap

    oof_score = metric(y_dx, y_dy, oof_dx, oof_dy)
    print(f"\n[LightGBM] OOF: {oof_score:.4f}m")

    return oof_dx, oof_dy, test_dx, test_dy

In [None]:
# bucket별 weight voting
def compute_bucket_weights(y_dx, y_dy, cb_oof_dx, cb_oof_dy, lgb_oof_dx, lgb_oof_dy, alpha=0.5):
    dist = np.sqrt(y_dx**2 + y_dy**2)
    buckets = np.array([bucket_from_dist(d) for d in dist])

    weights = {}
    for b in ["short", "mid", "long"]:
        idx = np.where(buckets == b)[0]
        if len(idx) == 0:
            weights[b] = (0.5, 0.5)
            continue

        cb_s = metric(y_dx[idx], y_dy[idx], cb_oof_dx[idx], cb_oof_dy[idx])
        lg_s = metric(y_dx[idx], y_dy[idx], lgb_oof_dx[idx], lgb_oof_dy[idx])

        w = weights_from_oof_scores([cb_s, lg_s], alpha=alpha)
        weights[b] = (float(w[0]), float(w[1]))
    return weights

def apply_bucket_ensemble(dx_a, dy_a, dx_b, dy_b, weights, dist_ref):
    """
    bucket별 weights를 적용해서 (dx,dy) 가중 평균 앙상블
    dx_a/dy_a: 모델A 예측 (N,)
    dx_b/dy_b: 모델B 예측 (N,)
    weights: {"short":(wA,wB), "mid":(...), "long":(...)}
    dist_ref: 버킷 구분 기준 거리 (N,)  - OOF는 true dist, TEST는 pred dist 사용
    """
    buckets = np.array([bucket_from_dist(float(d)) for d in dist_ref])

    out_dx = np.zeros_like(dx_a, dtype=np.float32)
    out_dy = np.zeros_like(dy_a, dtype=np.float32)

    for b in ["short", "mid", "long"]:
        idx = np.where(buckets == b)[0]
        if len(idx) == 0:
            continue
        w_a, w_b = weights[b]
        out_dx[idx] = (w_a * dx_a[idx] + w_b * dx_b[idx]).astype(np.float32)
        out_dy[idx] = (w_a * dy_a[idx] + w_b * dy_b[idx]).astype(np.float32)

    return out_dx, out_dy

# 5) Carry cap 후처리

def apply_carry_hard_caps(dx, dy, df_like, caps):
    dx = dx.copy()
    dy = dy.copy()

    pred_dist = np.sqrt(dx**2 + dy**2)

    for role, cap in caps.items():
        mask = (
            (df_like["player_role_pass"] == role) &
            (df_like["prev1_action_type"] == "Carry") &
            (pred_dist > cap)
        )
        if mask.sum() == 0:
            continue

        scale = cap / pred_dist[mask]
        dx[mask] *= scale
        dy[mask] *= scale

    return dx, dy

In [None]:
X_train, X_test, y_dx, y_dy, groups, test_epi, cat_features = build_dataset(df_copy)

def run_catboost_pipeline(
    X_train, y_dx, y_dy, X_test, groups, cat_features,
    long_threshold=35.0, min_long_samples=50, verbose=200
):
    cb_oof_dx, cb_oof_dy, cb_test_dx, cb_test_dy, cb_fi, cb_oof_df = train_residual_pipeline_cat(
        X_train, y_dx, y_dy, X_test, groups, cat_features,
        long_threshold=long_threshold,
        min_long_samples=min_long_samples,
        verbose=verbose
    )
    return cb_oof_dx, cb_oof_dy, cb_test_dx, cb_test_dy, cb_oof_df, cb_fi

In [None]:
# --- CatBoost ---
cb_oof_dx, cb_oof_dy, cb_test_dx, cb_test_dy, cb_oof_df, cb_fi = run_catboost_pipeline(
    X_train, y_dx, y_dy, X_test, groups, cat_features,
    long_threshold=35.0, min_long_samples=50, verbose=200
)


===== [CatBoost] 5-Fold Training (Base + LongSwap + Fixer) =====

--- Fold 1 ---
0:	learn: 16.0111154	test: 15.9181020	best: 15.9181020 (0)	total: 173ms	remaining: 17m 16s
200:	learn: 10.9461143	test: 11.6283953	best: 11.6283953 (200)	total: 34.4s	remaining: 16m 33s
400:	learn: 10.2273040	test: 11.4526273	best: 11.4521784 (399)	total: 57s	remaining: 13m 16s
600:	learn: 9.5259459	test: 11.3788610	best: 11.3788610 (600)	total: 1m 24s	remaining: 12m 38s
800:	learn: 8.9523175	test: 11.3588203	best: 11.3554834 (773)	total: 1m 50s	remaining: 12m
1000:	learn: 8.3748086	test: 11.3537590	best: 11.3503561 (914)	total: 2m 15s	remaining: 11m 16s
Stopped by overfitting detector  (200 iterations wait)

bestTest = 11.3503561
bestIteration = 914

Shrink model to first 915 iterations.
0:	learn: 14.9576729	test: 14.7510436	best: 14.7510436 (0)	total: 98.4ms	remaining: 9m 50s
200:	learn: 11.7135582	test: 12.4239115	best: 12.4239115 (200)	total: 47.8s	remaining: 22m 59s
400:	learn: 10.9114416	test: 12.24

In [None]:
# --- LightGBM (A1 + A2) ---
lgb_oof_dx, lgb_oof_dy, lgb_test_dx, lgb_test_dy = run_lgbm_oof_residual_longswap(
    X_train, y_dx, y_dy, X_test, groups, cat_cols=cat_features,
    long_threshold=35.0, min_long_samples=200,
    make_category_dtype=True,
    base_params=None, fixer_params=None,
    num_boost_round=6000, early_stopping_rounds=200, verbose_eval=200
)


===== [LightGBM] 5-Fold Training (Base + Fixer + LongSwap) =====

[LGBM] Fold 1
Training until validation scores don't improve for 200 rounds
[200]	valid_0's rmse: 14.6073
[400]	valid_0's rmse: 13.6277
[600]	valid_0's rmse: 12.9871
[800]	valid_0's rmse: 12.5689
[1000]	valid_0's rmse: 12.2885
[1200]	valid_0's rmse: 12.0957
[1400]	valid_0's rmse: 11.9552
[1600]	valid_0's rmse: 11.8516
[1800]	valid_0's rmse: 11.7766
[2000]	valid_0's rmse: 11.7279
[2200]	valid_0's rmse: 11.6963
[2400]	valid_0's rmse: 11.6764
[2600]	valid_0's rmse: 11.6608
[2800]	valid_0's rmse: 11.6502
[3000]	valid_0's rmse: 11.6424
[3200]	valid_0's rmse: 11.6346
[3400]	valid_0's rmse: 11.6293
[3600]	valid_0's rmse: 11.6228
[3800]	valid_0's rmse: 11.6187
[4000]	valid_0's rmse: 11.6156
[4200]	valid_0's rmse: 11.6126
[4400]	valid_0's rmse: 11.6087
[4600]	valid_0's rmse: 11.6068
[4800]	valid_0's rmse: 11.6025
[5000]	valid_0's rmse: 11.6003
[5200]	valid_0's rmse: 11.6001
Early stopping, best iteration is:
[5073]	valid_0's rms