In [5]:
import numpy as np
import pandas as pd
import lightgbm as lgb
from catboost import CatBoostRegressor
from sklearn.model_selection import LeaveOneOut
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error

In [3]:
def LGBM_loo(input_x, input_y, params):
    imp = pd.DataFrame()
    models = []
    oof_preds = np.zeros(len(input_x))

    loo = LeaveOneOut()
    splits = list(loo.split(input_x, input_y))
    n_split = len(splits)

    for nfold, (idx_tr, idx_va) in enumerate(splits):
        print(f"Fold {nfold+1}/{n_split}", end="\r")
        X_tr, y_tr = input_x.iloc[idx_tr], input_y.iloc[idx_tr]
        X_va, y_va = input_x.iloc[idx_va], input_y.iloc[idx_va]

        model = lgb.LGBMRegressor(**params)
        model.fit(
            X_tr, y_tr,
            eval_set=[(X_tr, y_tr)],
            eval_metric="rmse",
            callbacks=[lgb.log_evaluation(0)]
        )
        models.append(model)

        y_va_pred = model.predict(X_va)
        oof_preds[idx_va] = y_va_pred

        _imp = pd.DataFrame({
            "col": X_tr.columns,
            "imp": model.booster_.feature_importance(importance_type='gain'),
            "nfold": nfold
        })
        imp = pd.concat([imp, _imp], axis=0, ignore_index=True)

    # === 全fold完了後にまとめて評価 ===
    overall_rmse = np.sqrt(mean_squared_error(input_y, oof_preds))

    print("=" * 20, "LOOCV Results", "=" * 20)
    print(f"[LOOCV] RMSE: {overall_rmse:.5f}")

    imp = imp.groupby("col")["imp"].agg(["mean", "std"]).reset_index()
    imp.columns = ["col", "imp", "imp_std"]

    return models, imp, overall_rmse, oof_preds

In [4]:
def run_catboost_loo(input_x, input_y):
    loo = LeaveOneOut()
    oof_pred = np.zeros(len(input_y))
    models = []

    importances = np.zeros(input_x.shape[1])
    r2_list, mae_list, rmse_list = [], [], []

    for train_idx, valid_idx in loo.split(input_x):
        X_train, X_valid = input_x.iloc[train_idx], input_x.iloc[valid_idx]
        y_train, y_valid = input_y.iloc[train_idx], input_y.iloc[valid_idx]

        model = CatBoostRegressor(verbose=0, random_state=42)
        model.fit(X_train, y_train)
        pred = model.predict(X_valid)
        models.append(model)

        oof_pred[valid_idx] = pred
        importances += model.get_feature_importance()

        r2_list.append(r2_score(y_valid, pred))
        mae_list.append(mean_absolute_error(y_valid, pred))
        rmse_list.append(np.sqrt(mean_squared_error(y_valid, pred)))

    metric = {
        "R2": np.mean(r2_list),
        "MAE": np.mean(mae_list),
        "RMSE": np.mean(rmse_list)
    }

    importance_df = pd.DataFrame({
        "feature": input_x.columns,
        "importance": importances / len(input_y)
    }).sort_values(by="importance", ascending=False)
    
    # 最終モデルを全データで学習
    final_model = CatBoostRegressor(verbose=0, random_state=42)
    final_model.fit(input_x, input_y)

    return final_model, metric, importance_df, oof_pred
