# example

In [None]:
# === CONFIG: seed / fast-tune flags / CV&ES budgets / data I/O paths / submit-th override ===

SEED = 42

# === 時短フラグ ===
FAST_TUNE = True
TUNE_FRAC = 0.60
N_SPLITS_TUNE = 3

# === イテレーション/試行数 ===
EARLY_STOP_TUNE = 100
EARLY_STOP_FULL = 200
N_TRIALS_TUNE = 20
N_TRIALS_REFINE = 10

OPTUNA_TIMEOUT_SEC = 1800

DATA_DIR = r"G:\マイドライブ\MUFJ_competition_2025\data"
OUT_DIR  = r"C:\Users\koshihiramatsu\projects\MUFJ_competition_2025\model-proposal_A_v3"

# しきい値の固定（Noneで自動に戻す）
SUBMIT_THRESHOLD_OVERRIDE = 0.315


# IMPORTS

In [14]:
# === IMPORTS: stdlib / numpy-pandas / sklearn / catboost / optuna ===

import os, re, json, math, warnings, itertools, textwrap
warnings.filterwarnings("ignore")

import numpy as np
import pandas as pd

from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import f1_score, classification_report, confusion_matrix
from sklearn.utils import check_random_state

from catboost import CatBoostClassifier, Pool
import optuna


In [15]:
# === UTILS: column detection / submit-sep / versioning ===

from typing import Optional, Tuple, List

def detect_submit_sep(sample_submit_path: str) -> str:
    # カンマ/タブ/空白の順で試す。列数=2なら採用。
    for sep in [",", "\t", r"\s+"]:
        try:
            df = pd.read_csv(sample_submit_path, header=None, sep=sep, engine="python")
            if df.shape[1] == 2:
                return sep
        except Exception:
            pass
    # デフォルト: カンマ
    return ","

def is_binary(col: pd.Series) -> bool:
    vals = pd.unique(col.dropna())
    return set(vals).issubset({0,1})

def detect_columns(train: pd.DataFrame, test: pd.DataFrame) -> Tuple[str, str]:
    # 目的変数: train にのみ存在し、かつ {0,1} のどれか
    only_in_train = [c for c in train.columns if c not in test.columns]
    candid_tgt = [c for c in only_in_train if is_binary(train[c])]
    if len(candid_tgt) == 1:
        target_col = candid_tgt[0]
    else:
        # フォールバック: 名前に label/target/default が入っていて2値
        name_hits = [c for c in train.columns if any(k in c.lower() for k in ["label", "target", "default", "loanstatus"])]
        name_hits = [c for c in name_hits if c in train.columns and is_binary(train[c])]
        if len(name_hits) >= 1:
            target_col = name_hits[0]
        else:
            raise ValueError("目的変数を自動検出できない。TARGET_COL を手動指定して。")

    # ID列: train&test 共通 かつ 一意/整数っぽい/名前に id を含む を優先
    common = [c for c in test.columns if c in train.columns]
    # 1) 名前に 'id'
    id_like = [c for c in common if 'id' in c.lower()]
    def unique_int_like(df, c):
        s = df[c]
        nunique = s.nunique(dropna=True)
        return (nunique == len(s)) and (np.issubdtype(s.dropna().dtype, np.integer) or np.issubdtype(s.dropna().dtype, np.number))
    for c in id_like + common:
        if unique_int_like(test, c):
            id_col = c
            break
    else:
        # だめなら test の最左列
        id_col = test.columns[0]

    return target_col, id_col

def next_version_number(out_dir: str) -> int:
    os.makedirs(out_dir, exist_ok=True)
    pattern = re.compile(r"submission_A_v(\d+)\.csv$")
    ns = []
    for f in os.listdir(out_dir):
        m = pattern.match(f)
        if m:
            ns.append(int(m.group(1)))
    return (max(ns) + 1) if ns else 1


In [16]:
# === LOAD DATA & DETECT COLUMNS ===

train_path = os.path.join(DATA_DIR, "train.csv")
test_path  = os.path.join(DATA_DIR, "test.csv")
sample_path= os.path.join(DATA_DIR, "sample_submit.csv")

train = pd.read_csv(train_path)
test  = pd.read_csv(test_path)

SUBMIT_SEP = detect_submit_sep(sample_path)

TARGET_COL, ID_COL = detect_columns(train, test)

print("TARGET_COL:", TARGET_COL)
print("ID_COL:", ID_COL)
print("train shape:", train.shape, "test shape:", test.shape)
print("target dist:", train[TARGET_COL].value_counts(normalize=True).to_dict())

# 目的変数・ID の存在確認
assert TARGET_COL in train.columns
assert ID_COL in test.columns and ID_COL in train.columns


TARGET_COL: LoanStatus
ID_COL: id
train shape: (7552, 16) test shape: (7552, 15)
target dist: {0: 0.8723516949152542, 1: 0.12764830508474576}


In [None]:
# セル5=== PREP: features / categorical handling / build X_train,X_test / cat_features_idx ===


# 説明変数
features = [c for c in train.columns if c not in [TARGET_COL]]
# CatBoostはカテゴリ・数値を混在でそのまま扱えるが、objectは文字列化して欠損は 'MISSING' に寄せる
cat_cols = [c for c in features if train[c].dtype == 'object' or pd.api.types.is_categorical_dtype(train[c])]

def prep_df(df: pd.DataFrame) -> pd.DataFrame:
    out = df.copy()
    for c in cat_cols:
        out[c] = out[c].astype(str).fillna("MISSING")
    return out

X_train = prep_df(train[features])
y_train = train[TARGET_COL].astype(int).values
X_test  = prep_df(test[features])

cat_features_idx = [X_train.columns.get_loc(c) for c in cat_cols]

print("n_features:", len(features), "| n_categoricals:", len(cat_cols))


n_features: 15 | n_categoricals: 6


In [54]:
# === ドメイン知識に基づく特徴量エンジニアリング ===

def create_domain_features(df: pd.DataFrame) -> pd.DataFrame:
    """金融ドメイン知識に基づく特徴量生成"""
    df_new = df.copy()
    
    # 1. 財務リスク指標
    if 'GrossApproval' in df_new.columns and 'SBAGuaranteedApproval' in df_new.columns:
        # SBA保証率（リスク軽減指標）
        df_new['sba_guarantee_ratio'] = df_new['SBAGuaranteedApproval'] / (df_new['GrossApproval'] + 1e-8)
        
        # 借り手負担額（リスク指標）
        df_new['borrower_amount'] = df_new['GrossApproval'] - df_new['SBAGuaranteedApproval']
        
        # 借り手負担率
        df_new['borrower_ratio'] = df_new['borrower_amount'] / (df_new['GrossApproval'] + 1e-8)
    
    # 2. 金利リスク指標
    if 'InitialInterestRate' in df_new.columns and 'TermInMonths' in df_new.columns:
        # 金利カテゴリ（高リスク = 高金利）
        df_new['interest_rate_category'] = pd.cut(
            df_new['InitialInterestRate'], 
            bins=[0, 5, 8, 12, 20], 
            labels=['low', 'medium', 'high', 'very_high']
        ).astype(str)
        
        # 期間調整金利リスク
        df_new['adjusted_rate_risk'] = df_new['InitialInterestRate'] * np.log1p(df_new['TermInMonths'])
        
        # 短期高金利フラグ（特にリスキー）
        df_new['short_term_high_rate'] = (
            (df_new['TermInMonths'] <= 60) & (df_new['InitialInterestRate'] > 8)
        ).astype(int)
    
    # 3. 事業リスク指標
    if 'BusinessAge' in df_new.columns:
        # スタートアップフラグ（高リスク）
        df_new['is_startup'] = df_new['BusinessAge'].str.contains(
            'Startup|New Business', case=False, na=False
        ).astype(int)
        
        # 既存事業フラグ（低リスク）
        df_new['is_established'] = df_new['BusinessAge'].str.contains(
            'Existing|more than 2 years', case=False, na=False
        ).astype(int)
    
    # 4. 産業リスク指標
    if 'NaicsSector' in df_new.columns:
        # 高リスク産業（経験的に）
        high_risk_sectors = [
            'Accommodation_food services', 
            'Arts_entertainment_recreation',
            'Retail trade'
        ]
        df_new['high_risk_sector'] = df_new['NaicsSector'].isin(high_risk_sectors).astype(int)
        
        # 低リスク産業
        low_risk_sectors = [
            'Health care_social assistance',
            'Professional_scientific_technical services',
            'Finance_insurance'
        ]
        df_new['low_risk_sector'] = df_new['NaicsSector'].isin(low_risk_sectors).astype(int)
    
    # 5. 融資プログラムリスク
    if 'Subprogram' in df_new.columns:
        # Express loans（通常高リスク）
        df_new['is_express_loan'] = df_new['Subprogram'].str.contains(
            'Express', case=False, na=False
        ).astype(int)
        
        # Guaranty loans（通常低リスク）  
        df_new['is_guaranty_loan'] = df_new['Subprogram'].str.contains(
            'Guaranty', case=False, na=False
        ).astype(int)
    
    # 6. 雇用効率指標
    if 'JobsSupported' in df_new.columns and 'GrossApproval' in df_new.columns:
        # 雇用効率（1雇用あたりの融資額）
        df_new['cost_per_job'] = df_new['GrossApproval'] / (df_new['JobsSupported'] + 1e-8)
        
        # 高雇用効率フラグ
        df_new['high_job_efficiency'] = (df_new['JobsSupported'] > 5).astype(int)
    
    # 7. 複合リスクスコア
    risk_components = []
    if 'borrower_ratio' in df_new.columns:
        risk_components.append(df_new['borrower_ratio'])
    if 'is_startup' in df_new.columns:
        risk_components.append(df_new['is_startup'] * 0.3)
    if 'high_risk_sector' in df_new.columns:
        risk_components.append(df_new['high_risk_sector'] * 0.2)
    if 'is_express_loan' in df_new.columns:
        risk_components.append(df_new['is_express_loan'] * 0.1)
    
    if risk_components:
        df_new['composite_risk_score'] = np.sum(risk_components, axis=0)
    
    return df_new

print("=== ドメイン知識特徴量エンジニアリング ===")

# 元の特徴量をベースに
X_train_domain = create_domain_features(train[features])
X_test_domain = create_domain_features(test[features])

# カテゴリ列の更新
cat_cols_domain = [c for c in X_train_domain.columns 
                   if X_train_domain[c].dtype == 'object' or 'category' in str(X_train_domain[c].dtype)]

for c in cat_cols_domain:
    X_train_domain[c] = X_train_domain[c].astype(str).fillna("MISSING")
    X_test_domain[c] = X_test_domain[c].astype(str).fillna("MISSING")

cat_features_idx_domain = [X_train_domain.columns.get_loc(c) for c in cat_cols_domain]

print(f"特徴量数: {len(features)} → {len(X_train_domain.columns)} (+{len(X_train_domain.columns) - len(features)})")
print(f"新規特徴量: {[c for c in X_train_domain.columns if c not in features]}")
print(f"カテゴリ列数: {len(cat_features_idx_domain)}")

# 効果検証用の簡単なテスト
print("\n=== 効果の事前検証 ===")
from sklearn.model_selection import cross_val_score
from lightgbm import LGBMClassifier

# カテゴリ処理
X_train_domain_prep = X_train_domain.copy()
for c in cat_cols_domain:
    X_train_domain_prep[c] = X_train_domain_prep[c].astype('category')

# 簡単なテスト
quick_model = LGBMClassifier(
    objective="binary", learning_rate=0.1, num_leaves=31, 
    n_estimators=100, random_state=SEED, verbose=-1
)

domain_scores = cross_val_score(
    quick_model, X_train_domain_prep, y_train, 
    cv=3, scoring='f1', n_jobs=-1
)

print(f"ドメイン特徴量 F1: {domain_scores.mean():.6f} ± {domain_scores.std():.6f}")
print(f"ベースライン比較が必要: 前回は 0.462882")

=== ドメイン知識特徴量エンジニアリング ===
特徴量数: 15 → 30 (+15)
新規特徴量: ['sba_guarantee_ratio', 'borrower_amount', 'borrower_ratio', 'interest_rate_category', 'adjusted_rate_risk', 'short_term_high_rate', 'is_startup', 'is_established', 'high_risk_sector', 'low_risk_sector', 'is_express_loan', 'is_guaranty_loan', 'cost_per_job', 'high_job_efficiency', 'composite_risk_score']
カテゴリ列数: 7

=== 効果の事前検証 ===
ドメイン特徴量 F1: 0.463026 ± 0.047611
ベースライン比較が必要: 前回は 0.462882


In [55]:
# === ドメイン特徴量での本格学習パイプライン ===

print("=== ドメイン特徴量を適用して実行 ===")

# 1. 現在の特徴量をドメイン版に更新
X_train = X_train_domain.copy()
X_test = X_test_domain.copy()
cat_cols = cat_cols_domain.copy()
cat_features_idx = cat_features_idx_domain.copy()

print(f"✓ 特徴量を更新: {len(X_train.columns)}列")
print(f"✓ カテゴリ列: {len(cat_features_idx)}列")

# 2. プール再構築（既存の build_pools 関数を使用）
print("\n=== プール再構築 ===")

# tuning用
if FAST_TUNE:
    X_tune_domain = X_train.iloc[idx_tune].reset_index(drop=True)
    pools_tune = build_pools(X_tune_domain, y_tune, skf_tune, cat_features_idx)
    print(f"TUNE pools再構築: {len(X_tune_domain)} rows")
else:
    pools_tune = build_pools(X_train, y_train, skf_tune, cat_features_idx)
    print(f"TUNE pools再構築: {len(X_train)} rows")

# full用
pools_full = build_pools(X_train, y_train, skf_full, cat_features_idx)
print(f"FULL pools再構築: {len(X_train)} rows")

# 3. 既存のベストパラメータでCatBoost学習
print(f"\n=== CatBoost学習（ドメイン特徴量） ===")

# 既存の最適パラメータを使用
params_cb_domain = {
    "learning_rate": 0.06116108646095842,
    "depth": 5,
    "l2_leaf_reg": 5.478690083944246,
    "bagging_temperature": 0.8884344994647464,
    "random_strength": 1.865589408671679,
    "subsample": 0.9516049519127788,
    "scale_pos_weight": 1.1386783078556455,
    "iterations": 10000,
    "loss_function": "Logloss",
    "eval_metric": "Logloss",
    "random_seed": SEED,
    "verbose": False,
    "thread_count": -1,
    "use_best_model": True,
    "allow_writing_files": False,
}

# Seed bagging実行
SEED_BAG = [42, 2025, 777]
oof_cb_domain = np.zeros(len(X_train), dtype=float)
test_cb_domain = np.zeros(len(X_test), dtype=float)

for fold, (tr_pool, va_pool, va_idx) in enumerate(pools_full, 1):
    fold_prob = np.zeros(len(va_idx))
    fold_test = np.zeros(len(X_test))
    for sd in SEED_BAG:
        p = dict(params_cb_domain); p["random_seed"] = sd
        m = CatBoostClassifier(**p)
        m.fit(tr_pool, eval_set=va_pool, early_stopping_rounds=EARLY_STOP_FULL)
        fold_prob += m.predict_proba(va_pool)[:,1] / len(SEED_BAG)
        fold_test += m.predict_proba(Pool(X_test, cat_features=cat_features_idx))[:,1] / len(SEED_BAG)
    oof_cb_domain[va_idx] = fold_prob
    test_cb_domain += fold_test / len(pools_full)
    print(f"Fold {fold}/5 完了")

f1_cb_domain, th_cb_domain = eval_oof_f1(oof_cb_domain, y_train)
print(f"CB-domain OOF F1: {f1_cb_domain:.6f} | th: {th_cb_domain:.4f}")

# 4. 既存結果との比較
if 'oof_cb' in locals():
    f1_original = f1_score(y_train, (oof_cb >= 0.315).astype(int))
    f1_domain = f1_score(y_train, (oof_cb_domain >= 0.315).astype(int))
    improvement = f1_domain - f1_original
    
    print(f"\n=== 比較結果 ===")
    print(f"元のCB F1@0.315: {f1_original:.6f}")
    print(f"ドメインCB F1@0.315: {f1_domain:.6f}")
    print(f"改善度: {improvement:+.6f}")
    
    if improvement > 0.003:  # 0.3%以上改善
        print("✅ ドメイン特徴量が有効！アンサンブルに進む")
        # 結果を保存
        oof_cb = oof_cb_domain.copy()
        test_cb = test_cb_domain.copy()
        f1_cb = f1_cb_domain
        CURRENT_PIPE = "domain_features_cb"
    else:
        print("→ 改善は微小。元の特徴量の方が安全")

print(f"\n次: LightGBMも同様に実行してアンサンブルを比較")

=== ドメイン特徴量を適用して実行 ===
✓ 特徴量を更新: 30列
✓ カテゴリ列: 7列

=== プール再構築 ===
TUNE pools再構築: 4531 rows
FULL pools再構築: 7552 rows

=== CatBoost学習（ドメイン特徴量） ===
Fold 1/5 完了
Fold 2/5 完了
Fold 3/5 完了
Fold 4/5 完了
Fold 5/5 完了
CB-domain OOF F1: 0.635690 | th: 0.3600

=== 比較結果 ===
元のCB F1@0.315: 0.625518
ドメインCB F1@0.315: 0.628282
改善度: +0.002764
→ 改善は微小。元の特徴量の方が安全

次: LightGBMも同様に実行してアンサンブルを比較


In [56]:
# === 判定と次のアクション ===

print("=== ドメイン特徴量の効果判定 ===")
print(f"改善度: +0.002764 (0.28%)")
print(f"判定基準: 0.3%以上で採用")
print(f"結果: 基準をわずかに下回る")

print("\n=== 2つの選択肢 ===")
print("A) 安全策: 元の特徴量で既存パイプライン続行")
print("B) 挑戦: ドメイン特徴量でLightGBM+アンサンブル検証")

print("\n=== 推奨判断 ===")
print("微小改善のため、安全策(A)を推奨")
print("理由:")
print("- 改善度が小さく、過学習リスクあり")
print("- 既存パイプライン(v3: 0.625698)が安定")
print("- ドメイン特徴量は解釈性向上の価値あり（性能以外）")

# 安全策の実行
print("\n=== 安全策: 元の特徴量に戻す ===")

# 元の特徴量に戻す
if 'X_train_orig' in locals():
    X_train = X_train_orig.copy()
    X_test = X_test_orig.copy()
    
    # 元のカテゴリ処理
    cat_cols = [c for c in X_train.columns if X_train[c].dtype == 'object' or pd.api.types.is_categorical_dtype(X_train[c])]
    for c in cat_cols:
        X_train[c] = X_train[c].astype(str).fillna("MISSING")
        X_test[c] = X_test[c].astype(str).fillna("MISSING")
    
    cat_features_idx = [X_train.columns.get_loc(c) for c in cat_cols]
    
    # プール再構築
    if FAST_TUNE:
        X_tune_restored = X_train.iloc[idx_tune].reset_index(drop=True)
        pools_tune = build_pools(X_tune_restored, y_tune, skf_tune, cat_features_idx)
    else:
        pools_tune = build_pools(X_train, y_train, skf_tune, cat_features_idx)
    
    pools_full = build_pools(X_train, y_train, skf_full, cat_features_idx)
    
    print(f"✓ 元の特徴量に復元: {len(X_train.columns)}列")
    print(f"✓ プール再構築完了")
    print(f"✓ 既存の oof_cb, test_cb, oof_lgb, test_lgb を使用")
    
    # 実験記録
    DOMAIN_EXPERIMENT = {
        "attempted": True,
        "original_f1": 0.625518,
        "domain_f1": 0.628282,
        "improvement": 0.002764,
        "decision": "reverted_to_original",
        "reason": "improvement_below_threshold"
    }
    
    print(f"✓ 次: 既存アンサンブルパイプライン続行")
else:
    print("❌ X_train_orig が見つかりません")
    
print(f"\n=== 実験の価値 ===")
print(f"- ドメイン知識特徴量の検証完了")
print(f"- 微小ながら正の効果を確認")
print(f"- 解釈可能性が向上（SBA保証率、事業リスクなど）")
print(f"- 将来の改良時の参考資料")

=== ドメイン特徴量の効果判定 ===
改善度: +0.002764 (0.28%)
判定基準: 0.3%以上で採用
結果: 基準をわずかに下回る

=== 2つの選択肢 ===
A) 安全策: 元の特徴量で既存パイプライン続行
B) 挑戦: ドメイン特徴量でLightGBM+アンサンブル検証

=== 推奨判断 ===
微小改善のため、安全策(A)を推奨
理由:
- 改善度が小さく、過学習リスクあり
- 既存パイプライン(v3: 0.625698)が安定
- ドメイン特徴量は解釈性向上の価値あり（性能以外）

=== 安全策: 元の特徴量に戻す ===
✓ 元の特徴量に復元: 30列
✓ プール再構築完了
✓ 既存の oof_cb, test_cb, oof_lgb, test_lgb を使用
✓ 次: 既存アンサンブルパイプライン続行

=== 実験の価値 ===
- ドメイン知識特徴量の検証完了
- 微小ながら正の効果を確認
- 解釈可能性が向上（SBA保証率、事業リスクなど）
- 将来の改良時の参考資料


In [None]:
# # === 特徴量効果の高速検証 ===

# from sklearn.model_selection import cross_val_score
# from lightgbm import LGBMClassifier
# import numpy as np

# def quick_feature_test():
#     """軽量モデルで特徴量効果を高速テスト"""
    
#     # カテゴリ列の前処理
#     def prep_for_lgb(X):
#         X_prep = X.copy()
#         for c in X_prep.columns:
#             if X_prep[c].dtype == 'object':
#                 X_prep[c] = X_prep[c].astype('category')
#         return X_prep
    
#     # ベースライン（元の特徴量）
#     print("=== ベースライン評価 ===")
#     base_model = LGBMClassifier(
#         objective="binary",
#         learning_rate=0.1,
#         num_leaves=31,
#         n_estimators=100,  # 高速化のため少なく
#         random_state=SEED,
#         verbose=-1,
#         n_jobs=-1
#     )
    
#     # 元の特徴量でCV（カテゴリ処理済み）
#     X_train_orig_prep = prep_for_lgb(X_train_orig)
#     base_scores = cross_val_score(
#         base_model, X_train_orig_prep, y_train, 
#         cv=3, scoring='f1', n_jobs=-1  # 3-foldで高速化
#     )
#     base_f1 = base_scores.mean()
#     print(f"ベースライン F1: {base_f1:.6f} ± {base_scores.std():.6f}")
    
#     # 特徴量追加版
#     print("=== 特徴量追加版評価 ===")
#     X_train_prep = prep_for_lgb(X_train)
#     enhanced_scores = cross_val_score(
#         base_model, X_train_prep, y_train,
#         cv=3, scoring='f1', n_jobs=-1
#     )
#     enhanced_f1 = enhanced_scores.mean()
#     print(f"追加版 F1: {enhanced_f1:.6f} ± {enhanced_scores.std():.6f}")
    
#     # 結果判定
#     improvement = enhanced_f1 - base_f1
#     print(f"\n=== 結果 ===")
#     print(f"改善度: {improvement:+.6f}")
    
#     if improvement > 0.005:  # 0.5%以上改善
#         print("✓ 特徴量追加は有効です")
#         return True
#     elif improvement < -0.005:  # 0.5%以上悪化
#         print("✗ 特徴量追加は有害です")
#         return False
#     else:
#         print("△ 効果は微小です")
#         return None

# # テスト実行
# feature_is_beneficial = quick_feature_test()

# # 判定に基づく推奨アクション
# if feature_is_beneficial is True:
#     print("\n推奨: 特徴量を保持してOptuna最適化に進む")
# elif feature_is_beneficial is False:
#     print("\n推奨: 元の特徴量に戻してOptuna最適化に進む")
# else:
#     print("\n推奨: 元の特徴量に戻す（安全策）")

=== ベースライン評価 ===
ベースライン F1: 0.462882 ± 0.039709
=== 特徴量追加版評価 ===
追加版 F1: 0.454760 ± 0.024450

=== 結果 ===
改善度: -0.008122
✗ 特徴量追加は有害です

推奨: 元の特徴量に戻してOptuna最適化に進む


In [20]:
# === SANITY CHECK: categorical columns & cat_features_idx ===


# 安全確認（任意）
assert all(c in X_train.columns for c in cat_cols)
assert all(not c.startswith("TE_") for c in cat_cols)  # TE列は数値→カテゴリ指定に入れない
print("cat_features_idx length:", len(cat_features_idx))


cat_features_idx length: 9


In [21]:
# === KFOLDS: skf_full(5fold) / skf_tune(N_SPLITS_TUNE) ===

skf_full = StratifiedKFold(n_splits=5, shuffle=True, random_state=SEED)
skf_tune = StratifiedKFold(n_splits=N_SPLITS_TUNE, shuffle=True, random_state=SEED)


In [22]:
# === TUNE SUBSET (stratified sampling when FAST_TUNE) ===


# クラス比を保ってサブセットを作る（FAST_TUNE時のみ）
if FAST_TUNE:
    # 層化抽出
    from sklearn.model_selection import train_test_split
    idx_all = np.arange(len(X_train))
    idx_tune, _ = train_test_split(
        idx_all, train_size=TUNE_FRAC, stratify=y_train, random_state=SEED
    )
    X_tune = X_train.iloc[idx_tune].reset_index(drop=True)
    y_tune = y_train[idx_tune]
    print(f"TUNE SUBSET: {len(X_tune)} rows ({TUNE_FRAC*100:.0f}%)")
else:
    X_tune, y_tune = X_train, y_train


TUNE SUBSET: 4531 rows (60%)


In [23]:
# === BUILD POOLS: pools_tune / pools_full / test_pool ===


# 再利用できるよう、foldごとにPoolを前計算しておく（作成コストと前処理のばらつきを削減）
def build_pools(X, y, skf, cat_idx):
    pools = []
    for tr_idx, va_idx in skf.split(X, y):
        X_tr, X_va = X.iloc[tr_idx], X.iloc[va_idx]
        y_tr, y_va = y[tr_idx], y[va_idx]
        pools.append((
            Pool(X_tr, y_tr, cat_features=cat_idx),
            Pool(X_va, y_va, cat_features=cat_idx),
            va_idx
        ))
    return pools

pools_tune = build_pools(X_tune, y_tune, skf_tune, cat_features_idx)
pools_full = build_pools(X_train, y_train, skf_full, cat_features_idx)


In [24]:
# === OPTUNA OBJECTIVE: eval_oof_f1 / make_objective (CB quick) ===


def eval_oof_f1(probs, y_true):
    thresholds = np.linspace(0.05, 0.95, 181)
    f1s = [f1_score(y_true, (probs >= t).astype(int)) for t in thresholds]
    j = int(np.argmax(f1s))
    return f1s[j], float(thresholds[j])

def make_objective(pools, y_all, early_stop, iterations):
    def objective(trial):
        # 探索空間をやや狭くして収束を早める
        params = {
            "iterations": iterations,
            "learning_rate": trial.suggest_float("learning_rate", 0.03, 0.15, log=True),
            "depth": trial.suggest_int("depth", 4, 8),
            "l2_leaf_reg": trial.suggest_float("l2_leaf_reg", 1e-2, 1e1, log=True),
            "bagging_temperature": trial.suggest_float("bagging_temperature", 0.0, 5.0),
            "random_strength": trial.suggest_float("random_strength", 0.0, 2.0),
            "subsample": trial.suggest_float("subsample", 0.7, 1.0),
            "scale_pos_weight": trial.suggest_float("scale_pos_weight", 0.7, 3.0),
            "loss_function": "Logloss",
            "eval_metric": "Logloss",
            "random_seed": SEED,
            "verbose": False,
            "thread_count": -1,
            "use_best_model": True,
            "allow_writing_files": False,
            # GPUが使えるなら次行を有効化（失敗したらコメントアウト）
            # "task_type": "GPU", "devices": "0",
        }

        oof = np.zeros(len(y_all), dtype=float)
        for train_pool, valid_pool, va_idx in pools:
            model = CatBoostClassifier(**params)
            model.fit(train_pool, eval_set=valid_pool, early_stopping_rounds=early_stop)
            oof[va_idx] = model.predict_proba(valid_pool)[:, 1]

        f1, th = eval_oof_f1(oof[oof > 0], y_all[oof > 0])  # チューニングsubsetのOOF部分のみで算出
        trial.set_user_attr("best_threshold", th)
        return f1
    return objective


In [None]:
# === 改良されたOptuna最適化（セル8代替） ===

def make_improved_objective(pools, y_all, early_stop, iterations):
    """改良された目的関数（パラメータ競合を回避）"""
    def objective(trial):
        # Bootstrap typeを先に決定
        bootstrap_type = trial.suggest_categorical("bootstrap_type", ["Bayesian", "Bernoulli"])
        
        # 基本パラメータ
        params = {
            "iterations": iterations,
            "learning_rate": trial.suggest_float("learning_rate", 0.01, 0.2, log=True),
            "depth": trial.suggest_int("depth", 3, 10),
            "l2_leaf_reg": trial.suggest_float("l2_leaf_reg", 0.1, 100, log=True),
            "random_strength": trial.suggest_float("random_strength", 0.0, 5.0),
            "scale_pos_weight": trial.suggest_float("scale_pos_weight", 0.5, 10.0),
            "border_count": trial.suggest_int("border_count", 64, 128),
            "bootstrap_type": bootstrap_type,
            
            # 固定パラメータ
            "loss_function": "Logloss",
            "eval_metric": "F1",
            "random_seed": SEED,
            "verbose": False,
            "thread_count": -1,
            "use_best_model": True,
            "allow_writing_files": False,
        }
        
        # Bootstrap typeに応じた条件付きパラメータ
        if bootstrap_type == "Bayesian":
            params["bagging_temperature"] = trial.suggest_float("bagging_temperature", 0.0, 10.0)
        elif bootstrap_type == "Bernoulli":
            params["subsample"] = trial.suggest_float("subsample", 0.5, 1.0)

        oof = np.zeros(len(y_all), dtype=float)
        for train_pool, valid_pool, va_idx in pools:
            model = CatBoostClassifier(**params)
            model.fit(train_pool, eval_set=valid_pool, early_stopping_rounds=early_stop)
            oof[va_idx] = model.predict_proba(valid_pool)[:, 1]

        f1, th = eval_oof_f1(oof[oof > 0], y_all[oof > 0])
        trial.set_user_attr("best_threshold", th)
        return f1
    return objective

# Optuna最適化の実行
print("=== 改良されたOptuna最適化開始 ===")
sampler = optuna.samplers.TPESampler(seed=SEED, n_startup_trials=10, multivariate=True)
pruner = optuna.pruners.SuccessiveHalvingPruner(min_resource=100, reduction_factor=2)

study_improved = optuna.create_study(direction="maximize", sampler=sampler, pruner=pruner)
obj_improved = make_improved_objective(
    pools=pools_tune,
    y_all=y_tune,
    early_stop=EARLY_STOP_TUNE,
    iterations=5000,
)

# 最適化実行（時間制限付き）
study_improved.optimize(obj_improved, n_trials=40, timeout=2400, show_progress_bar=True)

# 最良パラメータの更新
best_params_improved = study_improved.best_trial.params.copy()
best_th_improved = study_improved.best_trial.user_attrs.get("best_threshold", 0.315)
best_score_improved = study_improved.best_value

print(f"Improved TUNE best_f1: {best_score_improved:.6f} | th: {best_th_improved:.4f}")
print(f"Improvement: {best_score_improved - locals().get('best_score', 0):.6f}")

# より良い結果なら更新
if best_score_improved > locals().get('best_score', 0):
    best_params = best_params_improved
    print("✓ パラメータを更新しました")

[I 2025-08-19 23:22:21,087] A new study created in memory with name: no-name-0f1e88ac-974e-4252-813a-79d90dee0415


=== 改良されたOptuna最適化開始 ===


Best trial: 0. Best value: 0.589688:   2%|▎         | 1/40 [00:35<23:06, 35.54s/it, 35.54/2400 seconds]

[I 2025-08-19 23:22:56,629] Trial 0 finished with value: 0.5896877269426289 and parameters: {'bootstrap_type': 'Bernoulli', 'learning_rate': 0.08960785365368121, 'depth': 7, 'l2_leaf_reg': 0.2938027938703535, 'random_strength': 0.7799726016810132, 'scale_pos_weight': 1.0517943155978948, 'border_count': 120, 'subsample': 0.8005575058716043}. Best is trial 0 with value: 0.5896877269426289.


Best trial: 0. Best value: 0.589688:   5%|▌         | 2/40 [00:53<16:08, 25.49s/it, 53.99/2400 seconds]

[I 2025-08-19 23:23:15,075] Trial 1 finished with value: 0.45670391061452514 and parameters: {'bootstrap_type': 'Bayesian', 'learning_rate': 0.18276027831785724, 'depth': 9, 'l2_leaf_reg': 0.4335281794951565, 'random_strength': 0.9091248360355031, 'scale_pos_weight': 2.2423428436076214, 'border_count': 83, 'bagging_temperature': 5.247564316322379}. Best is trial 0 with value: 0.5896877269426289.


Best trial: 0. Best value: 0.589688:   8%|▊         | 3/40 [01:04<11:28, 18.62s/it, 64.44/2400 seconds]

[I 2025-08-19 23:23:25,524] Trial 2 finished with value: 0.5677785663591199 and parameters: {'bootstrap_type': 'Bayesian', 'learning_rate': 0.06252287916406217, 'depth': 4, 'l2_leaf_reg': 0.7523742884534856, 'random_strength': 1.8318092164684585, 'scale_pos_weight': 4.832664850061842, 'border_count': 115, 'bagging_temperature': 1.9967378215835974}. Best is trial 0 with value: 0.5896877269426289.


Best trial: 0. Best value: 0.589688:  10%|█         | 4/40 [01:23<11:22, 18.95s/it, 83.90/2400 seconds]

[I 2025-08-19 23:23:44,986] Trial 3 finished with value: 0.5228684359119141 and parameters: {'bootstrap_type': 'Bernoulli', 'learning_rate': 0.011492999300221412, 'depth': 7, 'l2_leaf_reg': 0.32476735706274484, 'random_strength': 0.3252579649263976, 'scale_pos_weight': 9.514412603906665, 'border_count': 126, 'subsample': 0.9041986740582306}. Best is trial 0 with value: 0.5896877269426289.


Best trial: 0. Best value: 0.589688:  12%|█▎        | 5/40 [02:00<14:43, 25.24s/it, 120.28/2400 seconds]

[I 2025-08-19 23:24:21,374] Trial 4 finished with value: 0.5608283002588438 and parameters: {'bootstrap_type': 'Bayesian', 'learning_rate': 0.07766184280392888, 'depth': 6, 'l2_leaf_reg': 0.2323350351539011, 'random_strength': 2.475884550556351, 'scale_pos_weight': 0.8266909505945748, 'border_count': 123, 'bagging_temperature': 2.587799816000169}. Best is trial 0 with value: 0.5896877269426289.


Best trial: 0. Best value: 0.589688:  15%|█▌        | 6/40 [02:17<12:48, 22.62s/it, 137.81/2400 seconds]

[I 2025-08-19 23:24:38,900] Trial 5 finished with value: 0.47143890093998553 and parameters: {'bootstrap_type': 'Bayesian', 'learning_rate': 0.04749239763680407, 'depth': 7, 'l2_leaf_reg': 0.35856126103453967, 'random_strength': 4.847923138822793, 'scale_pos_weight': 7.863761821930588, 'border_count': 125, 'bagging_temperature': 8.948273504276488}. Best is trial 0 with value: 0.5896877269426289.


Best trial: 0. Best value: 0.589688:  18%|█▊        | 7/40 [02:35<11:34, 21.04s/it, 155.61/2400 seconds]

[I 2025-08-19 23:24:56,698] Trial 6 finished with value: 0.5750394944707741 and parameters: {'bootstrap_type': 'Bernoulli', 'learning_rate': 0.01303561122512888, 'depth': 4, 'l2_leaf_reg': 0.13667272915456222, 'random_strength': 1.6266516538163218, 'scale_pos_weight': 4.192434252050079, 'border_count': 81, 'subsample': 0.9143687545759647}. Best is trial 0 with value: 0.5896877269426289.


Best trial: 0. Best value: 0.589688:  20%|██        | 8/40 [02:56<11:07, 20.86s/it, 176.09/2400 seconds]

[I 2025-08-19 23:25:17,183] Trial 7 finished with value: 0.588703261734288 and parameters: {'bootstrap_type': 'Bayesian', 'learning_rate': 0.05082341959721458, 'depth': 4, 'l2_leaf_reg': 25.50298070162892, 'random_strength': 0.3727532183988541, 'scale_pos_weight': 9.875425897704915, 'border_count': 114, 'bagging_temperature': 1.987156815341724}. Best is trial 0 with value: 0.5896877269426289.


Best trial: 8. Best value: 0.596016:  22%|██▎       | 9/40 [03:15<10:31, 20.38s/it, 195.40/2400 seconds]

[I 2025-08-19 23:25:36,492] Trial 8 finished with value: 0.5960159362549801 and parameters: {'bootstrap_type': 'Bernoulli', 'learning_rate': 0.08310795711416077, 'depth': 8, 'l2_leaf_reg': 20.59733535743719, 'random_strength': 0.3702232586704518, 'scale_pos_weight': 3.90542442117059, 'border_count': 71, 'subsample': 0.9315517129377968}. Best is trial 8 with value: 0.5960159362549801.


Best trial: 8. Best value: 0.596016:  25%|██▌       | 10/40 [03:37<10:26, 20.89s/it, 217.44/2400 seconds]

[I 2025-08-19 23:25:58,530] Trial 9 finished with value: 0.5458547322083639 and parameters: {'bootstrap_type': 'Bayesian', 'learning_rate': 0.012097379927033842, 'depth': 5, 'l2_leaf_reg': 0.9452571391072306, 'random_strength': 3.64803089169032, 'scale_pos_weight': 6.556795977874525, 'border_count': 121, 'bagging_temperature': 4.722149251619493}. Best is trial 8 with value: 0.5960159362549801.


Best trial: 8. Best value: 0.596016:  28%|██▊       | 11/40 [04:23<13:47, 28.55s/it, 263.35/2400 seconds]

[I 2025-08-19 23:26:44,435] Trial 10 finished with value: 0.5953947368421053 and parameters: {'bootstrap_type': 'Bernoulli', 'learning_rate': 0.06647370780318866, 'depth': 9, 'l2_leaf_reg': 58.121848134606594, 'random_strength': 0.7916585492777745, 'scale_pos_weight': 1.4444781488748593, 'border_count': 94, 'subsample': 0.549768740129495}. Best is trial 8 with value: 0.5960159362549801.


Best trial: 8. Best value: 0.596016:  30%|███       | 12/40 [05:15<16:38, 35.65s/it, 315.25/2400 seconds]

[I 2025-08-19 23:27:36,336] Trial 11 finished with value: 0.54858934169279 and parameters: {'bootstrap_type': 'Bernoulli', 'learning_rate': 0.044914637189238354, 'depth': 9, 'l2_leaf_reg': 67.31590288367553, 'random_strength': 1.930515789853641, 'scale_pos_weight': 2.482633969418134, 'border_count': 100, 'subsample': 0.5160140699348478}. Best is trial 8 with value: 0.5960159362549801.


Best trial: 12. Best value: 0.599064:  32%|███▎      | 13/40 [05:40<14:33, 32.35s/it, 340.01/2400 seconds]

[I 2025-08-19 23:28:01,103] Trial 12 finished with value: 0.5990639625585024 and parameters: {'bootstrap_type': 'Bernoulli', 'learning_rate': 0.10928874384472712, 'depth': 7, 'l2_leaf_reg': 46.95604629990011, 'random_strength': 1.0969692655473313, 'scale_pos_weight': 2.910048816112014, 'border_count': 71, 'subsample': 0.6511621177269}. Best is trial 12 with value: 0.5990639625585024.


Best trial: 12. Best value: 0.599064:  35%|███▌      | 14/40 [05:59<12:22, 28.57s/it, 359.84/2400 seconds]

[I 2025-08-19 23:28:20,931] Trial 13 finished with value: 0.5807962529274004 and parameters: {'bootstrap_type': 'Bernoulli', 'learning_rate': 0.08703855068758226, 'depth': 6, 'l2_leaf_reg': 67.57212541354275, 'random_strength': 0.8031898984983616, 'scale_pos_weight': 6.943505458965144, 'border_count': 85, 'subsample': 0.6937971020319804}. Best is trial 12 with value: 0.5990639625585024.


Best trial: 12. Best value: 0.599064:  38%|███▊      | 15/40 [06:23<11:18, 27.13s/it, 383.62/2400 seconds]

[I 2025-08-19 23:28:44,710] Trial 14 finished with value: 0.5499645641389086 and parameters: {'bootstrap_type': 'Bernoulli', 'learning_rate': 0.147282229812399, 'depth': 10, 'l2_leaf_reg': 4.231926338882198, 'random_strength': 0.17250900494868193, 'scale_pos_weight': 5.33814017594981, 'border_count': 65, 'subsample': 0.9921146799648111}. Best is trial 12 with value: 0.5990639625585024.


Best trial: 12. Best value: 0.599064:  40%|████      | 16/40 [06:51<10:55, 27.33s/it, 411.43/2400 seconds]

[I 2025-08-19 23:29:12,522] Trial 15 finished with value: 0.5921158487530169 and parameters: {'bootstrap_type': 'Bernoulli', 'learning_rate': 0.09744354299475438, 'depth': 7, 'l2_leaf_reg': 42.67552707464498, 'random_strength': 1.3615321911537817, 'scale_pos_weight': 2.6602326522995603, 'border_count': 65, 'subsample': 0.6929102790914314}. Best is trial 12 with value: 0.5990639625585024.


Best trial: 12. Best value: 0.599064:  42%|████▎     | 17/40 [07:16<10:16, 26.79s/it, 436.97/2400 seconds]

[I 2025-08-19 23:29:38,053] Trial 16 finished with value: 0.5488297013720742 and parameters: {'bootstrap_type': 'Bayesian', 'learning_rate': 0.045444943537064214, 'depth': 4, 'l2_leaf_reg': 26.58784062384184, 'random_strength': 0.26872206715276203, 'scale_pos_weight': 4.337046783289247, 'border_count': 76, 'bagging_temperature': 9.914886742849223}. Best is trial 12 with value: 0.5990639625585024.


Best trial: 12. Best value: 0.599064:  45%|████▌     | 18/40 [07:48<10:18, 28.12s/it, 468.19/2400 seconds]

[I 2025-08-19 23:30:09,279] Trial 17 finished with value: 0.5471204188481675 and parameters: {'bootstrap_type': 'Bernoulli', 'learning_rate': 0.018046783835263737, 'depth': 7, 'l2_leaf_reg': 14.8927422448248, 'random_strength': 0.6612082930732935, 'scale_pos_weight': 5.008595895942105, 'border_count': 86, 'subsample': 0.8335705854262011}. Best is trial 12 with value: 0.5990639625585024.


Best trial: 12. Best value: 0.599064:  48%|████▊     | 19/40 [08:19<10:13, 29.21s/it, 499.92/2400 seconds]

[I 2025-08-19 23:30:41,004] Trial 18 finished with value: 0.5765472312703583 and parameters: {'bootstrap_type': 'Bernoulli', 'learning_rate': 0.025438960204559775, 'depth': 10, 'l2_leaf_reg': 2.1301766339444304, 'random_strength': 1.0824022801903403, 'scale_pos_weight': 2.8625677357759844, 'border_count': 76, 'subsample': 0.6604250893099086}. Best is trial 12 with value: 0.5990639625585024.


Best trial: 12. Best value: 0.599064:  50%|█████     | 20/40 [08:38<08:39, 25.97s/it, 518.35/2400 seconds]

[I 2025-08-19 23:30:59,436] Trial 19 finished with value: 0.5952197378565921 and parameters: {'bootstrap_type': 'Bernoulli', 'learning_rate': 0.18897336618903623, 'depth': 4, 'l2_leaf_reg': 71.85924026664246, 'random_strength': 1.4155673031137153, 'scale_pos_weight': 1.3206538589215824, 'border_count': 86, 'subsample': 0.8115025794057469}. Best is trial 12 with value: 0.5990639625585024.


Best trial: 12. Best value: 0.599064:  52%|█████▎    | 21/40 [09:05<08:17, 26.21s/it, 545.11/2400 seconds]

[I 2025-08-19 23:31:26,199] Trial 20 finished with value: 0.5716282320055905 and parameters: {'bootstrap_type': 'Bernoulli', 'learning_rate': 0.19910968574938182, 'depth': 9, 'l2_leaf_reg': 3.8271507638597657, 'random_strength': 3.240099723012559, 'scale_pos_weight': 3.916550412121021, 'border_count': 82, 'subsample': 0.9914098356443254}. Best is trial 12 with value: 0.5990639625585024.


Best trial: 12. Best value: 0.599064:  55%|█████▌    | 22/40 [11:18<17:31, 58.43s/it, 678.68/2400 seconds]

[I 2025-08-19 23:33:39,767] Trial 21 finished with value: 0.5949764521193093 and parameters: {'bootstrap_type': 'Bernoulli', 'learning_rate': 0.08475482877176325, 'depth': 10, 'l2_leaf_reg': 60.17255539177119, 'random_strength': 0.5851677016384293, 'scale_pos_weight': 0.5447729209868035, 'border_count': 88, 'subsample': 0.5748554636390495}. Best is trial 12 with value: 0.5990639625585024.


Best trial: 12. Best value: 0.599064:  57%|█████▊    | 23/40 [13:08<20:53, 73.72s/it, 788.08/2400 seconds]

[I 2025-08-19 23:35:29,169] Trial 22 finished with value: 0.5959367945823928 and parameters: {'bootstrap_type': 'Bernoulli', 'learning_rate': 0.027063524303619652, 'depth': 8, 'l2_leaf_reg': 8.805968193224015, 'random_strength': 0.27673517350910626, 'scale_pos_weight': 0.5558207544143716, 'border_count': 95, 'subsample': 0.5885645750738504}. Best is trial 12 with value: 0.5990639625585024.


Best trial: 12. Best value: 0.599064:  60%|██████    | 24/40 [13:39<16:18, 61.14s/it, 819.87/2400 seconds]

[I 2025-08-19 23:36:00,962] Trial 23 finished with value: 0.4249471458773784 and parameters: {'bootstrap_type': 'Bayesian', 'learning_rate': 0.017878929665669722, 'depth': 8, 'l2_leaf_reg': 0.9802554870609407, 'random_strength': 0.2835265150990586, 'scale_pos_weight': 1.3793383839696376, 'border_count': 96, 'bagging_temperature': 6.89406989705693}. Best is trial 12 with value: 0.5990639625585024.


Best trial: 12. Best value: 0.599064:  62%|██████▎   | 25/40 [14:08<12:50, 51.39s/it, 848.51/2400 seconds]

[I 2025-08-19 23:36:29,596] Trial 24 finished with value: 0.5181347150259067 and parameters: {'bootstrap_type': 'Bernoulli', 'learning_rate': 0.03210646135735558, 'depth': 8, 'l2_leaf_reg': 2.110452636433283, 'random_strength': 0.09778945543847606, 'scale_pos_weight': 1.2444196445569355, 'border_count': 90, 'subsample': 0.6374457163895562}. Best is trial 12 with value: 0.5990639625585024.


Best trial: 12. Best value: 0.599064:  65%|██████▌   | 26/40 [14:29<09:52, 42.31s/it, 869.62/2400 seconds]

[I 2025-08-19 23:36:50,710] Trial 25 finished with value: 0.581021897810219 and parameters: {'bootstrap_type': 'Bayesian', 'learning_rate': 0.1626259592072964, 'depth': 8, 'l2_leaf_reg': 71.16939733008829, 'random_strength': 1.1531880505711465, 'scale_pos_weight': 4.20621334487704, 'border_count': 84, 'bagging_temperature': 0.901740529474278}. Best is trial 12 with value: 0.5990639625585024.


Best trial: 12. Best value: 0.599064:  68%|██████▊   | 27/40 [15:44<11:16, 52.08s/it, 944.49/2400 seconds]

[I 2025-08-19 23:38:05,580] Trial 26 finished with value: 0.5429272281275552 and parameters: {'bootstrap_type': 'Bernoulli', 'learning_rate': 0.01196626001079669, 'depth': 9, 'l2_leaf_reg': 5.732593185490935, 'random_strength': 0.3258068957362059, 'scale_pos_weight': 1.1424246676821337, 'border_count': 107, 'subsample': 0.6175479689135017}. Best is trial 12 with value: 0.5990639625585024.


Best trial: 12. Best value: 0.599064:  70%|███████   | 28/40 [16:11<08:55, 44.66s/it, 971.85/2400 seconds]

[I 2025-08-19 23:38:32,937] Trial 27 finished with value: 0.5762952448545068 and parameters: {'bootstrap_type': 'Bernoulli', 'learning_rate': 0.07643521404571571, 'depth': 9, 'l2_leaf_reg': 27.959584584408553, 'random_strength': 0.31505206524448276, 'scale_pos_weight': 4.018109073654948, 'border_count': 79, 'subsample': 0.6969718203536315}. Best is trial 12 with value: 0.5990639625585024.


Best trial: 28. Best value: 0.599184:  72%|███████▎  | 29/40 [17:29<09:59, 54.51s/it, 1049.33/2400 seconds]

[I 2025-08-19 23:39:50,418] Trial 28 finished with value: 0.5991836734693877 and parameters: {'bootstrap_type': 'Bernoulli', 'learning_rate': 0.031994581785802896, 'depth': 6, 'l2_leaf_reg': 19.074488612834582, 'random_strength': 0.9725292959441013, 'scale_pos_weight': 0.6384226483112756, 'border_count': 77, 'subsample': 0.5075106058128436}. Best is trial 28 with value: 0.5991836734693877.


Best trial: 29. Best value: 0.600666:  75%|███████▌  | 30/40 [18:12<08:31, 51.13s/it, 1092.59/2400 seconds]

[I 2025-08-19 23:40:33,676] Trial 29 finished with value: 0.6006655574043261 and parameters: {'bootstrap_type': 'Bernoulli', 'learning_rate': 0.02637520571374268, 'depth': 4, 'l2_leaf_reg': 46.834726382941966, 'random_strength': 0.8042247354654554, 'scale_pos_weight': 2.2352572277350538, 'border_count': 92, 'subsample': 0.5000796490838281}. Best is trial 29 with value: 0.6006655574043261.


Best trial: 29. Best value: 0.600666:  78%|███████▊  | 31/40 [18:41<06:39, 44.43s/it, 1121.38/2400 seconds]

[I 2025-08-19 23:41:02,468] Trial 30 finished with value: 0.5661252900232019 and parameters: {'bootstrap_type': 'Bernoulli', 'learning_rate': 0.01913659430255868, 'depth': 5, 'l2_leaf_reg': 26.77369459103586, 'random_strength': 1.9676328233454843, 'scale_pos_weight': 3.577919894545734, 'border_count': 87, 'subsample': 0.5055192149059519}. Best is trial 29 with value: 0.6006655574043261.


Best trial: 29. Best value: 0.600666:  80%|████████  | 32/40 [19:04<05:04, 38.02s/it, 1144.44/2400 seconds]

[I 2025-08-19 23:41:25,531] Trial 31 finished with value: 0.5998515219005197 and parameters: {'bootstrap_type': 'Bernoulli', 'learning_rate': 0.04300662179734842, 'depth': 3, 'l2_leaf_reg': 13.113723151004434, 'random_strength': 0.6091887203174017, 'scale_pos_weight': 2.031745223593356, 'border_count': 87, 'subsample': 0.5071949658893635}. Best is trial 29 with value: 0.6006655574043261.


Best trial: 32. Best value: 0.606342:  82%|████████▎ | 33/40 [19:28<03:56, 33.74s/it, 1168.19/2400 seconds]

[I 2025-08-19 23:41:49,273] Trial 32 finished with value: 0.6063418406805878 and parameters: {'bootstrap_type': 'Bernoulli', 'learning_rate': 0.04454378607594373, 'depth': 3, 'l2_leaf_reg': 12.593711215339058, 'random_strength': 0.1295118269785709, 'scale_pos_weight': 2.0346687580523977, 'border_count': 89, 'subsample': 0.5057204004800618}. Best is trial 32 with value: 0.6063418406805878.


Best trial: 32. Best value: 0.606342:  85%|████████▌ | 34/40 [19:40<02:43, 27.33s/it, 1180.56/2400 seconds]

[I 2025-08-19 23:42:01,645] Trial 33 finished with value: 0.597864768683274 and parameters: {'bootstrap_type': 'Bernoulli', 'learning_rate': 0.04892533008234995, 'depth': 3, 'l2_leaf_reg': 2.696853926592426, 'random_strength': 0.11418820558174636, 'scale_pos_weight': 3.1118773169937795, 'border_count': 98, 'subsample': 0.504301319540524}. Best is trial 32 with value: 0.6063418406805878.


Best trial: 32. Best value: 0.606342:  88%|████████▊ | 35/40 [20:20<02:35, 31.12s/it, 1220.53/2400 seconds]

[I 2025-08-19 23:42:41,619] Trial 34 finished with value: 0.60015467904099 and parameters: {'bootstrap_type': 'Bernoulli', 'learning_rate': 0.028221172342106547, 'depth': 3, 'l2_leaf_reg': 15.243286943367362, 'random_strength': 0.9156255149214311, 'scale_pos_weight': 1.487570430258288, 'border_count': 88, 'subsample': 0.5017737724092609}. Best is trial 32 with value: 0.6063418406805878.


Best trial: 32. Best value: 0.606342:  90%|█████████ | 36/40 [20:29<01:37, 24.40s/it, 1229.24/2400 seconds]

[I 2025-08-19 23:42:50,330] Trial 35 finished with value: 0.4298245614035088 and parameters: {'bootstrap_type': 'Bernoulli', 'learning_rate': 0.015013627549906143, 'depth': 4, 'l2_leaf_reg': 69.07805381430413, 'random_strength': 0.388301244890308, 'scale_pos_weight': 1.2135825641321798, 'border_count': 109, 'subsample': 0.5112588117575477}. Best is trial 32 with value: 0.6063418406805878.


Best trial: 32. Best value: 0.606342:  92%|█████████▎| 37/40 [20:52<01:12, 24.20s/it, 1252.97/2400 seconds]

[I 2025-08-19 23:43:14,059] Trial 36 finished with value: 0.6046141607000796 and parameters: {'bootstrap_type': 'Bernoulli', 'learning_rate': 0.05445843985959337, 'depth': 3, 'l2_leaf_reg': 49.85246439648702, 'random_strength': 0.6951460978661433, 'scale_pos_weight': 2.4268987725030713, 'border_count': 90, 'subsample': 0.5043641753587105}. Best is trial 32 with value: 0.6063418406805878.


Best trial: 32. Best value: 0.606342:  95%|█████████▌| 38/40 [21:12<00:45, 22.87s/it, 1272.75/2400 seconds]

[I 2025-08-19 23:43:33,837] Trial 37 finished with value: 0.36511156186612576 and parameters: {'bootstrap_type': 'Bayesian', 'learning_rate': 0.02813792478852522, 'depth': 3, 'l2_leaf_reg': 58.211310096046766, 'random_strength': 2.1183302507065624, 'scale_pos_weight': 1.6025724371222139, 'border_count': 83, 'bagging_temperature': 7.018756193783226}. Best is trial 32 with value: 0.6063418406805878.


Best trial: 32. Best value: 0.606342:  98%|█████████▊| 39/40 [21:26<00:20, 20.22s/it, 1286.79/2400 seconds]

[I 2025-08-19 23:43:47,877] Trial 38 finished with value: 0.5608391608391609 and parameters: {'bootstrap_type': 'Bernoulli', 'learning_rate': 0.025249890654597665, 'depth': 4, 'l2_leaf_reg': 91.30454084631761, 'random_strength': 0.20610705349583347, 'scale_pos_weight': 4.499871357937358, 'border_count': 86, 'subsample': 0.5007730212650623}. Best is trial 32 with value: 0.6063418406805878.


Best trial: 32. Best value: 0.606342: 100%|██████████| 40/40 [22:00<00:00, 33.00s/it, 1320.06/2400 seconds]

[I 2025-08-19 23:44:21,146] Trial 39 finished with value: 0.5918819188191882 and parameters: {'bootstrap_type': 'Bernoulli', 'learning_rate': 0.050896563421196056, 'depth': 4, 'l2_leaf_reg': 12.14656743636142, 'random_strength': 1.9403753471823069, 'scale_pos_weight': 1.5273581947899326, 'border_count': 116, 'subsample': 0.5144600786953117}. Best is trial 32 with value: 0.6063418406805878.
Improved TUNE best_f1: 0.606342 | th: 0.4350
Improvement: 0.606342
✓ パラメータを更新しました





In [43]:
# === 最適化パラメータでの本格学習 ===

print("=== 最適化パラメータで本格学習開始 ===")

# 最適パラメータを準備
params_optimized = dict(best_params_improved)
params_optimized.update({
    "iterations": 10000,
    "loss_function": "Logloss",
    "eval_metric": "Logloss",
    "random_seed": SEED,
    "verbose": False,
    "thread_count": -1,
    "use_best_model": True,
    "allow_writing_files": False,
})

print("最適化パラメータ:")
import json
print(json.dumps(params_optimized, indent=2))

# 5-fold学習実行
oof_optimized = np.zeros(len(X_train), dtype=float)
test_optimized = np.zeros(len(X_test), dtype=float)
fold_f1s_opt = []

for fold, (train_pool, valid_pool, va_idx) in enumerate(pools_full, 1):
    print(f"Fold {fold}/5 学習中...")
    model = CatBoostClassifier(**params_optimized)
    model.fit(train_pool, eval_set=valid_pool, early_stopping_rounds=EARLY_STOP_FULL)
    
    # OOF予測
    oof_optimized[va_idx] = model.predict_proba(valid_pool)[:,1]
    
    # テスト予測（平均化）
    test_optimized += model.predict_proba(Pool(X_test, cat_features=cat_features_idx))[:,1] / len(pools_full)
    
    # Fold F1計算
    fold_pred = (oof_optimized[va_idx] >= 0.315).astype(int)  # 固定閾値で評価
    fold_f1 = f1_score(y_train[va_idx], fold_pred)
    fold_f1s_opt.append(fold_f1)
    print(f"Fold {fold} F1@0.315: {fold_f1:.6f}")

# 全体のOOF評価
oof_f1_global_opt, best_th_opt = eval_oof_f1(oof_optimized, y_train)
oof_f1_submit_opt = f1_score(y_train, (oof_optimized >= 0.315).astype(int))

print(f"\n=== 最適化結果 ===")
print(f"OOF F1 (最適閾値): {oof_f1_global_opt:.6f} | 閾値: {best_th_opt:.4f}")
print(f"OOF F1 (0.315閾値): {oof_f1_submit_opt:.6f}")
print(f"Fold F1s: {[round(f, 6) for f in fold_f1s_opt]}")
print(f"Fold F1 平均: {np.mean(fold_f1s_opt):.6f} ± {np.std(fold_f1s_opt):.6f}")

# 既存結果と比較
if 'oof_cb' in locals():
    current_f1 = f1_score(y_train, (oof_cb >= 0.315).astype(int))
    improvement = oof_f1_submit_opt - current_f1
    print(f"既存CB比較: {current_f1:.6f} → {oof_f1_submit_opt:.6f} (Δ{improvement:+.6f})")

# 最適化結果を保存（後続アンサンブルで使用）
oof_cb_optimized = oof_optimized.copy()
test_cb_optimized = test_optimized.copy()
print("✓ 最適化結果を oof_cb_optimized, test_cb_optimized に保存")

=== 最適化パラメータで本格学習開始 ===
最適化パラメータ:
{
  "bootstrap_type": "Bernoulli",
  "learning_rate": 0.04454378607594373,
  "depth": 3,
  "l2_leaf_reg": 12.593711215339058,
  "random_strength": 0.1295118269785709,
  "scale_pos_weight": 2.0346687580523977,
  "border_count": 89,
  "subsample": 0.5057204004800618,
  "iterations": 10000,
  "loss_function": "Logloss",
  "eval_metric": "Logloss",
  "random_seed": 42,
  "verbose": false,
  "thread_count": -1,
  "use_best_model": true,
  "allow_writing_files": false
}
Fold 1/5 学習中...
Fold 1 F1@0.315: 0.593750
Fold 2/5 学習中...
Fold 2 F1@0.315: 0.602410
Fold 3/5 学習中...
Fold 3 F1@0.315: 0.605607
Fold 4/5 学習中...
Fold 4 F1@0.315: 0.608163
Fold 5/5 学習中...
Fold 5 F1@0.315: 0.627530

=== 最適化結果 ===
OOF F1 (最適閾値): 0.627607 | 閾値: 0.4950
OOF F1 (0.315閾値): 0.607355
Fold F1s: [0.59375, 0.60241, 0.605607, 0.608163, 0.62753]
Fold F1 平均: 0.607492 ± 0.011137
既存CB比較: 0.625518 → 0.607355 (Δ-0.018164)
✓ 最適化結果を oof_cb_optimized, test_cb_optimized に保存


In [44]:
# === 既存CB結果を使用（最適化版は破棄） ===

print("=== パフォーマンス判定 ===")
print(f"既存CB F1: 0.625518")
print(f"最適化CB F1: 0.607355")
print(f"差分: -0.018164")
print("→ 既存CBの方が優秀なため、既存結果を使用")

# 既存の結果が存在することを確認
if 'oof_cb' in locals() and 'test_cb' in locals():
    print("✓ 既存 oof_cb, test_cb を使用")
    print(f"oof_cb shape: {oof_cb.shape}")
    print(f"test_cb shape: {test_cb.shape}")
    
    # 既存LGBMも確認
    if 'oof_lgb' in locals() and 'test_lgb' in locals():
        print("✓ 既存 oof_lgb, test_lgb も利用可能")
        print(f"oof_lgb shape: {oof_lgb.shape}")
        print(f"test_lgb shape: {test_lgb.shape}")
    else:
        print("⚠ LGBMを再実行する必要があります")
else:
    print("⚠ 既存CBが見つかりません。セル9b系を再実行してください")

print("\n=== 次のステップ ===")
print("✓ Optuna最適化: 完了（既存の方が良い結果）")
print("→ 次: 高度なアンサンブル手法の実装")
print("→ 既存のoof_cb, test_cb, oof_lgb, test_lgb を使用")

=== パフォーマンス判定 ===
既存CB F1: 0.625518
最適化CB F1: 0.607355
差分: -0.018164
→ 既存CBの方が優秀なため、既存結果を使用
✓ 既存 oof_cb, test_cb を使用
oof_cb shape: (7552,)
test_cb shape: (7552,)
✓ 既存 oof_lgb, test_lgb も利用可能
oof_lgb shape: (7552,)
test_lgb shape: (7552,)

=== 次のステップ ===
✓ Optuna最適化: 完了（既存の方が良い結果）
→ 次: 高度なアンサンブル手法の実装
→ 既存のoof_cb, test_cb, oof_lgb, test_lgb を使用


In [None]:
# # === 9-optuna-long (CB long search; after pools_tune, before 9) ===
# import optuna, os, json

# STORAGE = r"sqlite:///C:/Users/koshihiramatsu/projects/MUFJ_competition_2025/cb_long.db"
# STUDY_NAME = "cb_long_v1"

# sampler_long = optuna.samplers.TPESampler(seed=SEED, n_startup_trials=20, multivariate=True, group=True)
# pruner_long = optuna.pruners.HyperbandPruner(min_resource=200, reduction_factor=3)


# study_long = optuna.create_study(
#     direction="maximize", sampler=sampler_long, pruner=pruner_long,
#     storage=STORAGE, study_name=STUDY_NAME, load_if_exists=True
# )

# def make_objective_long(pools, y_all):
#     def obj(trial):
#         params = {
#             "iterations": trial.suggest_int("iterations", 4000, 12000, step=1000),
#             "learning_rate": trial.suggest_float("learning_rate", 0.02, 0.12, log=True),
#             "depth": trial.suggest_int("depth", 4, 9),
#             "l2_leaf_reg": trial.suggest_float("l2_leaf_reg", 1e-3, 1e2, log=True),
#             "bagging_temperature": trial.suggest_float("bagging_temperature", 0.0, 8.0),
#             "random_strength": trial.suggest_float("random_strength", 0.0, 3.0),
#             "subsample": trial.suggest_float("subsample", 0.7, 1.0),
#             "rsm": trial.suggest_float("rsm", 0.6, 1.0),  # feature subsampling
#             "scale_pos_weight": trial.suggest_float("scale_pos_weight", 0.8, 2.5),
#             "loss_function": "Logloss",
#             "eval_metric": "Logloss",
#             "random_seed": SEED,
#             "verbose": False,
#             "thread_count": -1,
#             "use_best_model": True,
#             "allow_writing_files": False,
#             # GPU使えるなら下2行を解禁
#             # "task_type": "GPU", "devices": "0",
#         }
#         oof_tmp = np.zeros(len(y_all), dtype=float)
#         for tr_pool, va_pool, va_idx in pools:
#             m = CatBoostClassifier(**params)
#             m.fit(tr_pool, eval_set=va_pool, early_stopping_rounds=200)
#             oof_tmp[va_idx] = m.predict_proba(va_pool)[:, 1]
#         f1, th = eval_oof_f1(oof_tmp, y_all)
#         trial.set_user_attr("best_threshold", th)
#         return f1
#     return obj

# obj_long = make_objective_long(pools_tune, y_tune)

# # 目安: 4時間 or 300試行の早い方で止める（レジューム可）
# study_long.optimize(obj_long, timeout=4*3600, n_trials=300, gc_after_trial=True, show_progress_bar=True)

# best_params_long = study_long.best_trial.params.copy()
# best_th_long     = study_long.best_trial.user_attrs.get("best_threshold", None)
# best_score_long  = study_long.best_value
# print("CB LONG best_f1:", round(best_score_long,6), "| th:", best_th_long, "\nparams:", best_params_long)

# # 記録
# with open(os.path.join(OUT_DIR, "best_cb_long.json"), "w", encoding="utf-8") as f:
#     json.dump({"f1": best_score_long, "th": best_th_long, "params": best_params_long}, f, indent=2)


In [None]:
# # ==== 9-optuna-long: best trial pick & set ====
# # ここは 9-optuna-long の直下に置く
# assert 'study_long' in locals(), "先に 9-optuna-long を実行して study_long を作ってあること"

# best_trial_long = study_long.best_trial
# best_params_long = best_trial_long.params.copy()

# # CatBoost 固定項目をマージ（trial 側にある 'iterations' はそのまま使う）
# best_params_cb = {
#     **best_params_long,
#     "loss_function": "Logloss",
#     "eval_metric": "Logloss",
#     "verbose": False,
#     "thread_count": -1,
#     "use_best_model": True,
#     "allow_writing_files": False,
#     # GPU 使えるなら:
#     # "task_type": "GPU", "devices": "0",
# }

# print("BEST(long) trial:", best_trial_long.number, 
#       "| f1_subset:", round(best_trial_long.value, 6))
# print("best_params_cb (long):")
# import json; print(json.dumps(best_params_cb, indent=2))


In [None]:
# # ==== bridge-for-long (put right after 9-optuna-long) ====
# # 以降の 9 / 9b が参照する best_params を、long の最良結果に差し替える
# best_params = best_params_cb
# print("bridge set: best_params <- best_params_cb (from long study)")


In [45]:
best_params = {
  "learning_rate": 0.06116108646095842,
  "depth": 5,
  "l2_leaf_reg": 5.478690083944246,
  "bagging_temperature": 0.8884344994647464,
  "random_strength": 1.865589408671679,
  "subsample": 0.9516049519127788,
  "scale_pos_weight": 1.1386783078556455,
  "loss_function": "Logloss",
  "eval_metric": "Logloss",
  "verbose": False,
  "thread_count": -1,
  "use_best_model": True,
  "allow_writing_files": False,
}


In [None]:
# # ==== 9

# params = dict(best_params)
# params.update({
#     "iterations": 10000,
#     "loss_function": "Logloss",
#     "eval_metric": "Logloss",
#     "random_seed": SEED,
#     "verbose": False,
#     "thread_count": -1,
#     "use_best_model": True,
#     "allow_writing_files": False,
#     # "task_type": "GPU", "devices": "0",  # 使えるなら
# })

# oof = np.zeros(len(X_train), dtype=float)
# test_prob = np.zeros(len(X_test), dtype=float)
# fold_f1s = []

# for fold, (train_pool, valid_pool, va_idx) in enumerate(pools_full, 1):
#     model = CatBoostClassifier(**params)
#     model.fit(train_pool, eval_set=valid_pool, early_stopping_rounds=EARLY_STOP_FULL)
#     oof[va_idx] = model.predict_proba(valid_pool)[:,1]
#     test_prob  += model.predict_proba(Pool(X_test, cat_features=cat_features_idx))[:,1] / skf_full.n_splits

# oof_f1, best_th_full = eval_oof_f1(oof, y_train)
# print("OOF F1:", round(oof_f1, 6), "| th(full):", round(best_th_full, 4))


In [None]:
# # === プール再構築（特徴量エンジニアリング後）セル9.5 ===

# print("=== 特徴量追加後のプール再構築 ===")

# # tuning用のプール再構築
# if FAST_TUNE:
#     X_tune_new = X_train.iloc[idx_tune].reset_index(drop=True)
#     pools_tune = build_pools(X_tune_new, y_tune, skf_tune, cat_features_idx)
#     print(f"TUNE SUBSET pools再構築完了: {len(X_tune_new)} rows, {len(X_tune_new.columns)} features")
# else:
#     pools_tune = build_pools(X_train, y_train, skf_tune, cat_features_idx)
#     print(f"TUNE pools再構築完了: {len(X_train)} rows, {len(X_train.columns)} features")

# # full用のプール再構築  
# pools_full = build_pools(X_train, y_train, skf_full, cat_features_idx)
# print(f"FULL pools再構築完了: {len(X_train)} rows, {len(X_train.columns)} features")
# print(f"カテゴリ特徴量インデックス数: {len(cat_features_idx)}")

=== 特徴量追加後のプール再構築 ===
TUNE SUBSET pools再構築完了: 4531 rows, 30 features
FULL pools再構築完了: 7552 rows, 30 features
カテゴリ特徴量インデックス数: 9


In [None]:
# # ==== 9b: CatBoost seed-bagging ====
# SEED_BAG = [42, 2025, 777]

# params_cb = dict(best_params)
# params_cb.update({
#     "iterations": 10000,
#     "loss_function": "Logloss",
#     "eval_metric": "Logloss",
#     "verbose": False,
#     "thread_count": -1,
#     "use_best_model": True,
#     "allow_writing_files": False,
# })

# oof_cb = np.zeros(len(X_train), dtype=float)
# test_cb = np.zeros(len(X_test), dtype=float)

# for fold, (tr_pool, va_pool, va_idx) in enumerate(pools_full, 1):
#     fold_prob = np.zeros(len(va_idx))
#     fold_test = np.zeros(len(X_test))
#     for sd in SEED_BAG:
#         p = dict(params_cb); p["random_seed"] = sd
#         m = CatBoostClassifier(**p)
#         m.fit(tr_pool, eval_set=va_pool, early_stopping_rounds=EARLY_STOP_FULL)
#         fold_prob += m.predict_proba(va_pool)[:,1] / len(SEED_BAG)
#         fold_test += m.predict_proba(Pool(X_test, cat_features=cat_features_idx))[:,1] / len(SEED_BAG)
#     oof_cb[va_idx] = fold_prob
#     test_cb += fold_test / skf_full.n_splits

# f1_cb, th_cb = eval_oof_f1(oof_cb, y_train)
# print("CB-bag  OOF F1:", round(f1_cb,6), "| th:", round(th_cb,4))

# #4:32

CB-bag  OOF F1: 0.632403 | th: 0.36


In [None]:
# # ==== 9b++: CB param-jitter (random_strength ±20%) ====
# from catboost import CatBoostClassifier, Pool
# import numpy as np
# from sklearn.metrics import f1_score

# assert 'pools_full' in locals() and len(pools_full)==5
# assert 'oof_cb' in locals() and 'test_cb' in locals(), "先に 9b を実行して oof_cb/test_cb を作ってから"

# params_cb_base = locals().get('best_params', None)
# if not params_cb_base:
#     params_cb_base = {
#         "learning_rate": 0.06116108646095842,
#         "depth": 5,
#         "l2_leaf_reg": 5.478690083944246,
#         "bagging_temperature": 0.8884344994647464,
#         "random_strength": 1.865589408671679,
#         "subsample": 0.9516049519127788,
#         "scale_pos_weight": 1.1386783078556455
#     }

# # ±20% だけランダム性を変える（時間増を最小化）
# rs0 = float(params_cb_base.get("random_strength", 1.0))
# rs_lo = max(0.0, rs0 * 0.8)
# rs_hi = rs0 * 1.2

# COMMON = dict(
#     iterations=10000,
#     loss_function="Logloss",
#     eval_metric="Logloss",
#     verbose=False,
#     thread_count=-1,
#     use_best_model=True,
#     allow_writing_files=False,
# )
# SUBMIT_TH = float(locals().get("SUBMIT_THRESHOLD_OVERRIDE", 0.315))

# variants = [
#     ("rs_0p8", {**params_cb_base, **COMMON, "random_seed": 42, "random_strength": rs_lo}),
#     ("rs_1p2", {**params_cb_base, **COMMON, "random_seed": 42, "random_strength": rs_hi}),
# ]

# new_oofs, new_tests = [], []

# for name, p in variants:
#     oof_tmp = np.zeros(len(y_train), dtype=float)
#     test_tmp = np.zeros(len(X_test), dtype=float)
#     for (tr_pool, va_pool, va_idx) in pools_full:
#         m = CatBoostClassifier(**p)
#         m.fit(tr_pool, eval_set=va_pool, early_stopping_rounds=EARLY_STOP_FULL)
#         oof_tmp[va_idx] = m.predict_proba(va_pool)[:,1]
#         test_tmp += m.predict_proba(Pool(X_test, cat_features=cat_features_idx))[:,1] / len(pools_full)
#     f1_here = f1_score(y_train, (oof_tmp >= SUBMIT_TH).astype(int))
#     print(f"[CB jitter:{name}] F1@{SUBMIT_TH:.3f}: {f1_here:.6f}")
#     new_oofs.append(oof_tmp); new_tests.append(test_tmp)

# # 既存seed-bagging（oof_cb/test_cb）と等重み平均
# oof_cb = np.mean([oof_cb] + new_oofs, axis=0)
# test_cb = np.mean([test_cb] + new_tests, axis=0)
# f1_cb = f1_score(y_train, (oof_cb >= SUBMIT_TH).astype(int))
# print(f"[CB jittered-avg] OOF F1@{SUBMIT_TH:.3f}: {f1_cb:.6f}")

# CURRENT_PIPE = "cb_param_jitter_rs"

# #2:58


[CB jitter:rs_0p8] F1@0.315: 0.623471
[CB jitter:rs_1p2] F1@0.315: 0.627149
[CB jittered-avg] OOF F1@0.315: 0.625571


In [None]:
# # 9c (rollback to baseline)
# import lightgbm as lgb
# from lightgbm import LGBMClassifier, early_stopping, log_evaluation

# X_train_lgb = X_train.copy()
# X_test_lgb  = X_test.copy()
# for c in cat_cols:
#     X_train_lgb[c] = X_train_lgb[c].astype("category")
#     X_test_lgb[c]  = X_test_lgb[c].astype("category")

# params_lgb = {
#     "objective": "binary",
#     "learning_rate": 0.03,
#     "num_leaves": 63,
#     "min_child_samples": 50,
#     "subsample": 0.9,
#     "colsample_bytree": 0.8,
#     "reg_alpha": 0.0,
#     "reg_lambda": 5.0,
#     "n_estimators": 10000,
#     "random_state": SEED,
#     "n_jobs": -1,
#     "verbose": -1,
#     "scale_pos_weight": 1.2,
# }

# oof_lgb = np.zeros(len(X_train))
# test_lgb = np.zeros(len(X_test))

# for fold, (tr_idx, va_idx) in enumerate(skf_full.split(X_train_lgb, y_train), 1):
#     X_tr, X_va = X_train_lgb.iloc[tr_idx], X_train_lgb.iloc[va_idx]
#     y_tr, y_va = y_train[tr_idx], y_train[va_idx]
#     m = LGBMClassifier(**params_lgb)
#     m.fit(
#         X_tr, y_tr,
#         eval_set=[(X_va, y_va)],
#         eval_metric="binary_logloss",
#         callbacks=[early_stopping(stopping_rounds=200, verbose=False), log_evaluation(period=0)],
#     )
#     oof_lgb[va_idx] = m.predict_proba(X_va)[:, 1]
#     test_lgb += m.predict_proba(X_test_lgb)[:, 1] / skf_full.n_splits

# f1_lgb, th_lgb = eval_oof_f1(oof_lgb, y_train)
# print("LGBM(rollback) OOF F1:", round(f1_lgb,6), "| th:", round(th_lgb,4))


LGBM(rollback) OOF F1: 0.62037 | th: 0.23


In [None]:
# # ==== 9d: Soft ensemble of CB-bag & LGBM ====
# weights = np.linspace(0.0, 1.0, 21)  # 0,0.05, …,1.0
# best = (-1, None, None)  # (f1, w, th)

# for w in weights:
#     oof_ens = w*oof_cb + (1-w)*oof_lgb
#     f1, th = eval_oof_f1(oof_ens, y_train)
#     if f1 > best[0]:
#         best = (f1, w, th)

# best_f1, best_w, best_th = best
# print(f"Ensemble OOF F1: {best_f1:.6f} | w(CB)={best_w:.2f} | th={best_th:.4f}")

# # テスト側の確率も同じ重みで合成
# test_ens = best_w*test_cb + (1-best_w)*test_lgb

# # 以降の提出セル（11）で使われる変数名に載せ替え
# # oof = w*oof_cb + (1-best_w)*oof_lgb   # 不要
# oof = best_w*oof_cb + (1-best_w)*oof_lgb
# test_prob = test_ens
# best_th_full = best_th


In [None]:
# # 9d' (re-run)
# weights = np.linspace(0.0, 1.0, 101)
# best = (-1, None, None)
# for w in weights:
#     oof_ens = w*oof_cb + (1-w)*oof_lgb
#     f1, th = eval_oof_f1(oof_ens, y_train)
#     if f1 > best[0]:
#         best = (f1, w, th)

# best_f1, best_w, best_th = best
# print(f"Ensemble OOF F1: {best_f1:.6f} | w(CB)={best_w:.2f} | th={best_th:.4f}")

# test_ens = best_w*test_cb + (1-best_w)*test_lgb
# oof = best_w*oof_cb + (1-best_w)*oof_lgb
# test_prob = test_ens
# best_th_full = best_th


Ensemble OOF F1: 0.644332 | w(CB)=0.46 | th=0.3150


In [None]:
# # ==== 9d-double-prime: Soft ensemble (opt for F1 at submit_th) ====
# submit_th = float(locals().get('SUBMIT_THRESHOLD_OVERRIDE', 0.315))

# weights = np.linspace(0.0, 1.0, 101)
# best = (-1.0, None)  # (f1_submit, w)
# for w in weights:
#     oof_ens = w*oof_cb + (1-w)*oof_lgb
#     f1_submit = f1_score(y_train, (oof_ens >= submit_th).astype(int))
#     if f1_submit > best[0]:
#         best = (f1_submit, w)

# best_f1_submit, best_w_submit = best
# print(f"[ENS@submit] F1@{submit_th:.3f}: {best_f1_submit:.6f} | w(CB)={best_w_submit:.2f}")

# # 採用
# oof = best_w_submit*oof_cb + (1-best_w_submit)*oof_lgb
# test_prob = best_w_submit*test_cb + (1-best_w_submit)*test_lgb
# best_w = best_w_submit  # ログ用
# best_th_full = submit_th  # ログ用（提出はoverride）


[ENS@submit] F1@0.315: 0.637708 | w(CB)=0.41


In [57]:
# ==== 9d''-micro: Soft ensemble (opt for F1 at submit_th, 0.001 step) ====
from sklearn.metrics import f1_score

submit_th = float(locals().get("SUBMIT_THRESHOLD_OVERRIDE", 0.315))
# ベース最適が 0.455 付近なので、その近傍を狭域探索
weights = np.round(np.arange(0.430, 0.481, 0.001), 3)

best = (-1.0, None)
for w in weights:
    oof_ens = w * oof_cb + (1 - w) * oof_lgb
    f1_sub = f1_score(y_train, (oof_ens >= submit_th).astype(int))
    if f1_sub > best[0]:
        best = (f1_sub, w)

best_f1_submit, best_w_submit = best
print(f"[ENS@submit micro] F1@{submit_th:.3f}: {best_f1_submit:.6f} | w(CB)={best_w_submit:.3f}")

# 採用（このセルの出力を以降の提出に反映）
oof = best_w_submit * oof_cb + (1 - best_w_submit) * oof_lgb
test_prob = best_w_submit * test_cb + (1 - best_w_submit) * test_lgb
best_w = best_w_submit   # ログ用
best_th_full = submit_th # ログ用（提出はoverride）
CURRENT_PIPE = "ens_weight_micro"


[ENS@submit micro] F1@0.315: 0.626629 | w(CB)=0.447


In [None]:
# # === 高度なアンサンブル（セル9d後に追加） ===

# def create_stacking_ensemble():
#     """スタッキングアンサンブルの実装"""
#     from sklearn.linear_model import LogisticRegression
    
#     print("=== スタッキングアンサンブル作成 ===")
    
#     # Level-1特徴量（OOF予測値）を準備
#     stack_features = np.column_stack([
#         oof_cb,           # CatBoost予測
#         oof_lgb,          # LightGBM予測
#         oof_cb ** 2,      # CatBoost予測の2乗
#         oof_lgb ** 2,     # LightGBM予測の2乗
#         oof_cb * oof_lgb, # 相互作用
#         np.abs(oof_cb - oof_lgb), # 予測の差
#         np.maximum(oof_cb, oof_lgb), # 最大値
#         np.minimum(oof_cb, oof_lgb), # 最小値
#     ])
    
#     # メタ学習器（LogisticRegression）
#     meta_model = LogisticRegression(
#         random_state=SEED, 
#         class_weight='balanced',
#         max_iter=1000
#     )
    
#     # メタ学習器の訓練
#     meta_model.fit(stack_features, y_train)
    
#     # テストセット用の特徴量
#     test_stack_features = np.column_stack([
#         test_cb,
#         test_lgb, 
#         test_cb ** 2,
#         test_lgb ** 2,
#         test_cb * test_lgb,
#         np.abs(test_cb - test_lgb),
#         np.maximum(test_cb, test_lgb),
#         np.minimum(test_cb, test_lgb),
#     ])
    
#     # スタッキング予測
#     oof_stack = meta_model.predict_proba(stack_features)[:, 1]
#     test_stack = meta_model.predict_proba(test_stack_features)[:, 1]
    
#     f1_stack, th_stack = eval_oof_f1(oof_stack, y_train)
#     print(f"スタッキング OOF F1: {f1_stack:.6f} | th: {th_stack:.4f}")
    
#     return oof_stack, test_stack, f1_stack

# def create_rank_ensemble():
#     """ランクベースアンサンブル"""
#     from scipy.stats import rankdata
    
#     print("=== ランクベースアンサンブル ===")
    
#     def normalize_ranks(pred):
#         ranks = rankdata(pred, method='average')
#         return (ranks - 1) / (len(ranks) - 1)
    
#     # ランク正規化
#     oof_cb_rank = normalize_ranks(oof_cb)
#     oof_lgb_rank = normalize_ranks(oof_lgb) 
#     test_cb_rank = normalize_ranks(test_cb)
#     test_lgb_rank = normalize_ranks(test_lgb)
    
#     # 複数の重み組み合わせを試す
#     best_rank_f1, best_rank_w, best_rank_oof, best_rank_test = -1, 0, None, None
    
#     for w in np.arange(0.3, 0.8, 0.05):
#         oof_rank = w * oof_cb_rank + (1-w) * oof_lgb_rank
#         f1_rank, _ = eval_oof_f1(oof_rank, y_train)
        
#         if f1_rank > best_rank_f1:
#             best_rank_f1 = f1_rank
#             best_rank_w = w
#             best_rank_oof = oof_rank
#             best_rank_test = w * test_cb_rank + (1-w) * test_lgb_rank
    
#     print(f"ランク OOF F1: {best_rank_f1:.6f} | w(CB): {best_rank_w:.3f}")
    
#     return best_rank_oof, best_rank_test, best_rank_f1

# # アンサンブル手法を実行
# oof_stack, test_stack, f1_stack = create_stacking_ensemble()
# oof_rank, test_rank, f1_rank = create_rank_ensemble()

# # 最良の手法を選択
# current_f1 = f1_score(y_train, (oof >= 0.315).astype(int))
# print(f"\n=== アンサンブル比較 ===")
# print(f"Current: {current_f1:.6f}")
# print(f"Stacking: {f1_stack:.6f}")
# print(f"Rank: {f1_rank:.6f}")

# # 最良の手法を採用
# if f1_stack > max(current_f1, f1_rank) and f1_stack > current_f1 + 0.001:
#     oof, test_prob = oof_stack, test_stack
#     print("✓ スタッキングアンサンブルを採用")
#     CURRENT_PIPE = "stacking_ensemble"
# elif f1_rank > current_f1 and f1_rank > current_f1 + 0.001:
#     oof, test_prob = oof_rank, test_rank  
#     print("✓ ランクアンサンブルを採用")
#     CURRENT_PIPE = "rank_ensemble"
# else:
#     print("✓ 現在の手法を維持")

=== スタッキングアンサンブル作成 ===
スタッキング OOF F1: 0.632536 | th: 0.8050
=== ランクベースアンサンブル ===
ランク OOF F1: 0.629738 | w(CB): 0.700

=== アンサンブル比較 ===
Current: 0.626629
Stacking: 0.632536
Rank: 0.629738
✓ スタッキングアンサンブルを採用


In [None]:
# # ==== 9d'''-rank-mix (micro) ====
# import numpy as np
# from scipy.stats import rankdata
# from sklearn.metrics import f1_score

# submit_th = float(locals().get("SUBMIT_THRESHOLD_OVERRIDE", 0.315))

# # rank化（同順位は平均、[0,1]にスケール）
# def to_rank01(x):
#     r = rankdata(x, method="average")
#     return (r - 1) / (len(r) - 1)

# oof_cb_r   = to_rank01(oof_cb)
# oof_lgb_r  = to_rank01(oof_lgb)
# test_cb_r  = to_rank01(test_cb)
# test_lgb_r = to_rank01(test_lgb)

# # rank混合の強さ gamma（0=確率そのまま, 1=rankのみ）
# gammas = np.round(np.arange(0.0, 0.31, 0.02), 3)  # 超軽く 0〜0.30
# best = (-1.0, None, None)

# for gamma in gammas:
#     cb_mix  = (1-gamma)*oof_cb  + gamma*oof_cb_r
#     lgb_mix = (1-gamma)*oof_lgb + gamma*oof_lgb_r
#     # 直前のベストw（なければ 0.454 を既定値に）
#     w0 = float(locals().get("best_w", 0.454))
#     # w も  ±0.01 を 0.001刻みで軽くサーチ
#     ws = np.round(np.arange(max(0, w0-0.01), min(1, w0+0.01)+1e-12, 0.001), 3)
#     for w in ws:
#         p = w*cb_mix + (1-w)*lgb_mix
#         f1s = f1_score(y_train, (p >= submit_th).astype(int))
#         if f1s > best[0]:
#             best = (f1s, gamma, w)

# print(f"[RankMix@submit] F1@{submit_th:.3f}: {best[0]:.6f} | gamma={best[1]} | w(CB)={best[2]}")

# # 採用して test も更新
# gamma, w = best[1], best[2]
# oof = w*((1-gamma)*oof_cb + gamma*oof_cb_r) + (1-w)*((1-gamma)*oof_lgb + gamma*oof_lgb_r)
# test_prob = w*((1-gamma)*test_cb + gamma*test_cb_r) + (1-w)*((1-gamma)*test_lgb + gamma*test_lgb_r)

# best_w = float(w)
# best_th_full = submit_th
# CURRENT_PIPE = "ens_rankmix_micro"


In [None]:
# # ==== 9d'''-refine: Ensemble weight refine (±0.01, step=0.0005) ====
# import numpy as np
# from sklearn.metrics import f1_score

# submit_th = float(locals().get("SUBMIT_THRESHOLD_OVERRIDE", 0.315))
# w0 = float(locals().get("best_w", 0.454))  # 9d''-micro の結果を初期値に
# lo = max(0.0, w0 - 0.01)
# hi = min(1.0, w0 + 0.01)
# weights = np.round(np.arange(lo, hi + 1e-12, 0.0005), 4)

# best = (-1.0, None)
# for w in weights:
#     probs = w * oof_cb + (1 - w) * oof_lgb
#     f1_sub = f1_score(y_train, (probs >= submit_th).astype(int))
#     if f1_sub > best[0]:
#         best = (f1_sub, w)

# best_f1, best_w_ref = best
# print(f"[ENS@submit micro-refine] F1@{submit_th:.3f}: {best_f1:.6f} | w(CB)={best_w_ref:.4f}")

# # 採用
# oof = best_w_ref * oof_cb + (1 - best_w_ref) * oof_lgb
# test_prob = best_w_ref * test_cb + (1 - best_w_ref) * test_lgb
# best_w = float(best_w_ref)
# best_th_full = submit_th
# CURRENT_PIPE = "ens_weight_micro_refine"


In [None]:
# # === 改良された閾値最適化 ===

# def optimize_threshold_advanced(oof_pred, y_true):
#     """高度な閾値最適化"""
#     from scipy.optimize import minimize_scalar
#     from sklearn.metrics import f1_score
    
#     # F1スコアを最大化する閾値を数値的に求める
#     def neg_f1_score(threshold):
#         y_pred = (oof_pred >= threshold).astype(int)
#         return -f1_score(y_true, y_pred, zero_division=0)
    
#     # 粗い検索で範囲を特定
#     coarse_thresholds = np.linspace(0.1, 0.9, 81)
#     coarse_f1s = [-neg_f1_score(t) for t in coarse_thresholds]
#     best_coarse_idx = np.argmax(coarse_f1s)
#     best_coarse_th = coarse_thresholds[best_coarse_idx]
    
#     # 最適値周辺で精密検索
#     search_range = (max(0.05, best_coarse_th - 0.05), 
#                    min(0.95, best_coarse_th + 0.05))
    
#     result = minimize_scalar(neg_f1_score, bounds=search_range, method='bounded')
#     optimal_threshold = result.x
#     optimal_f1 = -result.fun
    
#     # Fold別の安定性もチェック
#     fold_f1s = []
#     if 'pools_full' in locals():
#         for _, _, va_idx in pools_full:
#             y_va = y_true[va_idx] 
#             pred_va = (oof_pred[va_idx] >= optimal_threshold).astype(int)
#             fold_f1 = f1_score(y_va, pred_va, zero_division=0)
#             fold_f1s.append(fold_f1)
        
#         f1_std = np.std(fold_f1s)
#         print(f"Fold F1 std: {f1_std:.6f} (lower is better)")
    
#     return optimal_f1, optimal_threshold, fold_f1s

# def multi_threshold_search():
#     """複数閾値候補の比較"""
#     print("=== 高度な閾値最適化 ===")
    
#     # 1. 数値最適化
#     opt_f1, opt_th, fold_f1s = optimize_threshold_advanced(oof, y_train)
#     print(f"数値最適化: F1={opt_f1:.6f}, th={opt_th:.4f}")
    
#     # 2. クラス重みを考慮した閾値
#     pos_ratio = y_train.mean()
#     balanced_th = pos_ratio  # クラス比率ベース
#     balanced_f1 = f1_score(y_train, (oof >= balanced_th).astype(int))
#     print(f"バランス閾値: F1={balanced_f1:.6f}, th={balanced_th:.4f}")
    
#     # 3. Youden's J統計量ベース
#     from sklearn.metrics import roc_curve
#     fpr, tpr, thresholds = roc_curve(y_train, oof)
#     j_scores = tpr - fpr
#     best_j_idx = np.argmax(j_scores)
#     youden_th = thresholds[best_j_idx]
#     youden_f1 = f1_score(y_train, (oof >= youden_th).astype(int))
#     print(f"Youden閾値: F1={youden_f1:.6f}, th={youden_th:.4f}")
    
#     # 4. 従来の0.315付近の精密探索
#     fine_ths = np.arange(0.30, 0.34, 0.002)
#     fine_f1s = [f1_score(y_train, (oof >= t).astype(int)) for t in fine_ths]
#     best_fine_idx = np.argmax(fine_f1s)
#     fine_th = fine_ths[best_fine_idx]
#     fine_f1 = fine_f1s[best_fine_idx]
#     print(f"精密探索閾値: F1={fine_f1:.6f}, th={fine_th:.4f}")
    
#     # 最良の閾値を選択
#     candidates = [
#         ("numerical", opt_f1, opt_th),
#         ("balanced", balanced_f1, balanced_th), 
#         ("youden", youden_f1, youden_th),
#         ("fine_search", fine_f1, fine_th),
#     ]
    
#     best_method, best_f1, best_th = max(candidates, key=lambda x: x[1])
#     print(f"\n最良: {best_method} | F1={best_f1:.6f}, th={best_th:.4f}")
    
#     return best_th, best_f1

# # 閾値最適化を実行
# final_threshold, final_f1 = multi_threshold_search()

=== 高度な閾値最適化 ===
数値最適化: F1=0.631679, th=0.8031
バランス閾値: F1=0.435204, th=0.1276
Youden閾値: F1=0.570866, th=0.3125
精密探索閾値: F1=0.576419, th=0.3400

最良: numerical | F1=0.631679, th=0.8031


In [None]:
# # ==== STEP8-2: micro re-search of ensemble weight around current w ====
# import numpy as np
# from sklearn.metrics import f1_score

# assert 'oof_cb' in locals() and 'oof_lgb' in locals()
# th = float(locals().get("SUBMIT_THRESHOLD_OVERRIDE", 0.310))  # ← TH-SCANで上書き済み
# grid = np.arange(0.44, 0.501, 0.005)  # 0.44..0.50
# best = (-1.0, None)
# for w in grid:
#     oof_mix = w*oof_cb + (1-w)*oof_lgb
#     f1v = f1_score(y_train, (oof_mix >= th).astype(int))
#     if f1v > best[0]:
#         best = (f1v, w)
# print(f"[ENS micro] best F1@{th:.3f}={best[0]:.6f} | w(CB)={best[1]:.3f}")

# # 改善が +0.0005 以上のときだけ採用して oof/test を更新
# curr_submit_f1 = float(locals().get("oof_f1_at_submit", 0.0))  # 直近ログ値が無ければ0
# if best[0] >= curr_submit_f1 + 0.0005:
#     best_w = float(best[1])
#     oof = best_w*oof_cb + (1-best_w)*oof_lgb
#     test_prob = best_w*test_cb + (1-best_w)*locals().get('test_lgb', 0)
#     print(f"[ENS micro] APPLY w={best_w:.3f}")
# else:
#     print("[ENS micro] KEEP current w (no clear gain)")


[ENS micro] best F1@0.315=0.626629 | w(CB)=0.450
[ENS micro] APPLY w=0.450


In [None]:
# # ==== CHECK: submit threshold (no mutation) ====
# print("submit_threshold (override) =", locals().get("SUBMIT_THRESHOLD_OVERRIDE", None))


submit_threshold (override) = 0.315


In [58]:
# ==== TH-SCAN: OOF全体で最適しきい値を走査して SUBMIT_THRESHOLD_OVERRIDE を更新 ====
import numpy as np
from sklearn.metrics import f1_score

# 探索レンジとステップ（必要なら調整）
t_min, t_max, t_step = 0.20, 0.50, 0.005
ths = np.arange(t_min, t_max + 1e-9, t_step)

# 現在の submit 閾値（override 優先）
cur_th = locals().get("SUBMIT_THRESHOLD_OVERRIDE", None)
if cur_th is None:
    cur_th = locals().get("best_th_full", None)
if cur_th is None:
    # フォールバックで OOF 全体最適を一度計算
    cur_f1, cur_th = eval_oof_f1(oof, y_train)
else:
    cur_f1 = f1_score(y_train, (oof >= cur_th).astype(int))

# 走査
f1s = [f1_score(y_train, (oof >= t).astype(int)) for t in ths]
j = int(np.argmax(f1s))
best_t, best_f1 = float(ths[j]), float(f1s[j])

print(f"[TH-SCAN] best_t={best_t:.3f} | F1@best={best_f1:.6f}")

# 適用（上書き）
delta = best_f1 - cur_f1
SUBMIT_THRESHOLD_OVERRIDE = best_t
print(f"[TH-SCAN] APPLY_OVERRIDE -> SUBMIT_THRESHOLD_OVERRIDE = {SUBMIT_THRESHOLD_OVERRIDE:.3f} (ΔF1={delta:+.6f})")


[TH-SCAN] best_t=0.305 | F1@best=0.628388
[TH-SCAN] APPLY_OVERRIDE -> SUBMIT_THRESHOLD_OVERRIDE = 0.305 (ΔF1=+0.001758)


In [None]:
# # ==== TARGET monitor (robust, after ENS micro) ====
# from sklearn.metrics import f1_score
# assert 'oof' in locals() and 'y_train' in locals()

# # 現在の提出しきい値（TH-SCANで0.310にしてる前提）
# sub_th = float(locals().get("SUBMIT_THRESHOLD_OVERRIDE", 0.310))

# oof_f1_global, _ = eval_oof_f1(oof, y_train)
# oof_f1_submit = f1_score(y_train, (oof >= sub_th).astype(int))
# print(f"TARGET: 0.64 | OOF_global: {oof_f1_global:.6f} | OOF_at_submit: {oof_f1_submit:.6f} | submit_th: {sub_th:.3f}")


TARGET: 0.64 | OOF_global: 0.628388 | OOF_at_submit: 0.626629 | submit_th: 0.315


In [50]:
# === 最終結果の確定 ===

print("=== 改善施策の最終評価 ===")

# 各ステップの結果
print("1. 特徴量エンジニアリング: ❌ 有害（-0.008122）")
print("2. Optuna最適化: ❌ 既存の方が優秀（-0.018164）") 
print("3. スタッキングアンサンブル: ✅ 改善（+0.005907）")
print("4. 閾値最適化: ❌ 改善なし（-0.000857）")

print(f"\n=== 最終採用 ===")
print(f"手法: スタッキングアンサンブル")
print(f"OOF F1: 0.632536")
print(f"最適閾値: 0.8050（スタッキング用）")
print(f"提出閾値: 0.315（安全策で従来閾値使用推奨）")

# 最終確認
current_oof_f1_at_315 = f1_score(y_train, (oof >= 0.315).astype(int))
stacking_f1_at_315 = f1_score(y_train, (oof >= 0.315).astype(int))  # oofはスタッキング結果

print(f"\n=== 提出用F1（閾値0.315） ===")
print(f"スタッキング F1@0.315: {stacking_f1_at_315:.6f}")

# ベースラインとの比較
baseline_f1 = 0.646952  # 元のversion 1結果
improvement = stacking_f1_at_315 - baseline_f1
print(f"ベースライン: {baseline_f1:.6f}")
print(f"改善度: {improvement:+.6f}")

if improvement > 0:
    print("✅ 改善に成功！")
else:
    print("⚠️ ベースラインを下回っています")
    print("→ 元の手法（CB+LGBアンサンブル）の方が良い可能性")

print(f"\n=== 次のアクション ===")
print(f"現在のoof, test_probでセル21（提出作成）を実行してください")
print(f"スタッキングアンサンブルの結果が保存されます")

=== 改善施策の最終評価 ===
1. 特徴量エンジニアリング: ❌ 有害（-0.008122）
2. Optuna最適化: ❌ 既存の方が優秀（-0.018164）
3. スタッキングアンサンブル: ✅ 改善（+0.005907）
4. 閾値最適化: ❌ 改善なし（-0.000857）

=== 最終採用 ===
手法: スタッキングアンサンブル
OOF F1: 0.632536
最適閾値: 0.8050（スタッキング用）
提出閾値: 0.315（安全策で従来閾値使用推奨）

=== 提出用F1（閾値0.315） ===
スタッキング F1@0.315: 0.569645
ベースライン: 0.646952
改善度: -0.077307
⚠️ ベースラインを下回っています
→ 元の手法（CB+LGBアンサンブル）の方が良い可能性

=== 次のアクション ===
現在のoof, test_probでセル21（提出作成）を実行してください
スタッキングアンサンブルの結果が保存されます


In [51]:
# === 最良結果に戻す（元のアンサンブルを復元） ===

print("=== 最良結果への復元 ===")

# 問題の確認
print("スタッキング問題:")
print("- 最適閾値0.8050では F1=0.632536")
print("- 提出閾値0.315では F1=0.569645（ベースライン比-0.077）")
print("→ 提出には不適切")

# 元の最良アンサンブルに戻す
# セル17（9d''-micro）の結果を復元
if 'oof_cb' in locals() and 'oof_lgb' in locals():
    # 元の最適重み（セル17で見つかった値）
    best_w_original = 0.475  # セル17結果から
    
    # 元のアンサンブルを復元
    oof = best_w_original * oof_cb + (1 - best_w_original) * oof_lgb
    test_prob = best_w_original * test_cb + (1 - best_w_original) * test_lgb
    
    # 性能確認
    f1_at_315 = f1_score(y_train, (oof >= 0.315).astype(int))
    f1_global, th_global = eval_oof_f1(oof, y_train)
    
    print(f"\n=== 復元結果 ===")
    print(f"アンサンブル重み: w(CB)={best_w_original}")
    print(f"F1@0.315: {f1_at_315:.6f}")
    print(f"F1 global: {f1_global:.6f} | 最適閾値: {th_global:.4f}")
    
    # ベースラインとの比較
    baseline_f1 = 0.646952
    improvement = f1_at_315 - baseline_f1
    print(f"ベースライン比較: {baseline_f1:.6f} → {f1_at_315:.6f} ({improvement:+.6f})")
    
    if improvement > -0.01:  # 1%以内の劣化なら許容
        print("✅ 元のアンサンブルを採用（提出用）")
        SUBMIT_THRESHOLD_OVERRIDE = 0.315  # 元の閾値
        CURRENT_PIPE = "restored_cb_lgb_ensemble"
    else:
        print("⚠️ さらなる調整が必要")
else:
    print("❌ 元のCB/LGB結果が見つかりません")
    
print(f"\n=== 最終決定 ===")
print(f"採用手法: CB+LGBアンサンブル（重み最適化済み）")
print(f"提出閾値: 0.315")
print(f"次: セル21で提出ファイル作成")

=== 最良結果への復元 ===
スタッキング問題:
- 最適閾値0.8050では F1=0.632536
- 提出閾値0.315では F1=0.569645（ベースライン比-0.077）
→ 提出には不適切

=== 復元結果 ===
アンサンブル重み: w(CB)=0.475
F1@0.315: 0.625698
F1 global: 0.627523 | 最適閾値: 0.3050
ベースライン比較: 0.646952 → 0.625698 (-0.021254)
⚠️ さらなる調整が必要

=== 最終決定 ===
採用手法: CB+LGBアンサンブル（重み最適化済み）
提出閾値: 0.315
次: セル21で提出ファイル作成


In [59]:
# ==== セル11（常にfold再構築＋アンサンブル情報を記録）改良版 ====
import os, json, numpy as np, pandas as pd
from sklearn.metrics import f1_score, confusion_matrix, classification_report

assert 'oof' in locals() and 'test_prob' in locals(), "先にセル9/9dまで実行してoof/test_probを作ってから実行"

# 提出閾値の決定（優先: override → best_th_full → best_th → 0.5）
threshold_for_submit = locals().get("SUBMIT_THRESHOLD_OVERRIDE", None)
if threshold_for_submit is None:
    threshold_for_submit = locals().get("best_th_full", None)
if threshold_for_submit is None:
    threshold_for_submit = locals().get("best_th", 0.5)

threshold_source = (
    "override" if locals().get("SUBMIT_THRESHOLD_OVERRIDE", None) is not None
    else "best_th_full" if locals().get("best_th_full", None) is not None
    else "best_th" if locals().get("best_th", None) is not None
    else "default_0.5"
)

# ---- foldごとの指標を毎回作り直す ----
fold_reports = []
fold_f1s = []
if 'pools_full' in locals():
    for fold, (_tr_pool, _va_pool, va_idx) in enumerate(pools_full, 1):
        y_va = y_train[va_idx]
        y_pred_va = (oof[va_idx] >= threshold_for_submit).astype(int)
        f1v = f1_score(y_va, y_pred_va)
        cm  = confusion_matrix(y_va, y_pred_va)
        rep = classification_report(y_va, y_pred_va, digits=4)
        fold_f1s.append(f1v)
        fold_reports.append((f"FOLD {fold}", f1v, cm, rep))
else:
    # 予備（fold境界がないとき）
    y_pred = (oof >= threshold_for_submit).astype(int)
    f1v = f1_score(y_train, y_pred)
    cm  = confusion_matrix(y_train, y_pred)
    rep = classification_report(y_train, y_pred, digits=4)
    fold_f1s = [f1v]
    fold_reports = [("GLOBAL", f1v, cm, rep)]

# ---- 提出予測 ----
test_pred = (test_prob >= threshold_for_submit).astype(int)
assert len(test_pred) == len(test)
assert set(np.unique(test_pred)).issubset({0,1})

# 自動ナンバリング
OUT_DIR  = r"C:\Users\koshihiramatsu\projects\MUFJ_competition_2025\model-proposal_A_v3"
os.makedirs(OUT_DIR, exist_ok=True)
n = next_version_number(OUT_DIR)
sub_name = f"submission_A_v{n}.csv"
log_name = f"run_A2_v{n}.txt"

# 出力（sample_submitの区切りに合わせる）
sep = locals().get("SUBMIT_SEP", ",")
submit_df = pd.DataFrame({ID_COL: test[ID_COL].values, "pred": test_pred})
if sep == r"\s+":
    with open(os.path.join(OUT_DIR, sub_name), "w", encoding="utf-8") as f:
        for i, p in submit_df[[ID_COL, "pred"]].itertuples(index=False):
            f.write(f"{i} {p}\n")
else:
    submit_df.to_csv(os.path.join(OUT_DIR, sub_name), header=False, index=False, sep=sep)

print("Saved:", os.path.join(OUT_DIR, sub_name))

# ---- ログ（実験情報を追加） ----
def safe(x): 
    return float(x) if isinstance(x, (np.floating, np.float64, np.float32)) else x

oof_f1_global_best, _ = eval_oof_f1(oof, y_train)
oof_f1_at_submit = f1_score(y_train, (oof >= threshold_for_submit).astype(int))

log_lines = [
    f"version: {n}",
    f"seed: {SEED}",
    f"n_splits: {skf_full.n_splits if 'skf_full' in locals() else N_SPLITS}",
    f"target_col: {TARGET_COL}",
    f"id_col: {ID_COL}",
    f"n_features: {len(features)}",
    f"n_categoricals: {len(cat_cols)}",
    f"train_shape: {train.shape}",
    f"test_shape: {test.shape}",
    f"target_pos_ratio: {train[TARGET_COL].mean():.6f}",
    "",
    # === 改善施策の実験結果を追加 ===
    "=== IMPROVEMENT EXPERIMENTS ===",
    f"1_feature_engineering: failed (harmful, -0.008122)",
    f"2_optuna_optimization: failed (existing_better, -0.018164)", 
    f"3_stacking_ensemble: failed (threshold_dependent, works@0.805_not@0.315)",
    f"4_threshold_optimization: failed (minimal_gain, -0.000857)",
    f"final_method: restored_cb_lgb_ensemble",
    f"baseline_f1_v1: 0.646952",
    f"current_f1_v{n}: {oof_f1_at_submit:.6f}",
    f"improvement: {oof_f1_at_submit - 0.646952:+.6f}",
    "",
    # === 既存の情報 ===
    f"best_oof_f1_from_study: {locals().get('best_score_improved', locals().get('best_score', float('nan'))):.6f}",
    f"oof_f1_global_best: {oof_f1_global_best:.6f}",
    f"oof_f1_at_submit_th: {oof_f1_at_submit:.6f}",
    f"threshold_source: {threshold_source}",
    f"submit_threshold: {float(threshold_for_submit):.6f}",
    f"fold_f1s: {[round(safe(x), 6) for x in fold_f1s]}",
    "",
    # アンサンブル情報（存在する場合のみ）
    f"oof_f1_cb: {locals().get('f1_cb', float('nan')):.6f}",
    f"oof_f1_lgb: {locals().get('f1_lgb', float('nan')):.6f}",
    f"ensemble_w_cb: {locals().get('best_w', float('nan'))}",
    f"current_pipeline: {locals().get('CURRENT_PIPE', 'unknown')}",
    "",
    "best_params_cb:",
    json.dumps(locals().get('best_params', {}), indent=2),
    "params_lgb:",
    json.dumps(locals().get('params_lgb', {}), indent=2),
    "",
    # === Optuna実験結果も記録 ===
    "optuna_improved_results:",
    json.dumps({
        "best_trial_number": locals().get('study_improved', {}).best_trial.number if 'study_improved' in locals() else None,
        "best_params_improved": locals().get('best_params_improved', {}),
        "best_f1_improved": locals().get('best_score_improved', float('nan')),
        "best_threshold_improved": locals().get('best_th_improved', float('nan'))
    }, indent=2),
    "",
]

for title, f1v, cm, rep in fold_reports:
    log_lines += [title, f"F1@submit_th: {f1v:.6f}", "confusion_matrix:", str(cm), "report:", rep, "-"*40]

with open(os.path.join(OUT_DIR, log_name), "w", encoding="utf-8") as f:
    f.write("\n".join([str(x) for x in log_lines]))

print("Saved:", os.path.join(OUT_DIR, log_name))

Saved: C:\Users\koshihiramatsu\projects\MUFJ_competition_2025\model-proposal_A_v3\submission_A_v5.csv
Saved: C:\Users\koshihiramatsu\projects\MUFJ_competition_2025\model-proposal_A_v3\run_A2_v5.txt
