In [1]:
import warnings
warnings.filterwarnings("ignore", category=UserWarning)

import json
import os
import sys
from dataclasses import dataclass, field
from pathlib import Path
from typing import Literal

import optuna
import wandb
from dotenv import load_dotenv

sys.path.append(os.path.abspath("../.."))

import src.utils.run_optuna as op
from src.utils.optuna_objective import create_objective

### Config

In [2]:
load_dotenv(dotenv_path="../../.env")


@dataclass
class Config:
    # Data / CV
    model_name: str = "mlp"
    data_id: str = "057"
    n_folds: int = 5
    seed: int = 42

    # Optuna
    n_trials: int = 1
    direction: str = "maximize"
    sampler: str = "tpe"  # tpe / random
    pruner: str = "median"  # median / none

    # Initial params
    use_initial: Literal["never", "manual"] = "never"
    initial_param_sources: list[tuple[str, int]] = field(default_factory=list)   # (study_name, n_trial) 例: ("xgb-001", 1)

    # Storage
    storage: str = "sqlite:////home/hanse/kaggle/binary-bank/artifacts/optuna/optuna.db"

    # Option
    opts: dict = field(default_factory=dict)


cfg = Config()
cfg.initial_param_sources = [("lgbm-057", 16)]

# W&B
wandb_project = os.environ.get("COMPETITION_NAME")
wandb.login(key=os.environ.get("WANDB_API_KEY"))

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /home/hanse/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mkaitookano[0m ([33mkaitookano-waseda-university[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

### Build & Run

In [3]:
# --- helper: initial params loader ---
def load_initial_params(sources: list[tuple[str, int]]) -> list[dict]:
    loaded = []
    for study, n_trial in sources:
        path = Path(f"../../artifacts/optuna/{study}/trl{n_trial}.json")
        with path.open("r") as f:
            params = json.load(f)["params"]
        loaded.append(params)
    return loaded


# sampler / pruner factory
def build_sampler(name, seed):
    if name == "tpe":
        return optuna.samplers.TPESampler(n_startup_trials=15, seed=seed)
    elif name == "random":
        return optuna.samplers.RandomSampler(seed=seed)
    else:
        raise ValueError(f"unknown sampler: {name}")


def build_pruner(name):
    if name == "median":
        return optuna.pruners.MedianPruner(n_startup_trials=10, n_warmup_steps=1000)
    elif name == "none":
        return optuna.pruners.NopPruner()
    else:
        raise ValueError(f"unknown pruner: {name}")


objective = create_objective(
    cfg.model_name,
    cfg.data_id,
    seed=cfg.seed,
    n_folds=cfg.n_folds,
    wandb_project=wandb_project,
    study_name=f"{cfg.model_name}-{cfg.data_id}",
    opts=cfg.opts
)

sampler = build_sampler(cfg.sampler, cfg.seed)
pruner = build_pruner(cfg.pruner)

initial_params = None
if cfg.use_initial == "manual":
    initial_params = load_initial_params(cfg.initial_param_sources)

op.run_optuna_search(
    objective,
    n_trials=cfg.n_trials,
    direction=cfg.direction,
    study_name=f"{cfg.model_name}-{cfg.data_id}",
    storage=cfg.storage,
    sampler=sampler,
    pruner=pruner,
    initial_params=initial_params
)

[I 2025-11-04 21:18:21,355] Using an existing study with name 'mlp-057' instead of creating a new one.


[initial] none


  0%|          | 0/1 [00:00<?, ?it/s]

Fold Col: 5fold-s42
Free CPU Mem: 14.08 GB
Free GPU Mem: 7.07 GB
Epoch 1: Train LogLoss = 0.13462, Val LogLoss = 0.13369
New best model saved at epoch 1, Logloss: 0.13369
Epoch 2: Train LogLoss = 0.13215, Val LogLoss = 0.13186
New best model saved at epoch 2, Logloss: 0.13186
Epoch 3: Train LogLoss = 0.13085, Val LogLoss = 0.13109
New best model saved at epoch 3, Logloss: 0.13109
Epoch 4: Train LogLoss = 0.12983, Val LogLoss = 0.13078
New best model saved at epoch 4, Logloss: 0.13078
Epoch 5: Train LogLoss = 0.12884, Val LogLoss = 0.13045
New best model saved at epoch 5, Logloss: 0.13045
Epoch 6: Train LogLoss = 0.12802, Val LogLoss = 0.13020
New best model saved at epoch 6, Logloss: 0.13020
Epoch 7: Train LogLoss = 0.12694, Val LogLoss = 0.12968
New best model saved at epoch 7, Logloss: 0.12968
Epoch 8: Train LogLoss = 0.12628, Val LogLoss = 0.12987
Epoch 9: Train LogLoss = 0.12529, Val LogLoss = 0.12963
New best model saved at epoch 9, Logloss: 0.12963
Epoch 10: Train LogLoss = 0.124

0,1
epoch_f1,▁▁▂▂▂▃▃▃▄▄▄▅▅▅▆▆▆▇▇▇██
meta/f1/lr,██████▇▇▇▇▆▆▅▅▅▄▄▃▃▂▂▁
train/f1/accuracy,▁▂▂▂▃▃▃▃▄▄▄▄▄▅▅▅▆▆▇▇▇█
train/f1/auc,▁▂▂▂▃▃▃▃▃▄▄▄▄▅▅▅▆▆▇▇▇█
train/f1/log_loss,█▇▇▇▇▆▆▆▆▆▅▅▅▄▄▄▃▃▃▂▂▁
train/f1/mae,█▇▇▇▆▆▆▆▆▅▅▅▅▅▄▄▃▃▃▂▂▁
train/f1/r2,▁▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▆▆▆▇▇█
train/f1/rmse,█▇▇▇▇▆▆▆▆▆▅▅▅▄▄▄▄▃▃▂▂▁
valid/f1/accuracy,▁▃▄▄▅▆▆▆▇▇█▇██▇█▇▇▇▇▅▅
valid/f1/auc,▁▄▅▅▆▆▇▇██████▇▆▆▅▅▃▃▂

0,1
accuracy_f1,0.94552
auc_f1,0.97485
epoch_f1,21
log_loss_f1,0.12939
mae_f1,0.07556
meta/f1/lr,0.00133
r2_f1,0.62854
rmse_f1,0.19852
runtime_f1,3.91748
train/f1/accuracy,0.95354


[I 2025-11-04 21:22:20,991] Trial 6 finished with value: 0.9748478758554103 and parameters: {'num_layers': 2, 'hidden_dim1': 992, 'hidden_dim2': 768, 'hidden_dim3': -1, 'hidden_dim4': -1, 'batch_size': 864, 'lr': 0.0020513382630874496, 'eta_min': 0.00014321698289111514, 'dropout_rate': 0.1, 'activation': 'ReLU'}. Best is trial 0 with value: 0.9758143553433848.
✅ Message sent.


<optuna.study.study.Study at 0x7be5780382f0>