In [3]:
import json
import os
import sys
import re
from dataclasses import dataclass
from pathlib import Path

import pandas as pd
import polars as pl
import wandb
from autogluon.tabular import TabularPredictor
from dotenv import load_dotenv

sys.path.append(os.path.abspath("../.."))

from src.utils.telegram import send_message

### Config

In [4]:
@dataclass
class Config:
    data_id: str = "057"
    model_name: str = "ag"

    # AutoGluon
    label: str = "target"
    problem_type: str = "binary"
    eval_metric: str = "roc_auc"
    ag_path: str = "../../artifacts"

    time_limit: int = 3600*10
    presets: str = "best_quality"
    auto_stack: bool = True

    study_name = f"{model_name}_{data_id}"


cfg = Config()
load_dotenv(dotenv_path="../../.env")

feature_dir = Path(f"../../artifacts/features/{cfg.data_id}")

with open(feature_dir / "meta.json", "r") as f:
    meta = json.load(f)

train_paths = meta["train_paths"]
test_paths = meta["test_paths"]
level = meta["level"]

In [5]:
# === WANDB ===
wandb_project = os.environ.get("COMPETITION_NAME")
wandb.login(key=os.environ.get("WANDB_API_KEY"))

run = wandb.init(
    project=wandb_project,
    group=cfg.study_name,
    name=cfg.data_id,
    job_type="cv_training",
    tags=[cfg.model_name, level],
    config={
        "data_id": cfg.data_id,
        "level": level,
        "model": cfg.model_name,
    },
    dir="../../artifacts",
    reinit="finish_previous"
)

[34m[1mwandb[0m: [32m[41mERROR[0m Failed to detect the name of this notebook. You can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /home/hanse/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mkaitookano[0m ([33mkaitookano-waseda-university[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [9]:
# Training
all_cols = pl.read_parquet(train_paths, n_rows=0).columns

meta = {
    c
    for c in ("row_id", "weight")
    if c and c in all_cols
}
pat = re.compile(r"^\d+fold(?:-[A-Za-z0-9]+)?$")
features = [
    c for c in all_cols
    if c not in meta and not pat.fullmatch(c)
]

train = pl.read_parquet(train_paths, columns=features).to_pandas()

predictor = TabularPredictor(
    label=cfg.label,
    problem_type=cfg.problem_type,
    eval_metric=cfg.eval_metric,
    path=cfg.ag_path
)
predictor.fit(
    train,
    time_limit=cfg.time_limit,
    presets=cfg.presets,
    auto_stack=cfg.auto_stack
)

send_message("AutoGluon Training Completed!")

Verbosity: 2 (Standard Logging)
AutoGluon Version:  1.4.0
Python Version:     3.10.18
Operating System:   Linux
Platform Machine:   x86_64
Platform Version:   #1 SMP PREEMPT_DYNAMIC Thu Jun  5 18:30:46 UTC 2025
CPU Count:          24
Memory Avail:       15.19 GB / 23.47 GB (64.7%)
Disk Space Avail:   754.04 GB / 1006.85 GB (74.9%)
Presets specified: ['best_quality']
Using hyperparameters preset: hyperparameters='zeroshot'
Setting dynamic_stacking from 'auto' to True. Reason: Enable dynamic_stacking when use_bag_holdout is disabled. (use_bag_holdout=False)
Stack configuration (auto_stack=True): num_stack_levels=1, num_bag_folds=8, num_bag_sets=1
DyStack is enabled (dynamic_stacking=True). AutoGluon will try to determine whether the input data is affected by stacked overfitting and enable or disable stacking as a consequence.
	This is used to identify the optimal `num_stack_levels` value. Copies of AutoGluon will be fit on subsets of the data. Then holdout validation data is used to dete

KeyboardInterrupt: 