In [1]:
from pathlib import Path
import sys
import pandas as pd
import numpy as np
from IPython.display import display



NA_VALUES = ["---", "--", "", " ", "NA", "N/A"]
# =========================
# FILE FINDER
# =========================
def find_file(name, start=Path.cwd()):
    for ancestor in [start] + list(start.parents):
        matches = list(ancestor.rglob(name))
        if matches:
            return matches[0]
    return None


def find_files(file_map):
    found = {}
    for key, filename in file_map.items():
        path = find_file(filename)
        if path:
            found[key] = path
        else:
            print(f"[WARNING] File not found: {filename}")
    return found

eda_script_path = find_file("script_eda.py")
if eda_script_path is None:
    raise FileNotFoundError("❌ script_eda.py tidak ditemukan di parent directory")

# tambahkan BASE PROJECT ke sys.path
sys.path.append(str(eda_script_path.parent))

# sekarang bisa import
from script_eda import evaluate_dataset, extract_column_schema,find_internal_duplicate_columns,extract_single_schema,cek_value_data_column








In [2]:
path = find_file("merged_cuaca_ndvi_ispu.csv")

if path is None:
    raise FileNotFoundError("❌ File merged tidak ditemukan")

df = pd.read_csv(path, na_values=NA_VALUES)

df.head()

Unnamed: 0,tanggal,periode_data,stasiun,pm_sepuluh,sulfur_dioksida,karbon_monoksida,ozon,nitrogen_dioksida,max,parameter_pencemar_kritis,...,cloud_cover_min (%),wind_gusts_10m_mean (km/h),wind_speed_10m_mean (km/h),wind_gusts_10m_min (km/h),wind_speed_10m_min (km/h),surface_pressure_max (hPa),surface_pressure_min (hPa),lokasi,lokasi_clean,ndvi
0,2010-01-01,201001,DKI1 (Bunderan HI),60.0,4.0,73.0,27.0,14.0,73.0,CO,...,99.0,21.0,10.5,11.9,6.9,1009.3,1005.1,dki1_bundaranhi,DKI1,0.2023
1,2010-01-02,201001,DKI1 (Bunderan HI),32.0,2.0,16.0,33.0,9.0,33.0,O3,...,91.0,16.5,7.7,9.0,4.4,1009.9,1006.0,dki1_bundaranhi,DKI1,0.2023
2,2010-01-03,201001,DKI1 (Bunderan HI),27.0,2.0,19.0,20.0,9.0,27.0,PM10,...,81.0,18.4,9.4,11.9,6.5,1010.5,1006.5,dki1_bundaranhi,DKI1,0.2023
3,2010-01-04,201001,DKI1 (Bunderan HI),22.0,2.0,16.0,15.0,6.0,22.0,PM10,...,17.0,23.8,13.5,14.4,9.6,1009.1,1005.1,dki1_bundaranhi,DKI1,0.2023
4,2010-01-05,201001,DKI1 (Bunderan HI),25.0,2.0,17.0,15.0,8.0,25.0,PM10,...,99.0,21.6,11.1,10.4,7.8,1009.1,1006.0,dki1_bundaranhi,DKI1,0.2023


In [3]:
from sklearn.preprocessing import LabelEncoder
TARGET = "kategori"

DROP_COLS = [
    # =========================
    # TARGET & ISPU INTERNAL
    # =========================
    TARGET,
    "max",
    "parameter_pencemar_kritis","pm_sepuluh","sulfur_dioksida","karbon_monoksida","ozon","nitrogen_dioksida",

    # =========================
    # IDENTIFIER / NON-FEATURE
    # =========================
    "id",
    "tanggal",
    "periode_data",
    "time",
    "stasiun",
    "lokasi",
    "lokasi_clean",

    # =========================
    # CUACA — DROP MIN & MAX
    # =========================
    "temperature_2m_max (Â°C)",
    "temperature_2m_min (Â°C)",

    "relative_humidity_2m_max (%)",
    "relative_humidity_2m_min (%)",

    "cloud_cover_max (%)",
    "cloud_cover_min (%)",

    "surface_pressure_max (hPa)",
    "surface_pressure_min (hPa)",

    "wind_speed_10m_max (km/h)",
    "wind_speed_10m_min (km/h)",

    "wind_gusts_10m_max (km/h)",
    "wind_gusts_10m_min (km/h)",

    # =========================
    # ARAH ANGIN (DROP TOTAL)
    # =========================
    "wind_direction_10m_dominant (Â°)",
    "winddirection_10m_dominant (Â°)",
]


X = df.drop(columns=[c for c in DROP_COLS if c in df.columns])
y = df[TARGET]

le = LabelEncoder()
y_enc = le.fit_transform(y)


In [4]:
WINDOWS = {
    "W1": (("2010-01-01", "2022-12-31"), ("2023-01-01", "2023-06-30")),
    "W2": (("2010-01-01", "2023-06-30"), ("2023-07-01", "2023-12-31")),
    "W3": (("2010-01-01", "2023-12-31"), ("2024-01-01", "2024-12-31")),
    "W4": (("2010-01-01", "2024-12-31"), ("2025-01-01", "2025-12-31")), 
}


#### Set Up trainning


In [5]:
import pandas as pd
import numpy as np

from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import f1_score, confusion_matrix

from lightgbm import LGBMClassifier
from xgboost import XGBClassifier


In [6]:
import os
import random
import numpy as np

SEED = 42

os.environ["PYTHONHASHSEED"] = str(SEED)
random.seed(SEED)
np.random.seed(SEED)


In [7]:
MODELS = {
    "LightGBM": LGBMClassifier(
        objective="multiclass",
        class_weight="balanced",
        n_estimators=500,
        learning_rate=0.05,
        random_state=42
    ),
    "XGBoost": XGBClassifier(
        objective="multi:softprob",
        eval_metric="mlogloss",
        n_estimators=500,
        learning_rate=0.05,
        max_depth=6,
        subsample=0.8,
        colsample_bytree=0.8,
        random_state=42
    )
}

In [8]:
results = []

for w_name, (train_rng, val_rng) in WINDOWS.items():
    print(f"\n================ {w_name} ================")

    train_mask = (df['tanggal'] >= train_rng[0]) & (df['tanggal'] <= train_rng[1])
    val_mask   = (df['tanggal'] >= val_rng[0])   & (df['tanggal'] <= val_rng[1])

    X_train, X_val = X[train_mask], X[val_mask]
    y_train, y_val = y_enc[train_mask], y_enc[val_mask]

    print(f"Train size: {X_train.shape}, Val size: {X_val.shape}")

    for model_name, model in MODELS.items():
        print(f"\n--- {model_name} ---")

        model.fit(X_train, y_train)
        y_pred = model.predict(X_val)

        macro_f1 = f1_score(y_val, y_pred, average="macro")
        print(f"Macro-F1: {macro_f1:.4f}")

        # Confusion Matrix
        cm = confusion_matrix(y_val, y_pred)
        print("Confusion Matrix:")
        print(cm)

        # collapse check
        if len(np.unique(y_pred)) < len(np.unique(y_val)):
            print("⚠️ WARNING: model collapse ke kelas mayoritas")

        results.append({
            "window": w_name,
            "model": model_name,
            "macro_f1": macro_f1
        })



Train size: (10426, 14), Val size: (893, 14)

--- LightGBM ---
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000721 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2209
[LightGBM] [Info] Number of data points in the train set: 10426, number of used features: 14
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438


[WinError 2] The system cannot find the file specified
  File "C:\Users\USER\AppData\Roaming\Python\Python312\site-packages\joblib\externals\loky\backend\context.py", line 257, in _count_physical_cores
    cpu_info = subprocess.run(
               ^^^^^^^^^^^^^^^
  File "c:\Program Files\Python312\Lib\subprocess.py", line 548, in run
    with Popen(*popenargs, **kwargs) as process:
         ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Program Files\Python312\Lib\subprocess.py", line 1026, in __init__
    self._execute_child(args, executable, preexec_fn, close_fds,
  File "c:\Program Files\Python312\Lib\subprocess.py", line 1538, in _execute_child
    hp, ht, pid, tid = _winapi.CreateProcess(executable, args,
                       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^


Macro-F1: 0.3641
Confusion Matrix:
[[104   0  72  17]
 [  0   0   0   0]
 [116   2 436  89]
 [  0   0  37  20]]

--- XGBoost ---
Macro-F1: 0.4759
Confusion Matrix:
[[ 75 110   8]
 [ 80 529  34]
 [  0  46  11]]

Train size: (11319, 14), Val size: (911, 14)

--- LightGBM ---
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000710 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2219
[LightGBM] [Info] Number of data points in the train set: 11319, number of used features: 14
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
Macro-F1: 0.2615
Confusion Matrix:
[[  0   0  24  19]
 [  0   0   3   0]
 [ 12   4 413 286]
 [  0   0  50 100]]

--- XGBoost ---
Macro-F1: 0.2847
Confusion Matrix:
[[ 

In [9]:
results_df = pd.DataFrame(results)

summary = (
    results_df
    .groupby("model")["macro_f1"]
    .agg(["mean", "std"])
    .sort_values("mean", ascending=False)
)

print("\n===== MODEL COMPARISON SUMMARY =====")
print(summary)



===== MODEL COMPARISON SUMMARY =====
              mean       std
model                       
XGBoost   0.376852  0.078792
LightGBM  0.334057  0.053222


In [10]:
train_rng, val_rng = WINDOWS["W3"]

train_mask = (df['tanggal'] >= train_rng[0]) & (df['tanggal'] <= train_rng[1])
val_mask   = (df['tanggal'] >= val_rng[0])   & (df['tanggal'] <= val_rng[1])

X_train, X_val = X[train_mask], X[val_mask]
y_train, y_val = y_enc[train_mask], y_enc[val_mask]

print("W3 Train:", X_train.shape)
print("W3 Val  :", X_val.shape)

W3 Train: (12230, 14)
W3 Val  : (1824, 14)


In [11]:
import optuna
from lightgbm import LGBMClassifier
from sklearn.metrics import f1_score
import numpy as np

def objective(trial):
    params = {
        "objective": "multiclass",
        "class_weight": "balanced",
        "n_estimators": 500,

        "learning_rate": trial.suggest_float("learning_rate", 0.01, 0.08),
        "num_leaves": trial.suggest_int("num_leaves", 16, 64),
        "min_data_in_leaf": trial.suggest_int("min_data_in_leaf", 30, 200),
        "max_depth": trial.suggest_int("max_depth", 3, 10),

        "feature_fraction": trial.suggest_float("feature_fraction", 0.6, 1.0),
        "bagging_fraction": trial.suggest_float("bagging_fraction", 0.6, 1.0),

        "random_state": SEED,
        "n_jobs": 1
    }

    model = LGBMClassifier(**params)
    model.fit(X_train, y_train)

    y_pred = model.predict(X_val)

    # Macro-F1 (utama)
    macro_f1 = f1_score(y_val, y_pred, average="macro")

    # Penalti collapse (jika prediksi < jumlah kelas)
    n_pred_class = len(np.unique(y_pred))
    n_true_class = len(np.unique(y_val))

    if n_pred_class < n_true_class:
        macro_f1 -= 0.05  # penalti ringan tapi tegas

    return macro_f1


In [12]:
from tqdm.auto import tqdm

N_TRIALS = 50
pbar = tqdm(total=N_TRIALS, desc="Optuna Tuning (W3)")

def tqdm_callback(study, trial):
    pbar.update(1)

study = optuna.create_study(
    direction="maximize",
    sampler=optuna.samplers.TPESampler(seed=SEED)
)

study.optimize(
    objective,
    n_trials=N_TRIALS,
    callbacks=[tqdm_callback]  
)

pbar.close()  # <-- tutup progress bar

print("Best Macro-F1 (W3):", study.best_value)
print("Best Params:", study.best_params)


Optuna Tuning (W3):   0%|          | 0/50 [00:00<?, ?it/s]

[I 2026-02-02 02:11:38,334] A new study created in memory with name: no-name-2a2392e0-b364-4125-8295-67f84477bda7


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002489 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2247
[LightGBM] [Info] Number of data points in the train set: 12230, number of used features: 14
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438


[I 2026-02-02 02:11:43,665] Trial 0 finished with value: 0.3390180214108374 and parameters: {'learning_rate': 0.03621780831931538, 'num_leaves': 62, 'min_data_in_leaf': 155, 'max_depth': 7, 'feature_fraction': 0.6624074561769746, 'bagging_fraction': 0.662397808134481}. Best is trial 0 with value: 0.3390180214108374.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002044 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2247
[LightGBM] [Info] Number of data points in the train set: 12230, number of used features: 14
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438


[I 2026-02-02 02:11:50,479] Trial 1 finished with value: 0.3489249408813621 and parameters: {'learning_rate': 0.014065852851773964, 'num_leaves': 58, 'min_data_in_leaf': 132, 'max_depth': 8, 'feature_fraction': 0.608233797718321, 'bagging_fraction': 0.9879639408647978}. Best is trial 1 with value: 0.3489249408813621.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002131 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2247
[LightGBM] [Info] Number of data points in the train set: 12230, number of used features: 14
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438


[I 2026-02-02 02:11:54,063] Trial 2 finished with value: 0.3414597839218541 and parameters: {'learning_rate': 0.06827098485602953, 'num_leaves': 26, 'min_data_in_leaf': 61, 'max_depth': 4, 'feature_fraction': 0.7216968971838151, 'bagging_fraction': 0.8099025726528951}. Best is trial 1 with value: 0.3489249408813621.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002075 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2247
[LightGBM] [Info] Number of data points in the train set: 12230, number of used features: 14
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438


[I 2026-02-02 02:11:57,784] Trial 3 finished with value: 0.34396487330302933 and parameters: {'learning_rate': 0.040236151304948106, 'num_leaves': 30, 'min_data_in_leaf': 134, 'max_depth': 4, 'feature_fraction': 0.7168578594140873, 'bagging_fraction': 0.7465447373174767}. Best is trial 1 with value: 0.3489249408813621.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002069 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2247
[LightGBM] [Info] Number of data points in the train set: 12230, number of used features: 14
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438


[I 2026-02-02 02:12:04,188] Trial 4 finished with value: 0.35573678569247313 and parameters: {'learning_rate': 0.04192489889519252, 'num_leaves': 54, 'min_data_in_leaf': 64, 'max_depth': 7, 'feature_fraction': 0.836965827544817, 'bagging_fraction': 0.6185801650879991}. Best is trial 4 with value: 0.35573678569247313.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002240 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2247
[LightGBM] [Info] Number of data points in the train set: 12230, number of used features: 14
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438


[I 2026-02-02 02:12:09,891] Trial 5 finished with value: 0.3590762723840672 and parameters: {'learning_rate': 0.05252813963310069, 'num_leaves': 24, 'min_data_in_leaf': 41, 'max_depth': 10, 'feature_fraction': 0.9862528132298237, 'bagging_fraction': 0.9233589392465844}. Best is trial 5 with value: 0.3590762723840672.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002164 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2247
[LightGBM] [Info] Number of data points in the train set: 12230, number of used features: 14
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438


[I 2026-02-02 02:12:14,726] Trial 6 finished with value: 0.34949300693825536 and parameters: {'learning_rate': 0.031322963842135954, 'num_leaves': 20, 'min_data_in_leaf': 147, 'max_depth': 6, 'feature_fraction': 0.6488152939379115, 'bagging_fraction': 0.798070764044508}. Best is trial 5 with value: 0.3590762723840672.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002001 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2247
[LightGBM] [Info] Number of data points in the train set: 12230, number of used features: 14
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438


[I 2026-02-02 02:12:22,157] Trial 7 finished with value: 0.35126577732524134 and parameters: {'learning_rate': 0.012407196478065288, 'num_leaves': 60, 'min_data_in_leaf': 74, 'max_depth': 8, 'feature_fraction': 0.7246844304357644, 'bagging_fraction': 0.8080272084711243}. Best is trial 5 with value: 0.3590762723840672.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002147 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2247
[LightGBM] [Info] Number of data points in the train set: 12230, number of used features: 14
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438


[I 2026-02-02 02:12:28,328] Trial 8 finished with value: 0.3453967422066809 and parameters: {'learning_rate': 0.04826971955402958, 'num_leaves': 25, 'min_data_in_leaf': 195, 'max_depth': 9, 'feature_fraction': 0.9757995766256756, 'bagging_fraction': 0.9579309401710595}. Best is trial 5 with value: 0.3590762723840672.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002087 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2247
[LightGBM] [Info] Number of data points in the train set: 12230, number of used features: 14
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438


[I 2026-02-02 02:12:31,894] Trial 9 finished with value: 0.3477200235467198 and parameters: {'learning_rate': 0.05185299851677597, 'num_leaves': 61, 'min_data_in_leaf': 45, 'max_depth': 4, 'feature_fraction': 0.6180909155642152, 'bagging_fraction': 0.7301321323053057}. Best is trial 5 with value: 0.3590762723840672.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002203 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2247
[LightGBM] [Info] Number of data points in the train set: 12230, number of used features: 14
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438


[I 2026-02-02 02:12:38,856] Trial 10 finished with value: 0.35000965668876494 and parameters: {'learning_rate': 0.07685109125031989, 'num_leaves': 39, 'min_data_in_leaf': 93, 'max_depth': 10, 'feature_fraction': 0.9935584941681304, 'bagging_fraction': 0.9129509079547271}. Best is trial 5 with value: 0.3590762723840672.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002347 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2247
[LightGBM] [Info] Number of data points in the train set: 12230, number of used features: 14
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438


[I 2026-02-02 02:12:44,531] Trial 11 finished with value: 0.36313320831409357 and parameters: {'learning_rate': 0.05856777319215126, 'num_leaves': 47, 'min_data_in_leaf': 36, 'max_depth': 6, 'feature_fraction': 0.887352152856701, 'bagging_fraction': 0.6034744143986677}. Best is trial 11 with value: 0.36313320831409357.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002264 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2247
[LightGBM] [Info] Number of data points in the train set: 12230, number of used features: 14
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438


[I 2026-02-02 02:12:50,277] Trial 12 finished with value: 0.3713629003987841 and parameters: {'learning_rate': 0.059390958672122365, 'num_leaves': 46, 'min_data_in_leaf': 33, 'max_depth': 6, 'feature_fraction': 0.8945402084262539, 'bagging_fraction': 0.89164389641972}. Best is trial 12 with value: 0.3713629003987841.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002086 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2247
[LightGBM] [Info] Number of data points in the train set: 12230, number of used features: 14
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438


[I 2026-02-02 02:12:55,317] Trial 13 finished with value: 0.3447708006033101 and parameters: {'learning_rate': 0.06333081544873845, 'num_leaves': 47, 'min_data_in_leaf': 97, 'max_depth': 6, 'feature_fraction': 0.8740371923759868, 'bagging_fraction': 0.873261651112228}. Best is trial 12 with value: 0.3713629003987841.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002102 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2247
[LightGBM] [Info] Number of data points in the train set: 12230, number of used features: 14
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438


[I 2026-02-02 02:13:00,125] Trial 14 finished with value: 0.361859877347447 and parameters: {'learning_rate': 0.06091930634226341, 'num_leaves': 41, 'min_data_in_leaf': 30, 'max_depth': 5, 'feature_fraction': 0.869541921340321, 'bagging_fraction': 0.6011809691592997}. Best is trial 12 with value: 0.3713629003987841.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002082 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2247
[LightGBM] [Info] Number of data points in the train set: 12230, number of used features: 14
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438


[I 2026-02-02 02:13:03,482] Trial 15 finished with value: 0.3462976964427916 and parameters: {'learning_rate': 0.07995537070490863, 'num_leaves': 48, 'min_data_in_leaf': 89, 'max_depth': 3, 'feature_fraction': 0.9191979476316068, 'bagging_fraction': 0.8609089845160692}. Best is trial 12 with value: 0.3713629003987841.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001969 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2247
[LightGBM] [Info] Number of data points in the train set: 12230, number of used features: 14
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438


[I 2026-02-02 02:13:07,842] Trial 16 finished with value: 0.3542322179315243 and parameters: {'learning_rate': 0.0688842341728468, 'num_leaves': 35, 'min_data_in_leaf': 51, 'max_depth': 5, 'feature_fraction': 0.7806289735128839, 'bagging_fraction': 0.7022762860616638}. Best is trial 12 with value: 0.3713629003987841.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002136 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2247
[LightGBM] [Info] Number of data points in the train set: 12230, number of used features: 14
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438


[I 2026-02-02 02:13:12,606] Trial 17 finished with value: 0.34729046619622694 and parameters: {'learning_rate': 0.05711642450050793, 'num_leaves': 49, 'min_data_in_leaf': 112, 'max_depth': 5, 'feature_fraction': 0.9146511487079994, 'bagging_fraction': 0.8703838816433833}. Best is trial 12 with value: 0.3713629003987841.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002000 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2247
[LightGBM] [Info] Number of data points in the train set: 12230, number of used features: 14
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438


[I 2026-02-02 02:13:19,442] Trial 18 finished with value: 0.35133305732305414 and parameters: {'learning_rate': 0.030135591111163366, 'num_leaves': 43, 'min_data_in_leaf': 76, 'max_depth': 8, 'feature_fraction': 0.8034813455384235, 'bagging_fraction': 0.767571017153968}. Best is trial 12 with value: 0.3713629003987841.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002256 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2247
[LightGBM] [Info] Number of data points in the train set: 12230, number of used features: 14
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438


[I 2026-02-02 02:13:25,436] Trial 19 finished with value: 0.3709587950579078 and parameters: {'learning_rate': 0.07037999681368615, 'num_leaves': 53, 'min_data_in_leaf': 30, 'max_depth': 6, 'feature_fraction': 0.9362148766913249, 'bagging_fraction': 0.6687011707534343}. Best is trial 12 with value: 0.3713629003987841.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002041 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2247
[LightGBM] [Info] Number of data points in the train set: 12230, number of used features: 14
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438


[I 2026-02-02 02:13:30,906] Trial 20 finished with value: 0.3433355808656325 and parameters: {'learning_rate': 0.07271344011802469, 'num_leaves': 54, 'min_data_in_leaf': 183, 'max_depth': 7, 'feature_fraction': 0.9430469442806572, 'bagging_fraction': 0.6717501999416441}. Best is trial 12 with value: 0.3713629003987841.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002085 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2247
[LightGBM] [Info] Number of data points in the train set: 12230, number of used features: 14
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438


[I 2026-02-02 02:13:36,436] Trial 21 finished with value: 0.35697605609056043 and parameters: {'learning_rate': 0.06337887592267143, 'num_leaves': 52, 'min_data_in_leaf': 33, 'max_depth': 6, 'feature_fraction': 0.8837894480561455, 'bagging_fraction': 0.6417956426079291}. Best is trial 12 with value: 0.3713629003987841.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002296 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2247
[LightGBM] [Info] Number of data points in the train set: 12230, number of used features: 14
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438


[I 2026-02-02 02:13:42,045] Trial 22 finished with value: 0.35588601685711596 and parameters: {'learning_rate': 0.05903532954238931, 'num_leaves': 36, 'min_data_in_leaf': 53, 'max_depth': 6, 'feature_fraction': 0.9429124515981293, 'bagging_fraction': 0.688138241684752}. Best is trial 12 with value: 0.3713629003987841.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002014 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2247
[LightGBM] [Info] Number of data points in the train set: 12230, number of used features: 14
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438


[I 2026-02-02 02:13:46,617] Trial 23 finished with value: 0.37109685124678 and parameters: {'learning_rate': 0.06942106746304014, 'num_leaves': 44, 'min_data_in_leaf': 31, 'max_depth': 5, 'feature_fraction': 0.835546219804745, 'bagging_fraction': 0.6273029413898896}. Best is trial 12 with value: 0.3713629003987841.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002017 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2247
[LightGBM] [Info] Number of data points in the train set: 12230, number of used features: 14
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438


[I 2026-02-02 02:13:50,851] Trial 24 finished with value: 0.3533382600076811 and parameters: {'learning_rate': 0.07066355413270363, 'num_leaves': 44, 'min_data_in_leaf': 70, 'max_depth': 5, 'feature_fraction': 0.8168307923002049, 'bagging_fraction': 0.7137440747408894}. Best is trial 12 with value: 0.3713629003987841.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002001 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2247
[LightGBM] [Info] Number of data points in the train set: 12230, number of used features: 14
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438


[I 2026-02-02 02:13:53,844] Trial 25 finished with value: 0.34066660692598866 and parameters: {'learning_rate': 0.07369367284196848, 'num_leaves': 35, 'min_data_in_leaf': 55, 'max_depth': 3, 'feature_fraction': 0.8422859548397726, 'bagging_fraction': 0.6477791205694369}. Best is trial 12 with value: 0.3713629003987841.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002282 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2247
[LightGBM] [Info] Number of data points in the train set: 12230, number of used features: 14
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438


[I 2026-02-02 02:13:58,536] Trial 26 finished with value: 0.3581486586177405 and parameters: {'learning_rate': 0.06561186812818379, 'num_leaves': 51, 'min_data_in_leaf': 31, 'max_depth': 5, 'feature_fraction': 0.7537100981242278, 'bagging_fraction': 0.838817870015776}. Best is trial 12 with value: 0.3713629003987841.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002265 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2247
[LightGBM] [Info] Number of data points in the train set: 12230, number of used features: 14
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438


[I 2026-02-02 02:14:05,331] Trial 27 finished with value: 0.35368305741432154 and parameters: {'learning_rate': 0.052511347666443074, 'num_leaves': 57, 'min_data_in_leaf': 82, 'max_depth': 7, 'feature_fraction': 0.9542343643015132, 'bagging_fraction': 0.9044793283943926}. Best is trial 12 with value: 0.3713629003987841.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002076 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2247
[LightGBM] [Info] Number of data points in the train set: 12230, number of used features: 14
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438


[I 2026-02-02 02:14:09,119] Trial 28 finished with value: 0.35252421512398635 and parameters: {'learning_rate': 0.0761820593777039, 'num_leaves': 39, 'min_data_in_leaf': 45, 'max_depth': 4, 'feature_fraction': 0.8388334232298026, 'bagging_fraction': 0.768849665682507}. Best is trial 12 with value: 0.3713629003987841.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.003097 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2247
[LightGBM] [Info] Number of data points in the train set: 12230, number of used features: 14
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438


[I 2026-02-02 02:14:14,678] Trial 29 finished with value: 0.34099971214765123 and parameters: {'learning_rate': 0.06651117172747392, 'num_leaves': 64, 'min_data_in_leaf': 169, 'max_depth': 7, 'feature_fraction': 0.9137899187460268, 'bagging_fraction': 0.6400057166850222}. Best is trial 12 with value: 0.3713629003987841.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002142 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2247
[LightGBM] [Info] Number of data points in the train set: 12230, number of used features: 14
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438


[I 2026-02-02 02:14:19,469] Trial 30 finished with value: 0.33982177579194234 and parameters: {'learning_rate': 0.07995054339586412, 'num_leaves': 44, 'min_data_in_leaf': 114, 'max_depth': 6, 'feature_fraction': 0.784470702307805, 'bagging_fraction': 0.6841255838754161}. Best is trial 12 with value: 0.3713629003987841.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002031 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2247
[LightGBM] [Info] Number of data points in the train set: 12230, number of used features: 14
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438


[I 2026-02-02 02:14:24,984] Trial 31 finished with value: 0.3664542227779188 and parameters: {'learning_rate': 0.05763187219398967, 'num_leaves': 46, 'min_data_in_leaf': 41, 'max_depth': 6, 'feature_fraction': 0.8914312677259315, 'bagging_fraction': 0.6095066118173634}. Best is trial 12 with value: 0.3713629003987841.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002403 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2247
[LightGBM] [Info] Number of data points in the train set: 12230, number of used features: 14
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438


[I 2026-02-02 02:14:29,702] Trial 32 finished with value: 0.3566351206954642 and parameters: {'learning_rate': 0.047850109674110027, 'num_leaves': 56, 'min_data_in_leaf': 43, 'max_depth': 5, 'feature_fraction': 0.8993217298284331, 'bagging_fraction': 0.6617725383097897}. Best is trial 12 with value: 0.3713629003987841.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002001 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2247
[LightGBM] [Info] Number of data points in the train set: 12230, number of used features: 14
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438


[I 2026-02-02 02:14:34,974] Trial 33 finished with value: 0.35998464670261493 and parameters: {'learning_rate': 0.0548960553830966, 'num_leaves': 51, 'min_data_in_leaf': 53, 'max_depth': 6, 'feature_fraction': 0.8565700419170825, 'bagging_fraction': 0.6245805946190478}. Best is trial 12 with value: 0.3713629003987841.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002055 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2247
[LightGBM] [Info] Number of data points in the train set: 12230, number of used features: 14
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438


[I 2026-02-02 02:14:38,933] Trial 34 finished with value: 0.3530590953343943 and parameters: {'learning_rate': 0.06240584092807286, 'num_leaves': 45, 'min_data_in_leaf': 65, 'max_depth': 4, 'feature_fraction': 0.9332208075758166, 'bagging_fraction': 0.9963910655147282}. Best is trial 12 with value: 0.3713629003987841.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002154 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2247
[LightGBM] [Info] Number of data points in the train set: 12230, number of used features: 14
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438


[I 2026-02-02 02:14:46,109] Trial 35 finished with value: 0.3580554845972036 and parameters: {'learning_rate': 0.0188565882559865, 'num_leaves': 41, 'min_data_in_leaf': 59, 'max_depth': 8, 'feature_fraction': 0.8508648414368964, 'bagging_fraction': 0.6315100787900213}. Best is trial 12 with value: 0.3713629003987841.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002548 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2247
[LightGBM] [Info] Number of data points in the train set: 12230, number of used features: 14
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438


[I 2026-02-02 02:14:51,908] Trial 36 finished with value: 0.3732906153070922 and parameters: {'learning_rate': 0.06878372315657516, 'num_leaves': 31, 'min_data_in_leaf': 41, 'max_depth': 7, 'feature_fraction': 0.9706719587305236, 'bagging_fraction': 0.96434250343296}. Best is trial 36 with value: 0.3732906153070922.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002074 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2247
[LightGBM] [Info] Number of data points in the train set: 12230, number of used features: 14
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438


[I 2026-02-02 02:14:57,544] Trial 37 finished with value: 0.3761781305451972 and parameters: {'learning_rate': 0.07097685181824992, 'num_leaves': 31, 'min_data_in_leaf': 30, 'max_depth': 7, 'feature_fraction': 0.9640572374098972, 'bagging_fraction': 0.932509127045582}. Best is trial 37 with value: 0.3761781305451972.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000659 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2247
[LightGBM] [Info] Number of data points in the train set: 12230, number of used features: 14
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438


[I 2026-02-02 02:15:03,018] Trial 38 finished with value: 0.3581564946710696 and parameters: {'learning_rate': 0.06710469873284601, 'num_leaves': 29, 'min_data_in_leaf': 105, 'max_depth': 7, 'feature_fraction': 0.9753793918716039, 'bagging_fraction': 0.9569309709055783}. Best is trial 37 with value: 0.3761781305451972.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002312 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2247
[LightGBM] [Info] Number of data points in the train set: 12230, number of used features: 14
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438


[I 2026-02-02 02:15:09,163] Trial 39 finished with value: 0.35074866971560775 and parameters: {'learning_rate': 0.0753771683341607, 'num_leaves': 31, 'min_data_in_leaf': 128, 'max_depth': 9, 'feature_fraction': 0.9621938769746063, 'bagging_fraction': 0.961537632398352}. Best is trial 37 with value: 0.3761781305451972.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002145 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2247
[LightGBM] [Info] Number of data points in the train set: 12230, number of used features: 14
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438


[I 2026-02-02 02:15:16,005] Trial 40 finished with value: 0.36253935536478743 and parameters: {'learning_rate': 0.040510506734259766, 'num_leaves': 31, 'min_data_in_leaf': 40, 'max_depth': 8, 'feature_fraction': 0.9678381778758314, 'bagging_fraction': 0.9327089975549921}. Best is trial 37 with value: 0.3761781305451972.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000794 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2247
[LightGBM] [Info] Number of data points in the train set: 12230, number of used features: 14
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438


[I 2026-02-02 02:15:21,133] Trial 41 finished with value: 0.3645011305140447 and parameters: {'learning_rate': 0.07107210552964799, 'num_leaves': 21, 'min_data_in_leaf': 30, 'max_depth': 7, 'feature_fraction': 0.9925703928370292, 'bagging_fraction': 0.9792244048441668}. Best is trial 37 with value: 0.3761781305451972.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002075 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2247
[LightGBM] [Info] Number of data points in the train set: 12230, number of used features: 14
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438


[I 2026-02-02 02:15:26,752] Trial 42 finished with value: 0.3562385427015021 and parameters: {'learning_rate': 0.0714058901388002, 'num_leaves': 28, 'min_data_in_leaf': 48, 'max_depth': 7, 'feature_fraction': 0.9235972927782095, 'bagging_fraction': 0.8914058771564305}. Best is trial 37 with value: 0.3761781305451972.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002165 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2247
[LightGBM] [Info] Number of data points in the train set: 12230, number of used features: 14
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438


[I 2026-02-02 02:15:33,266] Trial 43 finished with value: 0.3580143662233012 and parameters: {'learning_rate': 0.0658101932471836, 'num_leaves': 37, 'min_data_in_leaf': 60, 'max_depth': 8, 'feature_fraction': 0.9500503254848667, 'bagging_fraction': 0.9338633474460745}. Best is trial 37 with value: 0.3761781305451972.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002141 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2247
[LightGBM] [Info] Number of data points in the train set: 12230, number of used features: 14
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438


[I 2026-02-02 02:15:37,816] Trial 44 finished with value: 0.36267087793262764 and parameters: {'learning_rate': 0.0685304567866557, 'num_leaves': 16, 'min_data_in_leaf': 38, 'max_depth': 9, 'feature_fraction': 0.9982921387433126, 'bagging_fraction': 0.9478571082882833}. Best is trial 37 with value: 0.3761781305451972.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002088 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2247
[LightGBM] [Info] Number of data points in the train set: 12230, number of used features: 14
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438


[I 2026-02-02 02:15:43,924] Trial 45 finished with value: 0.36644179296399837 and parameters: {'learning_rate': 0.07684470191868224, 'num_leaves': 33, 'min_data_in_leaf': 38, 'max_depth': 7, 'feature_fraction': 0.9082416762393283, 'bagging_fraction': 0.9746784810368919}. Best is trial 37 with value: 0.3761781305451972.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002036 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2247
[LightGBM] [Info] Number of data points in the train set: 12230, number of used features: 14
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438


[I 2026-02-02 02:15:48,730] Trial 46 finished with value: 0.3607113676205698 and parameters: {'learning_rate': 0.06165093827971011, 'num_leaves': 27, 'min_data_in_leaf': 47, 'max_depth': 6, 'feature_fraction': 0.8210128334807875, 'bagging_fraction': 0.8276155831677042}. Best is trial 37 with value: 0.3761781305451972.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002011 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2247
[LightGBM] [Info] Number of data points in the train set: 12230, number of used features: 14
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438


[I 2026-02-02 02:15:53,244] Trial 47 finished with value: 0.3539519351926811 and parameters: {'learning_rate': 0.07385738451901791, 'num_leaves': 33, 'min_data_in_leaf': 65, 'max_depth': 5, 'feature_fraction': 0.8652521330588858, 'bagging_fraction': 0.9028402592721789}. Best is trial 37 with value: 0.3761781305451972.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.003260 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2247
[LightGBM] [Info] Number of data points in the train set: 12230, number of used features: 14
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438


[I 2026-02-02 02:15:58,736] Trial 48 finished with value: 0.37020365830136853 and parameters: {'learning_rate': 0.03535397518440899, 'num_leaves': 24, 'min_data_in_leaf': 31, 'max_depth': 6, 'feature_fraction': 0.9784030315999569, 'bagging_fraction': 0.7863983334338038}. Best is trial 37 with value: 0.3761781305451972.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002021 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2247
[LightGBM] [Info] Number of data points in the train set: 12230, number of used features: 14
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438


[I 2026-02-02 02:16:04,266] Trial 49 finished with value: 0.3414028850047807 and parameters: {'learning_rate': 0.06431451700368489, 'num_leaves': 42, 'min_data_in_leaf': 158, 'max_depth': 7, 'feature_fraction': 0.925035335637314, 'bagging_fraction': 0.9214111554971592}. Best is trial 37 with value: 0.3761781305451972.


Best Macro-F1 (W3): 0.3761781305451972
Best Params: {'learning_rate': 0.07097685181824992, 'num_leaves': 31, 'min_data_in_leaf': 30, 'max_depth': 7, 'feature_fraction': 0.9640572374098972, 'bagging_fraction': 0.932509127045582}


In [13]:
final_train_mask = (
    (df['tanggal'] >= "2010-01-01") &
    (df['tanggal'] <= "2024-12-31")
)

X_final = X[final_train_mask]
y_final = y_enc[final_train_mask]

print("Final training size:", X_final.shape)


Final training size: (14054, 14)


In [14]:
best_params = study.best_params

final_model = LGBMClassifier(
    **best_params,
    objective="multiclass",
    class_weight="balanced",
    n_estimators=500,
    random_state=SEED,
    n_jobs=1
)

final_model.fit(X_final, y_final)


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002371 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2252
[LightGBM] [Info] Number of data points in the train set: 14054, number of used features: 14
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438


In [15]:
from sklearn.metrics import f1_score

f1_scores = {}

for w_name, (train_rng, val_rng) in WINDOWS.items():

    train_mask = (df['tanggal'] >= train_rng[0]) & (df['tanggal'] <= train_rng[1])
    val_mask   = (df['tanggal'] >= val_rng[0])   & (df['tanggal'] <= val_rng[1])

    X_train, X_val = X[train_mask], X[val_mask]
    y_train, y_val = y_enc[train_mask], y_enc[val_mask]

    model = LGBMClassifier(
        **best_params,
        objective="multiclass",
        class_weight="balanced",
        n_estimators=500,
        random_state=SEED,
        n_jobs=1
    )

    model.fit(X_train, y_train)
    y_pred = model.predict(X_val)

    f1 = f1_score(y_val, y_pred, average="macro")
    f1_scores[w_name] = f1

    print(f"{w_name} Macro-F1: {f1:.4f}")

print("\nAverage Macro-F1:", np.mean(list(f1_scores.values())))


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001829 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2209
[LightGBM] [Info] Number of data points in the train set: 10426, number of used features: 14
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
W1 Macro-F1: 0.3745
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002081 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2219
[LightGBM] [Info] Number of data points in the train set: 11319, number of used features: 14
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start training from score -1.609438
[LightGBM] [Info] Start 

In [16]:
import joblib

joblib.dump(final_model, "lgbm_ispu_model.pkl")
joblib.dump(le, "label_encoder.pkl")

print("✅ Model dan LabelEncoder berhasil disimpan")


✅ Model dan LabelEncoder berhasil disimpan


In [17]:
path = find_file("sample_submission.csv")

if path is None:
    raise FileNotFoundError("❌ File merged tidak ditemukan")

df = pd.read_csv(path, na_values=NA_VALUES)

df.head()

Unnamed: 0,id,category
0,2025-09-01_DKI1,
1,2025-09-01_DKI2,
2,2025-09-01_DKI3,
3,2025-09-01_DKI4,
4,2025-09-01_DKI5,


In [19]:
test_df = pd.read_csv(path)
X_test = test_df[X.columns]
y_test_pred = final_model.predict(X_test)
test_df[TARGET] = le.inverse_transform(y_test_pred)
test_df[["id", TARGET]].to_csv("submission3_noispu.csv", index=False)


KeyError: "None of [Index(['temperature_2m_max (°C)', 'temperature_2m_min (°C)',\n       'precipitation_sum (mm)', 'precipitation_hours (h)',\n       'wind_direction_10m_dominant (°)', 'shortwave_radiation_sum (MJ/m²)',\n       'temperature_2m_mean (°C)', 'relative_humidity_2m_mean (%)',\n       'cloud_cover_mean (%)', 'surface_pressure_mean (hPa)',\n       'winddirection_10m_dominant (°)', 'wind_gusts_10m_mean (km/h)',\n       'wind_speed_10m_mean (km/h)', 'ndvi'],\n      dtype='object')] are in the [columns]"