In [1]:
# train_random_forest_optuna.py – cleaned version (no date strings in X)
"""Daily‑level RandomForest + Optuna (multi‑output)

* Raw `lifelog_date` is **dropped** before the ColumnTransformer.
* Only numeric sensor features + `days_since_start` + one‑hot `subject_id` pass to model.
"""
from __future__ import annotations

import joblib, optuna, pandas as pd
from sklearn.compose import ColumnTransformer
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, f1_score
from sklearn.model_selection import train_test_split
from sklearn.multioutput import MultiOutputClassifier
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder

AGG_STRATEGY = "mixed"  # sum | mean | mixed | both
TEST_SIZE = 0.30
N_TRIALS = 100
RANDOM_STATE = 42

def load_data():
    merged = pd.read_csv("/Users/jhlee/Desktop/고려대학교/4-1/데이터과학/프로젝트/ETRI_lifelog_dataset/merge_df.csv", parse_dates=["timestamp"])
    train_y = pd.read_csv("/Users/jhlee/Desktop/고려대학교/4-1/데이터과학/프로젝트/ETRI_lifelog_dataset/ch2025_metrics_train.csv", parse_dates=["lifelog_date"])
    submit  = pd.read_csv("/Users/jhlee/Desktop/고려대학교/4-1/데이터과학/프로젝트/ETRI_lifelog_dataset/ch2025_submission_sample.csv", parse_dates=["lifelog_date"])
    return merged, train_y, submit

def aggregate_features(df_raw: pd.DataFrame, strategy="mixed") -> pd.DataFrame:
    df = df_raw.copy()
    df["lifelog_date"] = df["timestamp"].dt.floor("D")
    id_cols = ["subject_id", "lifelog_date"]
    feat_cols = [c for c in df.columns if c not in id_cols + ["timestamp"]]
    if strategy == "sum":
        agg_rules = {c: "sum" for c in feat_cols}
    elif strategy == "mean":
        agg_rules = {c: "mean" for c in feat_cols}
    elif strategy == "mixed":
        sum_like = ["time", "distance", "calorie", "count", "usage"]
        agg_rules = {c: ("sum" if any(k in c.lower() for k in sum_like) else "mean") for c in feat_cols}
    elif strategy == "both":
        sum_df = df.groupby(id_cols)[feat_cols].sum().add_suffix("_sum")
        mean_df = df.groupby(id_cols)[feat_cols].mean().add_suffix("_mean")
        return sum_df.join(mean_df).reset_index()
    else:
        raise ValueError("Invalid AGG_STRATEGY")
    return df.groupby(id_cols).agg(agg_rules).reset_index()

def build_preprocessor(cat_cols, num_cols):
    return ColumnTransformer([
        ("cat", OneHotEncoder(handle_unknown="ignore"), cat_cols),
        ("num", "passthrough", num_cols),
    ], remainder="drop")

def build_model(trial, pre):
    rf = RandomForestClassifier(
        n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),
        max_depth=trial.suggest_int("max_depth", 6, 30),
        min_samples_split=trial.suggest_int("min_samples_split", 2, 10),
        min_samples_leaf=trial.suggest_int("min_samples_leaf", 1, 8),
        max_features=trial.suggest_categorical("max_features", ["sqrt", "log2"]),
        bootstrap=trial.suggest_categorical("bootstrap", [True, False]),
        random_state=RANDOM_STATE, n_jobs=-1)
    return Pipeline([("pre", pre), ("clf", MultiOutputClassifier(rf))])

def macro_metrics(y_true, y_pred):
    acc = accuracy_score(y_true.values.ravel(), y_pred.ravel())
    f1 = f1_score(y_true.values.ravel(), y_pred.ravel(), average="macro")
    return acc, f1

def objective(trial, X_tr, X_val, y_tr, y_val, pre):
    model = build_model(trial, pre)
    model.fit(X_tr, y_tr)
    preds = model.predict(X_val)
    acc, f1 = macro_metrics(y_val, preds)
    trial.set_user_attr("accuracy", acc)
    trial.set_user_attr("macro_f1", f1)
    return (acc + f1) / 2

def main():
    merged, train_y, submit = load_data()
    feats = aggregate_features(merged, AGG_STRATEGY)
    data = train_y.merge(feats, on=["subject_id", "lifelog_date"], how="inner")
    data["days_since_start"] = (data["lifelog_date"] - data["lifelog_date"].min()).dt.days
    target_cols = ["Q1", "Q2", "Q3", "S1", "S2", "S3"]

    # ---------------- explicit feature filtering ----------------
    base_features = [c for c in data.columns if c not in target_cols + ["lifelog_date"]]
    cat_cols = ["subject_id"]
    num_cols = [c for c in base_features if c != "subject_id" and pd.api.types.is_numeric_dtype(data[c])]
    feature_cols = cat_cols + num_cols  # guarantee lifelog_date absent

    pre = build_preprocessor(cat_cols, num_cols)
    X = data[feature_cols]
    y = data[target_cols]
    X_tr, X_val, y_tr, y_val = train_test_split(X, y, test_size=TEST_SIZE, random_state=RANDOM_STATE, stratify=y["Q1"])

    study = optuna.create_study(direction="maximize", study_name="RF_multioutput")
    study.optimize(lambda t: objective(t, X_tr, X_val, y_tr, y_val, pre), n_trials=N_TRIALS, show_progress_bar=True)

    print("Best accuracy:", study.best_trial.user_attrs["accuracy"])
    print("Best macro‑F1:", study.best_trial.user_attrs["macro_f1"])

    best_model = build_model(optuna.trial.FixedTrial(study.best_params), pre)
    best_model.fit(X, y)
    joblib.dump(best_model, "rf_multioutput_best.pkl")

    # -------- submission set
    sub = submit.merge(feats, on=["subject_id", "lifelog_date"], how="left")
    sub["days_since_start"] = (sub["lifelog_date"] - data["lifelog_date"].min()).dt.days
    X_sub = sub[feature_cols]
    submit[target_cols] = best_model.predict(X_sub)
    submit.to_csv("ch2025_submission_rf.csv", index=False)
    print("Saved → ch2025_submission_rf.csv")

if __name__ == "__main__":
    main()


[I 2025-05-20 16:04:55,308] A new study created in memory with name: RF_multioutput


  0%|          | 0/100 [00:00<?, ?it/s]

  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:04:56,233] Trial 0 finished with value: 0.5426800866240364 and parameters: {'n_estimators': 200, 'max_depth': 8, 'min_samples_split': 6, 'min_samples_leaf': 5, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 0 with value: 0.5426800866240364.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:05:00,186] Trial 1 finished with value: 0.5414366813652476 and parameters: {'n_estimators': 700, 'max_depth': 13, 'min_samples_split': 3, 'min_samples_leaf': 2, 'max_features': 'log2', 'bootstrap': True}. Best is trial 0 with value: 0.5426800866240364.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:05:00,998] Trial 2 finished with value: 0.5309127629773449 and parameters: {'n_estimators': 200, 'max_depth': 17, 'min_samples_split': 3, 'min_samples_leaf': 7, 'max_features': 'log2', 'bootstrap': False}. Best is trial 0 with value: 0.5426800866240364.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:05:03,698] Trial 3 finished with value: 0.5237128637574135 and parameters: {'n_estimators': 500, 'max_depth': 25, 'min_samples_split': 4, 'min_samples_leaf': 5, 'max_features': 'sqrt', 'bootstrap': True}. Best is trial 0 with value: 0.5426800866240364.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:05:08,109] Trial 4 finished with value: 0.5290534745761559 and parameters: {'n_estimators': 800, 'max_depth': 7, 'min_samples_split': 8, 'min_samples_leaf': 6, 'max_features': 'log2', 'bootstrap': True}. Best is trial 0 with value: 0.5426800866240364.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:05:10,482] Trial 5 finished with value: 0.5343777561336247 and parameters: {'n_estimators': 600, 'max_depth': 16, 'min_samples_split': 2, 'min_samples_leaf': 5, 'max_features': 'log2', 'bootstrap': False}. Best is trial 0 with value: 0.5426800866240364.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:05:12,200] Trial 6 finished with value: 0.5384544424422046 and parameters: {'n_estimators': 300, 'max_depth': 20, 'min_samples_split': 4, 'min_samples_leaf': 1, 'max_features': 'sqrt', 'bootstrap': True}. Best is trial 0 with value: 0.5426800866240364.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:05:15,625] Trial 7 finished with value: 0.5259825225842975 and parameters: {'n_estimators': 600, 'max_depth': 9, 'min_samples_split': 6, 'min_samples_leaf': 1, 'max_features': 'sqrt', 'bootstrap': True}. Best is trial 0 with value: 0.5426800866240364.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:05:18,716] Trial 8 finished with value: 0.5416291532690246 and parameters: {'n_estimators': 800, 'max_depth': 13, 'min_samples_split': 7, 'min_samples_leaf': 6, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 0 with value: 0.5426800866240364.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:05:21,942] Trial 9 finished with value: 0.5206338996763754 and parameters: {'n_estimators': 600, 'max_depth': 22, 'min_samples_split': 2, 'min_samples_leaf': 6, 'max_features': 'sqrt', 'bootstrap': True}. Best is trial 0 with value: 0.5426800866240364.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:05:23,700] Trial 10 finished with value: 0.5438917655013099 and parameters: {'n_estimators': 400, 'max_depth': 30, 'min_samples_split': 10, 'min_samples_leaf': 3, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 10 with value: 0.5438917655013099.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:05:25,296] Trial 11 finished with value: 0.5438917655013099 and parameters: {'n_estimators': 400, 'max_depth': 30, 'min_samples_split': 10, 'min_samples_leaf': 3, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 10 with value: 0.5438917655013099.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:05:26,896] Trial 12 finished with value: 0.5438917655013099 and parameters: {'n_estimators': 400, 'max_depth': 30, 'min_samples_split': 10, 'min_samples_leaf': 3, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 10 with value: 0.5438917655013099.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:05:28,501] Trial 13 finished with value: 0.5438917655013099 and parameters: {'n_estimators': 400, 'max_depth': 30, 'min_samples_split': 10, 'min_samples_leaf': 3, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 10 with value: 0.5438917655013099.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:05:30,090] Trial 14 finished with value: 0.546376121463078 and parameters: {'n_estimators': 400, 'max_depth': 26, 'min_samples_split': 9, 'min_samples_leaf': 3, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 14 with value: 0.546376121463078.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:05:31,378] Trial 15 finished with value: 0.5504255413413081 and parameters: {'n_estimators': 300, 'max_depth': 25, 'min_samples_split': 8, 'min_samples_leaf': 4, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 15 with value: 0.5504255413413081.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:05:32,591] Trial 16 finished with value: 0.5504255413413081 and parameters: {'n_estimators': 300, 'max_depth': 25, 'min_samples_split': 8, 'min_samples_leaf': 4, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 15 with value: 0.5504255413413081.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:05:33,837] Trial 17 finished with value: 0.5504255413413081 and parameters: {'n_estimators': 300, 'max_depth': 25, 'min_samples_split': 8, 'min_samples_leaf': 4, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 15 with value: 0.5504255413413081.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:05:35,041] Trial 18 finished with value: 0.5392865750871967 and parameters: {'n_estimators': 300, 'max_depth': 22, 'min_samples_split': 7, 'min_samples_leaf': 8, 'max_features': 'log2', 'bootstrap': False}. Best is trial 15 with value: 0.5504255413413081.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:05:35,939] Trial 19 finished with value: 0.5448711885142581 and parameters: {'n_estimators': 200, 'max_depth': 27, 'min_samples_split': 6, 'min_samples_leaf': 4, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 15 with value: 0.5504255413413081.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:05:37,907] Trial 20 finished with value: 0.5375535877024396 and parameters: {'n_estimators': 500, 'max_depth': 22, 'min_samples_split': 8, 'min_samples_leaf': 2, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 15 with value: 0.5504255413413081.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:05:39,169] Trial 21 finished with value: 0.5504255413413081 and parameters: {'n_estimators': 300, 'max_depth': 24, 'min_samples_split': 8, 'min_samples_leaf': 4, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 15 with value: 0.5504255413413081.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:05:40,465] Trial 22 finished with value: 0.5504255413413081 and parameters: {'n_estimators': 300, 'max_depth': 27, 'min_samples_split': 7, 'min_samples_leaf': 4, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 15 with value: 0.5504255413413081.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:05:41,679] Trial 23 finished with value: 0.5442588169376588 and parameters: {'n_estimators': 300, 'max_depth': 19, 'min_samples_split': 9, 'min_samples_leaf': 4, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 15 with value: 0.5504255413413081.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:05:42,547] Trial 24 finished with value: 0.535405564006503 and parameters: {'n_estimators': 200, 'max_depth': 24, 'min_samples_split': 9, 'min_samples_leaf': 5, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 15 with value: 0.5504255413413081.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:05:43,823] Trial 25 finished with value: 0.5390084182543199 and parameters: {'n_estimators': 300, 'max_depth': 28, 'min_samples_split': 8, 'min_samples_leaf': 2, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 15 with value: 0.5504255413413081.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:05:44,697] Trial 26 finished with value: 0.5448711885142581 and parameters: {'n_estimators': 200, 'max_depth': 21, 'min_samples_split': 5, 'min_samples_leaf': 4, 'max_features': 'log2', 'bootstrap': False}. Best is trial 15 with value: 0.5504255413413081.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:05:46,630] Trial 27 finished with value: 0.5370133777936564 and parameters: {'n_estimators': 500, 'max_depth': 24, 'min_samples_split': 7, 'min_samples_leaf': 6, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 15 with value: 0.5504255413413081.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:05:48,188] Trial 28 finished with value: 0.5377060991521085 and parameters: {'n_estimators': 400, 'max_depth': 28, 'min_samples_split': 9, 'min_samples_leaf': 5, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 15 with value: 0.5504255413413081.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:05:49,109] Trial 29 finished with value: 0.5309127629773449 and parameters: {'n_estimators': 200, 'max_depth': 15, 'min_samples_split': 6, 'min_samples_leaf': 7, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 15 with value: 0.5504255413413081.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:05:50,302] Trial 30 finished with value: 0.5504255413413081 and parameters: {'n_estimators': 300, 'max_depth': 23, 'min_samples_split': 8, 'min_samples_leaf': 4, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 15 with value: 0.5504255413413081.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:05:51,534] Trial 31 finished with value: 0.5504255413413081 and parameters: {'n_estimators': 300, 'max_depth': 25, 'min_samples_split': 8, 'min_samples_leaf': 4, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 15 with value: 0.5504255413413081.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:05:52,809] Trial 32 finished with value: 0.5504255413413081 and parameters: {'n_estimators': 300, 'max_depth': 26, 'min_samples_split': 7, 'min_samples_leaf': 4, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 15 with value: 0.5504255413413081.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:05:53,657] Trial 33 finished with value: 0.549552452353653 and parameters: {'n_estimators': 200, 'max_depth': 19, 'min_samples_split': 9, 'min_samples_leaf': 3, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 15 with value: 0.5504255413413081.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:05:55,843] Trial 34 finished with value: 0.5281408317618508 and parameters: {'n_estimators': 400, 'max_depth': 25, 'min_samples_split': 8, 'min_samples_leaf': 5, 'max_features': 'log2', 'bootstrap': True}. Best is trial 15 with value: 0.5504255413413081.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:05:57,888] Trial 35 finished with value: 0.5400066342977141 and parameters: {'n_estimators': 500, 'max_depth': 28, 'min_samples_split': 5, 'min_samples_leaf': 5, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 15 with value: 0.5504255413413081.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:05:59,144] Trial 36 finished with value: 0.5390084182543199 and parameters: {'n_estimators': 300, 'max_depth': 23, 'min_samples_split': 8, 'min_samples_leaf': 2, 'max_features': 'log2', 'bootstrap': False}. Best is trial 15 with value: 0.5504255413413081.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:06:00,353] Trial 37 finished with value: 0.5238110774577551 and parameters: {'n_estimators': 200, 'max_depth': 24, 'min_samples_split': 7, 'min_samples_leaf': 4, 'max_features': 'sqrt', 'bootstrap': True}. Best is trial 15 with value: 0.5504255413413081.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:06:03,151] Trial 38 finished with value: 0.5469108431809074 and parameters: {'n_estimators': 700, 'max_depth': 20, 'min_samples_split': 9, 'min_samples_leaf': 2, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 15 with value: 0.5504255413413081.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:06:04,889] Trial 39 finished with value: 0.5280097666290134 and parameters: {'n_estimators': 300, 'max_depth': 17, 'min_samples_split': 8, 'min_samples_leaf': 3, 'max_features': 'log2', 'bootstrap': True}. Best is trial 15 with value: 0.5504255413413081.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:06:05,741] Trial 40 finished with value: 0.535405564006503 and parameters: {'n_estimators': 200, 'max_depth': 27, 'min_samples_split': 5, 'min_samples_leaf': 5, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 15 with value: 0.5504255413413081.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:06:06,958] Trial 41 finished with value: 0.5504255413413081 and parameters: {'n_estimators': 300, 'max_depth': 26, 'min_samples_split': 7, 'min_samples_leaf': 4, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 15 with value: 0.5504255413413081.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:06:08,247] Trial 42 finished with value: 0.5504255413413081 and parameters: {'n_estimators': 300, 'max_depth': 28, 'min_samples_split': 7, 'min_samples_leaf': 4, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 15 with value: 0.5504255413413081.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:06:09,795] Trial 43 finished with value: 0.5377060991521085 and parameters: {'n_estimators': 400, 'max_depth': 23, 'min_samples_split': 8, 'min_samples_leaf': 5, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 15 with value: 0.5504255413413081.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:06:11,034] Trial 44 finished with value: 0.5479318313532309 and parameters: {'n_estimators': 300, 'max_depth': 10, 'min_samples_split': 7, 'min_samples_leaf': 3, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 15 with value: 0.5504255413413081.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:06:13,289] Trial 45 finished with value: 0.5236104194318006 and parameters: {'n_estimators': 400, 'max_depth': 27, 'min_samples_split': 6, 'min_samples_leaf': 6, 'max_features': 'sqrt', 'bootstrap': True}. Best is trial 15 with value: 0.5504255413413081.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:06:14,099] Trial 46 finished with value: 0.546376121463078 and parameters: {'n_estimators': 200, 'max_depth': 25, 'min_samples_split': 9, 'min_samples_leaf': 4, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 15 with value: 0.5504255413413081.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:06:15,335] Trial 47 finished with value: 0.5532244510735702 and parameters: {'n_estimators': 300, 'max_depth': 29, 'min_samples_split': 6, 'min_samples_leaf': 3, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 47 with value: 0.5532244510735702.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:06:16,858] Trial 48 finished with value: 0.5453174474180796 and parameters: {'n_estimators': 400, 'max_depth': 29, 'min_samples_split': 4, 'min_samples_leaf': 3, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 47 with value: 0.5532244510735702.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:06:19,490] Trial 49 finished with value: 0.5465569431944458 and parameters: {'n_estimators': 700, 'max_depth': 21, 'min_samples_split': 6, 'min_samples_leaf': 3, 'max_features': 'log2', 'bootstrap': False}. Best is trial 47 with value: 0.5532244510735702.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:06:22,634] Trial 50 finished with value: 0.526658670750034 and parameters: {'n_estimators': 600, 'max_depth': 29, 'min_samples_split': 5, 'min_samples_leaf': 1, 'max_features': 'sqrt', 'bootstrap': True}. Best is trial 47 with value: 0.5532244510735702.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:06:23,842] Trial 51 finished with value: 0.5504255413413081 and parameters: {'n_estimators': 300, 'max_depth': 26, 'min_samples_split': 7, 'min_samples_leaf': 4, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 47 with value: 0.5532244510735702.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:06:25,058] Trial 52 finished with value: 0.5377370057588636 and parameters: {'n_estimators': 300, 'max_depth': 27, 'min_samples_split': 8, 'min_samples_leaf': 5, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 47 with value: 0.5532244510735702.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:06:26,290] Trial 53 finished with value: 0.546376121463078 and parameters: {'n_estimators': 300, 'max_depth': 29, 'min_samples_split': 7, 'min_samples_leaf': 3, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 47 with value: 0.5532244510735702.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:06:27,872] Trial 54 finished with value: 0.5466524195885251 and parameters: {'n_estimators': 400, 'max_depth': 25, 'min_samples_split': 8, 'min_samples_leaf': 4, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 47 with value: 0.5532244510735702.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:06:29,076] Trial 55 finished with value: 0.5423376163048295 and parameters: {'n_estimators': 300, 'max_depth': 26, 'min_samples_split': 6, 'min_samples_leaf': 2, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 47 with value: 0.5532244510735702.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:06:29,878] Trial 56 finished with value: 0.5317010128371533 and parameters: {'n_estimators': 200, 'max_depth': 6, 'min_samples_split': 9, 'min_samples_leaf': 4, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 47 with value: 0.5532244510735702.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:06:31,407] Trial 57 finished with value: 0.5449491741913814 and parameters: {'n_estimators': 400, 'max_depth': 24, 'min_samples_split': 7, 'min_samples_leaf': 3, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 47 with value: 0.5532244510735702.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:06:32,215] Trial 58 finished with value: 0.535405564006503 and parameters: {'n_estimators': 200, 'max_depth': 29, 'min_samples_split': 8, 'min_samples_leaf': 5, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 47 with value: 0.5532244510735702.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:06:33,726] Trial 59 finished with value: 0.5438917655013099 and parameters: {'n_estimators': 400, 'max_depth': 30, 'min_samples_split': 10, 'min_samples_leaf': 3, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 47 with value: 0.5532244510735702.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:06:34,964] Trial 60 finished with value: 0.5504255413413081 and parameters: {'n_estimators': 300, 'max_depth': 21, 'min_samples_split': 7, 'min_samples_leaf': 4, 'max_features': 'log2', 'bootstrap': False}. Best is trial 47 with value: 0.5532244510735702.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:06:36,153] Trial 61 finished with value: 0.5504255413413081 and parameters: {'n_estimators': 300, 'max_depth': 23, 'min_samples_split': 8, 'min_samples_leaf': 4, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 47 with value: 0.5532244510735702.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:06:37,404] Trial 62 finished with value: 0.547616266600027 and parameters: {'n_estimators': 300, 'max_depth': 23, 'min_samples_split': 9, 'min_samples_leaf': 4, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 47 with value: 0.5532244510735702.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:06:38,674] Trial 63 finished with value: 0.5504255413413081 and parameters: {'n_estimators': 300, 'max_depth': 22, 'min_samples_split': 8, 'min_samples_leaf': 4, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 47 with value: 0.5532244510735702.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:06:39,878] Trial 64 finished with value: 0.5377370057588636 and parameters: {'n_estimators': 300, 'max_depth': 24, 'min_samples_split': 9, 'min_samples_leaf': 5, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 47 with value: 0.5532244510735702.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:06:40,692] Trial 65 finished with value: 0.5453174474180796 and parameters: {'n_estimators': 200, 'max_depth': 27, 'min_samples_split': 6, 'min_samples_leaf': 3, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 47 with value: 0.5532244510735702.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:06:41,889] Trial 66 finished with value: 0.5504255413413081 and parameters: {'n_estimators': 300, 'max_depth': 25, 'min_samples_split': 8, 'min_samples_leaf': 4, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 47 with value: 0.5532244510735702.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:06:43,440] Trial 67 finished with value: 0.5377060991521085 and parameters: {'n_estimators': 400, 'max_depth': 28, 'min_samples_split': 8, 'min_samples_leaf': 5, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 47 with value: 0.5532244510735702.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:06:44,559] Trial 68 finished with value: 0.5238110774577551 and parameters: {'n_estimators': 200, 'max_depth': 26, 'min_samples_split': 7, 'min_samples_leaf': 4, 'max_features': 'sqrt', 'bootstrap': True}. Best is trial 47 with value: 0.5532244510735702.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:06:45,767] Trial 69 finished with value: 0.5327866006427289 and parameters: {'n_estimators': 300, 'max_depth': 22, 'min_samples_split': 9, 'min_samples_leaf': 6, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 47 with value: 0.5532244510735702.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:06:46,975] Trial 70 finished with value: 0.5537309346212612 and parameters: {'n_estimators': 300, 'max_depth': 24, 'min_samples_split': 8, 'min_samples_leaf': 3, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 70 with value: 0.5537309346212612.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:06:48,204] Trial 71 finished with value: 0.5537309346212612 and parameters: {'n_estimators': 300, 'max_depth': 24, 'min_samples_split': 8, 'min_samples_leaf': 3, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 70 with value: 0.5537309346212612.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:06:49,410] Trial 72 finished with value: 0.5390084182543199 and parameters: {'n_estimators': 300, 'max_depth': 24, 'min_samples_split': 8, 'min_samples_leaf': 2, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 70 with value: 0.5537309346212612.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:06:50,662] Trial 73 finished with value: 0.5537309346212612 and parameters: {'n_estimators': 300, 'max_depth': 25, 'min_samples_split': 8, 'min_samples_leaf': 3, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 70 with value: 0.5537309346212612.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:06:52,231] Trial 74 finished with value: 0.5453174474180796 and parameters: {'n_estimators': 400, 'max_depth': 20, 'min_samples_split': 2, 'min_samples_leaf': 3, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 70 with value: 0.5537309346212612.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:06:53,470] Trial 75 finished with value: 0.5440766045548654 and parameters: {'n_estimators': 300, 'max_depth': 25, 'min_samples_split': 9, 'min_samples_leaf': 3, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 70 with value: 0.5537309346212612.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:06:54,288] Trial 76 finished with value: 0.5409168955827097 and parameters: {'n_estimators': 200, 'max_depth': 22, 'min_samples_split': 8, 'min_samples_leaf': 2, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 70 with value: 0.5537309346212612.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:06:55,482] Trial 77 finished with value: 0.5394775707384403 and parameters: {'n_estimators': 300, 'max_depth': 18, 'min_samples_split': 10, 'min_samples_leaf': 3, 'max_features': 'log2', 'bootstrap': False}. Best is trial 70 with value: 0.5537309346212612.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:06:56,744] Trial 78 finished with value: 0.5338495514147688 and parameters: {'n_estimators': 300, 'max_depth': 13, 'min_samples_split': 8, 'min_samples_leaf': 2, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 70 with value: 0.5537309346212612.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:06:58,723] Trial 79 finished with value: 0.5410831398083951 and parameters: {'n_estimators': 500, 'max_depth': 24, 'min_samples_split': 3, 'min_samples_leaf': 3, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 70 with value: 0.5537309346212612.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:07:00,910] Trial 80 finished with value: 0.5338467341896301 and parameters: {'n_estimators': 400, 'max_depth': 21, 'min_samples_split': 9, 'min_samples_leaf': 3, 'max_features': 'sqrt', 'bootstrap': True}. Best is trial 70 with value: 0.5537309346212612.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:07:02,140] Trial 81 finished with value: 0.5504255413413081 and parameters: {'n_estimators': 300, 'max_depth': 26, 'min_samples_split': 7, 'min_samples_leaf': 4, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 70 with value: 0.5537309346212612.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:07:05,240] Trial 82 finished with value: 0.5426475351102458 and parameters: {'n_estimators': 800, 'max_depth': 27, 'min_samples_split': 8, 'min_samples_leaf': 3, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 70 with value: 0.5537309346212612.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:07:06,516] Trial 83 finished with value: 0.5392865750871967 and parameters: {'n_estimators': 300, 'max_depth': 25, 'min_samples_split': 7, 'min_samples_leaf': 8, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 70 with value: 0.5537309346212612.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:07:07,712] Trial 84 finished with value: 0.5504255413413081 and parameters: {'n_estimators': 300, 'max_depth': 28, 'min_samples_split': 8, 'min_samples_leaf': 4, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 70 with value: 0.5537309346212612.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:07:08,903] Trial 85 finished with value: 0.546376121463078 and parameters: {'n_estimators': 300, 'max_depth': 26, 'min_samples_split': 7, 'min_samples_leaf': 3, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 70 with value: 0.5537309346212612.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:07:09,713] Trial 86 finished with value: 0.5424289878805418 and parameters: {'n_estimators': 200, 'max_depth': 11, 'min_samples_split': 8, 'min_samples_leaf': 4, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 70 with value: 0.5537309346212612.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:07:10,940] Trial 87 finished with value: 0.5532244510735702 and parameters: {'n_estimators': 300, 'max_depth': 23, 'min_samples_split': 6, 'min_samples_leaf': 3, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 70 with value: 0.5537309346212612.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:07:12,500] Trial 88 finished with value: 0.5392970031996401 and parameters: {'n_estimators': 400, 'max_depth': 23, 'min_samples_split': 6, 'min_samples_leaf': 2, 'max_features': 'log2', 'bootstrap': False}. Best is trial 70 with value: 0.5537309346212612.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:07:13,710] Trial 89 finished with value: 0.5532244510735702 and parameters: {'n_estimators': 300, 'max_depth': 24, 'min_samples_split': 5, 'min_samples_leaf': 3, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 70 with value: 0.5537309346212612.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:07:14,928] Trial 90 finished with value: 0.5532244510735702 and parameters: {'n_estimators': 300, 'max_depth': 23, 'min_samples_split': 5, 'min_samples_leaf': 3, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 70 with value: 0.5537309346212612.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:07:16,146] Trial 91 finished with value: 0.5532244510735702 and parameters: {'n_estimators': 300, 'max_depth': 23, 'min_samples_split': 5, 'min_samples_leaf': 3, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 70 with value: 0.5537309346212612.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:07:17,341] Trial 92 finished with value: 0.5532244510735702 and parameters: {'n_estimators': 300, 'max_depth': 23, 'min_samples_split': 5, 'min_samples_leaf': 3, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 70 with value: 0.5537309346212612.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:07:18,544] Trial 93 finished with value: 0.5532244510735702 and parameters: {'n_estimators': 300, 'max_depth': 23, 'min_samples_split': 5, 'min_samples_leaf': 3, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 70 with value: 0.5537309346212612.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:07:19,732] Trial 94 finished with value: 0.5532244510735702 and parameters: {'n_estimators': 300, 'max_depth': 20, 'min_samples_split': 5, 'min_samples_leaf': 3, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 70 with value: 0.5537309346212612.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:07:20,908] Trial 95 finished with value: 0.5532244510735702 and parameters: {'n_estimators': 300, 'max_depth': 23, 'min_samples_split': 4, 'min_samples_leaf': 3, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 70 with value: 0.5537309346212612.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:07:22,095] Trial 96 finished with value: 0.5449282042173608 and parameters: {'n_estimators': 300, 'max_depth': 22, 'min_samples_split': 5, 'min_samples_leaf': 2, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 70 with value: 0.5537309346212612.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:07:23,273] Trial 97 finished with value: 0.5532244510735702 and parameters: {'n_estimators': 300, 'max_depth': 23, 'min_samples_split': 5, 'min_samples_leaf': 3, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 70 with value: 0.5537309346212612.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:07:24,074] Trial 98 finished with value: 0.5442662423847721 and parameters: {'n_estimators': 200, 'max_depth': 21, 'min_samples_split': 4, 'min_samples_leaf': 2, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 70 with value: 0.5537309346212612.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:07:25,294] Trial 99 finished with value: 0.5532244510735702 and parameters: {'n_estimators': 300, 'max_depth': 24, 'min_samples_split': 5, 'min_samples_leaf': 3, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 70 with value: 0.5537309346212612.
Best accuracy: 0.616
Best macro‑F1: 0.4914618692425224


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


Saved → ch2025_submission_rf.csv


In [2]:
# train_random_forest_optuna.py – cleaned version (no date strings in X)
"""Daily‑level RandomForest + Optuna (multi‑output)

* Raw `lifelog_date` is **dropped** before the ColumnTransformer.
* Only numeric sensor features + `days_since_start` + one‑hot `subject_id` pass to model.
"""
from __future__ import annotations

import joblib, optuna, pandas as pd
from sklearn.compose import ColumnTransformer
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, f1_score
from sklearn.model_selection import train_test_split
from sklearn.multioutput import MultiOutputClassifier
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder

AGG_STRATEGY = "mixed"  # sum | mean | mixed | both
TEST_SIZE = 0.30
N_TRIALS = 100
RANDOM_STATE = 42

def load_data():
    merged = pd.read_csv("/Users/jhlee/Desktop/고려대학교/4-1/데이터과학/프로젝트/ETRI_lifelog_dataset/merge_df.csv", parse_dates=["timestamp"])
    train_y = pd.read_csv("/Users/jhlee/Desktop/고려대학교/4-1/데이터과학/프로젝트/ETRI_lifelog_dataset/ch2025_metrics_train.csv", parse_dates=["lifelog_date"])
    submit  = pd.read_csv("/Users/jhlee/Desktop/고려대학교/4-1/데이터과학/프로젝트/ETRI_lifelog_dataset/ch2025_submission_sample.csv", parse_dates=["lifelog_date"])
    return merged, train_y, submit

def aggregate_features(df_raw: pd.DataFrame, strategy="mixed") -> pd.DataFrame:
    df = df_raw.copy()
    df["lifelog_date"] = df["timestamp"].dt.floor("D")
    id_cols = ["subject_id", "lifelog_date"]
    feat_cols = [c for c in df.columns if c not in id_cols + ["timestamp"]]
    if strategy == "sum":
        agg_rules = {c: "sum" for c in feat_cols}
    elif strategy == "mean":
        agg_rules = {c: "mean" for c in feat_cols}
    elif strategy == "mixed":
        sum_like = ["time", "distance", "calorie", "count", "usage"]
        agg_rules = {c: ("sum" if any(k in c.lower() for k in sum_like) else "mean") for c in feat_cols}
    elif strategy == "both":
        sum_df = df.groupby(id_cols)[feat_cols].sum().add_suffix("_sum")
        mean_df = df.groupby(id_cols)[feat_cols].mean().add_suffix("_mean")
        return sum_df.join(mean_df).reset_index()
    else:
        raise ValueError("Invalid AGG_STRATEGY")
    return df.groupby(id_cols).agg(agg_rules).reset_index()

def build_preprocessor(cat_cols, num_cols):
    return ColumnTransformer([
        ("cat", OneHotEncoder(handle_unknown="ignore"), cat_cols),
        ("num", "passthrough", num_cols),
    ], remainder="drop")

def build_model(trial, pre):
    rf = RandomForestClassifier(
        criterion = "entropy",
        n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),
        max_depth=trial.suggest_int("max_depth", 6, 30),
        min_samples_split=trial.suggest_int("min_samples_split", 2, 10),
        min_samples_leaf=trial.suggest_int("min_samples_leaf", 1, 8),
        max_features=trial.suggest_categorical("max_features", ["sqrt", "log2"]),
        bootstrap=trial.suggest_categorical("bootstrap", [True, False]),
        random_state=RANDOM_STATE, n_jobs=-1)
    return Pipeline([("pre", pre), ("clf", MultiOutputClassifier(rf))])

def macro_metrics(y_true, y_pred):
    acc = accuracy_score(y_true.values.ravel(), y_pred.ravel())
    f1 = f1_score(y_true.values.ravel(), y_pred.ravel(), average="macro")
    return acc, f1

def objective(trial, X_tr, X_val, y_tr, y_val, pre):
    model = build_model(trial, pre)
    model.fit(X_tr, y_tr)
    preds = model.predict(X_val)
    acc, f1 = macro_metrics(y_val, preds)
    trial.set_user_attr("accuracy", acc)
    trial.set_user_attr("macro_f1", f1)
    return (acc + f1) / 2

def main():
    merged, train_y, submit = load_data()
    feats = aggregate_features(merged, AGG_STRATEGY)
    data = train_y.merge(feats, on=["subject_id", "lifelog_date"], how="inner")
    data["days_since_start"] = (data["lifelog_date"] - data["lifelog_date"].min()).dt.days
    target_cols = ["Q1", "Q2", "Q3", "S1", "S2", "S3"]

    # ---------------- explicit feature filtering ----------------
    base_features = [c for c in data.columns if c not in target_cols + ["lifelog_date"]]
    cat_cols = ["subject_id"]
    num_cols = [c for c in base_features if c != "subject_id" and pd.api.types.is_numeric_dtype(data[c])]
    feature_cols = cat_cols + num_cols  # guarantee lifelog_date absent

    pre = build_preprocessor(cat_cols, num_cols)
    X = data[feature_cols]
    y = data[target_cols]
    X_tr, X_val, y_tr, y_val = train_test_split(X, y, test_size=TEST_SIZE, random_state=RANDOM_STATE, stratify=y["Q1"])

    study = optuna.create_study(direction="maximize", study_name="RF_multioutput")
    study.optimize(lambda t: objective(t, X_tr, X_val, y_tr, y_val, pre), n_trials=N_TRIALS, show_progress_bar=True)

    print("Best accuracy:", study.best_trial.user_attrs["accuracy"])
    print("Best macro‑F1:", study.best_trial.user_attrs["macro_f1"])

    best_model = build_model(optuna.trial.FixedTrial(study.best_params), pre)
    best_model.fit(X, y)
    joblib.dump(best_model, "rf_multioutput_best.pkl")

    # -------- submission set
    sub = submit.merge(feats, on=["subject_id", "lifelog_date"], how="left")
    sub["days_since_start"] = (sub["lifelog_date"] - data["lifelog_date"].min()).dt.days
    X_sub = sub[feature_cols]
    submit[target_cols] = best_model.predict(X_sub)
    submit.to_csv("ch2025_submission_rf_entropy.csv", index=False)
    print("Saved → ch2025_submission_rf_entropy.csv")

if __name__ == "__main__":
    main()


[I 2025-05-20 16:07:27,058] A new study created in memory with name: RF_multioutput


  0%|          | 0/100 [00:00<?, ?it/s]

  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:07:29,839] Trial 0 finished with value: 0.5275231409894543 and parameters: {'n_estimators': 500, 'max_depth': 11, 'min_samples_split': 6, 'min_samples_leaf': 8, 'max_features': 'log2', 'bootstrap': True}. Best is trial 0 with value: 0.5275231409894543.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:07:31,518] Trial 1 finished with value: 0.525866993140208 and parameters: {'n_estimators': 300, 'max_depth': 21, 'min_samples_split': 10, 'min_samples_leaf': 3, 'max_features': 'sqrt', 'bootstrap': True}. Best is trial 0 with value: 0.5275231409894543.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:07:32,631] Trial 2 finished with value: 0.5257067185517852 and parameters: {'n_estimators': 200, 'max_depth': 28, 'min_samples_split': 3, 'min_samples_leaf': 8, 'max_features': 'log2', 'bootstrap': True}. Best is trial 0 with value: 0.5275231409894543.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:07:34,277] Trial 3 finished with value: 0.5269936318537718 and parameters: {'n_estimators': 300, 'max_depth': 28, 'min_samples_split': 2, 'min_samples_leaf': 8, 'max_features': 'log2', 'bootstrap': True}. Best is trial 0 with value: 0.5275231409894543.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:07:36,475] Trial 4 finished with value: 0.5398919531973925 and parameters: {'n_estimators': 400, 'max_depth': 21, 'min_samples_split': 7, 'min_samples_leaf': 5, 'max_features': 'log2', 'bootstrap': True}. Best is trial 4 with value: 0.5398919531973925.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:07:38,720] Trial 5 finished with value: 0.5444325009253095 and parameters: {'n_estimators': 600, 'max_depth': 7, 'min_samples_split': 10, 'min_samples_leaf': 1, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 5 with value: 0.5444325009253095.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:07:40,983] Trial 6 finished with value: 0.5430179979175963 and parameters: {'n_estimators': 600, 'max_depth': 6, 'min_samples_split': 5, 'min_samples_leaf': 4, 'max_features': 'log2', 'bootstrap': False}. Best is trial 5 with value: 0.5444325009253095.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:07:42,668] Trial 7 finished with value: 0.5269936318537718 and parameters: {'n_estimators': 300, 'max_depth': 28, 'min_samples_split': 7, 'min_samples_leaf': 8, 'max_features': 'log2', 'bootstrap': True}. Best is trial 5 with value: 0.5444325009253095.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:07:44,663] Trial 8 finished with value: 0.5347054900559723 and parameters: {'n_estimators': 500, 'max_depth': 10, 'min_samples_split': 7, 'min_samples_leaf': 5, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 5 with value: 0.5444325009253095.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:07:45,480] Trial 9 finished with value: 0.5338683436407013 and parameters: {'n_estimators': 200, 'max_depth': 29, 'min_samples_split': 7, 'min_samples_leaf': 8, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 5 with value: 0.5444325009253095.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:07:48,474] Trial 10 finished with value: 0.5225213623582516 and parameters: {'n_estimators': 800, 'max_depth': 15, 'min_samples_split': 10, 'min_samples_leaf': 1, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 5 with value: 0.5444325009253095.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:07:51,019] Trial 11 finished with value: 0.5370308870018329 and parameters: {'n_estimators': 700, 'max_depth': 6, 'min_samples_split': 4, 'min_samples_leaf': 1, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 5 with value: 0.5444325009253095.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:07:53,585] Trial 12 finished with value: 0.541721995400861 and parameters: {'n_estimators': 700, 'max_depth': 6, 'min_samples_split': 5, 'min_samples_leaf': 3, 'max_features': 'log2', 'bootstrap': False}. Best is trial 5 with value: 0.5444325009253095.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:07:55,850] Trial 13 finished with value: 0.5468748477192407 and parameters: {'n_estimators': 600, 'max_depth': 11, 'min_samples_split': 9, 'min_samples_leaf': 3, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 13 with value: 0.5468748477192407.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:07:58,155] Trial 14 finished with value: 0.543734049536609 and parameters: {'n_estimators': 600, 'max_depth': 12, 'min_samples_split': 9, 'min_samples_leaf': 2, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 13 with value: 0.5468748477192407.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:08:00,404] Trial 15 finished with value: 0.5342208359326263 and parameters: {'n_estimators': 600, 'max_depth': 16, 'min_samples_split': 9, 'min_samples_leaf': 2, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 13 with value: 0.5468748477192407.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:08:03,350] Trial 16 finished with value: 0.5447692725294592 and parameters: {'n_estimators': 800, 'max_depth': 9, 'min_samples_split': 9, 'min_samples_leaf': 6, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 13 with value: 0.5468748477192407.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:08:06,314] Trial 17 finished with value: 0.5373642463511703 and parameters: {'n_estimators': 800, 'max_depth': 14, 'min_samples_split': 8, 'min_samples_leaf': 6, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 13 with value: 0.5468748477192407.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:08:08,925] Trial 18 finished with value: 0.5340032981521654 and parameters: {'n_estimators': 700, 'max_depth': 19, 'min_samples_split': 9, 'min_samples_leaf': 6, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 13 with value: 0.5468748477192407.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:08:11,908] Trial 19 finished with value: 0.5447692725294592 and parameters: {'n_estimators': 800, 'max_depth': 9, 'min_samples_split': 8, 'min_samples_leaf': 6, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 13 with value: 0.5468748477192407.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:08:14,538] Trial 20 finished with value: 0.5336181637052079 and parameters: {'n_estimators': 700, 'max_depth': 12, 'min_samples_split': 8, 'min_samples_leaf': 4, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 13 with value: 0.5468748477192407.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:08:17,497] Trial 21 finished with value: 0.5447692725294592 and parameters: {'n_estimators': 800, 'max_depth': 9, 'min_samples_split': 8, 'min_samples_leaf': 6, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 13 with value: 0.5468748477192407.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:08:20,441] Trial 22 finished with value: 0.5336600386777075 and parameters: {'n_estimators': 800, 'max_depth': 9, 'min_samples_split': 9, 'min_samples_leaf': 7, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 13 with value: 0.5468748477192407.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:08:23,091] Trial 23 finished with value: 0.5391331566823027 and parameters: {'n_estimators': 700, 'max_depth': 13, 'min_samples_split': 8, 'min_samples_leaf': 7, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 13 with value: 0.5468748477192407.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:08:26,070] Trial 24 finished with value: 0.5295879188190624 and parameters: {'n_estimators': 800, 'max_depth': 17, 'min_samples_split': 9, 'min_samples_leaf': 5, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 13 with value: 0.5468748477192407.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:08:27,578] Trial 25 finished with value: 0.5394676346732231 and parameters: {'n_estimators': 400, 'max_depth': 8, 'min_samples_split': 6, 'min_samples_leaf': 7, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 13 with value: 0.5468748477192407.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:08:29,501] Trial 26 finished with value: 0.5414366813652476 and parameters: {'n_estimators': 500, 'max_depth': 10, 'min_samples_split': 10, 'min_samples_leaf': 4, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 13 with value: 0.5468748477192407.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:08:32,125] Trial 27 finished with value: 0.5535414110586525 and parameters: {'n_estimators': 700, 'max_depth': 14, 'min_samples_split': 8, 'min_samples_leaf': 3, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 27 with value: 0.5535414110586525.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:08:34,761] Trial 28 finished with value: 0.5391278931059552 and parameters: {'n_estimators': 700, 'max_depth': 18, 'min_samples_split': 9, 'min_samples_leaf': 3, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 27 with value: 0.5535414110586525.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:08:37,970] Trial 29 finished with value: 0.5384486071890794 and parameters: {'n_estimators': 600, 'max_depth': 11, 'min_samples_split': 6, 'min_samples_leaf': 2, 'max_features': 'sqrt', 'bootstrap': True}. Best is trial 27 with value: 0.5535414110586525.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:08:39,900] Trial 30 finished with value: 0.5378246279268176 and parameters: {'n_estimators': 500, 'max_depth': 25, 'min_samples_split': 7, 'min_samples_leaf': 3, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 27 with value: 0.5535414110586525.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:08:42,860] Trial 31 finished with value: 0.5373642463511703 and parameters: {'n_estimators': 800, 'max_depth': 14, 'min_samples_split': 8, 'min_samples_leaf': 6, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 27 with value: 0.5535414110586525.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:08:45,497] Trial 32 finished with value: 0.538263696644161 and parameters: {'n_estimators': 700, 'max_depth': 11, 'min_samples_split': 10, 'min_samples_leaf': 4, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 27 with value: 0.5535414110586525.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:08:48,435] Trial 33 finished with value: 0.5338683436407013 and parameters: {'n_estimators': 800, 'max_depth': 8, 'min_samples_split': 8, 'min_samples_leaf': 5, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 27 with value: 0.5535414110586525.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:08:52,151] Trial 34 finished with value: 0.5357989417989417 and parameters: {'n_estimators': 700, 'max_depth': 12, 'min_samples_split': 9, 'min_samples_leaf': 2, 'max_features': 'sqrt', 'bootstrap': True}. Best is trial 27 with value: 0.5535414110586525.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:08:55,273] Trial 35 finished with value: 0.5366395195677897 and parameters: {'n_estimators': 600, 'max_depth': 14, 'min_samples_split': 8, 'min_samples_leaf': 3, 'max_features': 'log2', 'bootstrap': True}. Best is trial 27 with value: 0.5535414110586525.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:08:58,247] Trial 36 finished with value: 0.5391357223023889 and parameters: {'n_estimators': 800, 'max_depth': 8, 'min_samples_split': 10, 'min_samples_leaf': 7, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 27 with value: 0.5535414110586525.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:09:00,888] Trial 37 finished with value: 0.5369808870323309 and parameters: {'n_estimators': 700, 'max_depth': 10, 'min_samples_split': 5, 'min_samples_leaf': 5, 'max_features': 'log2', 'bootstrap': False}. Best is trial 27 with value: 0.5535414110586525.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:09:04,046] Trial 38 finished with value: 0.5404337021003688 and parameters: {'n_estimators': 600, 'max_depth': 20, 'min_samples_split': 2, 'min_samples_leaf': 4, 'max_features': 'sqrt', 'bootstrap': True}. Best is trial 27 with value: 0.5535414110586525.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:09:07,188] Trial 39 finished with value: 0.5373642463511703 and parameters: {'n_estimators': 800, 'max_depth': 24, 'min_samples_split': 7, 'min_samples_leaf': 6, 'max_features': 'log2', 'bootstrap': False}. Best is trial 27 with value: 0.5535414110586525.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:09:08,874] Trial 40 finished with value: 0.5458179310490325 and parameters: {'n_estimators': 400, 'max_depth': 13, 'min_samples_split': 10, 'min_samples_leaf': 5, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 27 with value: 0.5535414110586525.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:09:10,546] Trial 41 finished with value: 0.5378887490143214 and parameters: {'n_estimators': 400, 'max_depth': 16, 'min_samples_split': 10, 'min_samples_leaf': 5, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 27 with value: 0.5535414110586525.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:09:11,823] Trial 42 finished with value: 0.5348682724190358 and parameters: {'n_estimators': 300, 'max_depth': 13, 'min_samples_split': 9, 'min_samples_leaf': 6, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 27 with value: 0.5535414110586525.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:09:13,443] Trial 43 finished with value: 0.5399529741702177 and parameters: {'n_estimators': 400, 'max_depth': 9, 'min_samples_split': 10, 'min_samples_leaf': 5, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 27 with value: 0.5535414110586525.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:09:14,289] Trial 44 finished with value: 0.5528655582300779 and parameters: {'n_estimators': 200, 'max_depth': 7, 'min_samples_split': 9, 'min_samples_leaf': 3, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 27 with value: 0.5535414110586525.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:09:15,149] Trial 45 finished with value: 0.5359493557108108 and parameters: {'n_estimators': 200, 'max_depth': 7, 'min_samples_split': 10, 'min_samples_leaf': 4, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 27 with value: 0.5535414110586525.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:09:16,280] Trial 46 finished with value: 0.5322413941356875 and parameters: {'n_estimators': 200, 'max_depth': 7, 'min_samples_split': 9, 'min_samples_leaf': 3, 'max_features': 'log2', 'bootstrap': True}. Best is trial 27 with value: 0.5535414110586525.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:09:17,515] Trial 47 finished with value: 0.5343480080647814 and parameters: {'n_estimators': 300, 'max_depth': 15, 'min_samples_split': 10, 'min_samples_leaf': 2, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 27 with value: 0.5535414110586525.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:09:19,568] Trial 48 finished with value: 0.5466828711157252 and parameters: {'n_estimators': 500, 'max_depth': 11, 'min_samples_split': 9, 'min_samples_leaf': 3, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 27 with value: 0.5535414110586525.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:09:21,681] Trial 49 finished with value: 0.5399901477832512 and parameters: {'n_estimators': 500, 'max_depth': 13, 'min_samples_split': 9, 'min_samples_leaf': 3, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 27 with value: 0.5535414110586525.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:09:23,664] Trial 50 finished with value: 0.542275020267741 and parameters: {'n_estimators': 500, 'max_depth': 11, 'min_samples_split': 4, 'min_samples_leaf': 2, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 27 with value: 0.5535414110586525.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:09:25,323] Trial 51 finished with value: 0.5464882036091019 and parameters: {'n_estimators': 400, 'max_depth': 11, 'min_samples_split': 9, 'min_samples_leaf': 3, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 27 with value: 0.5535414110586525.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:09:26,957] Trial 52 finished with value: 0.5439213663185032 and parameters: {'n_estimators': 400, 'max_depth': 15, 'min_samples_split': 9, 'min_samples_leaf': 3, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 27 with value: 0.5535414110586525.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:09:28,552] Trial 53 finished with value: 0.5357294271763725 and parameters: {'n_estimators': 400, 'max_depth': 11, 'min_samples_split': 10, 'min_samples_leaf': 3, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 27 with value: 0.5535414110586525.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:09:29,797] Trial 54 finished with value: 0.5366027104644815 and parameters: {'n_estimators': 300, 'max_depth': 12, 'min_samples_split': 9, 'min_samples_leaf': 1, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 27 with value: 0.5535414110586525.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:09:31,768] Trial 55 finished with value: 0.5354357575231893 and parameters: {'n_estimators': 500, 'max_depth': 16, 'min_samples_split': 8, 'min_samples_leaf': 4, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 27 with value: 0.5535414110586525.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:09:33,055] Trial 56 finished with value: 0.5375070988800229 and parameters: {'n_estimators': 300, 'max_depth': 10, 'min_samples_split': 10, 'min_samples_leaf': 2, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 27 with value: 0.5535414110586525.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:09:33,970] Trial 57 finished with value: 0.5383942130402101 and parameters: {'n_estimators': 200, 'max_depth': 13, 'min_samples_split': 9, 'min_samples_leaf': 3, 'max_features': 'log2', 'bootstrap': False}. Best is trial 27 with value: 0.5535414110586525.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:09:36,269] Trial 58 finished with value: 0.5430179979175963 and parameters: {'n_estimators': 600, 'max_depth': 6, 'min_samples_split': 8, 'min_samples_leaf': 4, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 27 with value: 0.5535414110586525.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:09:38,221] Trial 59 finished with value: 0.5391278931059552 and parameters: {'n_estimators': 500, 'max_depth': 17, 'min_samples_split': 9, 'min_samples_leaf': 3, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 27 with value: 0.5535414110586525.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:09:41,404] Trial 60 finished with value: 0.539378813302211 and parameters: {'n_estimators': 600, 'max_depth': 12, 'min_samples_split': 7, 'min_samples_leaf': 2, 'max_features': 'sqrt', 'bootstrap': True}. Best is trial 27 with value: 0.5535414110586525.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:09:42,958] Trial 61 finished with value: 0.5533167110388403 and parameters: {'n_estimators': 400, 'max_depth': 7, 'min_samples_split': 9, 'min_samples_leaf': 3, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 27 with value: 0.5535414110586525.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:09:44,564] Trial 62 finished with value: 0.5427779658835526 and parameters: {'n_estimators': 400, 'max_depth': 7, 'min_samples_split': 10, 'min_samples_leaf': 3, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 27 with value: 0.5535414110586525.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:09:46,168] Trial 63 finished with value: 0.5421209965177477 and parameters: {'n_estimators': 400, 'max_depth': 8, 'min_samples_split': 9, 'min_samples_leaf': 3, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 27 with value: 0.5535414110586525.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:09:47,427] Trial 64 finished with value: 0.5352993575153724 and parameters: {'n_estimators': 300, 'max_depth': 10, 'min_samples_split': 8, 'min_samples_leaf': 4, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 27 with value: 0.5535414110586525.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:09:48,997] Trial 65 finished with value: 0.5401717747103121 and parameters: {'n_estimators': 400, 'max_depth': 6, 'min_samples_split': 9, 'min_samples_leaf': 2, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 27 with value: 0.5535414110586525.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:09:50,985] Trial 66 finished with value: 0.5331345092875147 and parameters: {'n_estimators': 500, 'max_depth': 14, 'min_samples_split': 10, 'min_samples_leaf': 4, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 27 with value: 0.5535414110586525.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:09:52,540] Trial 67 finished with value: 0.5473460092044109 and parameters: {'n_estimators': 400, 'max_depth': 10, 'min_samples_split': 9, 'min_samples_leaf': 3, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 27 with value: 0.5535414110586525.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:09:54,460] Trial 68 finished with value: 0.5452668863316543 and parameters: {'n_estimators': 500, 'max_depth': 10, 'min_samples_split': 8, 'min_samples_leaf': 3, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 27 with value: 0.5535414110586525.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:09:55,648] Trial 69 finished with value: 0.5541482943678325 and parameters: {'n_estimators': 300, 'max_depth': 8, 'min_samples_split': 7, 'min_samples_leaf': 2, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 69 with value: 0.5541482943678325.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:09:56,854] Trial 70 finished with value: 0.5444313858025294 and parameters: {'n_estimators': 300, 'max_depth': 8, 'min_samples_split': 7, 'min_samples_leaf': 1, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 69 with value: 0.5541482943678325.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:09:57,627] Trial 71 finished with value: 0.5412409307124105 and parameters: {'n_estimators': 200, 'max_depth': 7, 'min_samples_split': 8, 'min_samples_leaf': 2, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 69 with value: 0.5541482943678325.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:09:58,833] Trial 72 finished with value: 0.5497017842836242 and parameters: {'n_estimators': 300, 'max_depth': 9, 'min_samples_split': 8, 'min_samples_leaf': 2, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 69 with value: 0.5541482943678325.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:10:00,083] Trial 73 finished with value: 0.5479843016148748 and parameters: {'n_estimators': 300, 'max_depth': 9, 'min_samples_split': 8, 'min_samples_leaf': 1, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 69 with value: 0.5541482943678325.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:10:01,426] Trial 74 finished with value: 0.5566203534804415 and parameters: {'n_estimators': 300, 'max_depth': 9, 'min_samples_split': 7, 'min_samples_leaf': 1, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 74 with value: 0.5566203534804415.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:10:02,692] Trial 75 finished with value: 0.5566203534804415 and parameters: {'n_estimators': 300, 'max_depth': 9, 'min_samples_split': 7, 'min_samples_leaf': 1, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 74 with value: 0.5566203534804415.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:10:03,902] Trial 76 finished with value: 0.5279023147449757 and parameters: {'n_estimators': 300, 'max_depth': 8, 'min_samples_split': 6, 'min_samples_leaf': 1, 'max_features': 'log2', 'bootstrap': False}. Best is trial 74 with value: 0.5566203534804415.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:10:05,612] Trial 77 finished with value: 0.5405218944906445 and parameters: {'n_estimators': 300, 'max_depth': 7, 'min_samples_split': 7, 'min_samples_leaf': 1, 'max_features': 'sqrt', 'bootstrap': True}. Best is trial 74 with value: 0.5566203534804415.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:10:06,850] Trial 78 finished with value: 0.5566203534804415 and parameters: {'n_estimators': 300, 'max_depth': 9, 'min_samples_split': 7, 'min_samples_leaf': 1, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 74 with value: 0.5566203534804415.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:10:07,678] Trial 79 finished with value: 0.5341405697599679 and parameters: {'n_estimators': 200, 'max_depth': 6, 'min_samples_split': 7, 'min_samples_leaf': 1, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 74 with value: 0.5566203534804415.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:10:08,551] Trial 80 finished with value: 0.5392551397989206 and parameters: {'n_estimators': 200, 'max_depth': 9, 'min_samples_split': 6, 'min_samples_leaf': 1, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 74 with value: 0.5566203534804415.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:10:09,812] Trial 81 finished with value: 0.5566203534804415 and parameters: {'n_estimators': 300, 'max_depth': 9, 'min_samples_split': 7, 'min_samples_leaf': 1, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 74 with value: 0.5566203534804415.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:10:11,034] Trial 82 finished with value: 0.5444313858025294 and parameters: {'n_estimators': 300, 'max_depth': 8, 'min_samples_split': 7, 'min_samples_leaf': 1, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 74 with value: 0.5566203534804415.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:10:12,320] Trial 83 finished with value: 0.5484281879760855 and parameters: {'n_estimators': 300, 'max_depth': 9, 'min_samples_split': 6, 'min_samples_leaf': 1, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 74 with value: 0.5566203534804415.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:10:13,627] Trial 84 finished with value: 0.5348680678012543 and parameters: {'n_estimators': 300, 'max_depth': 7, 'min_samples_split': 7, 'min_samples_leaf': 2, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 74 with value: 0.5566203534804415.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:10:14,467] Trial 85 finished with value: 0.5406339244524638 and parameters: {'n_estimators': 200, 'max_depth': 8, 'min_samples_split': 7, 'min_samples_leaf': 1, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 74 with value: 0.5566203534804415.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:10:15,659] Trial 86 finished with value: 0.542300469483568 and parameters: {'n_estimators': 300, 'max_depth': 6, 'min_samples_split': 7, 'min_samples_leaf': 2, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 74 with value: 0.5566203534804415.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:10:16,931] Trial 87 finished with value: 0.5484281879760855 and parameters: {'n_estimators': 300, 'max_depth': 9, 'min_samples_split': 6, 'min_samples_leaf': 1, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 74 with value: 0.5566203534804415.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:10:17,740] Trial 88 finished with value: 0.5369040196677095 and parameters: {'n_estimators': 200, 'max_depth': 24, 'min_samples_split': 7, 'min_samples_leaf': 2, 'max_features': 'log2', 'bootstrap': False}. Best is trial 74 with value: 0.5566203534804415.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:10:19,406] Trial 89 finished with value: 0.5367872124465087 and parameters: {'n_estimators': 300, 'max_depth': 7, 'min_samples_split': 8, 'min_samples_leaf': 1, 'max_features': 'sqrt', 'bootstrap': True}. Best is trial 74 with value: 0.5566203534804415.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:10:20,670] Trial 90 finished with value: 0.5483989853014358 and parameters: {'n_estimators': 300, 'max_depth': 9, 'min_samples_split': 7, 'min_samples_leaf': 2, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 74 with value: 0.5566203534804415.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:10:21,920] Trial 91 finished with value: 0.5439774988009856 and parameters: {'n_estimators': 300, 'max_depth': 9, 'min_samples_split': 5, 'min_samples_leaf': 1, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 74 with value: 0.5566203534804415.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:10:23,137] Trial 92 finished with value: 0.5279023147449757 and parameters: {'n_estimators': 300, 'max_depth': 8, 'min_samples_split': 6, 'min_samples_leaf': 1, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 74 with value: 0.5566203534804415.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:10:24,391] Trial 93 finished with value: 0.5439774988009856 and parameters: {'n_estimators': 300, 'max_depth': 9, 'min_samples_split': 5, 'min_samples_leaf': 1, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 74 with value: 0.5566203534804415.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:10:25,224] Trial 94 finished with value: 0.5385765067731692 and parameters: {'n_estimators': 200, 'max_depth': 10, 'min_samples_split': 6, 'min_samples_leaf': 1, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 74 with value: 0.5566203534804415.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:10:26,415] Trial 95 finished with value: 0.542300469483568 and parameters: {'n_estimators': 300, 'max_depth': 6, 'min_samples_split': 7, 'min_samples_leaf': 2, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 74 with value: 0.5566203534804415.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:10:27,637] Trial 96 finished with value: 0.5328840722365378 and parameters: {'n_estimators': 300, 'max_depth': 22, 'min_samples_split': 6, 'min_samples_leaf': 1, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 74 with value: 0.5566203534804415.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:10:28,448] Trial 97 finished with value: 0.559435756463983 and parameters: {'n_estimators': 200, 'max_depth': 8, 'min_samples_split': 7, 'min_samples_leaf': 2, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 97 with value: 0.559435756463983.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:10:29,239] Trial 98 finished with value: 0.5412409307124105 and parameters: {'n_estimators': 200, 'max_depth': 7, 'min_samples_split': 8, 'min_samples_leaf': 2, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 97 with value: 0.559435756463983.


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


[I 2025-05-20 16:10:30,091] Trial 99 finished with value: 0.559435756463983 and parameters: {'n_estimators': 200, 'max_depth': 8, 'min_samples_split': 7, 'min_samples_leaf': 2, 'max_features': 'sqrt', 'bootstrap': False}. Best is trial 97 with value: 0.559435756463983.
Best accuracy: 0.6266666666666667
Best macro‑F1: 0.4922048462612993


  n_estimators=trial.suggest_int("n_estimators", 200, 800, 100),


Saved → ch2025_submission_rf_entropy.csv
