In [2]:
# LightGBM + Optuna 튜닝 (DB 충돌 방지 및 자동 예외처리 포함)
# 대상: 타워램프 상태 분류

import os
import pandas as pd
import lightgbm as lgb
import optuna
import joblib
import json
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from lightgbm import early_stopping, log_evaluation

# 기존 SQLite DB 삭제 시도 (실패해도 무시)
try:
    if os.path.exists("optuna_lgbm.db"):
        os.remove("optuna_lgbm.db")
except PermissionError:
    print("⚠️ optuna_lgbm.db 삭제 실패 - 이미 사용 중일 수 있음. 기존 DB로 이어서 실행합니다.")

# 데이터 로드
df = pd.read_csv('led_features_parallel_preprocessing.csv')
X = df.drop(columns=['label', 'image_name', 'label_name'])
y = df['label']

# 학습/검증 분리
X_train, X_val, y_train, y_val = train_test_split(X, y, stratify=y, test_size=0.2, random_state=42)

# Study 생성
study = optuna.create_study(
    study_name='lgbm_study',
    direction='minimize',
    storage='sqlite:///optuna_lgbm.db',
    load_if_exists=True
)

# Objective 함수
def objective(trial):
    params = {
        'n_estimators': 1000,
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3),
        'num_leaves': trial.suggest_int('num_leaves', 16, 128),
        'max_depth': trial.suggest_int('max_depth', 3, 12),
        'min_child_samples': trial.suggest_int('min_child_samples', 10, 100),
        'reg_alpha': trial.suggest_float('reg_alpha', 0.0, 5.0),
        'reg_lambda': trial.suggest_float('reg_lambda', 0.0, 5.0),
        'feature_fraction': trial.suggest_float('feature_fraction', 0.6, 1.0),
        'bagging_fraction': trial.suggest_float('bagging_fraction', 0.6, 1.0),
        'bagging_freq': trial.suggest_int('bagging_freq', 1, 10),
        'objective': 'multiclass',
        'num_class': len(set(y)),
        'random_state': 42
    }

    model = lgb.LGBMClassifier(**params)
    model.fit(
        X_train, y_train,
        eval_set=[(X_val, y_val)],
        callbacks=[early_stopping(50), log_evaluation(0)]
    )
    preds = model.predict(X_val)
    acc = accuracy_score(y_val, preds)

    # 중간 저장
    if trial.number % 50 == 0:
        with open("optuna_best_params_temp.json", "w") as f:
            json.dump(study.best_params, f)
        with open("optuna_best_score_temp.txt", "w") as f:
            f.write(f"Best Accuracy: {1.0 - study.best_value:.5f}")
        joblib.dump(model, "best_lgbm_model_temp.pkl")

    return 1.0 - acc

# 튜닝 실행
study.optimize(objective, n_trials=1000)

# 최종 저장
with open("optuna_best_params.json", "w") as f:
    json.dump(study.best_params, f)

with open("optuna_best_score.txt", "w") as f:
    f.write(f"Best Accuracy: {1.0 - study.best_value:.5f}")

final_model = lgb.LGBMClassifier(**study.best_params)
final_model.fit(X, y)
joblib.dump(final_model, "best_lgbm_model.pkl")

print("✅ 튜닝 완료. 최종 모델 및 파라미터 저장됨.")


[I 2025-07-24 16:20:27,101] Using an existing study with name 'lgbm_study' instead of creating a new one.


⚠️ optuna_lgbm.db 삭제 실패 - 이미 사용 중일 수 있음. 기존 DB로 이어서 실행합니다.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.003695 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 28999
[LightGBM] [Info] Number of data points in the train set: 2448, number of used features: 137
[LightGBM] [Info] Start training from score -1.386294
[LightGBM] [Info] Start training from score -1.386294
[LightGBM] [Info] Start training from score -1.386294
[LightGBM] [Info] Start training from score -1.386294
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[327]	valid_0's multi_logloss: 0.013084


[I 2025-07-24 16:20:28,225] Trial 1 finished with value: 0.0016339869281045694 and parameters: {'learning_rate': 0.11944499080144996, 'num_leaves': 87, 'max_depth': 12, 'min_child_samples': 32, 'reg_alpha': 2.7383277976560634, 'reg_lambda': 3.22392438119969, 'feature_fraction': 0.7235074989405689, 'bagging_fraction': 0.9901651349320219, 'bagging_freq': 1}. Best is trial 1 with value: 0.0016339869281045694.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002374 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 28988
[LightGBM] [Info] Number of data points in the train set: 2448, number of used features: 136
[LightGBM] [Info] Start training from score -1.386294
[LightGBM] [Info] Start training from score -1.386294
[LightGBM] [Info] Start training from score -1.386294
[LightGBM] [Info] Start training from score -1.386294
Training until validation scores don't improve for 50 rounds


[I 2025-07-24 16:20:29,933] Trial 2 finished with value: 0.0032679738562091387 and parameters: {'learning_rate': 0.036880690842836336, 'num_leaves': 57, 'max_depth': 4, 'min_child_samples': 66, 'reg_alpha': 3.745352690406199, 'reg_lambda': 4.102656027554244, 'feature_fraction': 0.7686483707409743, 'bagging_fraction': 0.8136123985370212, 'bagging_freq': 6}. Best is trial 1 with value: 0.0016339869281045694.


Early stopping, best iteration is:
[294]	valid_0's multi_logloss: 0.0185915
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002436 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 28999
[LightGBM] [Info] Number of data points in the train set: 2448, number of used features: 137
[LightGBM] [Info] Start training from score -1.386294
[LightGBM] [Info] Start training from score -1.386294
[LightGBM] [Info] Start training from score -1.386294
[LightGBM] [Info] Start training from score -1.386294
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[626]	valid_0's multi_logloss: 0.0126815


[I 2025-07-24 16:20:30,463] Trial 3 finished with value: 0.0016339869281045694 and parameters: {'learning_rate': 0.2281837480412347, 'num_leaves': 31, 'max_depth': 10, 'min_child_samples': 24, 'reg_alpha': 2.387033551377998, 'reg_lambda': 4.674950618061745, 'feature_fraction': 0.755923529588095, 'bagging_fraction': 0.7013492072630161, 'bagging_freq': 4}. Best is trial 1 with value: 0.0016339869281045694.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.003902 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 28988
[LightGBM] [Info] Number of data points in the train set: 2448, number of used features: 136
[LightGBM] [Info] Start training from score -1.386294
[LightGBM] [Info] Start training from score -1.386294
[LightGBM] [Info] Start training from score -1.386294
[LightGBM] [Info] Start training from score -1.386294
Training until validation scores don't improve for 50 rounds


[I 2025-07-24 16:20:30,824] Trial 4 finished with value: 0.0 and parameters: {'learning_rate': 0.0913868172100159, 'num_leaves': 110, 'max_depth': 12, 'min_child_samples': 50, 'reg_alpha': 1.0389485429687317, 'reg_lambda': 1.4800132604020955, 'feature_fraction': 0.8152253766995383, 'bagging_fraction': 0.7194923638619213, 'bagging_freq': 8}. Best is trial 4 with value: 0.0.


Early stopping, best iteration is:
[200]	valid_0's multi_logloss: 0.00674194
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002199 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 28988
[LightGBM] [Info] Number of data points in the train set: 2448, number of used features: 136
[LightGBM] [Info] Start training from score -1.386294
[LightGBM] [Info] Start training from score -1.386294
[LightGBM] [Info] Start training from score -1.386294
[LightGBM] [Info] Start training from score -1.386294
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[33]	valid_0's multi_logloss: 0.0240046


[I 2025-07-24 16:20:30,999] Trial 5 finished with value: 0.0032679738562091387 and parameters: {'learning_rate': 0.28859873316378337, 'num_leaves': 37, 'max_depth': 12, 'min_child_samples': 55, 'reg_alpha': 4.937670942963528, 'reg_lambda': 0.34469970222118396, 'feature_fraction': 0.6039332875941618, 'bagging_fraction': 0.8543959583922334, 'bagging_freq': 3}. Best is trial 4 with value: 0.0.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002325 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 29006
[LightGBM] [Info] Number of data points in the train set: 2448, number of used features: 138
[LightGBM] [Info] Start training from score -1.386294
[LightGBM] [Info] Start training from score -1.386294
[LightGBM] [Info] Start training from score -1.386294
[LightGBM] [Info] Start training from score -1.386294
Training until validation scores don't improve for 50 rounds


[I 2025-07-24 16:20:31,640] Trial 6 finished with value: 0.0016339869281045694 and parameters: {'learning_rate': 0.08528690921782744, 'num_leaves': 20, 'max_depth': 10, 'min_child_samples': 20, 'reg_alpha': 2.227400152658382, 'reg_lambda': 2.256890677767178, 'feature_fraction': 0.6552889682449342, 'bagging_fraction': 0.7979135404744695, 'bagging_freq': 2}. Best is trial 4 with value: 0.0.


Did not meet early stopping. Best iteration is:
[996]	valid_0's multi_logloss: 0.0113152
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002228 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 28976
[LightGBM] [Info] Number of data points in the train set: 2448, number of used features: 135
[LightGBM] [Info] Start training from score -1.386294
[LightGBM] [Info] Start training from score -1.386294
[LightGBM] [Info] Start training from score -1.386294
[LightGBM] [Info] Start training from score -1.386294
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[176]	valid_0's multi_logloss: 0.00534779


[I 2025-07-24 16:20:33,256] Trial 7 finished with value: 0.0 and parameters: {'learning_rate': 0.17413436969495052, 'num_leaves': 43, 'max_depth': 3, 'min_child_samples': 71, 'reg_alpha': 0.8129878643718896, 'reg_lambda': 1.849128572731928, 'feature_fraction': 0.7727113276380717, 'bagging_fraction': 0.9388650162717086, 'bagging_freq': 10}. Best is trial 4 with value: 0.0.
[I 2025-07-24 16:20:34,000] Trial 8 finished with value: 0.0032679738562091387 and parameters: {'learning_rate': 0.21886728201236436, 'num_leaves': 112, 'max_depth': 4, 'min_child_samples': 92, 'reg_alpha': 3.954017703784583, 'reg_lambda': 0.33059694991900535, 'feature_fraction': 0.6138537729870089, 'bagging_fraction': 0.7458240450391953, 'bagging_freq': 10}. Best is trial 4 with value: 0.0.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002103 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 28966
[LightGBM] [Info] Number of data points in the train set: 2448, number of used features: 134
[LightGBM] [Info] Start training from score -1.386294
[LightGBM] [Info] Start training from score -1.386294
[LightGBM] [Info] Start training from score -1.386294
[LightGBM] [Info] Start training from score -1.386294
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[151]	valid_0's multi_logloss: 0.019345
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002203 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 28988
[LightGBM] [Info] Number of data points in the train set: 2448, number of used features: 136
[LightGBM] [Info] Start training from score -1.386294
[LightGBM] [In

[I 2025-07-24 16:20:34,314] Trial 9 finished with value: 0.0032679738562091387 and parameters: {'learning_rate': 0.15402382810435417, 'num_leaves': 112, 'max_depth': 5, 'min_child_samples': 43, 'reg_alpha': 2.859511262026204, 'reg_lambda': 1.1870730310433215, 'feature_fraction': 0.7145801853793572, 'bagging_fraction': 0.6338231004531835, 'bagging_freq': 6}. Best is trial 4 with value: 0.0.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002241 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 28999
[LightGBM] [Info] Number of data points in the train set: 2448, number of used features: 137
[LightGBM] [Info] Start training from score -1.386294
[LightGBM] [Info] Start training from score -1.386294
[LightGBM] [Info] Start training from score -1.386294
[LightGBM] [Info] Start training from score -1.386294
Training until validation scores don't improve for 50 rounds


[I 2025-07-24 16:20:34,788] Trial 10 finished with value: 0.0 and parameters: {'learning_rate': 0.19922934044451565, 'num_leaves': 33, 'max_depth': 11, 'min_child_samples': 36, 'reg_alpha': 0.12284392093300289, 'reg_lambda': 2.756396520070772, 'feature_fraction': 0.6190547661411184, 'bagging_fraction': 0.9289406271507257, 'bagging_freq': 6}. Best is trial 4 with value: 0.0.


Early stopping, best iteration is:
[470]	valid_0's multi_logloss: 0.00193373
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002417 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 29010
[LightGBM] [Info] Number of data points in the train set: 2448, number of used features: 139
[LightGBM] [Info] Start training from score -1.386294
[LightGBM] [Info] Start training from score -1.386294
[LightGBM] [Info] Start training from score -1.386294
[LightGBM] [Info] Start training from score -1.386294
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[730]	valid_0's multi_logloss: 0.00890722


[I 2025-07-24 16:20:36,663] Trial 11 finished with value: 0.0016339869281045694 and parameters: {'learning_rate': 0.01923262055832871, 'num_leaves': 127, 'max_depth': 7, 'min_child_samples': 10, 'reg_alpha': 1.1289950853734814, 'reg_lambda': 1.3317111996208328, 'feature_fraction': 0.9155288711704122, 'bagging_fraction': 0.6109217832936789, 'bagging_freq': 8}. Best is trial 4 with value: 0.0.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001868 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 28976
[LightGBM] [Info] Number of data points in the train set: 2448, number of used features: 135
[LightGBM] [Info] Start training from score -1.386294
[LightGBM] [Info] Start training from score -1.386294
[LightGBM] [Info] Start training from score -1.386294
[LightGBM] [Info] Start training from score -1.386294
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[312]	valid_0's multi_logloss: 0.00647902


[W 2025-07-24 16:20:38,561] Trial 12 failed with parameters: {'learning_rate': 0.07915532114006915, 'num_leaves': 74, 'max_depth': 7, 'min_child_samples': 74, 'reg_alpha': 0.8878638053021504, 'reg_lambda': 1.8860359791093333, 'feature_fraction': 0.8713693035422282, 'bagging_fraction': 0.9081478832059269, 'bagging_freq': 10} because of the following error: KeyboardInterrupt().
Traceback (most recent call last):
  File "C:\Users\tori\AppData\Roaming\Python\Python310\site-packages\optuna\study\_optimize.py", line 201, in _run_trial
    value_or_values = func(trial)
  File "C:\Users\tori\AppData\Local\Temp\ipykernel_14876\780909580.py", line 56, in objective
    model.fit(
  File "C:\Users\tori\AppData\Roaming\Python\Python310\site-packages\lightgbm\sklearn.py", line 1560, in fit
    super().fit(
  File "C:\Users\tori\AppData\Roaming\Python\Python310\site-packages\lightgbm\sklearn.py", line 1049, in fit
    self._Booster = train(
  File "C:\Users\tori\AppData\Roaming\Python\Python310\site-

KeyboardInterrupt: 