In [None]:
# LightGBM + Optuna 하이퍼파라미터 튜닝 노트북
# 대상: 타워램프 상태 분류 (led_features_20250723_172434.csv)

import pandas as pd
import lightgbm as lgb
import optuna
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from optuna.visualization import plot_optimization_history, plot_param_importances

# 데이터 불러오기
df = pd.read_csv('led_features_20250723_172434.csv')
X = df.drop(columns=['label', 'image_name', 'label_name'])
y = df['label']

# 학습/검증 데이터 분리
X_train, X_val, y_train, y_val = train_test_split(X, y, stratify=y, test_size=0.2, random_state=42)

# Optuna 튜닝 함수 정의
def objective(trial):
    param = {
        'objective': 'multiclass',
        'num_class': len(y.unique()),
        'metric': 'multi_logloss',
        'verbosity': -1,
        'boosting_type': 'gbdt',
        'n_jobs': -1,
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3),
        'num_leaves': trial.suggest_int('num_leaves', 16, 128),
        'max_depth': trial.suggest_int('max_depth', 3, 12),
        'min_data_in_leaf': trial.suggest_int('min_data_in_leaf', 10, 100),
        'lambda_l1': trial.suggest_float('lambda_l1', 0.0, 5.0),
        'lambda_l2': trial.suggest_float('lambda_l2', 0.0, 5.0),
        'feature_fraction': trial.suggest_float('feature_fraction', 0.6, 1.0),
        'bagging_fraction': trial.suggest_float('bagging_fraction', 0.6, 1.0),
        'bagging_freq': trial.suggest_int('bagging_freq', 1, 10)
    }

    dtrain = lgb.Dataset(X_train, label=y_train)
    dval = lgb.Dataset(X_val, label=y_val)

    model = lgb.train(param, dtrain, valid_sets=[dval], num_boost_round=1000, early_stopping_rounds=50, verbose_eval=False)
    preds = model.predict(X_val)
    preds_labels = preds.argmax(axis=1)
    acc = accuracy_score(y_val, preds_labels)
    return 1.0 - acc  # Optuna는 최소화를 하므로 (1 - 정확도) 사용

# 튜닝 시작
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=50)

# 최적 파라미터 출력
print("Best parameters:", study.best_params)
print("Best accuracy:", 1.0 - study.best_value)

# 시각화
plot_optimization_history(study).show()
plot_param_importances(study).show()
