<a href="https://colab.research.google.com/github/ljs7463/AnalysisProject/blob/master/%EB%8D%B0%EC%9D%B4%EC%BD%98/analytics/stacking.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Import lib & load dataset

In [1]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns
import warnings
import os

# 시각화 폰트 설정
if os.name =='posix':
    plt.rc("font", family = "AppleGothic")

else:
    plt.rc("font", family = "Malgun Gothic")

# 경고문자 무시
warnings.filterwarnings(action='ignore')


# 데이터 로드
df_train = pd.read_csv('train.csv')
df_test = pd.read_csv('test.csv')
df_sub = pd.read_csv('sample_submission.csv')
df_info = pd.read_csv('data_info.csv')

# preprocessing & split dataset

In [2]:
## Label Encoding

## train데이터
df_train['preferred_difficulty_level'] = pd.factorize(df_train['preferred_difficulty_level'])[0]
df_train['subscription_type'] = pd.factorize(df_train['subscription_type'])[0]

## test데이터
df_test['preferred_difficulty_level'] = pd.factorize(df_test['preferred_difficulty_level'])[0]
df_test['subscription_type'] = pd.factorize(df_test['subscription_type'])[0]

# scaler
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
for i in [['subscription_duration','recent_login_time','average_time_per_learning_session','monthly_active_learning_days','total_completed_courses','recent_learning_achievement','abandoned_learning_sessions','community_engagement_level','customer_inquiry_history','payment_pattern']]:
  df_train[i] = scaler.fit_transform(df_train[i])
for i in [['subscription_duration','recent_login_time','average_time_per_learning_session','monthly_active_learning_days','total_completed_courses','recent_learning_achievement','abandoned_learning_sessions','community_engagement_level','customer_inquiry_history','payment_pattern']]:
  df_test[i] = scaler.transform(df_test[i])

# Delete user_id
df_train = df_train.drop(columns = 'user_id')

# split target
x = df_train[list(df_train.columns[:-1])]
y = df_train['target']

# Delete user_id
df_test = df_test.drop(columns = 'user_id')
# split target
new_x = df_test


# import lib for modeling

In [3]:
!pip install catboost
!pip install optuna



In [6]:
import optuna
from lightgbm import LGBMClassifier,early_stopping
from catboost import CatBoostClassifier
from sklearn.model_selection import KFold
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.metrics import make_scorer, f1_score
from sklearn.linear_model import LogisticRegression
import optuna
import numpy as np
from catboost import CatBoostClassifier
from sklearn.model_selection import KFold
from sklearn.metrics import f1_score
from xgboost import XGBClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_score
from sklearn.neighbors import KNeighborsClassifier

## xgboost

In [None]:
def objective(trial, x, y):
  # Optuna가 탐색할 하이퍼파라미터의 범위 설정
  params = {
        'max_depth': trial.suggest_int('max_depth', 3, 9),
        'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.1),
        'n_estimators': trial.suggest_int('n_estimators',1000,1001),
        'min_child_weight': trial.suggest_int('min_child_weight', 1, 10),
        'gamma': trial.suggest_loguniform('gamma', 1e-8, 1.0),
        'subsample': trial.suggest_loguniform('subsample', 0.01, 1.0),
        'colsample_bytree': trial.suggest_loguniform('colsample_bytree', 0.01, 1.0),
        'reg_alpha': trial.suggest_loguniform('reg_alpha', 1e-8, 1.0),
        'reg_lambda': trial.suggest_loguniform('reg_lambda', 1e-8, 1.0),
        'eval_metric': 'logloss',
        'use_label_encoder': False
    }

  # k-겹 교차 검증 설정
  kf = KFold(
      n_splits =5,
      shuffle = True,
      random_state = 42)
  f1_scores = []

  for train_index, test_index in kf.split(x):
    x_train, x_test = x.iloc[train_index], x.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    # xgBoost 모델생성
    model = XGBClassifier(**params, n_jobs = -1)
    model.fit(x_train, y_train, eval_set = [(x_test, y_test)], early_stopping_rounds = 100)

    # 예측 및 F 점수 계산
    preds = model.predict(x_test)
    f1 = f1_score(y_test, preds, average = 'macro')
    f1_scores.append(f1)

  return np.mean(f1_scores)

# Optuna  스터디 생성 및 최적화 실행
study = optuna.create_study(direction = 'maximize')
# lambda 함수를 사용하여 x와 y를 objective 함수에 전달
study.optimize(lambda trial: objective(trial, x, y), n_trials = 20)

# Optuna 스터디에서 최적의 하이퍼파라미터 가져오기
best_params = study.best_trial.params

# 최적의 하이퍼파라미터를 사용하여 CatBoost 모델 초기화
lgbm_model = LGBMClassifier(**best_params, verbose=0)

# Optuna 스터디에서 최적의 하이퍼파라미터 가져오기
best_params = study.best_trial.params

# 최적의 하이퍼파라미터를 사용하여 CatBoost 모델 초기화
xgb_model = XGBClassifier(**best_params, verbose=0)

[I 2023-12-06 11:11:26,646] A new study created in memory with name: no-name-f4b5a99b-15da-4a3e-8718-211faa902921


[0]	validation_0-logloss:0.66361
[1]	validation_0-logloss:0.66359
[2]	validation_0-logloss:0.66359
[3]	validation_0-logloss:0.66359
[4]	validation_0-logloss:0.66368
[5]	validation_0-logloss:0.66372
[6]	validation_0-logloss:0.66373
[7]	validation_0-logloss:0.66373
[8]	validation_0-logloss:0.66372
[9]	validation_0-logloss:0.66372
[10]	validation_0-logloss:0.66374
[11]	validation_0-logloss:0.66347
[12]	validation_0-logloss:0.66347
[13]	validation_0-logloss:0.66351
[14]	validation_0-logloss:0.66354
[15]	validation_0-logloss:0.66355
[16]	validation_0-logloss:0.66350
[17]	validation_0-logloss:0.66354
[18]	validation_0-logloss:0.66353
[19]	validation_0-logloss:0.66356
[20]	validation_0-logloss:0.66358
[21]	validation_0-logloss:0.66361
[22]	validation_0-logloss:0.66361
[23]	validation_0-logloss:0.66323
[24]	validation_0-logloss:0.66328
[25]	validation_0-logloss:0.66332
[26]	validation_0-logloss:0.66333
[27]	validation_0-logloss:0.66332
[28]	validation_0-logloss:0.66334
[29]	validation_0-loglos

[I 2023-12-06 11:12:12,270] Trial 0 finished with value: 0.38732106074736666 and parameters: {'max_depth': 6, 'learning_rate': 0.014139421184407896, 'n_estimators': 1000, 'min_child_weight': 5, 'gamma': 0.36653145567644285, 'subsample': 0.1682278935453077, 'colsample_bytree': 0.06591832654567586, 'reg_alpha': 0.00864089415937793, 'reg_lambda': 1.3782657926659546e-07}. Best is trial 0 with value: 0.38732106074736666.


[0]	validation_0-logloss:0.66406
[1]	validation_0-logloss:0.66194
[2]	validation_0-logloss:0.65977
[3]	validation_0-logloss:0.65829
[4]	validation_0-logloss:0.65936
[5]	validation_0-logloss:0.65915
[6]	validation_0-logloss:0.65856
[7]	validation_0-logloss:0.65788
[8]	validation_0-logloss:0.65936
[9]	validation_0-logloss:0.66070
[10]	validation_0-logloss:0.66030
[11]	validation_0-logloss:0.65929
[12]	validation_0-logloss:0.65868
[13]	validation_0-logloss:0.65887
[14]	validation_0-logloss:0.65955
[15]	validation_0-logloss:0.66067
[16]	validation_0-logloss:0.66125
[17]	validation_0-logloss:0.66062
[18]	validation_0-logloss:0.66260
[19]	validation_0-logloss:0.66311
[20]	validation_0-logloss:0.66290
[21]	validation_0-logloss:0.66332
[22]	validation_0-logloss:0.66293
[23]	validation_0-logloss:0.66354
[24]	validation_0-logloss:0.66357
[25]	validation_0-logloss:0.66485
[26]	validation_0-logloss:0.66423
[27]	validation_0-logloss:0.66314
[28]	validation_0-logloss:0.66409
[29]	validation_0-loglos

[I 2023-12-06 11:12:20,438] Trial 1 finished with value: 0.40349904222014166 and parameters: {'max_depth': 8, 'learning_rate': 0.08206921580533459, 'n_estimators': 1000, 'min_child_weight': 4, 'gamma': 0.001103230950741864, 'subsample': 0.09532445707782787, 'colsample_bytree': 0.5269621440523503, 'reg_alpha': 5.620612560629103e-07, 'reg_lambda': 0.030904045401884607}. Best is trial 1 with value: 0.40349904222014166.


[0]	validation_0-logloss:0.66362
[1]	validation_0-logloss:0.66338
[2]	validation_0-logloss:0.66343
[3]	validation_0-logloss:0.66313
[4]	validation_0-logloss:0.66297
[5]	validation_0-logloss:0.66294
[6]	validation_0-logloss:0.66294
[7]	validation_0-logloss:0.66292
[8]	validation_0-logloss:0.66291
[9]	validation_0-logloss:0.66297
[10]	validation_0-logloss:0.66270
[11]	validation_0-logloss:0.66244
[12]	validation_0-logloss:0.66238
[13]	validation_0-logloss:0.66243
[14]	validation_0-logloss:0.66237
[15]	validation_0-logloss:0.66236
[16]	validation_0-logloss:0.66233
[17]	validation_0-logloss:0.66202
[18]	validation_0-logloss:0.66208
[19]	validation_0-logloss:0.66212
[20]	validation_0-logloss:0.66216
[21]	validation_0-logloss:0.66179
[22]	validation_0-logloss:0.66183
[23]	validation_0-logloss:0.66155
[24]	validation_0-logloss:0.66135
[25]	validation_0-logloss:0.66135
[26]	validation_0-logloss:0.66136
[27]	validation_0-logloss:0.66086
[28]	validation_0-logloss:0.66086
[29]	validation_0-loglos

[I 2023-12-06 11:13:03,112] Trial 2 finished with value: 0.39851554990526794 and parameters: {'max_depth': 6, 'learning_rate': 0.012610879518962507, 'n_estimators': 1000, 'min_child_weight': 3, 'gamma': 8.789953301994004e-08, 'subsample': 0.07680733969938973, 'colsample_bytree': 0.22336367939865343, 'reg_alpha': 3.756419438059108e-06, 'reg_lambda': 0.0007772069198655107}. Best is trial 1 with value: 0.40349904222014166.


[0]	validation_0-logloss:0.66371
[1]	validation_0-logloss:0.66356
[2]	validation_0-logloss:0.66351
[3]	validation_0-logloss:0.66358
[4]	validation_0-logloss:0.66367
[5]	validation_0-logloss:0.66371
[6]	validation_0-logloss:0.66383
[7]	validation_0-logloss:0.66380
[8]	validation_0-logloss:0.66376
[9]	validation_0-logloss:0.66370
[10]	validation_0-logloss:0.66374
[11]	validation_0-logloss:0.66355
[12]	validation_0-logloss:0.66359
[13]	validation_0-logloss:0.66342
[14]	validation_0-logloss:0.66335
[15]	validation_0-logloss:0.66327
[16]	validation_0-logloss:0.66322
[17]	validation_0-logloss:0.66323
[18]	validation_0-logloss:0.66329
[19]	validation_0-logloss:0.66315
[20]	validation_0-logloss:0.66327
[21]	validation_0-logloss:0.66326
[22]	validation_0-logloss:0.66341
[23]	validation_0-logloss:0.66320
[24]	validation_0-logloss:0.66314
[25]	validation_0-logloss:0.66309
[26]	validation_0-logloss:0.66310
[27]	validation_0-logloss:0.66317
[28]	validation_0-logloss:0.66340
[29]	validation_0-loglos

[I 2023-12-06 11:13:24,940] Trial 3 finished with value: 0.3836154163965412 and parameters: {'max_depth': 3, 'learning_rate': 0.010616725975568847, 'n_estimators': 1001, 'min_child_weight': 1, 'gamma': 1.1915294504052386e-07, 'subsample': 0.012896001090643063, 'colsample_bytree': 0.1664392703905146, 'reg_alpha': 0.15008053104418037, 'reg_lambda': 7.071032647030073e-08}. Best is trial 1 with value: 0.40349904222014166.


[0]	validation_0-logloss:0.66364
[1]	validation_0-logloss:0.66333
[2]	validation_0-logloss:0.66305
[3]	validation_0-logloss:0.66274
[4]	validation_0-logloss:0.66276
[5]	validation_0-logloss:0.66279
[6]	validation_0-logloss:0.66281
[7]	validation_0-logloss:0.66280
[8]	validation_0-logloss:0.66281
[9]	validation_0-logloss:0.66287
[10]	validation_0-logloss:0.66260
[11]	validation_0-logloss:0.66233
[12]	validation_0-logloss:0.66233
[13]	validation_0-logloss:0.66232
[14]	validation_0-logloss:0.66234
[15]	validation_0-logloss:0.66233
[16]	validation_0-logloss:0.66240
[17]	validation_0-logloss:0.66215
[18]	validation_0-logloss:0.66214
[19]	validation_0-logloss:0.66213
[20]	validation_0-logloss:0.66213
[21]	validation_0-logloss:0.66189
[22]	validation_0-logloss:0.66187
[23]	validation_0-logloss:0.66158
[24]	validation_0-logloss:0.66130
[25]	validation_0-logloss:0.66133
[26]	validation_0-logloss:0.66135
[27]	validation_0-logloss:0.66109
[28]	validation_0-logloss:0.66110
[29]	validation_0-loglos

[I 2023-12-06 11:14:25,355] Trial 4 finished with value: 0.38567290422101175 and parameters: {'max_depth': 5, 'learning_rate': 0.010156881716181235, 'n_estimators': 1001, 'min_child_weight': 6, 'gamma': 0.011572410310538492, 'subsample': 0.39924203725895663, 'colsample_bytree': 0.30612949155139146, 'reg_alpha': 0.0007294409786687396, 'reg_lambda': 0.14133284380059716}. Best is trial 1 with value: 0.40349904222014166.


[0]	validation_0-logloss:0.66367
[1]	validation_0-logloss:0.66359
[2]	validation_0-logloss:0.66359
[3]	validation_0-logloss:0.66374
[4]	validation_0-logloss:0.66374
[5]	validation_0-logloss:0.66404
[6]	validation_0-logloss:0.66409
[7]	validation_0-logloss:0.66408
[8]	validation_0-logloss:0.66407
[9]	validation_0-logloss:0.66409
[10]	validation_0-logloss:0.66420
[11]	validation_0-logloss:0.66228
[12]	validation_0-logloss:0.66230
[13]	validation_0-logloss:0.66252
[14]	validation_0-logloss:0.66278
[15]	validation_0-logloss:0.66277
[16]	validation_0-logloss:0.66287
[17]	validation_0-logloss:0.66325
[18]	validation_0-logloss:0.66318
[19]	validation_0-logloss:0.66317
[20]	validation_0-logloss:0.66304
[21]	validation_0-logloss:0.66322
[22]	validation_0-logloss:0.66315
[23]	validation_0-logloss:0.66132
[24]	validation_0-logloss:0.66147
[25]	validation_0-logloss:0.66157
[26]	validation_0-logloss:0.66168
[27]	validation_0-logloss:0.66166
[28]	validation_0-logloss:0.66168
[29]	validation_0-loglos

[I 2023-12-06 11:14:49,117] Trial 5 finished with value: 0.389097138122806 and parameters: {'max_depth': 4, 'learning_rate': 0.0766875579334636, 'n_estimators': 1001, 'min_child_weight': 8, 'gamma': 0.00016859361983974296, 'subsample': 0.18108470256375184, 'colsample_bytree': 0.09203471747092679, 'reg_alpha': 0.008537555265473873, 'reg_lambda': 0.001306789621416929}. Best is trial 1 with value: 0.40349904222014166.


[0]	validation_0-logloss:0.66362
[1]	validation_0-logloss:0.66361
[2]	validation_0-logloss:0.66363
[3]	validation_0-logloss:0.66373
[4]	validation_0-logloss:0.66389
[5]	validation_0-logloss:0.66395
[6]	validation_0-logloss:0.66396
[7]	validation_0-logloss:0.66395
[8]	validation_0-logloss:0.66394
[9]	validation_0-logloss:0.66394
[10]	validation_0-logloss:0.66398
[11]	validation_0-logloss:0.66347
[12]	validation_0-logloss:0.66349
[13]	validation_0-logloss:0.66355
[14]	validation_0-logloss:0.66363
[15]	validation_0-logloss:0.66364
[16]	validation_0-logloss:0.66362
[17]	validation_0-logloss:0.66368
[18]	validation_0-logloss:0.66368
[19]	validation_0-logloss:0.66371
[20]	validation_0-logloss:0.66363
[21]	validation_0-logloss:0.66363
[22]	validation_0-logloss:0.66360
[23]	validation_0-logloss:0.66303
[24]	validation_0-logloss:0.66312
[25]	validation_0-logloss:0.66319
[26]	validation_0-logloss:0.66316
[27]	validation_0-logloss:0.66315
[28]	validation_0-logloss:0.66320
[29]	validation_0-loglos

## light Gbm

In [None]:
def objective(trial, x, y):
  # Optuna가 탐색할 하이퍼파라미터의 범위 설정
  params = {
        'objective': 'binary',
        'num_leaves': trial.suggest_int('num_leaves', 100, 500, step=1, log=True),
        'max_depth': trial.suggest_int('max_depth', 1, 10, step=1, log=False),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.1, log=True),
        'n_estimators': trial.suggest_int('n_estimators', 900, 1000, step=1, log=True),
        'class_weight': trial.suggest_categorical('class_weight', ['balanced', None]),
        'min_child_samples': trial.suggest_int('min_child_samples', 10, 50, step=10, log=False),
        'subsample': trial.suggest_uniform('subsample', 0.7, 1.0),
        'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.7, 1.0),
        'reg_alpha': trial.suggest_uniform('reg_alpha', 0.0, 1.0),
        'reg_lambda': trial.suggest_uniform('reg_lambda', 0.0, 10.0),
        'random_state': 0
  }

  # k-겹 교차 검증 설정
  kf = KFold(
      n_splits =10,
      shuffle = True,
      random_state = 42)
  f1_scores = []

  for train_index, test_index in kf.split(x):
    x_train, x_test = x.iloc[train_index], x.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    # xgBoost 모델생성
    model = LGBMClassifier(**params, n_jobs = -1)
    model.fit(x_train, y_train, eval_set = [(x_test, y_test)],callbacks=[early_stopping(stopping_rounds=100)], eval_metric= 'logloss')

    # 예측 및 F 점수 계산
    preds = model.predict(x_test)
    f1 = f1_score(y_test, preds, average = 'macro')
    f1_scores.append(f1)

  return np.mean(f1_scores)

# Optuna  스터디 생성 및 최적화 실행
study = optuna.create_study(direction = 'maximize')
# lambda 함수를 사용하여 x와 y를 objective 함수에 전달
study.optimize(lambda trial: objective(trial, x, y), n_trials = 40)

# Optuna 스터디에서 최적의 하이퍼파라미터 가져오기
best_params = study.best_trial.params

# 최적의 하이퍼파라미터를 사용하여 CatBoost 모델 초기화
lgbm_model = LGBMClassifier(**best_params, verbose=0)


# logistic regression

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_score


# 목적 함수 정의
def obj_log(trial,x,y):
    C = trial.suggest_float('C', 1e-4, 1e4, log=True)
    penalty = trial.suggest_categorical('penalty', ['l1', 'l2'])

    # 솔버 선택
    if penalty == 'l1':
        solver = 'liblinear'
    else:
        solver = trial.suggest_categorical('solver', ['lbfgs', 'newton-cg', 'sag'])

    model = LogisticRegression(C=C, penalty=penalty, solver=solver, max_iter=1000)
    score = cross_val_score(model, x, y, n_jobs=-1, cv=3, scoring='f1_macro')
    f1_macro = np.mean(score)

    return f1_macro

# Optuna 최적화 실행
study = optuna.create_study(direction='maximize')
# lambda 함수를 사용하여 x와 y를 objective 함수에 전달
study.optimize(lambda trial: obj_log(trial, x, y), n_trials = 60)

# 최적 파라미터 출력
print("Best trial:")
trial = study.best_trial
print("  Value: {}".format(trial.value))
print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))

# 최적 파라미터로 모델 학습
best_params = study.best_trial.params
logi_model = LogisticRegression(**best_params, max_iter=1000)


## knn

In [None]:
# 목적 함수 정의
def obj_knn(trial,x,y):
    n_neighbors = trial.suggest_int('n_neighbors', 1, 30)
    metric = trial.suggest_categorical('metric', ['euclidean', 'manhattan', 'minkowski'])
    weights = trial.suggest_categorical('weights', ['uniform', 'distance'])

    model = KNeighborsClassifier(n_neighbors=n_neighbors, metric=metric, weights=weights)
    score = cross_val_score(model, x, y, n_jobs=-1, cv=3, scoring='f1_macro')
    f1_macro = np.mean(score)

    return f1_macro

# Optuna 최적화 실행
study = optuna.create_study(direction='maximize')
# lambda 함수를 사용하여 x와 y를 objective 함수에 전달
study.optimize(lambda trial: obj_knn(trial, x, y), n_trials = 60)


# 최적 파라미터 출력
print("Best trial:")
trial = study.best_trial
print("  Value: {}".format(trial.value))
print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))

# 최적 파라미터로 모델 학습
best_params = study.best_trial.params
knn_model = KNeighborsClassifier(**best_params)

In [None]:
import optuna
import numpy as np
from catboost import CatBoostClassifier
from sklearn.model_selection import KFold
from sklearn.metrics import f1_score

# 사용할 모델 선정
# 1. lightgbm
# 2. catboost
# 3. logistic
# 4. SVM
# 5. KNN
# 6. 나이브베이즈



# catboost 하이퍼파라미터
cat_params= {
    'iterations': trial.suggest_int('iterations', 800, 1000),
    'depth': trial.suggest_int('depth', 4, 10),
    'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.5),
    'random_strength': trial.suggest_int('random_strength', 0, 100),
    'bagging_temperature': trial.suggest_float('bagging_temperature', 0.0, 1.0),
    'od_type': trial.suggest_categorical('od_type', ['IncToDec', 'Iter'])
}


# LightGbm 하이퍼파라미터
params = {
      'objective': 'binary',
      'num_leaves': trial.suggest_int('num_leaves', 100, 500, step=1, log=True),
      'max_depth': trial.suggest_int('max_depth', 1, 10, step=1, log=False),
      'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.1, log=True),
      'n_estimators': trial.suggest_int('n_estimators', 900, 1000, step=1, log=True),
      'class_weight': trial.suggest_categorical('class_weight', ['balanced', None]),
      'min_child_samples': trial.suggest_int('min_child_samples', 10, 50, step=10, log=False),
      'subsample': trial.suggest_uniform('subsample', 0.7, 1.0),
      'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.7, 1.0),
      'reg_alpha': trial.suggest_uniform('reg_alpha', 0.0, 1.0),
      'reg_lambda': trial.suggest_uniform('reg_lambda', 0.0, 10.0),
      'random_state': 0
}

# xgboost 하이퍼파라미터
params = {
      "objective":'binary:logistic',
      'max_depth': trial.suggest_int('max_depth', 3, 9),
      'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.1),
      'n_estimators': trial.suggest_int('n_estimators',1000,1001),
      'min_child_weight': trial.suggest_int('min_child_weight', 1, 10),
      'gamma': trial.suggest_loguniform('gamma', 1e-8, 1.0),
      'subsample': trial.suggest_loguniform('subsample', 0.01, 1.0),
      'colsample_bytree': trial.suggest_loguniform('colsample_bytree', 0.01, 1.0),
      'reg_alpha': trial.suggest_loguniform('reg_alpha', 1e-8, 1.0),
      'reg_lambda': trial.suggest_loguniform('reg_lambda', 1e-8, 1.0),
      'eval_metric': 'logloss',
      'use_label_encoder': False
  }






In [None]:
# 목적 함수 정의
def obj_log(trial):
    # 하이퍼파라미터 범위 설정
    C = trial.suggest_float('C', 1e-4, 1e4, log=True)
    penalty = trial.suggest_categorical('penalty', ['l1', 'l2'])

    # 모델 생성 및 검증
    model = LogisticRegression(C=C, penalty=penalty, solver='liblinear')
    score = cross_val_score(model, X, y, n_jobs=-1, cv=3)
    accuracy = np.mean(score)

    return accuracy

# Optuna 최적화 실행
study = optuna.create_study(direction='maximize')
study.optimize(obj_log, n_trials=100)