In [None]:
!pip install optuna

In [None]:
from google.colab import drive
drive.mount('/content/gdrive/')

In [None]:
import numpy as np
import pandas as pd

traffic_df = pd.read_csv('/content/gdrive/MyDrive/CIC-Darknet/darknet_log.csv')
traffic_df.shape

In [None]:
drop_columns = [2, 5, 8, 9, 10, 12, 13, 14, 15, 18, 19, 23, 24, 25, 28, 32, 33, 34, 35, 37, 40, 41, 42, 44, 45, 46, 47, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 63, 64, 65, 66, 69, 70, 71, 72, 73, 74, 76]
traffic_df.drop(traffic_df.columns[drop_columns], axis=1 ,inplace=True)
traffic_df.head()

In [None]:
from sklearn.model_selection import train_test_split

y_traffic_df = traffic_df['target']
X_traffic_df = traffic_df.drop('target', axis=1)

X_train, X_test, y_train, y_test = train_test_split(X_traffic_df, y_traffic_df, stratify=y_traffic_df, test_size=0.4, random_state=11)
print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

In [None]:
X_vali, X_test, y_vali, y_test = train_test_split(X_test, y_test, stratify=y_test, test_size=0.5, random_state=11)
print(X_vali.shape, y_vali.shape)
print(X_test.shape, y_test.shape)

In [None]:
import optuna
from lightgbm import LGBMClassifier

def objective(trial):

    params = {
        "objective": "multi:softprob",
        "eval_metric":'f1',
        "booster": 'gbtree',
        'objective': 'multiclass', 
        'class_weight': trial.suggest_categorical('class_weight', ['balanced', None]),
        'tree_method':'gpu_hist', 'predictor':'gpu_predictor', 'gpu_id': 0, # GPU 사용시
        # "tree_method": 'exact', 'gpu_id': -1,  # CPU 사용시
        "verbosity": 0,
        'num_class':3,
        "max_depth": trial.suggest_int("max_depth", 4, 10),
        "learning_rate": trial.suggest_uniform('learning_rate', 0.0001, 0.99),
        'n_estimators': trial.suggest_int("n_estimators", 1000, 2000, step=100),
        "colsample_bytree": trial.suggest_float("colsample_bytree", 0.5, 1.0),
        "colsample_bylevel": trial.suggest_float("colsample_bylevel", 0.5, 1.0),
        "colsample_bynode": trial.suggest_float("colsample_bynode", 0.5, 1.0),
        "reg_lambda": trial.suggest_loguniform("reg_lambda", 1e-2, 1),
        "reg_alpha": trial.suggest_loguniform("reg_alpha", 1e-2, 1),
        'subsample': trial.suggest_discrete_uniform('subsample', 0.6, 1.0, 0.05),     
        'min_child_weight': trial.suggest_int('min_child_weight', 2, 15),
        "gamma": trial.suggest_float("gamma", 0.1, 1.0, log=True),
    }

    model = LGBMClassifier(**params, random_state = 11, use_label_encoder = False)

    bst = model.fit(X_train, y_train, eval_set=[(X_vali,y_vali)], early_stopping_rounds=50, verbose=False)
    preds = bst.predict(X_vali)
    pred_labels = np.rint(preds)
    accuracy = sklearn.metrics.accuracy_score(y_vali, pred_labels)
    return accuracy

In [None]:
import sklearn.datasets
import sklearn.metrics
import psutil
import time

train_start = time.time()

study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=300, show_progress_bar=True)

print("Number of finished trials: ", len(study.trials))
print("Best trial:")


trial = study.best_trial

print("  Accuracy: {}".format(trial.value))
print("  Best hyperparameters: ")

for key, value in trial.params.items():
    print("    {}: {}".format(key, value))


clf = LGBMClassifier(**study.best_params, random_state = 11, use_label_encoder = False, boost_from_average=False)
clf.fit(X_train, y_train)

preds = clf.predict(X_test)
# pred_labels = np.rint(preds)
accuracy = sklearn.metrics.accuracy_score(y_test, preds)

print("Accuracy: {}".format(accuracy))