# Optuna - Hyperparameter Tuning

In [1]:
import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn import metrics
from xgboost import XGBClassifier 



In [4]:
import optuna
from optuna.storages import RDBStorage
from optuna_dashboard import wsgi

storage = RDBStorage("sqlite:///db.sqlite3")
application = wsgi(storage)

In [5]:
# load data
data = pd.read_csv('../kdd2004.csv')
data['target'] = data['target'].replace({-1:0, 1:1})

data.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,65,66,67,68,69,70,71,72,73,target
0,52.0,32.69,0.3,2.5,20.0,1256.8,-0.89,0.33,11.0,-55.0,...,1595.1,-1.64,2.83,-2.0,-50.0,445.2,-0.35,0.26,0.76,0
1,58.0,33.33,0.0,16.5,9.5,608.1,0.5,0.07,20.5,-52.5,...,762.9,0.29,0.82,-3.0,-35.0,140.3,1.16,0.39,0.73,0
2,77.0,27.27,-0.91,6.0,58.5,1623.6,-1.4,0.02,-6.5,-48.0,...,1491.8,0.32,-1.29,0.0,-34.0,658.2,-0.76,0.26,0.24,0
3,41.0,27.91,-0.35,3.0,46.0,1921.6,-1.36,-0.47,-32.0,-51.5,...,2047.7,-0.98,1.53,0.0,-49.0,554.2,-0.83,0.39,0.73,0
4,50.0,28.0,-1.32,-9.0,12.0,464.8,0.88,0.19,8.0,-51.5,...,479.5,0.68,-0.59,2.0,-36.0,-6.9,2.02,0.14,-0.23,0


In [6]:
# separate dataset into train and test

X_train, X_test, y_train, y_test = train_test_split(
    data.drop(labels=['target'], axis=1),  # drop the target
    data['target'],  # just the target
    test_size=0.3,
    stratify=data['target'],
    random_state=0)

X_train.shape, X_test.shape

((102025, 74), (43726, 74))

## XGBoost Classifier

In [7]:
params = {
    'n_estimators': 100,
    'learning_rate': 0.1,
    'max_depth': 6,
    'scale_pos_weight': 10
}
xgb = XGBClassifier(**params, n_jobs=-1, random_state=42)

xgb.fit(X_train, y_train)

y_train_xgb = xgb.predict_proba(X_train)[:,1]
y_test_xgb = xgb.predict_proba(X_test)[:,1]

In [8]:
print(f"Precision XGBoost test: {metrics.precision_score(y_train, xgb.predict(X_train)):0.2%}") 
print(f"Precision XGBoost test: {metrics.precision_score(y_test, xgb.predict(X_test)):0.2%}") 

Precision XGBoost test: 98.05%
Precision XGBoost test: 91.99%


In [11]:
def objective(trial):

    n_estimators = trial.suggest_int('n_estimators', 100, 500)
    learning_rate = trial.suggest_float('learning_rate', 0.01, 0.11)
    max_depth = trial.suggest_int('max_depth', 2, 10)
    scale_pos_weight = trial.suggest_int('scale_pos_weight', 1, 30)

    model = XGBClassifier(n_estimators=n_estimators,
                          learning_rate=learning_rate,
                          max_depth=max_depth,
                          scale_pos_weight=scale_pos_weight,
                          n_jobs=-1,
                          random_state=42)

    score = cross_val_score(model, X_train, y_train, cv=5, scoring='f1')

    return score

In [15]:
study = optuna.create_study(
    storage="sqlite:///db.sqlite3",  # Specify the storage URL here.
    study_name="KDD_Cup_2004_3"
)
study.optimize(objective, n_trials=3)
print(f"Best value: {study.best_value} (params: {study.best_params})")

[I 2024-03-05 20:56:46,489] A new study created in RDB with name: KDD_Cup_2004_3




[W 2024-03-05 20:56:54,421] Trial 0 failed with parameters: {'n_estimators': 405, 'learning_rate': 0.09167523547426291, 'max_depth': 7, 'scale_pos_weight': 27} because of the following error: KeyboardInterrupt().
Traceback (most recent call last):
  File "/Users/oliverdreger/opt/anaconda3/envs/ml2023/lib/python3.8/site-packages/optuna/study/_optimize.py", line 200, in _run_trial
    value_or_values = func(trial)
  File "/var/folders/pc/t6w9tgr112v1xdsjl9vggh6c0000gn/T/ipykernel_74398/2910149791.py", line 15, in objective
    score = cross_val_score(model, X_train, y_train, cv=5, scoring='f1', n_jobs=-1)
  File "/Users/oliverdreger/opt/anaconda3/envs/ml2023/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 515, in cross_val_score
    cv_results = cross_validate(
  File "/Users/oliverdreger/opt/anaconda3/envs/ml2023/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 266, in cross_validate
    results = parallel(
  File "/Users/oliverdreger/o

KeyboardInterrupt: 