# Imports and Setup

In [50]:
import optuna
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from xgboost import XGBClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score
import numpy as np

# Load dataset and preprocess

For test the code, I used the iris dataset. Replace with our data when it is ready.

In [43]:
data = load_breast_cancer()
X, y = data.data, data.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define optimizers

In [38]:
# Logistic Regression Optimization
def objective_logreg(trial):
    """Logistic Regression Optimization"""
    # Define l1_ration if penalty is 'elasticnet'.
    penalty = trial.suggest_categorical('penalty', ['l1', 'l2', None, 'elasticnet'])
    if penalty == 'elasticnet':
        l1_ratio = trial.suggest_float('l1_ratio', 0.0, 1.0, step=0.25)
    else:
        l1_ratio = None

    param = {
        'C': trial.suggest_categorical('C', [0.5, 1.0, 1.5]),
        'penalty': penalty,
        'max_iter': trial.suggest_int('max_iter', 50, 100, 150),
        'l1_ratio': l1_ratio
    }

    clf = LogisticRegression(**param, solver='saga', random_state=42)
    pipeline = Pipeline([
        ('scaler', StandardScaler()),
        ('clf', clf)
    ])
    score = cross_val_score(pipeline, X_train, y_train, cv=5, scoring='accuracy').mean()
    return score

In [47]:
# SVM Optimization
def objective_svm(trial):
    param = {
        'C': trial.suggest_categorical('C', [0.5, 1.0, 1.5]),
        'kernel': trial.suggest_categorical('kernel', ['linear', 'poly', 'rbf', 'sigmoid']),
        'degree': trial.suggest_int('degree', 2, 5, step=1),
        'gamma': trial.suggest_categorical('gamma', ['scale', 'auto'])
    }
    clf = SVC(**param, random_state=42)
    pipeline = Pipeline([
        ('scaler', StandardScaler()),
        ('clf', clf)
    ])
    score = cross_val_score(pipeline, X_train, y_train, cv=5, scoring='accuracy').mean()
    return score

In [53]:
# XGBoost Optimization
def objective_xgb(trial):
    param = {
        'n_estimators': trial.suggest_categorical('n_estimators', [5, 10, 25, 50, 100]),
        'learning_rate': trial.suggest_categorical('learning_rate', [0.001, 0.01, 0.1, 1.0]),
        'booster': trial.suggest_categorical('booster', ['gbtree', 'gblinear', 'dart'])
    }

    if param["booster"] in ["gbtree", "dart"]:
        # maximum depth of the tree, signifies complexity of the tree.
        param["max_depth"] = trial.suggest_int("max_depth", 3, 9, step=2)
        # minimum child weight, larger the term more conservative the tree.
        param["min_child_weight"] = trial.suggest_int("min_child_weight", 2, 10)
        param["eta"] = trial.suggest_float("eta", 1e-8, 1.0, log=True)
        # defines how selective algorithm is.
        param["gamma"] = trial.suggest_float("gamma", 1e-8, 1.0, log=True)
        param["grow_policy"] = trial.suggest_categorical("grow_policy", ["depthwise", "lossguide"])

    if param["booster"] == "dart":
        param["sample_type"] = trial.suggest_categorical("sample_type", ["uniform", "weighted"])
        param["normalize_type"] = trial.suggest_categorical("normalize_type", ["tree", "forest"])
        param["rate_drop"] = trial.suggest_float("rate_drop", 1e-8, 1.0, log=True)
        param["skip_drop"] = trial.suggest_float("skip_drop", 1e-8, 1.0, log=True)

    clf = XGBClassifier(**param, use_label_encoder=False, objective='binary:logistic', random_state=42)
    clf.fit(X_train, y_train, verbose=False)
    preds = clf.predict(X_test)
    preds_labels = np.rint(preds)
    score = accuracy_score(y_test, preds_labels)
    return score

# Run trails and find optimization

## Logistic Regression

In [44]:
study_logreg = optuna.create_study(direction='maximize')
study_logreg.optimize(objective_logreg, n_trials=50)

[I 2025-01-11 20:22:57,681] A new study created in memory with name: no-name-82f9cdec-02a8-4701-a2a9-cf9652334acd
  'max_iter': trial.suggest_int('max_iter', 50, 100, 150),
[I 2025-01-11 20:22:57,776] Trial 0 finished with value: 0.9736263736263737 and parameters: {'penalty': 'l2', 'C': 1.5, 'max_iter': 50}. Best is trial 0 with value: 0.9736263736263737.
  'max_iter': trial.suggest_int('max_iter', 50, 100, 150),
[I 2025-01-11 20:22:57,874] Trial 1 finished with value: 0.9736263736263737 and parameters: {'penalty': 'l1', 'C': 0.5, 'max_iter': 50}. Best is trial 0 with value: 0.9736263736263737.
  'max_iter': trial.suggest_int('max_iter', 50, 100, 150),
[I 2025-01-11 20:22:57,967] Trial 2 finished with value: 0.9736263736263737 and parameters: {'penalty': 'l2', 'C': 1.5, 'max_iter': 50}. Best is trial 0 with value: 0.9736263736263737.
  'max_iter': trial.suggest_int('max_iter', 50, 100, 150),
[I 2025-01-11 20:22:58,059] Trial 3 finished with value: 0.9736263736263737 and parameters: {'p

Best parameters for Logistic Regression: {'penalty': None, 'C': 1.5, 'max_iter': 50}


In [45]:
print("Best parameters for Logistic Regression:", study_logreg.best_params)

Best parameters for Logistic Regression: {'penalty': None, 'C': 1.5, 'max_iter': 50}


## SVM

In [48]:
study_svm = optuna.create_study(direction='maximize')
study_svm.optimize(objective_svm, n_trials=50)

[I 2025-01-11 20:27:45,020] A new study created in memory with name: no-name-259e2b59-f434-474a-81e1-1873ee4e2535
[I 2025-01-11 20:27:45,069] Trial 0 finished with value: 0.9736263736263737 and parameters: {'C': 1.0, 'kernel': 'rbf', 'degree': 5, 'gamma': 'auto'}. Best is trial 0 with value: 0.9736263736263737.
[I 2025-01-11 20:27:45,113] Trial 1 finished with value: 0.9758241758241759 and parameters: {'C': 1.5, 'kernel': 'rbf', 'degree': 4, 'gamma': 'auto'}. Best is trial 1 with value: 0.9758241758241759.
[I 2025-01-11 20:27:45,154] Trial 2 finished with value: 0.9692307692307693 and parameters: {'C': 1.5, 'kernel': 'linear', 'degree': 2, 'gamma': 'auto'}. Best is trial 1 with value: 0.9758241758241759.
[I 2025-01-11 20:27:45,199] Trial 3 finished with value: 0.9736263736263737 and parameters: {'C': 1.0, 'kernel': 'rbf', 'degree': 3, 'gamma': 'auto'}. Best is trial 1 with value: 0.9758241758241759.
[I 2025-01-11 20:27:45,247] Trial 4 finished with value: 0.9692307692307693 and paramet

In [49]:
print("Best parameters for SVM:", study_svm.best_params)

Best parameters for SVM: {'C': 1.5, 'kernel': 'rbf', 'degree': 4, 'gamma': 'auto'}


## XGboost

In [54]:
study_xgb = optuna.create_study(direction='maximize')
study_xgb.optimize(objective_xgb, n_trials=50)

[I 2025-01-11 20:37:08,230] A new study created in memory with name: no-name-da828f77-59c8-4fff-8a6a-fb26401cb2d1
Parameters: { "use_label_encoder" } are not used.

[I 2025-01-11 20:37:10,613] Trial 0 finished with value: 0.9736842105263158 and parameters: {'n_estimators': 100, 'learning_rate': 0.1, 'booster': 'dart', 'max_depth': 9, 'min_child_weight': 10, 'eta': 4.207008942642503e-08, 'gamma': 0.0011319596186514975, 'grow_policy': 'lossguide', 'sample_type': 'weighted', 'normalize_type': 'forest', 'rate_drop': 1.9193282672923407e-07, 'skip_drop': 1.3001259631241898e-06}. Best is trial 0 with value: 0.9736842105263158.
Parameters: { "use_label_encoder" } are not used.

[I 2025-01-11 20:37:10,851] Trial 1 finished with value: 0.956140350877193 and parameters: {'n_estimators': 25, 'learning_rate': 1.0, 'booster': 'dart', 'max_depth': 9, 'min_child_weight': 8, 'eta': 0.0015297363455298346, 'gamma': 3.818152506548063e-06, 'grow_policy': 'depthwise', 'sample_type': 'uniform', 'normalize_ty

In [55]:
print("Best parameters for XGBoost:", study_xgb.best_params)

Best parameters for XGBoost: {'n_estimators': 100, 'learning_rate': 0.1, 'booster': 'dart', 'max_depth': 9, 'min_child_weight': 10, 'eta': 4.207008942642503e-08, 'gamma': 0.0011319596186514975, 'grow_policy': 'lossguide', 'sample_type': 'weighted', 'normalize_type': 'forest', 'rate_drop': 1.9193282672923407e-07, 'skip_drop': 1.3001259631241898e-06}
