In [3]:
import optuna
import xgboost as xgb
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler

# Load the Iris dataset
iris = load_iris()
X = iris.data
y = iris.target

# Split the dataset into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Feature scaling (optional, depends on the classifier and data)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Define an objective function for Optuna to minimize (i.e., tune the hyperparameters)
def objective(trial):
    # Define hyperparameters to tune
    param = {
        'objective': 'multi:softprob',
        'num_class': 3,
        'max_depth': trial.suggest_int('max_depth', 3, 10),
        'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.3),
        'n_estimators': trial.suggest_int('n_estimators', 50, 300),
        'subsample': trial.suggest_uniform('subsample', 0.6, 1.0),
        'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.6, 1.0),
        'gamma': trial.suggest_uniform('gamma', 0, 0.5),
    }

    # Train the model
    xgb_clf = xgb.XGBClassifier(**param, use_label_encoder=False)
    xgb_clf.fit(X_train, y_train)

    # Predict and calculate accuracy on the test set
    y_pred = xgb_clf.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    return accuracy

# Create an Optuna study and optimize the objective function
study = optuna.create_study(direction='maximize')  # We aim to maximize accuracy
study.optimize(objective, n_trials=50)

# Print the best parameters and the best accuracy
print(f"Best trial: {study.best_trial.value}")
print(f"Best hyperparameters: {study.best_trial.params}")

# Retrain the model with the best hyperparameters
best_params = study.best_trial.params
xgb_clf_optimized = xgb.XGBClassifier(**best_params, objective='multi:softprob', num_class=3, use_label_encoder=False)
xgb_clf_optimized.fit(X_train, y_train)

# Evaluate the optimized model
y_pred_optimized = xgb_clf_optimized.predict(X_test)
accuracy_optimized = accuracy_score(y_test, y_pred_optimized)
print(f"Optimized Model Accuracy: {accuracy_optimized:.2f}")


  from .autonotebook import tqdm as notebook_tqdm
[I 2024-09-04 16:42:18,276] A new study created in memory with name: no-name-a4711bc0-c7f8-4561-be2f-87f1cec4b0c7
  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.3),
  'subsample': trial.suggest_uniform('subsample', 0.6, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.6, 1.0),
  'gamma': trial.suggest_uniform('gamma', 0, 0.5),
Parameters: { "use_label_encoder" } are not used.

[I 2024-09-04 16:42:18,343] Trial 0 finished with value: 1.0 and parameters: {'max_depth': 8, 'learning_rate': 0.07989025277308612, 'n_estimators': 239, 'subsample': 0.7829640061253554, 'colsample_bytree': 0.6757049395491639, 'gamma': 0.25831882964898184}. Best is trial 0 with value: 1.0.
  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.3),
  'subsample': trial.suggest_uniform('subsample', 0.6, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.6, 1.0),
  'gamma': trial.suggest_u

Best trial: 1.0
Best hyperparameters: {'max_depth': 8, 'learning_rate': 0.07989025277308612, 'n_estimators': 239, 'subsample': 0.7829640061253554, 'colsample_bytree': 0.6757049395491639, 'gamma': 0.25831882964898184}
Optimized Model Accuracy: 1.00
