In [9]:
pip install optuna

Collecting optuna
  Downloading optuna-4.4.0-py3-none-any.whl.metadata (17 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.9.0-py3-none-any.whl.metadata (10 kB)
Downloading optuna-4.4.0-py3-none-any.whl (395 kB)
Downloading colorlog-6.9.0-py3-none-any.whl (11 kB)
Installing collected packages: colorlog, optuna
Successfully installed colorlog-6.9.0 optuna-4.4.0
Note: you may need to restart the kernel to use updated packages.


In [17]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score
import optuna
from sklearn.model_selection import train_test_split,GridSearchCV,RandomizedSearchCV

In [2]:
data= load_breast_cancer()
x,y= data.data, data.target

In [3]:
# split data
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=42)

In [4]:
#standardize data
scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)


In [6]:
x_train.shape, x_test.shape, y_train.shape, y_test.shape

((455, 30), (114, 30), (455,), (114,))

In [8]:
#train model
model = XGBClassifier( eval_metric='logloss',random_state=42)
model.fit(x_train, y_train)

#predict
y_pred = model.predict(x_test)
#evaluate
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

Accuracy: 0.96


In [13]:
# DEFINE OBJECTIVE FUNCTION for OPTUNA
def objective(trial):
    # Define hyperparameters to tune
    params = {
        'n_estimators': trial.suggest_int('n_estimators', 50, 500, step=50),
        'max_depth': trial.suggest_int('max_depth', 3, 10),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3, log=True),
        'subsample': trial.suggest_float('subsample', 0.5, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1.0),
        'gamma':trial.suggest_float('gamma',0,5),
        'reg_alpha': trial.suggest_float('reg_alpha', 0, 1),
        'reg_lambda': trial.suggest_float('reg_lambda', 0, 1)
    }
    
    # Train model
    model = XGBClassifier(**params,eval_metric='logloss', random_state=42)
    model.fit(x_train, y_train)
    
    # Evaluate model on validation
    accuracy = accuracy_score(y_test, model.predict(x_test))
    
    return accuracy

In [14]:
#create optuna study
study=optuna.create_study(direction='maximize')
#optimize hyperparameters
study.optimize(objective, n_trials=100)

[I 2025-07-17 21:19:54,173] A new study created in memory with name: no-name-484185c1-8f46-4721-ab14-ff0e0a133be8
[I 2025-07-17 21:19:54,361] Trial 0 finished with value: 0.956140350877193 and parameters: {'n_estimators': 100, 'max_depth': 6, 'learning_rate': 0.043564405589547626, 'subsample': 0.7728591495881039, 'colsample_bytree': 0.7953311321121503, 'gamma': 4.347502971246753, 'reg_alpha': 0.17110927697309064, 'reg_lambda': 0.2313596764295045}. Best is trial 0 with value: 0.956140350877193.
[I 2025-07-17 21:19:54,571] Trial 1 finished with value: 0.956140350877193 and parameters: {'n_estimators': 400, 'max_depth': 4, 'learning_rate': 0.16804380017935347, 'subsample': 0.7313848406784691, 'colsample_bytree': 0.8327347510452855, 'gamma': 2.7256667248297033, 'reg_alpha': 0.4607292069640574, 'reg_lambda': 0.45646571995268637}. Best is trial 0 with value: 0.956140350877193.
[I 2025-07-17 21:19:54,728] Trial 2 finished with value: 0.9736842105263158 and parameters: {'n_estimators': 350, 'm

In [15]:
print("Best hyperparameters:", study.best_params)

Best hyperparameters: {'n_estimators': 350, 'max_depth': 9, 'learning_rate': 0.23006590198191568, 'subsample': 0.5419011674699025, 'colsample_bytree': 0.5440397828435227, 'gamma': 4.508457171063477, 'reg_alpha': 0.06330467656582428, 'reg_lambda': 0.7571054431168931}


In [16]:
print('best accuracy:', study.best_value)

best accuracy: 0.9824561403508771


In [18]:
param_grid={
    'n_estimators':[100,200,300],
    'max_depth':[3,5,10],
    'learning_rate':[0.01,0.1,0.2],
    'subsample':[0.5,0.8,1.0],
}

In [19]:
#initialize grid search
grid_search=GridSearchCV(estimator=XGBClassifier(eval_metric='logloss',random_state=42),
                         param_grid=param_grid,
                         scoring='accuracy',
                         cv=5,
                         n_jobs=-1)

In [20]:
#perform grid search
grid_search.fit(x_train,y_train)
#best parameters
print(grid_search.best_estimator_)
y_pred=grid_search.predict(x_test)
print('accuracy_score',accuracy_score(y_test,y_pred))

XGBClassifier(base_score=None, booster=None, callbacks=None,
              colsample_bylevel=None, colsample_bynode=None,
              colsample_bytree=None, device=None, early_stopping_rounds=None,
              enable_categorical=False, eval_metric='logloss',
              feature_types=None, feature_weights=None, gamma=None,
              grow_policy=None, importance_type=None,
              interaction_constraints=None, learning_rate=0.1, max_bin=None,
              max_cat_threshold=None, max_cat_to_onehot=None,
              max_delta_step=None, max_depth=3, max_leaves=None,
              min_child_weight=None, missing=nan, monotone_constraints=None,
              multi_strategy=None, n_estimators=200, n_jobs=None,
              num_parallel_tree=None, ...)
accuracy_score 0.9736842105263158


In [21]:
print('best hyperparameters:', grid_search.best_params_)
print('best accuracy:', grid_search.best_score_)

best hyperparameters: {'learning_rate': 0.1, 'max_depth': 3, 'n_estimators': 200, 'subsample': 0.5}
best accuracy: 0.9736263736263737


In [23]:
param_dist={
    'n_estimators':[50,100,200,300,400],
    'max_depth':[3,5,7,9],
    'learning_rate':[0.01,0.05,0.1,0.2],
    'subsample':[0.5,0.7,0.8,0.9,1.0],
    'colsample_bytree':[0.5,0.7,0.8,0.9,1.0]
}

In [24]:
#train model
random_search=RandomizedSearchCV(estimator=XGBClassifier(eval_metric='logloss',random_state=42),
                                    param_distributions=param_dist,
                                    n_iter=100,
                                    scoring='accuracy',
                                    cv=5,
                                    n_jobs=-1,
                                    random_state=42)

In [25]:
#perforn randomized search
best_random_model=random_search.fit(x_train,y_train)
y_pred_random=best_random_model.predict(x_test)
print('accuracy_score with random search:', accuracy_score(y_test,y_pred_random))

accuracy_score with random search: 0.9736842105263158
