In [1]:
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_iris

# Load dataset
data = load_iris()
X, y = data.data, data.target

# Split dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [2]:
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, confusion_matrix

# Initialize the XGBoost model
model = XGBClassifier()

# Train the model
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)

print("Accuracy:", accuracy)
print("Confusion Matrix:\n", conf_matrix)


Accuracy: 1.0
Confusion Matrix:
 [[10  0  0]
 [ 0  9  0]
 [ 0  0 11]]


In [3]:
from sklearn.model_selection import GridSearchCV

# Define the hyperparameter grid
param_grid = {
    'n_estimators': [50, 100, 150],
    'learning_rate': [0.01, 0.1, 0.2],
    'max_depth': [3, 4, 5],
    'subsample': [0.7, 1.0]
}

# Initialize the GridSearchCV
grid_search = GridSearchCV(estimator=XGBClassifier(), param_grid=param_grid, cv=3, scoring='accuracy')

# Perform the grid search
grid_search.fit(X_train, y_train)

# Get the best parameters and score
best_params = grid_search.best_params_
best_score = grid_search.best_score_

print("Best Parameters:", best_params)
print("Best Cross-Validation Score:", best_score)


Best Parameters: {'learning_rate': 0.01, 'max_depth': 3, 'n_estimators': 100, 'subsample': 0.7}
Best Cross-Validation Score: 0.9500000000000001


In [4]:
from sklearn.model_selection import RandomizedSearchCV

# Define the hyperparameter space
param_distributions = {
    'n_estimators': [50, 100, 150, 200],
    'learning_rate': [0.01, 0.05, 0.1, 0.2],
    'max_depth': [3, 4, 5, 6],
    'gamma': [0, 0.1, 0.2],
    'subsample': [0.7, 0.8, 1.0]
}

# Initialize RandomizedSearchCV
random_search = RandomizedSearchCV(estimator=XGBClassifier(), param_distributions=param_distributions, n_iter=10, cv=3, scoring='accuracy', random_state=42)

# Perform the random search
random_search.fit(X_train, y_train)

# Get the best parameters and score
best_params_random = random_search.best_params_
best_score_random = random_search.best_score_

print("Best Parameters (Random Search):", best_params_random)
print("Best Cross-Validation Score (Random Search):", best_score_random)


Best Parameters (Random Search): {'subsample': 0.8, 'n_estimators': 100, 'max_depth': 4, 'learning_rate': 0.01, 'gamma': 0.2}
Best Cross-Validation Score (Random Search): 0.9583333333333334


In [6]:
pip install optuna

Collecting optuna
  Downloading optuna-4.0.0-py3-none-any.whl.metadata (16 kB)
Collecting alembic>=1.5.0 (from optuna)
  Downloading alembic-1.13.2-py3-none-any.whl.metadata (7.4 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.8.2-py3-none-any.whl.metadata (10 kB)
Collecting Mako (from alembic>=1.5.0->optuna)
  Downloading Mako-1.3.5-py3-none-any.whl.metadata (2.9 kB)
Downloading optuna-4.0.0-py3-none-any.whl (362 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m362.8/362.8 kB[0m [31m3.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading alembic-1.13.2-py3-none-any.whl (232 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m233.0/233.0 kB[0m [31m14.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading colorlog-6.8.2-py3-none-any.whl (11 kB)
Downloading Mako-1.3.5-py3-none-any.whl (78 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m78.6/78.6 kB[0m [31m5.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: Ma

In [7]:
import optuna

# Define the objective function for Optuna
def objective(trial):
    param = {
        'n_estimators': trial.suggest_int('n_estimators', 50, 200),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.2),
        'max_depth': trial.suggest_int('max_depth', 3, 6),
        'subsample': trial.suggest_float('subsample', 0.7, 1.0),
        'gamma': trial.suggest_float('gamma', 0, 0.3)
    }
    model = XGBClassifier(**param)
    model.fit(X_train, y_train)
    preds = model.predict(X_test)
    accuracy = accuracy_score(y_test, preds)
    return accuracy

# Perform the optimization
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=20)

# Get the best trial results
best_trial = study.best_trial
print("Best Trial Parameters:", best_trial.params)
print("Best Accuracy:", best_trial.value)


[I 2024-09-16 04:39:13,972] A new study created in memory with name: no-name-be4a0209-d350-47d8-ae74-6eec5e219ba0
[I 2024-09-16 04:39:14,028] Trial 0 finished with value: 1.0 and parameters: {'n_estimators': 168, 'learning_rate': 0.11791040938037373, 'max_depth': 6, 'subsample': 0.860428103481157, 'gamma': 0.2836070044594087}. Best is trial 0 with value: 1.0.
[I 2024-09-16 04:39:14,080] Trial 1 finished with value: 1.0 and parameters: {'n_estimators': 167, 'learning_rate': 0.14832308544251288, 'max_depth': 3, 'subsample': 0.7282296345476242, 'gamma': 0.27305159361958475}. Best is trial 0 with value: 1.0.
[I 2024-09-16 04:39:14,131] Trial 2 finished with value: 1.0 and parameters: {'n_estimators': 119, 'learning_rate': 0.03760380490121611, 'max_depth': 4, 'subsample': 0.8545612320915632, 'gamma': 0.2848371452735529}. Best is trial 0 with value: 1.0.
[I 2024-09-16 04:39:14,426] Trial 3 finished with value: 1.0 and parameters: {'n_estimators': 165, 'learning_rate': 0.0911033118896294, 'ma

Best Trial Parameters: {'n_estimators': 168, 'learning_rate': 0.11791040938037373, 'max_depth': 6, 'subsample': 0.860428103481157, 'gamma': 0.2836070044594087}
Best Accuracy: 1.0
