In [3]:
!pip3 install lightgbm

Collecting lightgbm
  Using cached lightgbm-3.3.5-py3-none-win_amd64.whl (1.0 MB)
Installing collected packages: lightgbm
Successfully installed lightgbm-3.3.5




In [1]:
from hyperopt import fmin, tpe, hp, Trials
from sklearn import datasets
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score

iris = datasets.load_breast_cancer()
X = iris.data
y = iris.target

def objective(params):
    clf = RandomForestClassifier(**params)
    return -cross_val_score(clf, X, y, cv=5).mean()

space = {
    'n_estimators': hp.choice('n_estimators', range(1, 100)),
    'max_depth': hp.choice('max_depth', range(1, 10)),
    'criterion': hp.choice('criterion', ['gini', 'entropy'])
}

trials = Trials()
best = fmin(objective, space, algo=tpe.suggest, max_evals=100, trials=trials)

print(best)


100%|██████████| 100/100 [00:32<00:00,  3.03trial/s, best loss: -0.9736376339077782]
{'criterion': 1, 'max_depth': 7, 'n_estimators': 39}


In [4]:
import numpy as np
from hyperopt import fmin, tpe, atpe, hp, Trials
from sklearn import datasets
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score
from functools import partial

# Load breast cancer dataset
data = datasets.load_breast_cancer()
X, y = data.data, data.target

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Define the search space for hyperparameters
space = {
    'n_estimators': hp.choice('n_estimators', range(10, 200)),
    'learning_rate': hp.loguniform('learning_rate', -5, 0),
    'max_depth': hp.choice('max_depth', range(1, 10)),
    'min_samples_split': hp.choice('min_samples_split', range(2, 30)),
    'min_samples_leaf': hp.choice('min_samples_leaf', range(1, 30)),
}

# Define the objective function for optimization
def objective(params, X, y, cv=5):
    model = GradientBoostingClassifier(**params)
    score = cross_val_score(model, X, y, cv=cv, scoring='accuracy').mean()
    return -score

# Create partial function with X_train and y_train
objective_func = partial(objective, X=X_train, y=y_train)

# Optimize using TPE
trials_tpe = Trials()
best_tpe = fmin(
    fn=objective_func,
    space=space,
    algo=tpe.suggest,
    max_evals=50,
    trials=trials_tpe,
    verbose=1,
)

# Optimize using ATPE
trials_atpe = Trials()
best_atpe = fmin(
    fn=objective_func,
    space=space,
    algo=atpe.suggest,
    max_evals=50,
    trials=trials_atpe,
    verbose=1,
)

# Train and evaluate the best models with TPE and ATPE
model_tpe = GradientBoostingClassifier(**best_tpe)
model_tpe.fit(X_train, y_train)
tpe_preds = model_tpe.predict(X_test)
tpe_accuracy = accuracy_score(y_test, tpe_preds)

model_atpe = GradientBoostingClassifier(**best_atpe)
model_atpe.fit(X_train, y_train)
atpe_preds = model_atpe.predict(X_test)
atpe_accuracy = accuracy_score(y_test, atpe_preds)

# Print the best hyperparameters and accuracy scores
print("Best TPE hyperparameters: ", best_tpe)
print("Best ATPE hyperparameters: ", best_atpe)
print("TPE accuracy: ", tpe_accuracy)
print("ATPE accuracy: ", atpe_accuracy)


100%|██████████| 50/50 [00:57<00:00,  1.14s/trial, best loss: -0.9697784810126582]
100%|██████████| 50/50 [01:08<00:00,  1.38s/trial, best loss: -0.9698101265822784]
Best TPE hyperparameters:  {'learning_rate': 0.9435620612826735, 'max_depth': 4, 'min_samples_leaf': 20, 'min_samples_split': 11, 'n_estimators': 104}
Best ATPE hyperparameters:  {'learning_rate': 0.5808903041046102, 'max_depth': 3, 'min_samples_leaf': 19, 'min_samples_split': 26, 'n_estimators': 73}
TPE accuracy:  0.9649122807017544
ATPE accuracy:  0.9707602339181286
