In [21]:
import pandas as pd
import lightgbm as lgb
from sklearn.datasets import load_breast_cancer
from hyperband import HyperbandSearchCV
from sklearn.model_selection import train_test_split

In [27]:
# import data
X, y = load_breast_cancer(return_X_y=True)
X_train, X_val, y_train, y_val = train_test_split(X,y,test_size=0.2,
                                                  stratify=y, random_state=42)

In [32]:
# Set paramters for the models
band_params = {'boosting_type': 'gbdt',
 'class_weight': None,
 'colsample_bytree': 0.9,
 'importance_type': 'split',
 'learning_rate': 0.01,
 'max_depth': 20,
 'min_child_samples': 25,
 'min_child_weight': 0,
 'min_split_gain': 0,
 'n_estimators': 4000,
 'n_jobs': -1,
 'num_leaves': 200,
 'objective': 'binary',
 'random_state': None,
 'reg_alpha': 0,
 'reg_lambda': 0,
 'silent': True,
 'subsample': 0.8,
 'subsample_for_bin': 200000,
 'subsample_freq': 1,
 'metric': 'auc',
 'max_bin': 100,
 'verbose': -1,
 'scale_pos_weight': 1}
# Set the tuning parameters for the model
param_dict =  {
    'learning_rate': [.001,0.01,0.1],
    'max_depth': [5,15, 20, 30, 50],
    'num_leaves': [50,150,200,250],
    'min_child_samples': [35,40,60,80],
    'subsample': [0.7,0.8,0.9]
}

In [33]:
search = HyperbandSearchCV(lgb.LGBMClassifier(**band_params),param_dict,cv=3,
                           resource_param='n_estimators',verbose=100,
                           max_iter=200,min_iter=50,
                           scoring='roc_auc')

In [34]:
fit_params={'early_stopping_rounds':10,'eval_set':[(X_val,y_val)]}

In [35]:
search.fit(X_train,y_train,**fit_params)

Starting bracket 1 (out of 2) of hyperband
Starting successive halving iteration 1 out of 2. Fitting 3 configurations, with resource_param n_estimators set to 66, and keeping the best 1 configurations.
Fitting 3 folds for each of 3 candidates, totalling 9 fits
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[CV] subsample=0.7, num_leaves=150, min_child_samples=80, max_depth=50, learning_rate=0.01, n_estimators=66 
[1]	valid_0's auc: 0.90873
Training until validation scores don't improve for 10 rounds.
[2]	valid_0's auc: 0.926422
[3]	valid_0's auc: 0.959821
[4]	valid_0's auc: 0.961806
[5]	valid_0's auc: 0.961144
[6]	valid_0's auc: 0.961806
[7]	valid_0's auc: 0.961144
[8]	valid_0's auc: 0.973545
[9]	valid_0's auc: 0.974206
[10]	valid_0's auc: 0.975529
[11]	valid_0's auc: 0.975529
[12]	valid_0's auc: 0.975364
[13]	valid_0's auc: 0.97371
[14]	valid_0's auc: 0.974206
[15]	valid_0's auc: 0.974206
[16]	valid_0's auc: 0.973876
[17]	valid_0's auc: 0.973545
[18]	

[CV]  subsample=0.9, num_leaves=250, min_child_samples=80, max_depth=15, learning_rate=0.001, n_estimators=66, score=0.971, total=   0.0s
[Parallel(n_jobs=1)]: Done   9 out of   9 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   9 out of   9 | elapsed:    0.1s finished
Starting successive halving iteration 2 out of 2. Fitting 1 configurations, with resource_param n_estimators set to 200
Fitting 3 folds for each of 1 candidates, totalling 3 fits
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[CV] subsample=0.9, num_leaves=250, min_child_samples=40, max_depth=30, learning_rate=0.001, n_estimators=200 
[1]	valid_0's auc: 0.965112
Training until validation scores don't improve for 10 rounds.
[2]	valid_0's auc: 0.983796
[3]	valid_0's auc: 0.988922
[4]	valid_0's auc: 0.989749
[5]	valid_0's auc: 0.987269
[6]	valid_0's auc: 0.98793
[7]	valid_0's auc: 0.988261
[8]	valid_0's auc: 0.988426
[9]	valid_0's auc: 0.987765
[10]	valid_0's auc: 0.987765

[3]	valid_0's auc: 0.982474
[4]	valid_0's auc: 0.986276
[5]	valid_0's auc: 0.98578
[6]	valid_0's auc: 0.98578
[7]	valid_0's auc: 0.985119
[8]	valid_0's auc: 0.98545
[9]	valid_0's auc: 0.984127
[10]	valid_0's auc: 0.983631
[11]	valid_0's auc: 0.983962
[12]	valid_0's auc: 0.984623
[13]	valid_0's auc: 0.985615
[14]	valid_0's auc: 0.986607
[15]	valid_0's auc: 0.986607
[16]	valid_0's auc: 0.985946
[17]	valid_0's auc: 0.985946
[18]	valid_0's auc: 0.986276
[19]	valid_0's auc: 0.987103
[20]	valid_0's auc: 0.987103
[21]	valid_0's auc: 0.986772
[22]	valid_0's auc: 0.986772
[23]	valid_0's auc: 0.98578
[24]	valid_0's auc: 0.98578
[25]	valid_0's auc: 0.986772
[26]	valid_0's auc: 0.986772
[27]	valid_0's auc: 0.986772
[28]	valid_0's auc: 0.986772
[29]	valid_0's auc: 0.986442
Early stopping, best iteration is:
[19]	valid_0's auc: 0.987103
[CV]  subsample=0.8, num_leaves=250, min_child_samples=40, max_depth=50, learning_rate=0.001, n_estimators=200, score=0.966, total=   0.0s
[Parallel(n_jobs=1)]: Done

HyperbandSearchCV(cv=3, error_score='raise',
                  estimator=LGBMClassifier(boosting_type='gbdt',
                                           class_weight=None,
                                           colsample_bytree=0.9,
                                           importance_type='split',
                                           learning_rate=0.01, max_bin=100,
                                           max_depth=20, metric='auc',
                                           min_child_samples=25,
                                           min_child_weight=0, min_split_gain=0,
                                           n_estimators=4000, n_jobs=-1,
                                           num_leaves=200, objective='binary',
                                           random_state=None, reg_...
                  eta=3, iid=True, max_iter=200, min_iter=50, n_jobs=1,
                  param_distributions={'learning_rate': [0.001, 0.01, 0.1],
                                

In [36]:
search.best_params_

{'subsample': 0.7,
 'num_leaves': 250,
 'min_child_samples': 40,
 'max_depth': 50,
 'learning_rate': 0.1,
 'n_estimators': 200}

In [37]:
search.best_score_

0.9862087912087912