In [1]:
import sys
import time
from wideboost.wrappers import wxgb
import numpy as np
import xgboost as xgb

import tensorflow_datasets as tfds
from matplotlib import pyplot as plt

(ds_train, ds_test), ds_info = tfds.load(
    'mnist',
    split=['train', 'test'],
    shuffle_files=False,
    as_supervised=True,
    with_info=True,
)

for i in ds_train.batch(60000):
    a = i
    
for i in ds_test.batch(60000):
    b = i

In [2]:
xtrain = a[0].numpy().reshape([-1,28*28])
ytrain = a[1].numpy()

xtest = b[0].numpy().reshape([-1,28*28])
ytest = b[1].numpy()

In [3]:
def _onehot(Y):
    b = np.zeros((Y.shape[0], Y.max().astype(int)+1))
    b[np.arange(Y.shape[0]),Y.astype(int).flatten()] = 1
    return b

In [4]:
def get_data(seed):
    np.random.seed(seed)
    train_idx = np.random.choice(range(xtrain.shape[0]), round(xtrain.shape[0] * 0.7), replace=False)
    valid_idx = np.array(list(set(range(xtrain.shape[0])) - set(train_idx)))
    train_idx = np.random.choice(train_idx, 3_000, replace=False)
    valid_idx = np.random.choice(valid_idx, 2_000, replace=False)

    test_idx = np.random.choice(range(xtest.shape[0]), 1_000, replace=False)

    features = np.random.choice(range(xtrain.shape[1]), 200, replace=False)


    y_train = _onehot(ytrain[train_idx])
    y_valid = _onehot(ytrain[valid_idx])
    y_test =  _onehot(ytest[test_idx])

    return (
        xtrain[train_idx[:,None], features],
        xtrain[valid_idx[:,None], features],
        xtest[test_idx[:,None], features], y_train, y_valid, y_test
    )

In [6]:
from hyperopt import fmin, tpe, hp, STATUS_OK, space_eval
import os

iters = 30
results = []
for est in range(iters):

    t0 = time.time()
    x_train, x_valid, x_test, y_train, y_valid, y_test = get_data(1000 + est)
    watchlist = [
        ((x_train, y_train),'train'),
        ((x_test, y_test), 'test'),
        ((x_valid, y_valid), 'valid')
    ]

    best_val = 1000.0
    xgb_test_val = 1000.0
    xgb_param = {}
    nrounds = 0

    def objective(param):
        global best_val
        global xgb_test_val
        global xgb_param
        global nrounds
        num_round = 500
        param['output_dim'] = 10
        param['nthread'] = 2
        print(param)

        xgb_results = {}
        wbst = wxgb.fit(
            x_train, y_train,
            param, num_round, watchlist,
            evals_result=xgb_results,
            early_stopping_rounds=20,
            verbose_eval=200
        )
        
        if (min(xgb_results['valid']['many_logloss'])) < best_val:
            print("NEW BEST VALUE!")
            best_val = min(xgb_results['valid']['many_logloss'])
            xgb_test_val = xgb_results['test']['many_logloss'][
                np.argmin(xgb_results['valid']['many_logloss'])
            ]
            xgb_param = param
            nrounds = np.argmin(xgb_results['valid']['many_logloss']) + 1

        return {'loss': min(xgb_results['valid']['many_logloss']), 'status': STATUS_OK }

    spc = {
        'btype': hp.choice('btype',['I']),
        'extra_dims': hp.choice('extra_dims',[0]),
        'objective': hp.choice('objective',['manybinary:logistic']),
        'eval_metric':hp.choice('eval_metric',[['many_logloss']]),
        'eta': hp.loguniform('eta', -7, 0),
        'beta_eta': hp.choice('beta_eta',[0]),
        'max_depth' : hp.choice('max_depth',range(1,11)),
        'subsample': hp.uniform('subsample', 0.5, 1),
        'colsample_bytree': hp.uniform('colsample_bytree', 0.5, 1),
        'colsample_bylevel': hp.uniform('colsample_bylevel', 0.5, 1),
        'min_child_weight': hp.loguniform('min_child_weight', -16, 5),
        'alpha': hp.choice('alpha', [0, hp.loguniform('alpha_positive', -16, 2)]),
        'lambda': hp.choice('lambda', [0, hp.loguniform('lambda_positive', -16, 2)]),
        'gamma': hp.choice('gamma', [0, hp.loguniform('gamma_positive', -16, 2)])
    }
    os.environ['HYPEROPT_FMIN_SEED'] = '101010'
    best = fmin(objective,
        space=spc,
        algo=tpe.suggest,
        max_evals=25)
    t1 = time.time()
    results.append({
        'iter': est,
        'valid_logloss': best_val,
        'test_logloss': xgb_test_val,
        'num_rounds': nrounds,
        'time_diff': t1 - t0,
        'winning_param': xgb_param.copy()
    })

{'alpha': 0, 'beta_eta': 0, 'btype': 'I', 'colsample_bylevel': 0.7489980469009814, 'colsample_bytree': 0.9420146676089547, 'eta': 0.0070706936561268224, 'eval_metric': ('many_logloss',), 'extra_dims': 0, 'gamma': 0, 'lambda': 0, 'max_depth': 5, 'min_child_weight': 2.1041667775099838e-05, 'objective': 'manybinary:logistic', 'subsample': 0.6262451727083667, 'output_dim': 10, 'nthread': 2}
[0]	train-many_logloss:0.68707	test-many_logloss:0.68727	valid-many_logloss:0.68724
[200]	train-many_logloss:0.18431	test-many_logloss:0.20818	valid-many_logloss:0.20662
[400]	train-many_logloss:0.07420	test-many_logloss:0.11057	valid-many_logloss:0.10850
[499]	train-many_logloss:0.05209	test-many_logloss:0.09254	valid-many_logloss:0.09044
NEW BEST VALUE!                                       
{'alpha': 0, 'beta_eta': 0, 'btype': 'I', 'colsample_bylevel': 0.8357246683355697, 'colsample_bytree': 0.7491187395188089, 'eta': 0.605518780660455, 'eval_metric': ('many_logloss',), 'extra_dims': 0, 'gamma': 0, '

  return -np.mean(np.log(p))

  return -np.mean(np.log(p))

  p = 1/(1 + np.exp(-logits))

  P = 1 / (1 + np.exp(-logits))



[23]	train-many_logloss:nan	test-many_logloss:nan	valid-many_logloss:nan
{'alpha': 0, 'beta_eta': 0, 'btype': 'I', 'colsample_bylevel': 0.7061150549075759, 'colsample_bytree': 0.5843009197915733, 'eta': 0.002630107568123181, 'eval_metric': ('many_logloss',), 'extra_dims': 0, 'gamma': 6.947115616562489, 'lambda': 0, 'max_depth': 10, 'min_child_weight': 41.36978142254861, 'objective': 'manybinary:logistic', 'subsample': 0.7453409993563878, 'output_dim': 10, 'nthread': 2}
[0]	train-many_logloss:0.69120	test-many_logloss:0.69121	valid-many_logloss:0.69122
[200]	train-many_logloss:0.43210	test-many_logloss:0.43422	valid-many_logloss:0.43461
[400]	train-many_logloss:0.30839	test-many_logloss:0.31178	valid-many_logloss:0.31227
[499]	train-many_logloss:0.27036	test-many_logloss:0.27420	valid-many_logloss:0.27475
{'alpha': 1.055992173502114e-05, 'beta_eta': 0, 'btype': 'I', 'colsample_bylevel': 0.8017715093779021, 'colsample_bytree': 0.7625111741024486, 'eta': 0.007606220882567551, 'eval_metric

[0]	train-many_logloss:0.43751	test-many_logloss:0.43986	valid-many_logloss:0.44247
[123]	train-many_logloss:0.05417	test-many_logloss:0.09020	valid-many_logloss:0.08868
{'alpha': 0, 'beta_eta': 0, 'btype': 'I', 'colsample_bylevel': 0.8025654416649992, 'colsample_bytree': 0.8748052006921443, 'eta': 0.009393489875683971, 'eval_metric': ('many_logloss',), 'extra_dims': 0, 'gamma': 0, 'lambda': 0, 'max_depth': 2, 'min_child_weight': 0.013234942416650719, 'objective': 'manybinary:logistic', 'subsample': 0.8694541664166509, 'output_dim': 10, 'nthread': 2}
[0]	train-many_logloss:0.68619	test-many_logloss:0.68622	valid-many_logloss:0.68622
[200]	train-many_logloss:0.21970	test-many_logloss:0.22572	valid-many_logloss:0.22511
[400]	train-many_logloss:0.14107	test-many_logloss:0.15176	valid-many_logloss:0.15049
[499]	train-many_logloss:0.12216	test-many_logloss:0.13483	valid-many_logloss:0.13362
{'alpha': 3.1952805148613077, 'beta_eta': 0, 'btype': 'I', 'colsample_bylevel': 0.8947364949908566, '

[0]	train-many_logloss:0.69241	test-many_logloss:0.69242	valid-many_logloss:0.69242
[200]	train-many_logloss:0.56880	test-many_logloss:0.56994	valid-many_logloss:0.56973
[400]	train-many_logloss:0.47978	test-many_logloss:0.48179	valid-many_logloss:0.48157
[499]	train-many_logloss:0.44448	test-many_logloss:0.44692	valid-many_logloss:0.44671
{'alpha': 0, 'beta_eta': 0, 'btype': 'I', 'colsample_bylevel': 0.787125082065971, 'colsample_bytree': 0.5243474561426513, 'eta': 0.8099003407203538, 'eval_metric': ('many_logloss',), 'extra_dims': 0, 'gamma': 7.475715249737038e-05, 'lambda': 0.0003639646509775927, 'max_depth': 7, 'min_child_weight': 0.09759627970810976, 'objective': 'manybinary:logistic', 'subsample': 0.553442319017114, 'output_dim': 10, 'nthread': 2}
[0]	train-many_logloss:0.24943	test-many_logloss:0.28692	valid-many_logloss:0.28708
[22]	train-many_logloss:nan	test-many_logloss:nan	valid-many_logloss:nan
{'alpha': 0, 'beta_eta': 0, 'btype': 'I', 'colsample_bylevel': 0.70611505490757

[172]	train-many_logloss:0.01001	test-many_logloss:0.06525	valid-many_logloss:0.06307
NEW BEST VALUE!                                                       
{'alpha': 0, 'beta_eta': 0, 'btype': 'I', 'colsample_bylevel': 0.822829897275416, 'colsample_bytree': 0.8661025998484644, 'eta': 0.011434936316016709, 'eval_metric': ('many_logloss',), 'extra_dims': 0, 'gamma': 0, 'lambda': 2.091554134375915, 'max_depth': 8, 'min_child_weight': 1.6723514459697203e-05, 'objective': 'manybinary:logistic', 'subsample': 0.6325684178842439, 'output_dim': 10, 'nthread': 2}
[0]	train-many_logloss:0.68344	test-many_logloss:0.68383	valid-many_logloss:0.68382
[200]	train-many_logloss:0.09578	test-many_logloss:0.13004	valid-many_logloss:0.13112
[400]	train-many_logloss:0.02978	test-many_logloss:0.07501	valid-many_logloss:0.07698
[499]	train-many_logloss:0.02005	test-many_logloss:0.06808	valid-many_logloss:0.07031
{'alpha': 0, 'beta_eta': 0, 'btype': 'I', 'colsample_bylevel': 0.9439090074967231, 'colsample_byt

[0]	train-many_logloss:0.39794	test-many_logloss:0.39931	valid-many_logloss:0.40014
[26]	train-many_logloss:0.25457	test-many_logloss:0.25696	valid-many_logloss:0.25656
{'alpha': 3.7815423151500604e-05, 'beta_eta': 0, 'btype': 'I', 'colsample_bylevel': 0.5547269545129563, 'colsample_bytree': 0.9843635186756493, 'eta': 0.5949933323999959, 'eval_metric': ('many_logloss',), 'extra_dims': 0, 'gamma': 0.00013347887526644314, 'lambda': 0, 'max_depth': 9, 'min_child_weight': 7.283920920982167e-06, 'objective': 'manybinary:logistic', 'subsample': 0.9927469091208938, 'output_dim': 10, 'nthread': 2}
[0]	train-many_logloss:0.28203	test-many_logloss:0.33490	valid-many_logloss:0.32972
[27]	train-many_logloss:0.00000	test-many_logloss:0.14267	valid-many_logloss:0.13660
{'alpha': 0.003403374940376167, 'beta_eta': 0, 'btype': 'I', 'colsample_bylevel': 0.6367989155272455, 'colsample_bytree': 0.6974194337846622, 'eta': 0.3029677824305006, 'eval_metric': ('many_logloss',), 'extra_dims': 0, 'gamma': 1.581

100%|██████████| 25/25 [18:12<00:00, 43.70s/trial, best loss: 0.060273]
{'alpha': 0, 'beta_eta': 0, 'btype': 'I', 'colsample_bylevel': 0.7489980469009814, 'colsample_bytree': 0.9420146676089547, 'eta': 0.0070706936561268224, 'eval_metric': ('many_logloss',), 'extra_dims': 0, 'gamma': 0, 'lambda': 0, 'max_depth': 5, 'min_child_weight': 2.1041667775099838e-05, 'objective': 'manybinary:logistic', 'subsample': 0.6262451727083667, 'output_dim': 10, 'nthread': 2}
[0]	train-many_logloss:0.68702	test-many_logloss:0.68710	valid-many_logloss:0.68720
[200]	train-many_logloss:0.17873	test-many_logloss:0.19630	valid-many_logloss:0.20185
[400]	train-many_logloss:0.06800	test-many_logloss:0.09722	valid-many_logloss:0.10419
[499]	train-many_logloss:0.04594	test-many_logloss:0.07936	valid-many_logloss:0.08656
NEW BEST VALUE!                                       
{'alpha': 0, 'beta_eta': 0, 'btype': 'I', 'colsample_bylevel': 0.8357246683355697, 'colsample_bytree': 0.7491187395188089, 'eta': 0.605518780

[0]	train-many_logloss:0.66930	test-many_logloss:0.66938	valid-many_logloss:0.66954
[200]	train-many_logloss:0.05464	test-many_logloss:0.07607	valid-many_logloss:0.08364
[400]	train-many_logloss:0.02103	test-many_logloss:0.05610	valid-many_logloss:0.06224
[499]	train-many_logloss:0.01419	test-many_logloss:0.05310	valid-many_logloss:0.05933
NEW BEST VALUE!                                                        
{'alpha': 0, 'beta_eta': 0, 'btype': 'I', 'colsample_bylevel': 0.7211698432094364, 'colsample_bytree': 0.712394975511369, 'eta': 0.008477424619483462, 'eval_metric': ('many_logloss',), 'extra_dims': 0, 'gamma': 0, 'lambda': 0, 'max_depth': 3, 'min_child_weight': 32.756265617720345, 'objective': 'manybinary:logistic', 'subsample': 0.6995489417449295, 'output_dim': 10, 'nthread': 2}
[0]	train-many_logloss:0.68678	test-many_logloss:0.68679	valid-many_logloss:0.68683
[200]	train-many_logloss:0.21817	test-many_logloss:0.21732	valid-many_logloss:0.22412
[400]	train-many_logloss:0.13696

[0]	train-many_logloss:0.58617	test-many_logloss:0.58735	valid-many_logloss:0.58749
[200]	train-many_logloss:0.02315	test-many_logloss:0.05552	valid-many_logloss:0.06279
[400]	train-many_logloss:0.01945	test-many_logloss:0.05398	valid-many_logloss:0.06132
[485]	train-many_logloss:0.01891	test-many_logloss:0.05393	valid-many_logloss:0.06123
{'alpha': 0.12561116777862372, 'beta_eta': 0, 'btype': 'I', 'colsample_bylevel': 0.9256829275325673, 'colsample_bytree': 0.9319504355895726, 'eta': 0.1636754464108664, 'eval_metric': ('many_logloss',), 'extra_dims': 0, 'gamma': 0, 'lambda': 4.5397484807711255e-05, 'max_depth': 3, 'min_child_weight': 0.4396216781804687, 'objective': 'manybinary:logistic', 'subsample': 0.797982966956712, 'output_dim': 10, 'nthread': 2}
[0]	train-many_logloss:0.57227	test-many_logloss:0.57261	valid-many_logloss:0.57389
[149]	train-many_logloss:0.00496	test-many_logloss:0.04984	valid-many_logloss:0.05794
100%|██████████| 25/25 [18:46<00:00, 45.07s/trial, best loss: 0.057

[0]	train-many_logloss:0.69056	test-many_logloss:0.69056	valid-many_logloss:0.69058
[200]	train-many_logloss:0.40042	test-many_logloss:0.40163	valid-many_logloss:0.40180
[400]	train-many_logloss:0.29894	test-many_logloss:0.30064	valid-many_logloss:0.30140
[499]	train-many_logloss:0.27195	test-many_logloss:0.27386	valid-many_logloss:0.27489
{'alpha': 8.111376998577662e-05, 'beta_eta': 0, 'btype': 'I', 'colsample_bylevel': 0.8729133438901646, 'colsample_bytree': 0.5205123603280772, 'eta': 0.0309650866085196, 'eval_metric': ('many_logloss',), 'extra_dims': 0, 'gamma': 0, 'lambda': 7.910525070154376e-06, 'max_depth': 3, 'min_child_weight': 7.405155036744976e-05, 'objective': 'manybinary:logistic', 'subsample': 0.6231629412057431, 'output_dim': 10, 'nthread': 2}
[0]	train-many_logloss:0.66918	test-many_logloss:0.66944	valid-many_logloss:0.66960
[200]	train-many_logloss:0.05669	test-many_logloss:0.08254	valid-many_logloss:0.08829
[400]	train-many_logloss:0.02246	test-many_logloss:0.06149	val

[200]	train-many_logloss:0.29546	test-many_logloss:0.29569	valid-many_logloss:0.29772
[400]	train-many_logloss:0.22098	test-many_logloss:0.22158	valid-many_logloss:0.22521
[499]	train-many_logloss:0.20108	test-many_logloss:0.20190	valid-many_logloss:0.20614
{'alpha': 0, 'beta_eta': 0, 'btype': 'I', 'colsample_bylevel': 0.98982155285091, 'colsample_bytree': 0.8137063678634311, 'eta': 0.021437825449384364, 'eval_metric': ('many_logloss',), 'extra_dims': 0, 'gamma': 0, 'lambda': 0, 'max_depth': 5, 'min_child_weight': 2.5600171535202e-07, 'objective': 'manybinary:logistic', 'subsample': 0.7174114308249333, 'output_dim': 10, 'nthread': 2}
[0]	train-many_logloss:0.67470	test-many_logloss:0.67511	valid-many_logloss:0.67535
[200]	train-many_logloss:0.03293	test-many_logloss:0.07192	valid-many_logloss:0.07975
[400]	train-many_logloss:0.00667	test-many_logloss:0.05791	valid-many_logloss:0.06461
[491]	train-many_logloss:0.00346	test-many_logloss:0.05780	valid-many_logloss:0.06372
NEW BEST VALUE! 

[0]	train-many_logloss:0.65549	test-many_logloss:0.65565	valid-many_logloss:0.65603
[200]	train-many_logloss:0.03897	test-many_logloss:0.06595	valid-many_logloss:0.07265
[400]	train-many_logloss:0.02341	test-many_logloss:0.05822	valid-many_logloss:0.06364
[499]	train-many_logloss:0.02089	test-many_logloss:0.05706	valid-many_logloss:0.06244
{'alpha': 3.002743880356697, 'beta_eta': 0, 'btype': 'I', 'colsample_bylevel': 0.880750740505001, 'colsample_bytree': 0.6378931895802679, 'eta': 0.07394609656696631, 'eval_metric': ('many_logloss',), 'extra_dims': 0, 'gamma': 0, 'lambda': 2.007018003013685e-07, 'max_depth': 4, 'min_child_weight': 0.00020723760317545867, 'objective': 'manybinary:logistic', 'subsample': 0.7957161829611085, 'output_dim': 10, 'nthread': 2}
[0]	train-many_logloss:0.63594	test-many_logloss:0.63640	valid-many_logloss:0.63693
[200]	train-many_logloss:0.02323	test-many_logloss:0.05871	valid-many_logloss:0.06383
[400]	train-many_logloss:0.01659	test-many_logloss:0.05588	valid-

[0]	train-many_logloss:0.69117	test-many_logloss:0.69119	valid-many_logloss:0.69119
[200]	train-many_logloss:0.42712	test-many_logloss:0.42848	valid-many_logloss:0.43065
[400]	train-many_logloss:0.30030	test-many_logloss:0.30265	valid-many_logloss:0.30630
[499]	train-many_logloss:0.26087	test-many_logloss:0.26369	valid-many_logloss:0.26799
{'alpha': 1.055992173502114e-05, 'beta_eta': 0, 'btype': 'I', 'colsample_bylevel': 0.8017715093779021, 'colsample_bytree': 0.7625111741024486, 'eta': 0.007606220882567551, 'eval_metric': ('many_logloss',), 'extra_dims': 0, 'gamma': 0.001053204847233351, 'lambda': 0.23316250775856961, 'max_depth': 1, 'min_child_weight': 1.9088058756735313e-07, 'objective': 'manybinary:logistic', 'subsample': 0.5239057927863935, 'output_dim': 10, 'nthread': 2}
[0]	train-many_logloss:0.68793	test-many_logloss:0.68792	valid-many_logloss:0.68793
[200]	train-many_logloss:0.29542	test-many_logloss:0.29774	valid-many_logloss:0.29863
[400]	train-many_logloss:0.21832	test-many

[400]	train-many_logloss:0.47947	test-many_logloss:0.48088	valid-many_logloss:0.48106
[499]	train-many_logloss:0.44416	test-many_logloss:0.44593	valid-many_logloss:0.44610
{'alpha': 0, 'beta_eta': 0, 'btype': 'I', 'colsample_bylevel': 0.787125082065971, 'colsample_bytree': 0.5243474561426513, 'eta': 0.8099003407203538, 'eval_metric': ('many_logloss',), 'extra_dims': 0, 'gamma': 7.475715249737038e-05, 'lambda': 0.0003639646509775927, 'max_depth': 7, 'min_child_weight': 0.09759627970810976, 'objective': 'manybinary:logistic', 'subsample': 0.553442319017114, 'output_dim': 10, 'nthread': 2}
[0]	train-many_logloss:0.24218	test-many_logloss:0.28833	valid-many_logloss:0.28218
[23]	train-many_logloss:nan	test-many_logloss:nan	valid-many_logloss:nan
{'alpha': 0, 'beta_eta': 0, 'btype': 'I', 'colsample_bylevel': 0.7061150549075759, 'colsample_bytree': 0.5843009197915733, 'eta': 0.002630107568123181, 'eval_metric': ('many_logloss',), 'extra_dims': 0, 'gamma': 6.947115616562489, 'lambda': 0, 'max_

[0]	train-many_logloss:0.68513	test-many_logloss:0.68567	valid-many_logloss:0.68575
[200]	train-many_logloss:0.10924	test-many_logloss:0.15254	valid-many_logloss:0.15350
[400]	train-many_logloss:0.02822	test-many_logloss:0.07855	valid-many_logloss:0.08020
[499]	train-many_logloss:0.01779	test-many_logloss:0.06943	valid-many_logloss:0.07161
{'alpha': 5.769734865019702e-05, 'beta_eta': 0, 'btype': 'I', 'colsample_bylevel': 0.947651214492775, 'colsample_bytree': 0.8633457836387857, 'eta': 0.3977029463511548, 'eval_metric': ('many_logloss',), 'extra_dims': 0, 'gamma': 0.003128466805888169, 'lambda': 0, 'max_depth': 6, 'min_child_weight': 23.81766245704533, 'objective': 'manybinary:logistic', 'subsample': 0.8289219023831419, 'output_dim': 10, 'nthread': 2}
[0]	train-many_logloss:0.43574	test-many_logloss:0.43901	valid-many_logloss:0.43826
[151]	train-many_logloss:0.04856	test-many_logloss:0.08542	valid-many_logloss:0.08504
{'alpha': 0, 'beta_eta': 0, 'btype': 'I', 'colsample_bylevel': 0.802

[0]	train-many_logloss:0.68346	test-many_logloss:0.68386	valid-many_logloss:0.68383
[200]	train-many_logloss:0.09718	test-many_logloss:0.13128	valid-many_logloss:0.13182
[400]	train-many_logloss:0.03013	test-many_logloss:0.07749	valid-many_logloss:0.07626
[499]	train-many_logloss:0.02024	test-many_logloss:0.07103	valid-many_logloss:0.06947
{'alpha': 0, 'beta_eta': 0, 'btype': 'I', 'colsample_bylevel': 0.9439090074967231, 'colsample_bytree': 0.5525925639927433, 'eta': 0.0009915520889927426, 'eval_metric': ('many_logloss',), 'extra_dims': 0, 'gamma': 0, 'lambda': 0.004073992634408555, 'max_depth': 2, 'min_child_weight': 1.3633143522854873e-06, 'objective': 'manybinary:logistic', 'subsample': 0.9311207654760087, 'output_dim': 10, 'nthread': 2}
[0]	train-many_logloss:0.69241	test-many_logloss:0.69242	valid-many_logloss:0.69242
[200]	train-many_logloss:0.56999	test-many_logloss:0.57068	valid-many_logloss:0.57074
[400]	train-many_logloss:0.48171	test-many_logloss:0.48308	valid-many_logloss:0

[28]	train-many_logloss:0.00000	test-many_logloss:0.16149	valid-many_logloss:0.15099
{'alpha': 0.003403374940376167, 'beta_eta': 0, 'btype': 'I', 'colsample_bylevel': 0.6367989155272455, 'colsample_bytree': 0.6974194337846622, 'eta': 0.3029677824305006, 'eval_metric': ('many_logloss',), 'extra_dims': 0, 'gamma': 1.5812851156161036, 'lambda': 1.2783614674881826e-06, 'max_depth': 5, 'min_child_weight': 2.558034001077105, 'objective': 'manybinary:logistic', 'subsample': 0.7481581307597385, 'output_dim': 10, 'nthread': 2}
[0]	train-many_logloss:0.47404	test-many_logloss:0.48018	valid-many_logloss:0.48122
[135]	train-many_logloss:0.01030	test-many_logloss:0.06979	valid-many_logloss:0.07173
NEW BEST VALUE!                                                       
{'alpha': 0, 'beta_eta': 0, 'btype': 'I', 'colsample_bylevel': 0.822829897275416, 'colsample_bytree': 0.8661025998484644, 'eta': 0.011434936316016709, 'eval_metric': ('many_logloss',), 'extra_dims': 0, 'gamma': 0, 'lambda': 2.091554134

[0]	train-many_logloss:0.69211	test-many_logloss:0.69211	valid-many_logloss:0.69211
[200]	train-many_logloss:0.53403	test-many_logloss:0.53452	valid-many_logloss:0.53454
[400]	train-many_logloss:0.43814	test-many_logloss:0.43905	valid-many_logloss:0.43909
[499]	train-many_logloss:0.40437	test-many_logloss:0.40552	valid-many_logloss:0.40559
{'alpha': 0.0038647140584965353, 'beta_eta': 0, 'btype': 'I', 'colsample_bylevel': 0.7781226592895327, 'colsample_bytree': 0.6398164878102375, 'eta': 0.0041851001124752684, 'eval_metric': ('many_logloss',), 'extra_dims': 0, 'gamma': 5.825640644469816e-06, 'lambda': 0, 'max_depth': 8, 'min_child_weight': 0.00537473915613328, 'objective': 'manybinary:logistic', 'subsample': 0.5391816076002862, 'output_dim': 10, 'nthread': 2}
[0]	train-many_logloss:0.68933	test-many_logloss:0.68958	valid-many_logloss:0.68960
[200]	train-many_logloss:0.26857	test-many_logloss:0.29868	valid-many_logloss:0.29833
[400]	train-many_logloss:0.12143	test-many_logloss:0.16455	va

[0]	train-many_logloss:0.40041	test-many_logloss:0.40091	valid-many_logloss:0.40208
[31]	train-many_logloss:0.25540	test-many_logloss:0.25422	valid-many_logloss:0.26038
{'alpha': 3.7815423151500604e-05, 'beta_eta': 0, 'btype': 'I', 'colsample_bylevel': 0.5547269545129563, 'colsample_bytree': 0.9843635186756493, 'eta': 0.5949933323999959, 'eval_metric': ('many_logloss',), 'extra_dims': 0, 'gamma': 0.00013347887526644314, 'lambda': 0, 'max_depth': 9, 'min_child_weight': 7.283920920982167e-06, 'objective': 'manybinary:logistic', 'subsample': 0.9927469091208938, 'output_dim': 10, 'nthread': 2}
[0]	train-many_logloss:0.28171	test-many_logloss:0.32704	valid-many_logloss:0.32962
[25]	train-many_logloss:0.00000	test-many_logloss:0.13548	valid-many_logloss:0.15570
{'alpha': 0.003403374940376167, 'beta_eta': 0, 'btype': 'I', 'colsample_bylevel': 0.6367989155272455, 'colsample_bytree': 0.6974194337846622, 'eta': 0.3029677824305006, 'eval_metric': ('many_logloss',), 'extra_dims': 0, 'gamma': 1.581

[0]	train-many_logloss:0.58844	test-many_logloss:0.59068	valid-many_logloss:0.59110
[145]	train-many_logloss:0.00298	test-many_logloss:0.05522	valid-many_logloss:0.06657
NEW BEST VALUE!                                                        
100%|██████████| 25/25 [18:31<00:00, 44.45s/trial, best loss: 0.066373]
{'alpha': 0, 'beta_eta': 0, 'btype': 'I', 'colsample_bylevel': 0.7489980469009814, 'colsample_bytree': 0.9420146676089547, 'eta': 0.0070706936561268224, 'eval_metric': ('many_logloss',), 'extra_dims': 0, 'gamma': 0, 'lambda': 0, 'max_depth': 5, 'min_child_weight': 2.1041667775099838e-05, 'objective': 'manybinary:logistic', 'subsample': 0.6262451727083667, 'output_dim': 10, 'nthread': 2}
[0]	train-many_logloss:0.68705	test-many_logloss:0.68727	valid-many_logloss:0.68729
[200]	train-many_logloss:0.17960	test-many_logloss:0.20334	valid-many_logloss:0.20397
[400]	train-many_logloss:0.06897	test-many_logloss:0.10608	valid-many_logloss:0.10586
[499]	train-many_logloss:0.04704	test-ma

[0]	train-many_logloss:0.66934	test-many_logloss:0.66966	valid-many_logloss:0.66969
[200]	train-many_logloss:0.05696	test-many_logloss:0.08852	valid-many_logloss:0.08610
[400]	train-many_logloss:0.02264	test-many_logloss:0.06918	valid-many_logloss:0.06430
[499]	train-many_logloss:0.01524	test-many_logloss:0.06560	valid-many_logloss:0.06077
NEW BEST VALUE!                                                        
{'alpha': 0, 'beta_eta': 0, 'btype': 'I', 'colsample_bylevel': 0.7211698432094364, 'colsample_bytree': 0.712394975511369, 'eta': 0.008477424619483462, 'eval_metric': ('many_logloss',), 'extra_dims': 0, 'gamma': 0, 'lambda': 0, 'max_depth': 3, 'min_child_weight': 32.756265617720345, 'objective': 'manybinary:logistic', 'subsample': 0.6995489417449295, 'output_dim': 10, 'nthread': 2}
[0]	train-many_logloss:0.68684	test-many_logloss:0.68692	valid-many_logloss:0.68692
[200]	train-many_logloss:0.21654	test-many_logloss:0.22250	valid-many_logloss:0.22301
[400]	train-many_logloss:0.13704

[0]	train-many_logloss:0.58696	test-many_logloss:0.58841	valid-many_logloss:0.58860
[200]	train-many_logloss:0.02364	test-many_logloss:0.06915	valid-many_logloss:0.06438
[400]	train-many_logloss:0.01994	test-many_logloss:0.06792	valid-many_logloss:0.06303
[425]	train-many_logloss:0.01974	test-many_logloss:0.06789	valid-many_logloss:0.06299
{'alpha': 0.12561116777862372, 'beta_eta': 0, 'btype': 'I', 'colsample_bylevel': 0.9256829275325673, 'colsample_bytree': 0.9319504355895726, 'eta': 0.1636754464108664, 'eval_metric': ('many_logloss',), 'extra_dims': 0, 'gamma': 0, 'lambda': 4.5397484807711255e-05, 'max_depth': 3, 'min_child_weight': 0.4396216781804687, 'objective': 'manybinary:logistic', 'subsample': 0.797982966956712, 'output_dim': 10, 'nthread': 2}
[0]	train-many_logloss:0.57304	test-many_logloss:0.57486	valid-many_logloss:0.57524
[191]	train-many_logloss:0.00286	test-many_logloss:0.06591	valid-many_logloss:0.05885
100%|██████████| 25/25 [18:47<00:00, 45.09s/trial, best loss: 0.056

[0]	train-many_logloss:0.69055	test-many_logloss:0.69056	valid-many_logloss:0.69056
[200]	train-many_logloss:0.40071	test-many_logloss:0.40206	valid-many_logloss:0.40172
[400]	train-many_logloss:0.30011	test-many_logloss:0.30231	valid-many_logloss:0.30213
[499]	train-many_logloss:0.27341	test-many_logloss:0.27603	valid-many_logloss:0.27599
{'alpha': 8.111376998577662e-05, 'beta_eta': 0, 'btype': 'I', 'colsample_bylevel': 0.8729133438901646, 'colsample_bytree': 0.5205123603280772, 'eta': 0.0309650866085196, 'eval_metric': ('many_logloss',), 'extra_dims': 0, 'gamma': 0, 'lambda': 7.910525070154376e-06, 'max_depth': 3, 'min_child_weight': 7.405155036744976e-05, 'objective': 'manybinary:logistic', 'subsample': 0.6231629412057431, 'output_dim': 10, 'nthread': 2}
[0]	train-many_logloss:0.66903	test-many_logloss:0.66940	valid-many_logloss:0.66933
[200]	train-many_logloss:0.05647	test-many_logloss:0.08979	valid-many_logloss:0.09066
[400]	train-many_logloss:0.02222	test-many_logloss:0.07078	val

[400]	train-many_logloss:0.21932	test-many_logloss:0.22529	valid-many_logloss:0.22542
[499]	train-many_logloss:0.19874	test-many_logloss:0.20571	valid-many_logloss:0.20607
{'alpha': 0, 'beta_eta': 0, 'btype': 'I', 'colsample_bylevel': 0.98982155285091, 'colsample_bytree': 0.8137063678634311, 'eta': 0.021437825449384364, 'eval_metric': ('many_logloss',), 'extra_dims': 0, 'gamma': 0, 'lambda': 0, 'max_depth': 5, 'min_child_weight': 2.5600171535202e-07, 'objective': 'manybinary:logistic', 'subsample': 0.7174114308249333, 'output_dim': 10, 'nthread': 2}
[0]	train-many_logloss:0.67476	test-many_logloss:0.67552	valid-many_logloss:0.67543
[200]	train-many_logloss:0.03073	test-many_logloss:0.07865	valid-many_logloss:0.07870
[400]	train-many_logloss:0.00560	test-many_logloss:0.06399	valid-many_logloss:0.06605
[473]	train-many_logloss:0.00316	test-many_logloss:0.06325	valid-many_logloss:0.06610
{'alpha': 0, 'beta_eta': 0, 'btype': 'I', 'colsample_bylevel': 0.6189065258301154, 'colsample_bytree':

[0]	train-many_logloss:0.59182	test-many_logloss:0.59446	valid-many_logloss:0.59435
[200]	train-many_logloss:0.01440	test-many_logloss:0.06130	valid-many_logloss:0.06234
[245]	train-many_logloss:0.01354	test-many_logloss:0.06109	valid-many_logloss:0.06222
NEW BEST VALUE!                                                        
{'alpha': 2.3522100034707503, 'beta_eta': 0, 'btype': 'I', 'colsample_bylevel': 0.9024251204562953, 'colsample_bytree': 0.6405095119643568, 'eta': 0.09698958656804434, 'eval_metric': ('many_logloss',), 'extra_dims': 0, 'gamma': 0, 'lambda': 2.007018003013685e-07, 'max_depth': 4, 'min_child_weight': 0.9707761203227625, 'objective': 'manybinary:logistic', 'subsample': 0.8060144940453603, 'output_dim': 10, 'nthread': 2}
[0]	train-many_logloss:0.61788	test-many_logloss:0.61949	valid-many_logloss:0.61971
[200]	train-many_logloss:0.01528	test-many_logloss:0.06149	valid-many_logloss:0.06283
[358]	train-many_logloss:0.01216	test-many_logloss:0.06050	valid-many_logloss:0.0

[0]	train-many_logloss:0.69119	test-many_logloss:0.69121	valid-many_logloss:0.69121
[200]	train-many_logloss:0.42862	test-many_logloss:0.43118	valid-many_logloss:0.43079
[400]	train-many_logloss:0.30265	test-many_logloss:0.30707	valid-many_logloss:0.30662
[499]	train-many_logloss:0.26370	test-many_logloss:0.26888	valid-many_logloss:0.26843
{'alpha': 1.055992173502114e-05, 'beta_eta': 0, 'btype': 'I', 'colsample_bylevel': 0.8017715093779021, 'colsample_bytree': 0.7625111741024486, 'eta': 0.007606220882567551, 'eval_metric': ('many_logloss',), 'extra_dims': 0, 'gamma': 0.001053204847233351, 'lambda': 0.23316250775856961, 'max_depth': 1, 'min_child_weight': 1.9088058756735313e-07, 'objective': 'manybinary:logistic', 'subsample': 0.5239057927863935, 'output_dim': 10, 'nthread': 2}
[0]	train-many_logloss:0.68789	test-many_logloss:0.68793	valid-many_logloss:0.68791
[200]	train-many_logloss:0.29275	test-many_logloss:0.29674	valid-many_logloss:0.29644
[400]	train-many_logloss:0.21645	test-many

[0]	train-many_logloss:0.69242	test-many_logloss:0.69242	valid-many_logloss:0.69242
[200]	train-many_logloss:0.56972	test-many_logloss:0.57039	valid-many_logloss:0.57060
[400]	train-many_logloss:0.48116	test-many_logloss:0.48250	valid-many_logloss:0.48291
[499]	train-many_logloss:0.44613	test-many_logloss:0.44773	valid-many_logloss:0.44827
{'alpha': 0, 'beta_eta': 0, 'btype': 'I', 'colsample_bylevel': 0.787125082065971, 'colsample_bytree': 0.5243474561426513, 'eta': 0.8099003407203538, 'eval_metric': ('many_logloss',), 'extra_dims': 0, 'gamma': 7.475715249737038e-05, 'lambda': 0.0003639646509775927, 'max_depth': 7, 'min_child_weight': 0.09759627970810976, 'objective': 'manybinary:logistic', 'subsample': 0.553442319017114, 'output_dim': 10, 'nthread': 2}
[0]	train-many_logloss:0.24828	test-many_logloss:0.28792	valid-many_logloss:0.28690
[23]	train-many_logloss:nan	test-many_logloss:nan	valid-many_logloss:nan
{'alpha': 0, 'beta_eta': 0, 'btype': 'I', 'colsample_bylevel': 0.70611505490757

NEW BEST VALUE!                                                       
{'alpha': 0, 'beta_eta': 0, 'btype': 'I', 'colsample_bylevel': 0.822829897275416, 'colsample_bytree': 0.8661025998484644, 'eta': 0.011434936316016709, 'eval_metric': ('many_logloss',), 'extra_dims': 0, 'gamma': 0, 'lambda': 2.091554134375915, 'max_depth': 8, 'min_child_weight': 1.6723514459697203e-05, 'objective': 'manybinary:logistic', 'subsample': 0.6325684178842439, 'output_dim': 10, 'nthread': 2}
[0]	train-many_logloss:0.68347	test-many_logloss:0.68390	valid-many_logloss:0.68381
[200]	train-many_logloss:0.09293	test-many_logloss:0.12996	valid-many_logloss:0.12823
[400]	train-many_logloss:0.02812	test-many_logloss:0.07674	valid-many_logloss:0.07557
[499]	train-many_logloss:0.01872	test-many_logloss:0.07031	valid-many_logloss:0.06932
{'alpha': 0, 'beta_eta': 0, 'btype': 'I', 'colsample_bylevel': 0.9439090074967231, 'colsample_bytree': 0.5525925639927433, 'eta': 0.0009915520889927426, 'eval_metric': ('many_logloss'

[0]	train-many_logloss:0.68931	test-many_logloss:0.68953	valid-many_logloss:0.68952
[200]	train-many_logloss:0.26665	test-many_logloss:0.29659	valid-many_logloss:0.29475
[400]	train-many_logloss:0.11963	test-many_logloss:0.16197	valid-many_logloss:0.15959
[499]	train-many_logloss:0.08247	test-many_logloss:0.12867	valid-many_logloss:0.12625
{'alpha': 0, 'beta_eta': 0, 'btype': 'I', 'colsample_bylevel': 0.980487461508577, 'colsample_bytree': 0.872601457169579, 'eta': 0.46040749038190176, 'eval_metric': ('many_logloss',), 'extra_dims': 0, 'gamma': 0, 'lambda': 0, 'max_depth': 5, 'min_child_weight': 0.0003681699610575285, 'objective': 'manybinary:logistic', 'subsample': 0.7369430758094139, 'output_dim': 10, 'nthread': 2}
[0]	train-many_logloss:0.38123	test-many_logloss:0.40013	valid-many_logloss:0.39623
[38]	train-many_logloss:0.00014	test-many_logloss:0.09880	valid-many_logloss:0.09025
{'alpha': 4.07913722402031e-06, 'beta_eta': 0, 'btype': 'I', 'colsample_bylevel': 0.7818800515438491, 'c

[0]	train-many_logloss:0.27936	test-many_logloss:0.32580	valid-many_logloss:0.32604
[27]	train-many_logloss:0.00000	test-many_logloss:0.14708	valid-many_logloss:0.14116
{'alpha': 0.003403374940376167, 'beta_eta': 0, 'btype': 'I', 'colsample_bylevel': 0.6367989155272455, 'colsample_bytree': 0.6974194337846622, 'eta': 0.3029677824305006, 'eval_metric': ('many_logloss',), 'extra_dims': 0, 'gamma': 1.5812851156161036, 'lambda': 1.2783614674881826e-06, 'max_depth': 5, 'min_child_weight': 2.558034001077105, 'objective': 'manybinary:logistic', 'subsample': 0.7481581307597385, 'output_dim': 10, 'nthread': 2}
[0]	train-many_logloss:0.47423	test-many_logloss:0.48072	valid-many_logloss:0.47987
[78]	train-many_logloss:0.01110	test-many_logloss:0.06450	valid-many_logloss:0.06441
NEW BEST VALUE!                                                       
{'alpha': 0, 'beta_eta': 0, 'btype': 'I', 'colsample_bylevel': 0.822829897275416, 'colsample_bytree': 0.8661025998484644, 'eta': 0.011434936316016709, '

[200]	train-many_logloss:0.18178	test-many_logloss:0.20819	valid-many_logloss:0.20773
[400]	train-many_logloss:0.07204	test-many_logloss:0.11259	valid-many_logloss:0.11182
[499]	train-many_logloss:0.05007	test-many_logloss:0.09569	valid-many_logloss:0.09448
NEW BEST VALUE!                                       
{'alpha': 0, 'beta_eta': 0, 'btype': 'I', 'colsample_bylevel': 0.8357246683355697, 'colsample_bytree': 0.7491187395188089, 'eta': 0.605518780660455, 'eval_metric': ('many_logloss',), 'extra_dims': 0, 'gamma': 0, 'lambda': 0, 'max_depth': 5, 'min_child_weight': 118.844312919608, 'objective': 'manybinary:logistic', 'subsample': 0.6525233714163394, 'output_dim': 10, 'nthread': 2}
[0]	train-many_logloss:0.40071	test-many_logloss:0.40214	valid-many_logloss:0.40269
[65]	train-many_logloss:0.25880	test-many_logloss:0.26452	valid-many_logloss:0.26307
{'alpha': 3.7815423151500604e-05, 'beta_eta': 0, 'btype': 'I', 'colsample_bylevel': 0.5547269545129563, 'colsample_bytree': 0.984363518675

{'alpha': 0.12561116777862372, 'beta_eta': 0, 'btype': 'I', 'colsample_bylevel': 0.9256829275325673, 'colsample_bytree': 0.9319504355895726, 'eta': 0.1636754464108664, 'eval_metric': ('many_logloss',), 'extra_dims': 0, 'gamma': 0, 'lambda': 4.5397484807711255e-05, 'max_depth': 3, 'min_child_weight': 0.4396216781804687, 'objective': 'manybinary:logistic', 'subsample': 0.797982966956712, 'output_dim': 10, 'nthread': 2}
[0]	train-many_logloss:0.57480	test-many_logloss:0.57686	valid-many_logloss:0.57609
[200]	train-many_logloss:0.00375	test-many_logloss:0.06996	valid-many_logloss:0.06659
[205]	train-many_logloss:0.00353	test-many_logloss:0.07015	valid-many_logloss:0.06680
100%|██████████| 25/25 [18:45<00:00, 45.02s/trial, best loss: 0.065055]
{'alpha': 0, 'beta_eta': 0, 'btype': 'I', 'colsample_bylevel': 0.7489980469009814, 'colsample_bytree': 0.9420146676089547, 'eta': 0.0070706936561268224, 'eval_metric': ('many_logloss',), 'extra_dims': 0, 'gamma': 0, 'lambda': 0, 'max_depth': 5, 'min_c

NEW BEST VALUE!                                                        
{'alpha': 1.200782835459218, 'beta_eta': 0, 'btype': 'I', 'colsample_bylevel': 0.8687918622527812, 'colsample_bytree': 0.6034148520201185, 'eta': 0.19484734134839077, 'eval_metric': ('many_logloss',), 'extra_dims': 0, 'gamma': 0, 'lambda': 1.635393836955409e-05, 'max_depth': 4, 'min_child_weight': 0.0005216471023949306, 'objective': 'manybinary:logistic', 'subsample': 0.7985877624194806, 'output_dim': 10, 'nthread': 2}
[0]	train-many_logloss:0.54857	test-many_logloss:0.55184	valid-many_logloss:0.55133
[138]	train-many_logloss:0.00644	test-many_logloss:0.06049	valid-many_logloss:0.05811
NEW BEST VALUE!                                                        
{'alpha': 6.612121512010405, 'beta_eta': 0, 'btype': 'I', 'colsample_bylevel': 0.8905064534577106, 'colsample_bytree': 0.6187259854298783, 'eta': 0.14667519751803157, 'eval_metric': ('many_logloss',), 'extra_dims': 0, 'gamma': 0, 'lambda': 3.994005971611469e-05, 

[0]	train-many_logloss:0.65490	test-many_logloss:0.65556	valid-many_logloss:0.65566
[200]	train-many_logloss:0.03508	test-many_logloss:0.06850	valid-many_logloss:0.06835
[400]	train-many_logloss:0.02133	test-many_logloss:0.06077	valid-many_logloss:0.06082
[499]	train-many_logloss:0.01925	test-many_logloss:0.05972	valid-many_logloss:0.05977
{'alpha': 0.7524380514093831, 'beta_eta': 0, 'btype': 'I', 'colsample_bylevel': 0.8739806827265371, 'colsample_bytree': 0.8185559155653019, 'eta': 0.14690197928967186, 'eval_metric': ('many_logloss',), 'extra_dims': 0, 'gamma': 0, 'lambda': 2.4773030076015884e-05, 'max_depth': 3, 'min_child_weight': 0.0001947667427690561, 'objective': 'manybinary:logistic', 'subsample': 0.8066047339236166, 'output_dim': 10, 'nthread': 2}
[0]	train-many_logloss:0.58409	test-many_logloss:0.58631	valid-many_logloss:0.58582
[200]	train-many_logloss:0.00609	test-many_logloss:0.05568	valid-many_logloss:0.05471
[246]	train-many_logloss:0.00487	test-many_logloss:0.05568	vali

[400]	train-many_logloss:0.30665	test-many_logloss:0.30778	valid-many_logloss:0.31100
[499]	train-many_logloss:0.26780	test-many_logloss:0.26900	valid-many_logloss:0.27299
{'alpha': 1.055992173502114e-05, 'beta_eta': 0, 'btype': 'I', 'colsample_bylevel': 0.8017715093779021, 'colsample_bytree': 0.7625111741024486, 'eta': 0.007606220882567551, 'eval_metric': ('many_logloss',), 'extra_dims': 0, 'gamma': 0.001053204847233351, 'lambda': 0.23316250775856961, 'max_depth': 1, 'min_child_weight': 1.9088058756735313e-07, 'objective': 'manybinary:logistic', 'subsample': 0.5239057927863935, 'output_dim': 10, 'nthread': 2}
[0]	train-many_logloss:0.68790	test-many_logloss:0.68794	valid-many_logloss:0.68790
[200]	train-many_logloss:0.29609	test-many_logloss:0.29704	valid-many_logloss:0.29894
[400]	train-many_logloss:0.21950	test-many_logloss:0.21946	valid-many_logloss:0.22442
[499]	train-many_logloss:0.19859	test-many_logloss:0.19857	valid-many_logloss:0.20439
{'alpha': 0, 'beta_eta': 0, 'btype': 'I'

[0]	train-many_logloss:0.68608	test-many_logloss:0.68614	valid-many_logloss:0.68614
[200]	train-many_logloss:0.21318	test-many_logloss:0.21619	valid-many_logloss:0.22137
[400]	train-many_logloss:0.13456	test-many_logloss:0.13971	valid-many_logloss:0.14771
[499]	train-many_logloss:0.11582	test-many_logloss:0.12243	valid-many_logloss:0.13109
{'alpha': 2.4983490924580396, 'beta_eta': 0, 'btype': 'I', 'colsample_bylevel': 0.8788092996859548, 'colsample_bytree': 0.6517822231671051, 'eta': 0.13400382263629143, 'eval_metric': ('many_logloss',), 'extra_dims': 0, 'gamma': 0.07670281823746909, 'lambda': 2.8818405246231986e-07, 'max_depth': 4, 'min_child_weight': 1.4538773712870752, 'objective': 'manybinary:logistic', 'subsample': 0.8087991627856118, 'output_dim': 10, 'nthread': 2}
[0]	train-many_logloss:0.59168	test-many_logloss:0.59299	valid-many_logloss:0.59369
[200]	train-many_logloss:0.01486	test-many_logloss:0.05573	valid-many_logloss:0.06468
[290]	train-many_logloss:0.01333	test-many_loglo

[0]	train-many_logloss:0.68508	test-many_logloss:0.68563	valid-many_logloss:0.68569
[200]	train-many_logloss:0.10921	test-many_logloss:0.15093	valid-many_logloss:0.15378
[400]	train-many_logloss:0.02835	test-many_logloss:0.07762	valid-many_logloss:0.08022
[499]	train-many_logloss:0.01776	test-many_logloss:0.06891	valid-many_logloss:0.07134
{'alpha': 5.769734865019702e-05, 'beta_eta': 0, 'btype': 'I', 'colsample_bylevel': 0.947651214492775, 'colsample_bytree': 0.8633457836387857, 'eta': 0.3977029463511548, 'eval_metric': ('many_logloss',), 'extra_dims': 0, 'gamma': 0.003128466805888169, 'lambda': 0, 'max_depth': 6, 'min_child_weight': 23.81766245704533, 'objective': 'manybinary:logistic', 'subsample': 0.8289219023831419, 'output_dim': 10, 'nthread': 2}
[0]	train-many_logloss:0.43607	test-many_logloss:0.43719	valid-many_logloss:0.44021
[112]	train-many_logloss:0.04979	test-many_logloss:0.08025	valid-many_logloss:0.08568
{'alpha': 0, 'beta_eta': 0, 'btype': 'I', 'colsample_bylevel': 0.802

[0]	train-many_logloss:0.68344	test-many_logloss:0.68370	valid-many_logloss:0.68380
[200]	train-many_logloss:0.09509	test-many_logloss:0.12635	valid-many_logloss:0.13200
[400]	train-many_logloss:0.02966	test-many_logloss:0.07375	valid-many_logloss:0.07921
[499]	train-many_logloss:0.01998	test-many_logloss:0.06741	valid-many_logloss:0.07281
{'alpha': 0, 'beta_eta': 0, 'btype': 'I', 'colsample_bylevel': 0.9439090074967231, 'colsample_bytree': 0.5525925639927433, 'eta': 0.0009915520889927426, 'eval_metric': ('many_logloss',), 'extra_dims': 0, 'gamma': 0, 'lambda': 0.004073992634408555, 'max_depth': 2, 'min_child_weight': 1.3633143522854873e-06, 'objective': 'manybinary:logistic', 'subsample': 0.9311207654760087, 'output_dim': 10, 'nthread': 2}
[0]	train-many_logloss:0.69242	test-many_logloss:0.69242	valid-many_logloss:0.69242
[200]	train-many_logloss:0.56883	test-many_logloss:0.56937	valid-many_logloss:0.56972
[400]	train-many_logloss:0.47976	test-many_logloss:0.48059	valid-many_logloss:0

[0]	train-many_logloss:0.28132	test-many_logloss:0.32896	valid-many_logloss:0.32921
[26]	train-many_logloss:0.00000	test-many_logloss:0.14144	valid-many_logloss:0.16019
{'alpha': 0.003403374940376167, 'beta_eta': 0, 'btype': 'I', 'colsample_bylevel': 0.6367989155272455, 'colsample_bytree': 0.6974194337846622, 'eta': 0.3029677824305006, 'eval_metric': ('many_logloss',), 'extra_dims': 0, 'gamma': 1.5812851156161036, 'lambda': 1.2783614674881826e-06, 'max_depth': 5, 'min_child_weight': 2.558034001077105, 'objective': 'manybinary:logistic', 'subsample': 0.7481581307597385, 'output_dim': 10, 'nthread': 2}
[0]	train-many_logloss:0.47573	test-many_logloss:0.48106	valid-many_logloss:0.48141
[64]	train-many_logloss:0.01192	test-many_logloss:0.06231	valid-many_logloss:0.07246
NEW BEST VALUE!                                                      
{'alpha': 0, 'beta_eta': 0, 'btype': 'I', 'colsample_bylevel': 0.822829897275416, 'colsample_bytree': 0.8661025998484644, 'eta': 0.011434936316016709, 'e

[0]	train-many_logloss:0.69211	test-many_logloss:0.69211	valid-many_logloss:0.69211
[200]	train-many_logloss:0.53409	test-many_logloss:0.53402	valid-many_logloss:0.53438
[400]	train-many_logloss:0.43831	test-many_logloss:0.43844	valid-many_logloss:0.43895
[499]	train-many_logloss:0.40463	test-many_logloss:0.40487	valid-many_logloss:0.40544
{'alpha': 0.0038647140584965353, 'beta_eta': 0, 'btype': 'I', 'colsample_bylevel': 0.7781226592895327, 'colsample_bytree': 0.6398164878102375, 'eta': 0.0041851001124752684, 'eval_metric': ('many_logloss',), 'extra_dims': 0, 'gamma': 5.825640644469816e-06, 'lambda': 0, 'max_depth': 8, 'min_child_weight': 0.00537473915613328, 'objective': 'manybinary:logistic', 'subsample': 0.5391816076002862, 'output_dim': 10, 'nthread': 2}
[0]	train-many_logloss:0.68935	test-many_logloss:0.68954	valid-many_logloss:0.68957
[200]	train-many_logloss:0.26930	test-many_logloss:0.29722	valid-many_logloss:0.29920
[400]	train-many_logloss:0.12175	test-many_logloss:0.16190	va

[0]	train-many_logloss:0.68680	test-many_logloss:0.68688	valid-many_logloss:0.68687
[200]	train-many_logloss:0.21311	test-many_logloss:0.22076	valid-many_logloss:0.22050
[400]	train-many_logloss:0.13316	test-many_logloss:0.14452	valid-many_logloss:0.14438
[499]	train-many_logloss:0.12012	test-many_logloss:0.13318	valid-many_logloss:0.13267
{'alpha': 0, 'beta_eta': 0, 'btype': 'I', 'colsample_bylevel': 0.7987531851832483, 'colsample_bytree': 0.7299740823920091, 'eta': 0.06030778497723384, 'eval_metric': ('many_logloss',), 'extra_dims': 0, 'gamma': 0, 'lambda': 0, 'max_depth': 8, 'min_child_weight': 0.00023882505207060682, 'objective': 'manybinary:logistic', 'subsample': 0.8750385697829414, 'output_dim': 10, 'nthread': 2}
[0]	train-many_logloss:0.63789	test-many_logloss:0.64248	valid-many_logloss:0.64210
[101]	train-many_logloss:0.00208	test-many_logloss:0.08150	valid-many_logloss:0.07515
{'alpha': 1.3107376407541217e-06, 'beta_eta': 0, 'btype': 'I', 'colsample_bylevel': 0.73327329479180

[0]	train-many_logloss:0.56788	test-many_logloss:0.57178	valid-many_logloss:0.57068
[125]	train-many_logloss:0.00184	test-many_logloss:0.06355	valid-many_logloss:0.05831
100%|██████████| 25/25 [18:20<00:00, 44.03s/trial, best loss: 0.056673]
{'alpha': 0, 'beta_eta': 0, 'btype': 'I', 'colsample_bylevel': 0.7489980469009814, 'colsample_bytree': 0.9420146676089547, 'eta': 0.0070706936561268224, 'eval_metric': ('many_logloss',), 'extra_dims': 0, 'gamma': 0, 'lambda': 0, 'max_depth': 5, 'min_child_weight': 2.1041667775099838e-05, 'objective': 'manybinary:logistic', 'subsample': 0.6262451727083667, 'output_dim': 10, 'nthread': 2}
[0]	train-many_logloss:0.68706	test-many_logloss:0.68727	valid-many_logloss:0.68728
[200]	train-many_logloss:0.18007	test-many_logloss:0.20476	valid-many_logloss:0.20597
[400]	train-many_logloss:0.06981	test-many_logloss:0.10559	valid-many_logloss:0.10826
[499]	train-many_logloss:0.04766	test-many_logloss:0.08745	valid-many_logloss:0.09033
NEW BEST VALUE!           

[0]	train-many_logloss:0.66927	test-many_logloss:0.66962	valid-many_logloss:0.66964
[200]	train-many_logloss:0.05695	test-many_logloss:0.08467	valid-many_logloss:0.08715
[400]	train-many_logloss:0.02295	test-many_logloss:0.06536	valid-many_logloss:0.06571
[499]	train-many_logloss:0.01566	test-many_logloss:0.06299	valid-many_logloss:0.06241
NEW BEST VALUE!                                                        
{'alpha': 0, 'beta_eta': 0, 'btype': 'I', 'colsample_bylevel': 0.7211698432094364, 'colsample_bytree': 0.712394975511369, 'eta': 0.008477424619483462, 'eval_metric': ('many_logloss',), 'extra_dims': 0, 'gamma': 0, 'lambda': 0, 'max_depth': 3, 'min_child_weight': 32.756265617720345, 'objective': 'manybinary:logistic', 'subsample': 0.6995489417449295, 'output_dim': 10, 'nthread': 2}
[0]	train-many_logloss:0.68682	test-many_logloss:0.68687	valid-many_logloss:0.68689
[200]	train-many_logloss:0.21840	test-many_logloss:0.22471	valid-many_logloss:0.22798
[400]	train-many_logloss:0.13743

[400]	train-many_logloss:0.00561	test-many_logloss:0.06751	valid-many_logloss:0.06568
[470]	train-many_logloss:0.00333	test-many_logloss:0.06800	valid-many_logloss:0.06556
NEW BEST VALUE!                                                       
{'alpha': 0, 'beta_eta': 0, 'btype': 'I', 'colsample_bylevel': 0.6189065258301154, 'colsample_bytree': 0.7882001782331782, 'eta': 0.003770077706590942, 'eval_metric': ('many_logloss',), 'extra_dims': 0, 'gamma': 0.0006381725365621713, 'lambda': 0, 'max_depth': 1, 'min_child_weight': 0.05537987461653371, 'objective': 'manybinary:logistic', 'subsample': 0.6537419349296045, 'output_dim': 10, 'nthread': 2}
[0]	train-many_logloss:0.69057	test-many_logloss:0.69057	valid-many_logloss:0.69057
[200]	train-many_logloss:0.40183	test-many_logloss:0.40355	valid-many_logloss:0.40340
[400]	train-many_logloss:0.30077	test-many_logloss:0.30320	valid-many_logloss:0.30350
[499]	train-many_logloss:0.27398	test-many_logloss:0.27668	valid-many_logloss:0.27716
{'alpha':

[0]	train-many_logloss:0.63591	test-many_logloss:0.63732	valid-many_logloss:0.63714
[200]	train-many_logloss:0.02169	test-many_logloss:0.06736	valid-many_logloss:0.06583
[400]	train-many_logloss:0.01604	test-many_logloss:0.06520	valid-many_logloss:0.06335
[499]	train-many_logloss:0.01527	test-many_logloss:0.06496	valid-many_logloss:0.06312
NEW BEST VALUE!                                                        
{'alpha': 1.200782835459218, 'beta_eta': 0, 'btype': 'I', 'colsample_bylevel': 0.8687918622527812, 'colsample_bytree': 0.6034148520201185, 'eta': 0.19484734134839077, 'eval_metric': ('many_logloss',), 'extra_dims': 0, 'gamma': 0, 'lambda': 1.635393836955409e-05, 'max_depth': 4, 'min_child_weight': 0.0005216471023949306, 'objective': 'manybinary:logistic', 'subsample': 0.7985877624194806, 'output_dim': 10, 'nthread': 2}
[0]	train-many_logloss:0.54837	test-many_logloss:0.55120	valid-many_logloss:0.55157
[139]	train-many_logloss:0.00663	test-many_logloss:0.06298	valid-many_logloss:0

[0]	train-many_logloss:0.68792	test-many_logloss:0.68793	valid-many_logloss:0.68795
[200]	train-many_logloss:0.29484	test-many_logloss:0.29578	valid-many_logloss:0.29790
[400]	train-many_logloss:0.21848	test-many_logloss:0.22030	valid-many_logloss:0.22325
[499]	train-many_logloss:0.19801	test-many_logloss:0.20041	valid-many_logloss:0.20344
{'alpha': 0, 'beta_eta': 0, 'btype': 'I', 'colsample_bylevel': 0.98982155285091, 'colsample_bytree': 0.8137063678634311, 'eta': 0.021437825449384364, 'eval_metric': ('many_logloss',), 'extra_dims': 0, 'gamma': 0, 'lambda': 0, 'max_depth': 5, 'min_child_weight': 2.5600171535202e-07, 'objective': 'manybinary:logistic', 'subsample': 0.7174114308249333, 'output_dim': 10, 'nthread': 2}
[0]	train-many_logloss:0.67468	test-many_logloss:0.67528	valid-many_logloss:0.67542
[200]	train-many_logloss:0.03248	test-many_logloss:0.07832	valid-many_logloss:0.07956
[400]	train-many_logloss:0.00660	test-many_logloss:0.06477	valid-many_logloss:0.06455
[498]	train-many_l

{'alpha': 3.1952805148613077, 'beta_eta': 0, 'btype': 'I', 'colsample_bylevel': 0.8947364949908566, 'colsample_bytree': 0.6489201195603208, 'eta': 0.04857283281713467, 'eval_metric': ('many_logloss',), 'extra_dims': 0, 'gamma': 0, 'lambda': 5.599926914440914e-07, 'max_depth': 4, 'min_child_weight': 1.7794291782520545e-07, 'objective': 'manybinary:logistic', 'subsample': 0.5893278373512121, 'output_dim': 10, 'nthread': 2}
[0]	train-many_logloss:0.65547	test-many_logloss:0.65585	valid-many_logloss:0.65615
[200]	train-many_logloss:0.03912	test-many_logloss:0.07298	valid-many_logloss:0.07270
[400]	train-many_logloss:0.02356	test-many_logloss:0.06506	valid-many_logloss:0.06420
[499]	train-many_logloss:0.02098	test-many_logloss:0.06416	valid-many_logloss:0.06324
NEW BEST VALUE!                                                        
{'alpha': 3.002743880356697, 'beta_eta': 0, 'btype': 'I', 'colsample_bylevel': 0.880750740505001, 'colsample_bytree': 0.6378931895802679, 'eta': 0.07394609656696

In [68]:
import pandas as pd
pd.DataFrame(results)

Unnamed: 0,iter,valid_logloss,test_logloss,num_rounds,time_diff,winning_param
0,0,0.060741,0.061365,500,1184.938146,"{'alpha': 0, 'beta_eta': 0, 'btype': 'I', 'col..."
1,1,0.062533,0.054095,110,1097.437254,"{'alpha': 0.1617477004218055, 'beta_eta': 0, '..."
2,2,0.057733,0.056778,233,1135.469312,"{'alpha': 0.23062196782499855, 'beta_eta': 0, ..."
3,3,0.060273,0.058737,106,1092.539088,"{'alpha': 1.200782835459218, 'beta_eta': 0, 'b..."
4,4,0.057297,0.050025,216,1126.722315,"{'alpha': 0.7524380514093831, 'beta_eta': 0, '..."
5,5,0.061852,0.05635,119,1109.750677,"{'alpha': 0.19028592334432648, 'beta_eta': 0, ..."
6,6,0.057963,0.053345,157,1137.014389,"{'alpha': 0.19028592334432648, 'beta_eta': 0, ..."
7,7,0.061134,0.060308,125,1100.057296,"{'alpha': 1.200782835459218, 'beta_eta': 0, 'b..."
8,8,0.056773,0.05362,141,1208.929127,"{'alpha': 0.19028592334432648, 'beta_eta': 0, ..."
9,9,0.059144,0.05998,208,1104.502578,"{'alpha': 0.7524380514093831, 'beta_eta': 0, '..."
