In [1]:
import sys
import time
from wideboost.wrappers import wxgb
import numpy as np
import xgboost as xgb

import tensorflow_datasets as tfds
from matplotlib import pyplot as plt

(ds_train, ds_test), ds_info = tfds.load(
    'mnist',
    split=['train', 'test'],
    shuffle_files=False,
    as_supervised=True,
    with_info=True,
)

for i in ds_train.batch(60000):
    a = i
    
for i in ds_test.batch(60000):
    b = i

In [2]:
xtrain = a[0].numpy().reshape([-1,28*28])
ytrain = a[1].numpy()

xtest = b[0].numpy().reshape([-1,28*28])
ytest = b[1].numpy()

In [3]:
def _onehot(Y):
    b = np.zeros((Y.shape[0], Y.max().astype(int)+1))
    b[np.arange(Y.shape[0]),Y.astype(int).flatten()] = 1
    return b

In [4]:
def get_data(seed):
    np.random.seed(seed)
    train_idx = np.random.choice(range(xtrain.shape[0]), round(xtrain.shape[0] * 0.7), replace=False)
    valid_idx = np.array(list(set(range(xtrain.shape[0])) - set(train_idx)))
    train_idx = np.random.choice(train_idx, 3_000, replace=False)
    valid_idx = np.random.choice(valid_idx, 2_000, replace=False)

    test_idx = np.random.choice(range(xtest.shape[0]), 1_000, replace=False)

    features = np.random.choice(range(xtrain.shape[1]), 200, replace=False)


    y_train = _onehot(ytrain[train_idx])
    y_valid = _onehot(ytrain[valid_idx])
    y_test =  _onehot(ytest[test_idx])

    return (
        xtrain[train_idx[:,None], features],
        xtrain[valid_idx[:,None], features],
        xtest[test_idx[:,None], features], y_train, y_valid, y_test
    )

In [5]:
from hyperopt import fmin, tpe, hp, STATUS_OK, space_eval
import os

iters = 30
results = []
for est in range(iters):

    t0 = time.time()
    x_train, x_valid, x_test, y_train, y_valid, y_test = get_data(1000 + est)
    watchlist = [
        ((x_train, y_train),'train'),
        ((x_test, y_test), 'test'),
        ((x_valid, y_valid), 'valid')
    ]

    best_val = 1000.0
    xgb_test_val = 1000.0
    nrounds = 0
    xgb_param = {}

    def objective(param):
        global best_val
        global xgb_test_val
        global xgb_param
        global nrounds
        num_round = 500
        param['output_dim'] = 10
        param['nthread'] = 2
        print(param)

        xgb_results = {}
        wbst = wxgb.fit(
            x_train, y_train,
            param, num_round, watchlist,
            evals_result=xgb_results,
            early_stopping_rounds=20,
            verbose_eval=200
        )
        
        if (min(xgb_results['valid']['many_logloss'])) < best_val:
            print("NEW BEST VALUE!")
            best_val = min(xgb_results['valid']['many_logloss'])
            xgb_test_val = xgb_results['test']['many_logloss'][
                np.argmin(xgb_results['valid']['many_logloss'])
            ]
            nrounds = np.argmin(xgb_results['valid']['many_logloss']) + 1
            xgb_param = param

        return {'loss': min(xgb_results['valid']['many_logloss']), 'status': STATUS_OK }

    spc = {
        'btype': hp.choice('btype',['I', 'In', 'R', 'Rn']),
        'extra_dims': hp.choice('extra_dims',[0,1,2,3,4,5,6,7]),
        'objective': hp.choice('objective',['manybinary:logistic']),
        'eval_metric':hp.choice('eval_metric',[['many_logloss']]),
        'eta': hp.loguniform('eta', -7, 0),
        'beta_eta': hp.loguniform('beta_eta', -7, 0),
        'max_depth' : hp.choice('max_depth',range(1,11)),
        'subsample': hp.uniform('subsample', 0.5, 1),
        'colsample_bytree': hp.uniform('colsample_bytree', 0.5, 1),
        'colsample_bylevel': hp.uniform('colsample_bylevel', 0.5, 1),
        'min_child_weight': hp.loguniform('min_child_weight', -16, 5),
        'alpha': hp.choice('alpha', [0, hp.loguniform('alpha_positive', -16, 2)]),
        'lambda': hp.choice('lambda', [0, hp.loguniform('lambda_positive', -16, 2)]),
        'gamma': hp.choice('gamma', [0, hp.loguniform('gamma_positive', -16, 2)])
    }
    os.environ['HYPEROPT_FMIN_SEED'] = '101010'
    best = fmin(objective,
        space=spc,
        algo=tpe.suggest,
        max_evals=25)
    t1 = time.time()
    results.append({
        'iter': est,
        'valid_logloss': best_val,
        'test_logloss': xgb_test_val,
        'num_rounds': nrounds,
        'time_diff': t1 - t0,
        'winning_param': xgb_param.copy()
    })

{'alpha': 8.813057946902575e-07, 'beta_eta': 0.0016692834513794938, 'btype': 'R', 'colsample_bylevel': 0.7489980469009814, 'colsample_bytree': 0.9420146676089547, 'eta': 0.0070706936561268224, 'eval_metric': ('many_logloss',), 'extra_dims': 2, 'gamma': 0, 'lambda': 0, 'max_depth': 5, 'min_child_weight': 2.1041667775099838e-05, 'objective': 'manybinary:logistic', 'subsample': 0.6262451727083667, 'output_dim': 10, 'nthread': 2}
[0]	train-many_logloss:0.65380	test-many_logloss:0.65374	valid-many_logloss:0.65379
[200]	train-many_logloss:0.10360	test-many_logloss:0.12882	valid-many_logloss:0.12791
[400]	train-many_logloss:0.03629	test-many_logloss:0.07274	valid-many_logloss:0.07304
[499]	train-many_logloss:0.02289	test-many_logloss:0.06268	valid-many_logloss:0.06328
NEW BEST VALUE!                                       
{'alpha': 0, 'beta_eta': 0.07375799600401638, 'btype': 'I', 'colsample_bylevel': 0.8357246683355697, 'colsample_bytree': 0.7491187395188089, 'eta': 0.605518780660455, 'eval_

  return -np.mean(np.log(p))

  p = 1/(1 + np.exp(-logits))

  return -np.mean(np.log(p))



{'alpha': 0, 'beta_eta': 0.012588600896345226, 'btype': 'Rn', 'colsample_bylevel': 0.5547269545129563, 'colsample_bytree': 0.9843635186756493, 'eta': 0.5949933323999959, 'eval_metric': ('many_logloss',), 'extra_dims': 4, 'gamma': 0.00013347887526644314, 'lambda': 0, 'max_depth': 9, 'min_child_weight': 7.283920920982167e-06, 'objective': 'manybinary:logistic', 'subsample': 0.9927469091208938, 'output_dim': 10, 'nthread': 2}
[0]	train-many_logloss:0.94147	test-many_logloss:0.97079	valid-many_logloss:0.96448
  8%|▊         | 2/25 [02:10<21:34, 56.28s/trial, best loss: 0.063283]

  P = 1 / (1 + np.exp(-logits))



[19]	train-many_logloss:inf	test-many_logloss:inf	valid-many_logloss:inf
{'alpha': 1.0366863221923632, 'beta_eta': 0.13640103803528691, 'btype': 'Rn', 'colsample_bylevel': 0.6367989155272455, 'colsample_bytree': 0.6974194337846622, 'eta': 0.3029677824305006, 'eval_metric': ('many_logloss',), 'extra_dims': 6, 'gamma': 1.5812851156161036, 'lambda': 1.2783614674881826e-06, 'max_depth': 5, 'min_child_weight': 2.558034001077105, 'objective': 'manybinary:logistic', 'subsample': 0.7481581307597385, 'output_dim': 10, 'nthread': 2}
[0]	train-many_logloss:0.55526	test-many_logloss:0.55322	valid-many_logloss:0.55533
[20]	train-many_logloss:nan	test-many_logloss:nan	valid-many_logloss:nan
{'alpha': 0.0015268888255807838, 'beta_eta': 0.20179405533498707, 'btype': 'R', 'colsample_bylevel': 0.822829897275416, 'colsample_bytree': 0.8661025998484644, 'eta': 0.011434936316016709, 'eval_metric': ('many_logloss',), 'extra_dims': 1, 'gamma': 0, 'lambda': 2.091554134375915, 'max_depth': 8, 'min_child_weight

[0]	train-many_logloss:0.38356	test-many_logloss:0.39847	valid-many_logloss:0.39799
[21]	train-many_logloss:inf	test-many_logloss:inf	valid-many_logloss:inf
{'alpha': 0, 'beta_eta': 0.004046103790871514, 'btype': 'I', 'colsample_bylevel': 0.7818800515438491, 'colsample_bytree': 0.7724637498474807, 'eta': 0.008563159803242014, 'eval_metric': ('many_logloss',), 'extra_dims': 4, 'gamma': 1.138875246733091, 'lambda': 0.004929740945608459, 'max_depth': 8, 'min_child_weight': 1.2189098145853323e-06, 'objective': 'manybinary:logistic', 'subsample': 0.92406704328224, 'output_dim': 10, 'nthread': 2}
[0]	train-many_logloss:0.66861	test-many_logloss:0.66929	valid-many_logloss:0.66929
[200]	train-many_logloss:0.02989	test-many_logloss:0.08598	valid-many_logloss:0.08441
[400]	train-many_logloss:0.00846	test-many_logloss:0.06844	valid-many_logloss:0.06731
[498]	train-many_logloss:0.00602	test-many_logloss:0.06796	valid-many_logloss:0.06689
{'alpha': 0, 'beta_eta': 0.008940815077581921, 'btype': 'R',

[0]	train-many_logloss:0.63633	test-many_logloss:0.63629	valid-many_logloss:0.63626
[52]	train-many_logloss:0.01783	test-many_logloss:0.10965	valid-many_logloss:0.12495
{'alpha': 7.049352314660277e-05, 'beta_eta': 0.12137127268126893, 'btype': 'Rn', 'colsample_bylevel': 0.9439090074967231, 'colsample_bytree': 0.5525925639927433, 'eta': 0.0009915520889927426, 'eval_metric': ('many_logloss',), 'extra_dims': 3, 'gamma': 0, 'lambda': 0.004073992634408555, 'max_depth': 2, 'min_child_weight': 1.3633143522854873e-06, 'objective': 'manybinary:logistic', 'subsample': 0.9311207654760087, 'output_dim': 10, 'nthread': 2}
[0]	train-many_logloss:0.68735	test-many_logloss:0.68734	valid-many_logloss:0.68734
[61]	train-many_logloss:0.16321	test-many_logloss:0.17985	valid-many_logloss:0.17341
{'alpha': 8.581958439353927e-05, 'beta_eta': 0.4437471588909133, 'btype': 'R', 'colsample_bylevel': 0.787125082065971, 'colsample_bytree': 0.5243474561426513, 'eta': 0.8099003407203538, 'eval_metric': ('many_loglos

[469]	train-many_logloss:0.00580	test-many_logloss:0.06051	valid-many_logloss:0.06952
{'alpha': 0, 'beta_eta': 0.008940815077581921, 'btype': 'R', 'colsample_bylevel': 0.947651214492775, 'colsample_bytree': 0.8633457836387857, 'eta': 0.3977029463511548, 'eval_metric': ('many_logloss',), 'extra_dims': 7, 'gamma': 0.003128466805888169, 'lambda': 0, 'max_depth': 6, 'min_child_weight': 23.81766245704533, 'objective': 'manybinary:logistic', 'subsample': 0.8289219023831419, 'output_dim': 10, 'nthread': 2}
[0]	train-many_logloss:0.78526	test-many_logloss:0.79068	valid-many_logloss:0.79058
[19]	train-many_logloss:nan	test-many_logloss:nan	valid-many_logloss:nan
{'alpha': 0, 'beta_eta': 0.7189262806817021, 'btype': 'Rn', 'colsample_bylevel': 0.8025654416649992, 'colsample_bytree': 0.8748052006921443, 'eta': 0.009393489875683971, 'eval_metric': ('many_logloss',), 'extra_dims': 3, 'gamma': 0, 'lambda': 0, 'max_depth': 2, 'min_child_weight': 0.013234942416650719, 'objective': 'manybinary:logistic'

[0]	train-many_logloss:0.68675	test-many_logloss:0.68676	valid-many_logloss:0.68675
[63]	train-many_logloss:0.16497	test-many_logloss:0.17519	valid-many_logloss:0.17782
{'alpha': 8.581958439353927e-05, 'beta_eta': 0.4437471588909133, 'btype': 'R', 'colsample_bylevel': 0.787125082065971, 'colsample_bytree': 0.5243474561426513, 'eta': 0.8099003407203538, 'eval_metric': ('many_logloss',), 'extra_dims': 3, 'gamma': 7.475715249737038e-05, 'lambda': 0.0003639646509775927, 'max_depth': 7, 'min_child_weight': 0.09759627970810976, 'objective': 'manybinary:logistic', 'subsample': 0.553442319017114, 'output_dim': 10, 'nthread': 2}
[0]	train-many_logloss:1.23065	test-many_logloss:1.24387	valid-many_logloss:1.24909
[20]	train-many_logloss:inf	test-many_logloss:inf	valid-many_logloss:inf
{'alpha': 0, 'beta_eta': 0.1292065517955496, 'btype': 'Rn', 'colsample_bylevel': 0.7061150549075759, 'colsample_bytree': 0.5843009197915733, 'eta': 0.002630107568123181, 'eval_metric': ('many_logloss',), 'extra_dims

[0]	train-many_logloss:0.81905	test-many_logloss:0.82068	valid-many_logloss:0.82202
[20]	train-many_logloss:nan	test-many_logloss:nan	valid-many_logloss:nan
{'alpha': 0, 'beta_eta': 0.7189262806817021, 'btype': 'Rn', 'colsample_bylevel': 0.8025654416649992, 'colsample_bytree': 0.8748052006921443, 'eta': 0.009393489875683971, 'eval_metric': ('many_logloss',), 'extra_dims': 3, 'gamma': 0, 'lambda': 0, 'max_depth': 2, 'min_child_weight': 0.013234942416650719, 'objective': 'manybinary:logistic', 'subsample': 0.8694541664166509, 'output_dim': 10, 'nthread': 2}
[0]	train-many_logloss:0.63477	test-many_logloss:0.63487	valid-many_logloss:0.63469
[26]	train-many_logloss:inf	test-many_logloss:inf	valid-many_logloss:inf
{'alpha': 1.2866021697680973e-07, 'beta_eta': 0.0016060730652605972, 'btype': 'In', 'colsample_bylevel': 0.6691714474136173, 'colsample_bytree': 0.9253629229741183, 'eta': 0.09251058420349038, 'eval_metric': ('many_logloss',), 'extra_dims': 2, 'gamma': 0, 'lambda': 0, 'max_depth':

[20]	train-many_logloss:inf	test-many_logloss:inf	valid-many_logloss:nan
{'alpha': 0, 'beta_eta': 0.1292065517955496, 'btype': 'Rn', 'colsample_bylevel': 0.7061150549075759, 'colsample_bytree': 0.5843009197915733, 'eta': 0.002630107568123181, 'eval_metric': ('many_logloss',), 'extra_dims': 2, 'gamma': 6.947115616562489, 'lambda': 0, 'max_depth': 10, 'min_child_weight': 41.36978142254861, 'objective': 'manybinary:logistic', 'subsample': 0.7453409993563878, 'output_dim': 10, 'nthread': 2}
[0]	train-many_logloss:0.67789	test-many_logloss:0.67787	valid-many_logloss:0.67788
[69]	train-many_logloss:0.10284	test-many_logloss:0.14121	valid-many_logloss:0.15305
{'alpha': 0, 'beta_eta': 0.11363289588177274, 'btype': 'Rn', 'colsample_bylevel': 0.8017715093779021, 'colsample_bytree': 0.7625111741024486, 'eta': 0.007606220882567551, 'eval_metric': ('many_logloss',), 'extra_dims': 4, 'gamma': 0.001053204847233351, 'lambda': 0.23316250775856961, 'max_depth': 1, 'min_child_weight': 1.9088058756735313e

{'alpha': 2.5240365004235283e-07, 'beta_eta': 0.0012215190837876378, 'btype': 'In', 'colsample_bylevel': 0.8788092996859548, 'colsample_bytree': 0.9767086318631194, 'eta': 0.14876123484960702, 'eval_metric': ('many_logloss',), 'extra_dims': 4, 'gamma': 0.055243004681850574, 'lambda': 0.008679746735189764, 'max_depth': 4, 'min_child_weight': 1.7599154275713062e-06, 'objective': 'manybinary:logistic', 'subsample': 0.8017393719767765, 'output_dim': 10, 'nthread': 2}
[0]	train-many_logloss:0.40105	test-many_logloss:0.40497	valid-many_logloss:0.40428
[91]	train-many_logloss:0.00159	test-many_logloss:0.05841	valid-many_logloss:0.06086
NEW BEST VALUE!                                                        
{'alpha': 1.3010046524364964e-07, 'beta_eta': 0.0009424848537938038, 'btype': 'In', 'colsample_bylevel': 0.8884275174529376, 'colsample_bytree': 0.9893656719086061, 'eta': 0.13615993293609235, 'eval_metric': ('many_logloss',), 'extra_dims': 2, 'gamma': 0, 'lambda': 4.7840334774841065, 'max_

[0]	train-many_logloss:0.67804	test-many_logloss:0.67808	valid-many_logloss:0.67807
[68]	train-many_logloss:0.10492	test-many_logloss:0.13878	valid-many_logloss:0.15422
{'alpha': 0, 'beta_eta': 0.11363289588177274, 'btype': 'Rn', 'colsample_bylevel': 0.8017715093779021, 'colsample_bytree': 0.7625111741024486, 'eta': 0.007606220882567551, 'eval_metric': ('many_logloss',), 'extra_dims': 4, 'gamma': 0.001053204847233351, 'lambda': 0.23316250775856961, 'max_depth': 1, 'min_child_weight': 1.9088058756735313e-07, 'objective': 'manybinary:logistic', 'subsample': 0.5239057927863935, 'output_dim': 10, 'nthread': 2}
[0]	train-many_logloss:0.64346	test-many_logloss:0.64350	valid-many_logloss:0.64349
[71]	train-many_logloss:0.16605	test-many_logloss:0.16553	valid-many_logloss:0.17703
{'alpha': 0, 'beta_eta': 0.2023070079269739, 'btype': 'I', 'colsample_bylevel': 0.98982155285091, 'colsample_bytree': 0.8137063678634311, 'eta': 0.021437825449384364, 'eval_metric': ('many_logloss',), 'extra_dims': 2,

[134]	train-many_logloss:0.00308	test-many_logloss:0.04998	valid-many_logloss:0.05925
{'alpha': 3.966043373903589e-07, 'beta_eta': 0.001004441246142552, 'btype': 'R', 'colsample_bylevel': 0.8733004876379642, 'colsample_bytree': 0.9704007541709373, 'eta': 0.09181110165955197, 'eval_metric': ('many_logloss',), 'extra_dims': 2, 'gamma': 0, 'lambda': 0, 'max_depth': 9, 'min_child_weight': 0.0005649624368618833, 'objective': 'manybinary:logistic', 'subsample': 0.9947066851224226, 'output_dim': 10, 'nthread': 2}
[0]	train-many_logloss:0.35632	test-many_logloss:0.35816	valid-many_logloss:0.35892
[64]	train-many_logloss:0.00511	test-many_logloss:0.10425	valid-many_logloss:0.12331
{'alpha': 5.784092011332092, 'beta_eta': 0.027216134331891087, 'btype': 'R', 'colsample_bylevel': 0.5693867059258523, 'colsample_bytree': 0.6575318179678347, 'eta': 0.07670577225189641, 'eval_metric': ('many_logloss',), 'extra_dims': 6, 'gamma': 0, 'lambda': 0, 'max_depth': 8, 'min_child_weight': 3.698366596197951e-05

{'alpha': 0, 'beta_eta': 0.2023070079269739, 'btype': 'I', 'colsample_bylevel': 0.98982155285091, 'colsample_bytree': 0.8137063678634311, 'eta': 0.021437825449384364, 'eval_metric': ('many_logloss',), 'extra_dims': 2, 'gamma': 0, 'lambda': 0, 'max_depth': 5, 'min_child_weight': 2.5600171535202e-07, 'objective': 'manybinary:logistic', 'subsample': 0.7174114308249333, 'output_dim': 10, 'nthread': 2}
[0]	train-many_logloss:0.65390	test-many_logloss:0.65464	valid-many_logloss:0.65469
[38]	train-many_logloss:0.01982	test-many_logloss:0.13258	valid-many_logloss:0.14361
{'alpha': 0, 'beta_eta': 0.675621539900281, 'btype': 'I', 'colsample_bylevel': 0.6189065258301154, 'colsample_bytree': 0.7882001782331782, 'eta': 0.003770077706590942, 'eval_metric': ('many_logloss',), 'extra_dims': 0, 'gamma': 0.0006381725365621713, 'lambda': 0, 'max_depth': 1, 'min_child_weight': 0.05537987461653371, 'objective': 'manybinary:logistic', 'subsample': 0.6537419349296045, 'output_dim': 10, 'nthread': 2}
[0]	trai

{'alpha': 5.784092011332092, 'beta_eta': 0.027216134331891087, 'btype': 'R', 'colsample_bylevel': 0.5693867059258523, 'colsample_bytree': 0.6575318179678347, 'eta': 0.07670577225189641, 'eval_metric': ('many_logloss',), 'extra_dims': 6, 'gamma': 0, 'lambda': 0, 'max_depth': 8, 'min_child_weight': 3.698366596197951e-05, 'objective': 'manybinary:logistic', 'subsample': 0.8155041171706532, 'output_dim': 10, 'nthread': 2}
[0]	train-many_logloss:0.35278	test-many_logloss:0.35298	valid-many_logloss:0.35276
[19]	train-many_logloss:inf	test-many_logloss:inf	valid-many_logloss:inf
{'alpha': 2.126746953167188e-06, 'beta_eta': 0.02123098842309264, 'btype': 'In', 'colsample_bylevel': 0.8623508697792003, 'colsample_bytree': 0.9403604335918812, 'eta': 0.029011416869536316, 'eval_metric': ('many_logloss',), 'extra_dims': 5, 'gamma': 0, 'lambda': 0, 'max_depth': 6, 'min_child_weight': 0.0001611272474738016, 'objective': 'manybinary:logistic', 'subsample': 0.9022280609486286, 'output_dim': 10, 'nthread

[0]	train-many_logloss:0.69055	test-many_logloss:0.69055	valid-many_logloss:0.69056
[24]	train-many_logloss:nan	test-many_logloss:nan	valid-many_logloss:nan
{'alpha': 0, 'beta_eta': 0.06489049340354942, 'btype': 'I', 'colsample_bylevel': 0.8729133438901646, 'colsample_bytree': 0.5205123603280772, 'eta': 0.0309650866085196, 'eval_metric': ('many_logloss',), 'extra_dims': 5, 'gamma': 0, 'lambda': 7.910525070154376e-06, 'max_depth': 3, 'min_child_weight': 7.405155036744976e-05, 'objective': 'manybinary:logistic', 'subsample': 0.6231629412057431, 'output_dim': 10, 'nthread': 2}
[0]	train-many_logloss:0.59919	test-many_logloss:0.59972	valid-many_logloss:0.59985
[112]	train-many_logloss:0.01430	test-many_logloss:0.06901	valid-many_logloss:0.07688
{'alpha': 0.014845224338582359, 'beta_eta': 0.03770137496335071, 'btype': 'Rn', 'colsample_bylevel': 0.7211698432094364, 'colsample_bytree': 0.712394975511369, 'eta': 0.008477424619483462, 'eval_metric': ('many_logloss',), 'extra_dims': 1, 'gamma': 

[0]	train-many_logloss:0.48789	test-many_logloss:0.48895	valid-many_logloss:0.48939
[176]	train-many_logloss:0.00149	test-many_logloss:0.05384	valid-many_logloss:0.05835
NEW BEST VALUE!                                                        
{'alpha': 5.483912491057741e-06, 'beta_eta': 0.0025370596635607544, 'btype': 'In', 'colsample_bylevel': 0.5048473961823136, 'colsample_bytree': 0.9510772187101697, 'eta': 0.05635840587385212, 'eval_metric': ('many_logloss',), 'extra_dims': 5, 'gamma': 0, 'lambda': 4.5397484807711255e-05, 'max_depth': 7, 'min_child_weight': 6.3162501873757385e-06, 'objective': 'manybinary:logistic', 'subsample': 0.5960145588730401, 'output_dim': 10, 'nthread': 2}
[0]	train-many_logloss:0.52570	test-many_logloss:0.52776	valid-many_logloss:0.52853
[107]	train-many_logloss:0.00135	test-many_logloss:0.06087	valid-many_logloss:0.06927
100%|██████████| 25/25 [11:59<00:00, 28.76s/trial, best loss: 0.057557]
{'alpha': 8.813057946902575e-07, 'beta_eta': 0.0016692834513794938

[0]	train-many_logloss:0.60177	test-many_logloss:0.60207	valid-many_logloss:0.60192
[107]	train-many_logloss:0.01384	test-many_logloss:0.07764	valid-many_logloss:0.07991
{'alpha': 0.014845224338582359, 'beta_eta': 0.03770137496335071, 'btype': 'Rn', 'colsample_bylevel': 0.7211698432094364, 'colsample_bytree': 0.712394975511369, 'eta': 0.008477424619483462, 'eval_metric': ('many_logloss',), 'extra_dims': 1, 'gamma': 0, 'lambda': 0, 'max_depth': 3, 'min_child_weight': 32.756265617720345, 'objective': 'manybinary:logistic', 'subsample': 0.6995489417449295, 'output_dim': 10, 'nthread': 2}
[0]	train-many_logloss:0.64989	test-many_logloss:0.64993	valid-many_logloss:0.64992
[198]	train-many_logloss:0.05178	test-many_logloss:0.09452	valid-many_logloss:0.09747
{'alpha': 2.7324514268482383, 'beta_eta': 0.08036958137175818, 'btype': 'R', 'colsample_bylevel': 0.7987531851832483, 'colsample_bytree': 0.7299740823920091, 'eta': 0.06030778497723384, 'eval_metric': ('many_logloss',), 'extra_dims': 3, '

[62]	train-many_logloss:0.00302	test-many_logloss:0.06096	valid-many_logloss:0.06149
100%|██████████| 25/25 [11:35<00:00, 27.81s/trial, best loss: 0.056916]
{'alpha': 8.813057946902575e-07, 'beta_eta': 0.0016692834513794938, 'btype': 'R', 'colsample_bylevel': 0.7489980469009814, 'colsample_bytree': 0.9420146676089547, 'eta': 0.0070706936561268224, 'eval_metric': ('many_logloss',), 'extra_dims': 2, 'gamma': 0, 'lambda': 0, 'max_depth': 5, 'min_child_weight': 2.1041667775099838e-05, 'objective': 'manybinary:logistic', 'subsample': 0.6262451727083667, 'output_dim': 10, 'nthread': 2}
[0]	train-many_logloss:0.65105	test-many_logloss:0.65113	valid-many_logloss:0.65105
[200]	train-many_logloss:0.09859	test-many_logloss:0.12139	valid-many_logloss:0.12068
[400]	train-many_logloss:0.03197	test-many_logloss:0.06496	valid-many_logloss:0.06531
[499]	train-many_logloss:0.01917	test-many_logloss:0.05496	valid-many_logloss:0.05571
NEW BEST VALUE!                                       
{'alpha': 0, 'be

{'alpha': 2.7324514268482383, 'beta_eta': 0.08036958137175818, 'btype': 'R', 'colsample_bylevel': 0.7987531851832483, 'colsample_bytree': 0.7299740823920091, 'eta': 0.06030778497723384, 'eval_metric': ('many_logloss',), 'extra_dims': 3, 'gamma': 0, 'lambda': 0, 'max_depth': 8, 'min_child_weight': 0.00023882505207060682, 'objective': 'manybinary:logistic', 'subsample': 0.8750385697829414, 'output_dim': 10, 'nthread': 2}
[0]	train-many_logloss:0.42114	test-many_logloss:0.42201	valid-many_logloss:0.42128
[19]	train-many_logloss:inf	test-many_logloss:inf	valid-many_logloss:inf
{'alpha': 0, 'beta_eta': 0.009060610135766442, 'btype': 'I', 'colsample_bylevel': 0.7332732947918047, 'colsample_bytree': 0.5030735772414907, 'eta': 0.0015245875835189877, 'eval_metric': ('many_logloss',), 'extra_dims': 7, 'gamma': 9.066632495478617e-06, 'lambda': 0, 'max_depth': 3, 'min_child_weight': 135.18110003620356, 'objective': 'manybinary:logistic', 'subsample': 0.5593100367488764, 'output_dim': 10, 'nthread'

[200]	train-many_logloss:0.10375	test-many_logloss:0.13136	valid-many_logloss:0.13065
[400]	train-many_logloss:0.03468	test-many_logloss:0.07376	valid-many_logloss:0.07346
[499]	train-many_logloss:0.02112	test-many_logloss:0.06326	valid-many_logloss:0.06317
NEW BEST VALUE!                                       
{'alpha': 0, 'beta_eta': 0.07375799600401638, 'btype': 'I', 'colsample_bylevel': 0.8357246683355697, 'colsample_bytree': 0.7491187395188089, 'eta': 0.605518780660455, 'eval_metric': ('many_logloss',), 'extra_dims': 0, 'gamma': 0, 'lambda': 0, 'max_depth': 5, 'min_child_weight': 118.844312919608, 'objective': 'manybinary:logistic', 'subsample': 0.6525233714163394, 'output_dim': 10, 'nthread': 2}
[0]	train-many_logloss:0.40121	test-many_logloss:0.40136	valid-many_logloss:0.40165
[141]	train-many_logloss:0.05420	test-many_logloss:0.12067	valid-many_logloss:0.12509
{'alpha': 0, 'beta_eta': 0.012588600896345226, 'btype': 'Rn', 'colsample_bylevel': 0.5547269545129563, 'colsample_bytre

NEW BEST VALUE!                                                        
{'alpha': 0, 'beta_eta': 0.009060610135766442, 'btype': 'I', 'colsample_bylevel': 0.7332732947918047, 'colsample_bytree': 0.5030735772414907, 'eta': 0.0015245875835189877, 'eval_metric': ('many_logloss',), 'extra_dims': 7, 'gamma': 9.066632495478617e-06, 'lambda': 0, 'max_depth': 3, 'min_child_weight': 135.18110003620356, 'objective': 'manybinary:logistic', 'subsample': 0.5593100367488764, 'output_dim': 10, 'nthread': 2}
[0]	train-many_logloss:0.68678	test-many_logloss:0.68678	valid-many_logloss:0.68678
[200]	train-many_logloss:0.16601	test-many_logloss:0.17621	valid-many_logloss:0.17339
[400]	train-many_logloss:0.08842	test-many_logloss:0.10627	valid-many_logloss:0.10378
[499]	train-many_logloss:0.07326	test-many_logloss:0.09584	valid-many_logloss:0.09344
{'alpha': 9.442657203270879e-05, 'beta_eta': 0.17396370377116593, 'btype': 'I', 'colsample_bylevel': 0.7781226592895327, 'colsample_bytree': 0.6398164878102375, 

[0]	train-many_logloss:0.39748	test-many_logloss:0.39817	valid-many_logloss:0.39926
[97]	train-many_logloss:nan	test-many_logloss:nan	valid-many_logloss:nan
{'alpha': 0, 'beta_eta': 0.012588600896345226, 'btype': 'Rn', 'colsample_bylevel': 0.5547269545129563, 'colsample_bytree': 0.9843635186756493, 'eta': 0.5949933323999959, 'eval_metric': ('many_logloss',), 'extra_dims': 4, 'gamma': 0.00013347887526644314, 'lambda': 0, 'max_depth': 9, 'min_child_weight': 7.283920920982167e-06, 'objective': 'manybinary:logistic', 'subsample': 0.9927469091208938, 'output_dim': 10, 'nthread': 2}
[0]	train-many_logloss:0.82005	test-many_logloss:0.84353	valid-many_logloss:0.83827
[19]	train-many_logloss:inf	test-many_logloss:inf	valid-many_logloss:inf
{'alpha': 1.0366863221923632, 'beta_eta': 0.13640103803528691, 'btype': 'Rn', 'colsample_bylevel': 0.6367989155272455, 'colsample_bytree': 0.6974194337846622, 'eta': 0.3029677824305006, 'eval_metric': ('many_logloss',), 'extra_dims': 6, 'gamma': 1.58128511561

[499]	train-many_logloss:0.06907	test-many_logloss:0.09451	valid-many_logloss:0.09179
{'alpha': 9.442657203270879e-05, 'beta_eta': 0.17396370377116593, 'btype': 'I', 'colsample_bylevel': 0.7781226592895327, 'colsample_bytree': 0.6398164878102375, 'eta': 0.0041851001124752684, 'eval_metric': ('many_logloss',), 'extra_dims': 7, 'gamma': 5.825640644469816e-06, 'lambda': 0, 'max_depth': 8, 'min_child_weight': 0.00537473915613328, 'objective': 'manybinary:logistic', 'subsample': 0.5391816076002862, 'output_dim': 10, 'nthread': 2}
[0]	train-many_logloss:0.67570	test-many_logloss:0.67607	valid-many_logloss:0.67605
[42]	train-many_logloss:0.01363	test-many_logloss:0.13245	valid-many_logloss:0.13963
{'alpha': 0, 'beta_eta': 0.7112779064038146, 'btype': 'I', 'colsample_bylevel': 0.980487461508577, 'colsample_bytree': 0.872601457169579, 'eta': 0.46040749038190176, 'eval_metric': ('many_logloss',), 'extra_dims': 0, 'gamma': 0, 'lambda': 0, 'max_depth': 5, 'min_child_weight': 0.0003681699610575285,

{'alpha': 1.0366863221923632, 'beta_eta': 0.13640103803528691, 'btype': 'Rn', 'colsample_bylevel': 0.6367989155272455, 'colsample_bytree': 0.6974194337846622, 'eta': 0.3029677824305006, 'eval_metric': ('many_logloss',), 'extra_dims': 6, 'gamma': 1.5812851156161036, 'lambda': 1.2783614674881826e-06, 'max_depth': 5, 'min_child_weight': 2.558034001077105, 'objective': 'manybinary:logistic', 'subsample': 0.7481581307597385, 'output_dim': 10, 'nthread': 2}
[0]	train-many_logloss:0.57556	test-many_logloss:0.56930	valid-many_logloss:0.57454
[19]	train-many_logloss:nan	test-many_logloss:inf	valid-many_logloss:inf
{'alpha': 0.0015268888255807838, 'beta_eta': 0.20179405533498707, 'btype': 'R', 'colsample_bylevel': 0.822829897275416, 'colsample_bytree': 0.8661025998484644, 'eta': 0.011434936316016709, 'eval_metric': ('many_logloss',), 'extra_dims': 1, 'gamma': 0, 'lambda': 2.091554134375915, 'max_depth': 8, 'min_child_weight': 1.6723514459697203e-05, 'objective': 'manybinary:logistic', 'subsample

{'alpha': 0, 'beta_eta': 0.7112779064038146, 'btype': 'I', 'colsample_bylevel': 0.980487461508577, 'colsample_bytree': 0.872601457169579, 'eta': 0.46040749038190176, 'eval_metric': ('many_logloss',), 'extra_dims': 0, 'gamma': 0, 'lambda': 0, 'max_depth': 5, 'min_child_weight': 0.0003681699610575285, 'objective': 'manybinary:logistic', 'subsample': 0.7369430758094139, 'output_dim': 10, 'nthread': 2}
[0]	train-many_logloss:0.38321	test-many_logloss:0.39448	valid-many_logloss:0.39775
[21]	train-many_logloss:inf	test-many_logloss:inf	valid-many_logloss:inf
{'alpha': 0, 'beta_eta': 0.004046103790871514, 'btype': 'I', 'colsample_bylevel': 0.7818800515438491, 'colsample_bytree': 0.7724637498474807, 'eta': 0.008563159803242014, 'eval_metric': ('many_logloss',), 'extra_dims': 4, 'gamma': 1.138875246733091, 'lambda': 0.004929740945608459, 'max_depth': 8, 'min_child_weight': 1.2189098145853323e-06, 'objective': 'manybinary:logistic', 'subsample': 0.92406704328224, 'output_dim': 10, 'nthread': 2}


[0]	train-many_logloss:0.63036	test-many_logloss:0.63047	valid-many_logloss:0.63051
[20]	train-many_logloss:inf	test-many_logloss:inf	valid-many_logloss:inf
{'alpha': 7.049352314660277e-05, 'beta_eta': 0.12137127268126893, 'btype': 'Rn', 'colsample_bylevel': 0.9439090074967231, 'colsample_bytree': 0.5525925639927433, 'eta': 0.0009915520889927426, 'eval_metric': ('many_logloss',), 'extra_dims': 3, 'gamma': 0, 'lambda': 0.004073992634408555, 'max_depth': 2, 'min_child_weight': 1.3633143522854873e-06, 'objective': 'manybinary:logistic', 'subsample': 0.9311207654760087, 'output_dim': 10, 'nthread': 2}
[0]	train-many_logloss:0.68683	test-many_logloss:0.68682	valid-many_logloss:0.68683
[61]	train-many_logloss:0.17084	test-many_logloss:0.18234	valid-many_logloss:0.17489
{'alpha': 8.581958439353927e-05, 'beta_eta': 0.4437471588909133, 'btype': 'R', 'colsample_bylevel': 0.787125082065971, 'colsample_bytree': 0.5243474561426513, 'eta': 0.8099003407203538, 'eval_metric': ('many_logloss',), 'extra

[400]	train-many_logloss:0.00780	test-many_logloss:0.07021	valid-many_logloss:0.06887
[460]	train-many_logloss:0.00627	test-many_logloss:0.07024	valid-many_logloss:0.06870
{'alpha': 0, 'beta_eta': 0.008940815077581921, 'btype': 'R', 'colsample_bylevel': 0.947651214492775, 'colsample_bytree': 0.8633457836387857, 'eta': 0.3977029463511548, 'eval_metric': ('many_logloss',), 'extra_dims': 7, 'gamma': 0.003128466805888169, 'lambda': 0, 'max_depth': 6, 'min_child_weight': 23.81766245704533, 'objective': 'manybinary:logistic', 'subsample': 0.8289219023831419, 'output_dim': 10, 'nthread': 2}
[0]	train-many_logloss:0.79929	test-many_logloss:0.79655	valid-many_logloss:0.80036
[19]	train-many_logloss:nan	test-many_logloss:nan	valid-many_logloss:nan
{'alpha': 0, 'beta_eta': 0.7189262806817021, 'btype': 'Rn', 'colsample_bylevel': 0.8025654416649992, 'colsample_bytree': 0.8748052006921443, 'eta': 0.009393489875683971, 'eval_metric': ('many_logloss',), 'extra_dims': 3, 'gamma': 0, 'lambda': 0, 'max_d

[0]	train-many_logloss:0.68719	test-many_logloss:0.68719	valid-many_logloss:0.68722
[60]	train-many_logloss:0.16372	test-many_logloss:0.17011	valid-many_logloss:0.17394
{'alpha': 8.581958439353927e-05, 'beta_eta': 0.4437471588909133, 'btype': 'R', 'colsample_bylevel': 0.787125082065971, 'colsample_bytree': 0.5243474561426513, 'eta': 0.8099003407203538, 'eval_metric': ('many_logloss',), 'extra_dims': 3, 'gamma': 7.475715249737038e-05, 'lambda': 0.0003639646509775927, 'max_depth': 7, 'min_child_weight': 0.09759627970810976, 'objective': 'manybinary:logistic', 'subsample': 0.553442319017114, 'output_dim': 10, 'nthread': 2}
[0]	train-many_logloss:1.21004	test-many_logloss:1.21940	valid-many_logloss:1.21537
[20]	train-many_logloss:inf	test-many_logloss:inf	valid-many_logloss:inf
{'alpha': 0, 'beta_eta': 0.1292065517955496, 'btype': 'Rn', 'colsample_bylevel': 0.7061150549075759, 'colsample_bytree': 0.5843009197915733, 'eta': 0.002630107568123181, 'eval_metric': ('many_logloss',), 'extra_dims

[0]	train-many_logloss:0.75820	test-many_logloss:0.75810	valid-many_logloss:0.76221
[21]	train-many_logloss:nan	test-many_logloss:nan	valid-many_logloss:nan
{'alpha': 0, 'beta_eta': 0.7189262806817021, 'btype': 'Rn', 'colsample_bylevel': 0.8025654416649992, 'colsample_bytree': 0.8748052006921443, 'eta': 0.009393489875683971, 'eval_metric': ('many_logloss',), 'extra_dims': 3, 'gamma': 0, 'lambda': 0, 'max_depth': 2, 'min_child_weight': 0.013234942416650719, 'objective': 'manybinary:logistic', 'subsample': 0.8694541664166509, 'output_dim': 10, 'nthread': 2}
[0]	train-many_logloss:0.63598	test-many_logloss:0.63605	valid-many_logloss:0.63609
[25]	train-many_logloss:nan	test-many_logloss:nan	valid-many_logloss:nan
{'alpha': 1.2866021697680973e-07, 'beta_eta': 0.0016060730652605972, 'btype': 'In', 'colsample_bylevel': 0.6691714474136173, 'colsample_bytree': 0.9253629229741183, 'eta': 0.09251058420349038, 'eval_metric': ('many_logloss',), 'extra_dims': 2, 'gamma': 0, 'lambda': 0, 'max_depth':

{'alpha': 0, 'beta_eta': 0.1292065517955496, 'btype': 'Rn', 'colsample_bylevel': 0.7061150549075759, 'colsample_bytree': 0.5843009197915733, 'eta': 0.002630107568123181, 'eval_metric': ('many_logloss',), 'extra_dims': 2, 'gamma': 6.947115616562489, 'lambda': 0, 'max_depth': 10, 'min_child_weight': 41.36978142254861, 'objective': 'manybinary:logistic', 'subsample': 0.7453409993563878, 'output_dim': 10, 'nthread': 2}
[0]	train-many_logloss:0.67723	test-many_logloss:0.67723	valid-many_logloss:0.67723
[70]	train-many_logloss:0.11067	test-many_logloss:0.15108	valid-many_logloss:0.16206
{'alpha': 0, 'beta_eta': 0.11363289588177274, 'btype': 'Rn', 'colsample_bylevel': 0.8017715093779021, 'colsample_bytree': 0.7625111741024486, 'eta': 0.007606220882567551, 'eval_metric': ('many_logloss',), 'extra_dims': 4, 'gamma': 0.001053204847233351, 'lambda': 0.23316250775856961, 'max_depth': 1, 'min_child_weight': 1.9088058756735313e-07, 'objective': 'manybinary:logistic', 'subsample': 0.5239057927863935,

{'alpha': 1.2866021697680973e-07, 'beta_eta': 0.0016060730652605972, 'btype': 'In', 'colsample_bylevel': 0.6691714474136173, 'colsample_bytree': 0.9253629229741183, 'eta': 0.09251058420349038, 'eval_metric': ('many_logloss',), 'extra_dims': 2, 'gamma': 0, 'lambda': 0, 'max_depth': 4, 'min_child_weight': 0.0006473488594456236, 'objective': 'manybinary:logistic', 'subsample': 0.8009418233768887, 'output_dim': 10, 'nthread': 2}
[0]	train-many_logloss:0.53217	test-many_logloss:0.53400	valid-many_logloss:0.53379
[138]	train-many_logloss:0.00277	test-many_logloss:0.05955	valid-many_logloss:0.06328
{'alpha': 3.966043373903589e-07, 'beta_eta': 0.001004441246142552, 'btype': 'R', 'colsample_bylevel': 0.8733004876379642, 'colsample_bytree': 0.9704007541709373, 'eta': 0.09181110165955197, 'eval_metric': ('many_logloss',), 'extra_dims': 2, 'gamma': 0, 'lambda': 0, 'max_depth': 9, 'min_child_weight': 0.0005649624368618833, 'objective': 'manybinary:logistic', 'subsample': 0.9947066851224226, 'output

[0]	train-many_logloss:0.64458	test-many_logloss:0.64455	valid-many_logloss:0.64450
[67]	train-many_logloss:0.15970	test-many_logloss:0.16936	valid-many_logloss:0.17087
{'alpha': 0, 'beta_eta': 0.2023070079269739, 'btype': 'I', 'colsample_bylevel': 0.98982155285091, 'colsample_bytree': 0.8137063678634311, 'eta': 0.021437825449384364, 'eval_metric': ('many_logloss',), 'extra_dims': 2, 'gamma': 0, 'lambda': 0, 'max_depth': 5, 'min_child_weight': 2.5600171535202e-07, 'objective': 'manybinary:logistic', 'subsample': 0.7174114308249333, 'output_dim': 10, 'nthread': 2}
[0]	train-many_logloss:0.65524	test-many_logloss:0.65597	valid-many_logloss:0.65586
[41]	train-many_logloss:0.01786	test-many_logloss:0.13657	valid-many_logloss:0.12871
{'alpha': 0, 'beta_eta': 0.675621539900281, 'btype': 'I', 'colsample_bylevel': 0.6189065258301154, 'colsample_bytree': 0.7882001782331782, 'eta': 0.003770077706590942, 'eval_metric': ('many_logloss',), 'extra_dims': 0, 'gamma': 0.0006381725365621713, 'lambda': 

[0]	train-many_logloss:0.36419	test-many_logloss:0.36565	valid-many_logloss:0.36490
[63]	train-many_logloss:0.00302	test-many_logloss:0.10945	valid-many_logloss:0.11223
{'alpha': 5.784092011332092, 'beta_eta': 0.027216134331891087, 'btype': 'R', 'colsample_bylevel': 0.5693867059258523, 'colsample_bytree': 0.6575318179678347, 'eta': 0.07670577225189641, 'eval_metric': ('many_logloss',), 'extra_dims': 6, 'gamma': 0, 'lambda': 0, 'max_depth': 8, 'min_child_weight': 3.698366596197951e-05, 'objective': 'manybinary:logistic', 'subsample': 0.8155041171706532, 'output_dim': 10, 'nthread': 2}
[0]	train-many_logloss:0.35962	test-many_logloss:0.35947	valid-many_logloss:0.35915
[19]	train-many_logloss:inf	test-many_logloss:inf	valid-many_logloss:inf
{'alpha': 2.126746953167188e-06, 'beta_eta': 0.02123098842309264, 'btype': 'In', 'colsample_bylevel': 0.8623508697792003, 'colsample_bytree': 0.9403604335918812, 'eta': 0.029011416869536316, 'eval_metric': ('many_logloss',), 'extra_dims': 5, 'gamma': 0

[38]	train-many_logloss:0.02046	test-many_logloss:0.11844	valid-many_logloss:0.14218
{'alpha': 0, 'beta_eta': 0.675621539900281, 'btype': 'I', 'colsample_bylevel': 0.6189065258301154, 'colsample_bytree': 0.7882001782331782, 'eta': 0.003770077706590942, 'eval_metric': ('many_logloss',), 'extra_dims': 0, 'gamma': 0.0006381725365621713, 'lambda': 0, 'max_depth': 1, 'min_child_weight': 0.05537987461653371, 'objective': 'manybinary:logistic', 'subsample': 0.6537419349296045, 'output_dim': 10, 'nthread': 2}
[0]	train-many_logloss:0.69055	test-many_logloss:0.69057	valid-many_logloss:0.69056
[24]	train-many_logloss:0.23846	test-many_logloss:0.24478	valid-many_logloss:0.24483
{'alpha': 0, 'beta_eta': 0.06489049340354942, 'btype': 'I', 'colsample_bylevel': 0.8729133438901646, 'colsample_bytree': 0.5205123603280772, 'eta': 0.0309650866085196, 'eval_metric': ('many_logloss',), 'extra_dims': 5, 'gamma': 0, 'lambda': 7.910525070154376e-06, 'max_depth': 3, 'min_child_weight': 7.405155036744976e-05, '

NEW BEST VALUE!                                                        
{'alpha': 1.4664754681880375e-06, 'beta_eta': 0.002000002635921915, 'btype': 'In', 'colsample_bylevel': 0.6603963754053725, 'colsample_bytree': 0.9265334879663432, 'eta': 0.07115224376188432, 'eval_metric': ('many_logloss',), 'extra_dims': 6, 'gamma': 0, 'lambda': 1.2098069395080887e-07, 'max_depth': 4, 'min_child_weight': 4.486306030624562e-05, 'objective': 'manybinary:logistic', 'subsample': 0.6755268295322712, 'output_dim': 10, 'nthread': 2}
[0]	train-many_logloss:0.47720	test-many_logloss:0.47874	valid-many_logloss:0.47872
[153]	train-many_logloss:0.00213	test-many_logloss:0.05164	valid-many_logloss:0.05745
NEW BEST VALUE!                                                        
{'alpha': 2.559875417296009e-06, 'beta_eta': 0.0032775937398758018, 'btype': 'In', 'colsample_bylevel': 0.502178458314058, 'colsample_bytree': 0.9386828416019393, 'eta': 0.09830479795145422, 'eval_metric': ('many_logloss',), 'extra_dims'

[24]	train-many_logloss:0.22964	test-many_logloss:0.24519	valid-many_logloss:0.24078
{'alpha': 0, 'beta_eta': 0.06489049340354942, 'btype': 'I', 'colsample_bylevel': 0.8729133438901646, 'colsample_bytree': 0.5205123603280772, 'eta': 0.0309650866085196, 'eval_metric': ('many_logloss',), 'extra_dims': 5, 'gamma': 0, 'lambda': 7.910525070154376e-06, 'max_depth': 3, 'min_child_weight': 7.405155036744976e-05, 'objective': 'manybinary:logistic', 'subsample': 0.6231629412057431, 'output_dim': 10, 'nthread': 2}
[0]	train-many_logloss:0.60527	test-many_logloss:0.60609	valid-many_logloss:0.60558
[100]	train-many_logloss:0.01554	test-many_logloss:0.07612	valid-many_logloss:0.07975
{'alpha': 0.014845224338582359, 'beta_eta': 0.03770137496335071, 'btype': 'Rn', 'colsample_bylevel': 0.7211698432094364, 'colsample_bytree': 0.712394975511369, 'eta': 0.008477424619483462, 'eval_metric': ('many_logloss',), 'extra_dims': 1, 'gamma': 0, 'lambda': 0, 'max_depth': 3, 'min_child_weight': 32.756265617720345, 

{'alpha': 0.12285362381316108, 'beta_eta': 0.0032518959545431397, 'btype': 'R', 'colsample_bylevel': 0.6766284350099434, 'colsample_bytree': 0.8251017236629836, 'eta': 0.1895635060238536, 'eval_metric': ('many_logloss',), 'extra_dims': 3, 'gamma': 0, 'lambda': 0, 'max_depth': 7, 'min_child_weight': 5.941002139647163e-06, 'objective': 'manybinary:logistic', 'subsample': 0.5960145588730401, 'output_dim': 10, 'nthread': 2}
[0]	train-many_logloss:0.33905	test-many_logloss:0.34128	valid-many_logloss:0.34273
[123]	train-many_logloss:0.00062	test-many_logloss:0.07125	valid-many_logloss:0.07070
100%|██████████| 25/25 [11:12<00:00, 26.89s/trial, best loss: 0.054402]
{'alpha': 8.813057946902575e-07, 'beta_eta': 0.0016692834513794938, 'btype': 'R', 'colsample_bylevel': 0.7489980469009814, 'colsample_bytree': 0.9420146676089547, 'eta': 0.0070706936561268224, 'eval_metric': ('many_logloss',), 'extra_dims': 2, 'gamma': 0, 'lambda': 0, 'max_depth': 5, 'min_child_weight': 2.1041667775099838e-05, 'obje

[0]	train-many_logloss:0.64998	test-many_logloss:0.65002	valid-many_logloss:0.65004
[200]	train-many_logloss:0.04677	test-many_logloss:0.08554	valid-many_logloss:0.08879
[204]	train-many_logloss:0.04592	test-many_logloss:0.08565	valid-many_logloss:0.08895
{'alpha': 2.7324514268482383, 'beta_eta': 0.08036958137175818, 'btype': 'R', 'colsample_bylevel': 0.7987531851832483, 'colsample_bytree': 0.7299740823920091, 'eta': 0.06030778497723384, 'eval_metric': ('many_logloss',), 'extra_dims': 3, 'gamma': 0, 'lambda': 0, 'max_depth': 8, 'min_child_weight': 0.00023882505207060682, 'objective': 'manybinary:logistic', 'subsample': 0.8750385697829414, 'output_dim': 10, 'nthread': 2}
[0]	train-many_logloss:0.41785	test-many_logloss:0.41796	valid-many_logloss:0.41839
[65]	train-many_logloss:0.00064	test-many_logloss:0.05915	valid-many_logloss:0.05466
NEW BEST VALUE!                                                        
{'alpha': 0, 'beta_eta': 0.009060610135766442, 'btype': 'I', 'colsample_bylevel'

[0]	train-many_logloss:0.65694	test-many_logloss:0.65705	valid-many_logloss:0.65693
[200]	train-many_logloss:0.10916	test-many_logloss:0.13798	valid-many_logloss:0.13774
[400]	train-many_logloss:0.03857	test-many_logloss:0.07997	valid-many_logloss:0.07999
[499]	train-many_logloss:0.02423	test-many_logloss:0.06964	valid-many_logloss:0.06963
NEW BEST VALUE!                                       
{'alpha': 0, 'beta_eta': 0.07375799600401638, 'btype': 'I', 'colsample_bylevel': 0.8357246683355697, 'colsample_bytree': 0.7491187395188089, 'eta': 0.605518780660455, 'eval_metric': ('many_logloss',), 'extra_dims': 0, 'gamma': 0, 'lambda': 0, 'max_depth': 5, 'min_child_weight': 118.844312919608, 'objective': 'manybinary:logistic', 'subsample': 0.6525233714163394, 'output_dim': 10, 'nthread': 2}
[0]	train-many_logloss:0.40071	test-many_logloss:0.40214	valid-many_logloss:0.40269
[125]	train-many_logloss:0.06285	test-many_logloss:0.13230	valid-many_logloss:0.13131
{'alpha': 0, 'beta_eta': 0.01258860

NEW BEST VALUE!                                                       
{'alpha': 0, 'beta_eta': 0.009060610135766442, 'btype': 'I', 'colsample_bylevel': 0.7332732947918047, 'colsample_bytree': 0.5030735772414907, 'eta': 0.0015245875835189877, 'eval_metric': ('many_logloss',), 'extra_dims': 7, 'gamma': 9.066632495478617e-06, 'lambda': 0, 'max_depth': 3, 'min_child_weight': 135.18110003620356, 'objective': 'manybinary:logistic', 'subsample': 0.5593100367488764, 'output_dim': 10, 'nthread': 2}
[0]	train-many_logloss:0.68666	test-many_logloss:0.68668	valid-many_logloss:0.68668
[200]	train-many_logloss:0.16995	test-many_logloss:0.17990	valid-many_logloss:0.17995
[400]	train-many_logloss:0.09353	test-many_logloss:0.11130	valid-many_logloss:0.11193
[499]	train-many_logloss:0.07882	test-many_logloss:0.10140	valid-many_logloss:0.10224
{'alpha': 9.442657203270879e-05, 'beta_eta': 0.17396370377116593, 'btype': 'I', 'colsample_bylevel': 0.7781226592895327, 'colsample_bytree': 0.6398164878102375, '

[0]	train-many_logloss:0.39693	test-many_logloss:0.39825	valid-many_logloss:0.39862
[77]	train-many_logloss:0.09120	test-many_logloss:0.12475	valid-many_logloss:0.12733
{'alpha': 0, 'beta_eta': 0.012588600896345226, 'btype': 'Rn', 'colsample_bylevel': 0.5547269545129563, 'colsample_bytree': 0.9843635186756493, 'eta': 0.5949933323999959, 'eval_metric': ('many_logloss',), 'extra_dims': 4, 'gamma': 0.00013347887526644314, 'lambda': 0, 'max_depth': 9, 'min_child_weight': 7.283920920982167e-06, 'objective': 'manybinary:logistic', 'subsample': 0.9927469091208938, 'output_dim': 10, 'nthread': 2}
[0]	train-many_logloss:0.92190	test-many_logloss:0.95127	valid-many_logloss:0.93386
[20]	train-many_logloss:inf	test-many_logloss:inf	valid-many_logloss:inf
{'alpha': 1.0366863221923632, 'beta_eta': 0.13640103803528691, 'btype': 'Rn', 'colsample_bylevel': 0.6367989155272455, 'colsample_bytree': 0.6974194337846622, 'eta': 0.3029677824305006, 'eval_metric': ('many_logloss',), 'extra_dims': 6, 'gamma': 1

[499]	train-many_logloss:0.06573	test-many_logloss:0.08807	valid-many_logloss:0.08996
{'alpha': 9.442657203270879e-05, 'beta_eta': 0.17396370377116593, 'btype': 'I', 'colsample_bylevel': 0.7781226592895327, 'colsample_bytree': 0.6398164878102375, 'eta': 0.0041851001124752684, 'eval_metric': ('many_logloss',), 'extra_dims': 7, 'gamma': 5.825640644469816e-06, 'lambda': 0, 'max_depth': 8, 'min_child_weight': 0.00537473915613328, 'objective': 'manybinary:logistic', 'subsample': 0.5391816076002862, 'output_dim': 10, 'nthread': 2}
[0]	train-many_logloss:0.67469	test-many_logloss:0.67500	valid-many_logloss:0.67497
[42]	train-many_logloss:0.01377	test-many_logloss:0.13287	valid-many_logloss:0.12830
{'alpha': 0, 'beta_eta': 0.7112779064038146, 'btype': 'I', 'colsample_bylevel': 0.980487461508577, 'colsample_bytree': 0.872601457169579, 'eta': 0.46040749038190176, 'eval_metric': ('many_logloss',), 'extra_dims': 0, 'gamma': 0, 'lambda': 0, 'max_depth': 5, 'min_child_weight': 0.0003681699610575285,

[0]	train-many_logloss:0.97197	test-many_logloss:0.99095	valid-many_logloss:1.00118
[19]	train-many_logloss:inf	test-many_logloss:inf	valid-many_logloss:inf
{'alpha': 1.0366863221923632, 'beta_eta': 0.13640103803528691, 'btype': 'Rn', 'colsample_bylevel': 0.6367989155272455, 'colsample_bytree': 0.6974194337846622, 'eta': 0.3029677824305006, 'eval_metric': ('many_logloss',), 'extra_dims': 6, 'gamma': 1.5812851156161036, 'lambda': 1.2783614674881826e-06, 'max_depth': 5, 'min_child_weight': 2.558034001077105, 'objective': 'manybinary:logistic', 'subsample': 0.7481581307597385, 'output_dim': 10, 'nthread': 2}
[0]	train-many_logloss:0.57597	test-many_logloss:0.58148	valid-many_logloss:0.57950
[20]	train-many_logloss:nan	test-many_logloss:inf	valid-many_logloss:inf
{'alpha': 0.0015268888255807838, 'beta_eta': 0.20179405533498707, 'btype': 'R', 'colsample_bylevel': 0.822829897275416, 'colsample_bytree': 0.8661025998484644, 'eta': 0.011434936316016709, 'eval_metric': ('many_logloss',), 'extra_

[0]	train-many_logloss:0.67588	test-many_logloss:0.67609	valid-many_logloss:0.67615
[42]	train-many_logloss:0.01293	test-many_logloss:0.11668	valid-many_logloss:0.12061
{'alpha': 0, 'beta_eta': 0.7112779064038146, 'btype': 'I', 'colsample_bylevel': 0.980487461508577, 'colsample_bytree': 0.872601457169579, 'eta': 0.46040749038190176, 'eval_metric': ('many_logloss',), 'extra_dims': 0, 'gamma': 0, 'lambda': 0, 'max_depth': 5, 'min_child_weight': 0.0003681699610575285, 'objective': 'manybinary:logistic', 'subsample': 0.7369430758094139, 'output_dim': 10, 'nthread': 2}
[0]	train-many_logloss:0.38030	test-many_logloss:0.39597	valid-many_logloss:0.39447
[21]	train-many_logloss:inf	test-many_logloss:nan	valid-many_logloss:inf
{'alpha': 0, 'beta_eta': 0.004046103790871514, 'btype': 'I', 'colsample_bylevel': 0.7818800515438491, 'colsample_bytree': 0.7724637498474807, 'eta': 0.008563159803242014, 'eval_metric': ('many_logloss',), 'extra_dims': 4, 'gamma': 1.138875246733091, 'lambda': 0.0049297409

[19]	train-many_logloss:inf	test-many_logloss:inf	valid-many_logloss:inf
{'alpha': 0.0015268888255807838, 'beta_eta': 0.20179405533498707, 'btype': 'R', 'colsample_bylevel': 0.822829897275416, 'colsample_bytree': 0.8661025998484644, 'eta': 0.011434936316016709, 'eval_metric': ('many_logloss',), 'extra_dims': 1, 'gamma': 0, 'lambda': 2.091554134375915, 'max_depth': 8, 'min_child_weight': 1.6723514459697203e-05, 'objective': 'manybinary:logistic', 'subsample': 0.6325684178842439, 'output_dim': 10, 'nthread': 2}
[0]	train-many_logloss:0.63816	test-many_logloss:0.63794	valid-many_logloss:0.63801
[20]	train-many_logloss:inf	test-many_logloss:inf	valid-many_logloss:inf
{'alpha': 7.049352314660277e-05, 'beta_eta': 0.12137127268126893, 'btype': 'Rn', 'colsample_bylevel': 0.9439090074967231, 'colsample_bytree': 0.5525925639927433, 'eta': 0.0009915520889927426, 'eval_metric': ('many_logloss',), 'extra_dims': 3, 'gamma': 0, 'lambda': 0.004073992634408555, 'max_depth': 2, 'min_child_weight': 1.363

[0]	train-many_logloss:0.66751	test-many_logloss:0.66813	valid-many_logloss:0.66814
[200]	train-many_logloss:0.02910	test-many_logloss:0.07797	valid-many_logloss:0.08819
[400]	train-many_logloss:0.00803	test-many_logloss:0.06028	valid-many_logloss:0.07311
[468]	train-many_logloss:0.00623	test-many_logloss:0.05964	valid-many_logloss:0.07304
{'alpha': 0, 'beta_eta': 0.008940815077581921, 'btype': 'R', 'colsample_bylevel': 0.947651214492775, 'colsample_bytree': 0.8633457836387857, 'eta': 0.3977029463511548, 'eval_metric': ('many_logloss',), 'extra_dims': 7, 'gamma': 0.003128466805888169, 'lambda': 0, 'max_depth': 6, 'min_child_weight': 23.81766245704533, 'objective': 'manybinary:logistic', 'subsample': 0.8289219023831419, 'output_dim': 10, 'nthread': 2}
[0]	train-many_logloss:0.79768	test-many_logloss:0.79587	valid-many_logloss:0.79706
[20]	train-many_logloss:nan	test-many_logloss:nan	valid-many_logloss:nan
{'alpha': 0, 'beta_eta': 0.7189262806817021, 'btype': 'Rn', 'colsample_bylevel': 0

[0]	train-many_logloss:0.68689	test-many_logloss:0.68687	valid-many_logloss:0.68688
[63]	train-many_logloss:0.16942	test-many_logloss:0.17074	valid-many_logloss:0.17304
{'alpha': 8.581958439353927e-05, 'beta_eta': 0.4437471588909133, 'btype': 'R', 'colsample_bylevel': 0.787125082065971, 'colsample_bytree': 0.5243474561426513, 'eta': 0.8099003407203538, 'eval_metric': ('many_logloss',), 'extra_dims': 3, 'gamma': 7.475715249737038e-05, 'lambda': 0.0003639646509775927, 'max_depth': 7, 'min_child_weight': 0.09759627970810976, 'objective': 'manybinary:logistic', 'subsample': 0.553442319017114, 'output_dim': 10, 'nthread': 2}
[0]	train-many_logloss:1.21286	test-many_logloss:1.22346	valid-many_logloss:1.22254
[19]	train-many_logloss:inf	test-many_logloss:inf	valid-many_logloss:inf
{'alpha': 0, 'beta_eta': 0.1292065517955496, 'btype': 'Rn', 'colsample_bylevel': 0.7061150549075759, 'colsample_bytree': 0.5843009197915733, 'eta': 0.002630107568123181, 'eval_metric': ('many_logloss',), 'extra_dims

[20]	train-many_logloss:nan	test-many_logloss:nan	valid-many_logloss:nan
{'alpha': 0, 'beta_eta': 0.7189262806817021, 'btype': 'Rn', 'colsample_bylevel': 0.8025654416649992, 'colsample_bytree': 0.8748052006921443, 'eta': 0.009393489875683971, 'eval_metric': ('many_logloss',), 'extra_dims': 3, 'gamma': 0, 'lambda': 0, 'max_depth': 2, 'min_child_weight': 0.013234942416650719, 'objective': 'manybinary:logistic', 'subsample': 0.8694541664166509, 'output_dim': 10, 'nthread': 2}
[0]	train-many_logloss:0.63715	test-many_logloss:0.63695	valid-many_logloss:0.63699
[24]	train-many_logloss:nan	test-many_logloss:nan	valid-many_logloss:nan
{'alpha': 2.5240365004235283e-07, 'beta_eta': 0.0012215190837876378, 'btype': 'In', 'colsample_bylevel': 0.8788092996859548, 'colsample_bytree': 0.9767086318631194, 'eta': 0.14876123484960702, 'eval_metric': ('many_logloss',), 'extra_dims': 4, 'gamma': 0.055243004681850574, 'lambda': 0.008679746735189764, 'max_depth': 4, 'min_child_weight': 1.7599154275713062e-06

[0]	train-many_logloss:1.28956	test-many_logloss:1.28824	valid-many_logloss:1.29760
[19]	train-many_logloss:inf	test-many_logloss:inf	valid-many_logloss:inf
{'alpha': 0, 'beta_eta': 0.1292065517955496, 'btype': 'Rn', 'colsample_bylevel': 0.7061150549075759, 'colsample_bytree': 0.5843009197915733, 'eta': 0.002630107568123181, 'eval_metric': ('many_logloss',), 'extra_dims': 2, 'gamma': 6.947115616562489, 'lambda': 0, 'max_depth': 10, 'min_child_weight': 41.36978142254861, 'objective': 'manybinary:logistic', 'subsample': 0.7453409993563878, 'output_dim': 10, 'nthread': 2}
[0]	train-many_logloss:0.67775	test-many_logloss:0.67778	valid-many_logloss:0.67776
[68]	train-many_logloss:0.09737	test-many_logloss:0.13997	valid-many_logloss:0.14747
{'alpha': 0, 'beta_eta': 0.11363289588177274, 'btype': 'Rn', 'colsample_bylevel': 0.8017715093779021, 'colsample_bytree': 0.7625111741024486, 'eta': 0.007606220882567551, 'eval_metric': ('many_logloss',), 'extra_dims': 4, 'gamma': 0.001053204847233351, 'l

{'alpha': 2.5240365004235283e-07, 'beta_eta': 0.0012215190837876378, 'btype': 'In', 'colsample_bylevel': 0.8788092996859548, 'colsample_bytree': 0.9767086318631194, 'eta': 0.14876123484960702, 'eval_metric': ('many_logloss',), 'extra_dims': 4, 'gamma': 0.055243004681850574, 'lambda': 0.008679746735189764, 'max_depth': 4, 'min_child_weight': 1.7599154275713062e-06, 'objective': 'manybinary:logistic', 'subsample': 0.8017393719767765, 'output_dim': 10, 'nthread': 2}
[0]	train-many_logloss:0.39100	test-many_logloss:0.39284	valid-many_logloss:0.39379
[85]	train-many_logloss:0.00264	test-many_logloss:0.05950	valid-many_logloss:0.06784
{'alpha': 1.3010046524364964e-07, 'beta_eta': 0.0009424848537938038, 'btype': 'In', 'colsample_bylevel': 0.8884275174529376, 'colsample_bytree': 0.9893656719086061, 'eta': 0.13615993293609235, 'eval_metric': ('many_logloss',), 'extra_dims': 2, 'gamma': 0, 'lambda': 4.7840334774841065, 'max_depth': 4, 'min_child_weight': 0.0008214702178563566, 'objective': 'many

[68]	train-many_logloss:0.09951	test-many_logloss:0.14639	valid-many_logloss:0.15491
{'alpha': 0, 'beta_eta': 0.11363289588177274, 'btype': 'Rn', 'colsample_bylevel': 0.8017715093779021, 'colsample_bytree': 0.7625111741024486, 'eta': 0.007606220882567551, 'eval_metric': ('many_logloss',), 'extra_dims': 4, 'gamma': 0.001053204847233351, 'lambda': 0.23316250775856961, 'max_depth': 1, 'min_child_weight': 1.9088058756735313e-07, 'objective': 'manybinary:logistic', 'subsample': 0.5239057927863935, 'output_dim': 10, 'nthread': 2}
[0]	train-many_logloss:0.64208	test-many_logloss:0.64203	valid-many_logloss:0.64206
[73]	train-many_logloss:0.16791	test-many_logloss:0.17029	valid-many_logloss:0.18034
{'alpha': 0, 'beta_eta': 0.2023070079269739, 'btype': 'I', 'colsample_bylevel': 0.98982155285091, 'colsample_bytree': 0.8137063678634311, 'eta': 0.021437825449384364, 'eval_metric': ('many_logloss',), 'extra_dims': 2, 'gamma': 0, 'lambda': 0, 'max_depth': 5, 'min_child_weight': 2.5600171535202e-07, '

[136]	train-many_logloss:0.00358	test-many_logloss:0.06019	valid-many_logloss:0.06734
{'alpha': 3.966043373903589e-07, 'beta_eta': 0.001004441246142552, 'btype': 'R', 'colsample_bylevel': 0.8733004876379642, 'colsample_bytree': 0.9704007541709373, 'eta': 0.09181110165955197, 'eval_metric': ('many_logloss',), 'extra_dims': 2, 'gamma': 0, 'lambda': 0, 'max_depth': 9, 'min_child_weight': 0.0005649624368618833, 'objective': 'manybinary:logistic', 'subsample': 0.9947066851224226, 'output_dim': 10, 'nthread': 2}
[0]	train-many_logloss:0.36577	test-many_logloss:0.36867	valid-many_logloss:0.36877
[59]	train-many_logloss:0.00484	test-many_logloss:0.11882	valid-many_logloss:0.12944
{'alpha': 5.784092011332092, 'beta_eta': 0.027216134331891087, 'btype': 'R', 'colsample_bylevel': 0.5693867059258523, 'colsample_bytree': 0.6575318179678347, 'eta': 0.07670577225189641, 'eval_metric': ('many_logloss',), 'extra_dims': 6, 'gamma': 0, 'lambda': 0, 'max_depth': 8, 'min_child_weight': 3.698366596197951e-05

{'alpha': 0, 'beta_eta': 0.2023070079269739, 'btype': 'I', 'colsample_bylevel': 0.98982155285091, 'colsample_bytree': 0.8137063678634311, 'eta': 0.021437825449384364, 'eval_metric': ('many_logloss',), 'extra_dims': 2, 'gamma': 0, 'lambda': 0, 'max_depth': 5, 'min_child_weight': 2.5600171535202e-07, 'objective': 'manybinary:logistic', 'subsample': 0.7174114308249333, 'output_dim': 10, 'nthread': 2}
[0]	train-many_logloss:0.65521	test-many_logloss:0.65582	valid-many_logloss:0.65574
[40]	train-many_logloss:0.01717	test-many_logloss:0.14393	valid-many_logloss:0.12943
{'alpha': 0, 'beta_eta': 0.675621539900281, 'btype': 'I', 'colsample_bylevel': 0.6189065258301154, 'colsample_bytree': 0.7882001782331782, 'eta': 0.003770077706590942, 'eval_metric': ('many_logloss',), 'extra_dims': 0, 'gamma': 0.0006381725365621713, 'lambda': 0, 'max_depth': 1, 'min_child_weight': 0.05537987461653371, 'objective': 'manybinary:logistic', 'subsample': 0.6537419349296045, 'output_dim': 10, 'nthread': 2}
[0]	trai

[0]	train-many_logloss:0.35037	test-many_logloss:0.34959	valid-many_logloss:0.35017
[19]	train-many_logloss:inf	test-many_logloss:inf	valid-many_logloss:inf
{'alpha': 2.126746953167188e-06, 'beta_eta': 0.02123098842309264, 'btype': 'In', 'colsample_bylevel': 0.8623508697792003, 'colsample_bytree': 0.9403604335918812, 'eta': 0.029011416869536316, 'eval_metric': ('many_logloss',), 'extra_dims': 5, 'gamma': 0, 'lambda': 0, 'max_depth': 6, 'min_child_weight': 0.0001611272474738016, 'objective': 'manybinary:logistic', 'subsample': 0.9022280609486286, 'output_dim': 10, 'nthread': 2}
[0]	train-many_logloss:0.59675	test-many_logloss:0.59823	valid-many_logloss:0.59824
[123]	train-many_logloss:0.00160	test-many_logloss:0.06696	valid-many_logloss:0.06320
{'alpha': 0.12285362381316108, 'beta_eta': 0.0032518959545431397, 'btype': 'R', 'colsample_bylevel': 0.6766284350099434, 'colsample_bytree': 0.8251017236629836, 'eta': 0.1895635060238536, 'eval_metric': ('many_logloss',), 'extra_dims': 3, 'gamma'

{'alpha': 0, 'beta_eta': 0.06489049340354942, 'btype': 'I', 'colsample_bylevel': 0.8729133438901646, 'colsample_bytree': 0.5205123603280772, 'eta': 0.0309650866085196, 'eval_metric': ('many_logloss',), 'extra_dims': 5, 'gamma': 0, 'lambda': 7.910525070154376e-06, 'max_depth': 3, 'min_child_weight': 7.405155036744976e-05, 'objective': 'manybinary:logistic', 'subsample': 0.6231629412057431, 'output_dim': 10, 'nthread': 2}
[0]	train-many_logloss:0.59881	test-many_logloss:0.59815	valid-many_logloss:0.59919
[107]	train-many_logloss:0.01610	test-many_logloss:0.07529	valid-many_logloss:0.08393
{'alpha': 0.014845224338582359, 'beta_eta': 0.03770137496335071, 'btype': 'Rn', 'colsample_bylevel': 0.7211698432094364, 'colsample_bytree': 0.712394975511369, 'eta': 0.008477424619483462, 'eval_metric': ('many_logloss',), 'extra_dims': 1, 'gamma': 0, 'lambda': 0, 'max_depth': 3, 'min_child_weight': 32.756265617720345, 'objective': 'manybinary:logistic', 'subsample': 0.6995489417449295, 'output_dim': 10

{'alpha': 0.12285362381316108, 'beta_eta': 0.0032518959545431397, 'btype': 'R', 'colsample_bylevel': 0.6766284350099434, 'colsample_bytree': 0.8251017236629836, 'eta': 0.1895635060238536, 'eval_metric': ('many_logloss',), 'extra_dims': 3, 'gamma': 0, 'lambda': 0, 'max_depth': 7, 'min_child_weight': 5.941002139647163e-06, 'objective': 'manybinary:logistic', 'subsample': 0.5960145588730401, 'output_dim': 10, 'nthread': 2}
[0]	train-many_logloss:0.33375	test-many_logloss:0.33517	valid-many_logloss:0.33446
[101]	train-many_logloss:0.00089	test-many_logloss:0.06676	valid-many_logloss:0.07073
100%|██████████| 25/25 [11:34<00:00, 27.80s/trial, best loss: 0.055482]
{'alpha': 8.813057946902575e-07, 'beta_eta': 0.0016692834513794938, 'btype': 'R', 'colsample_bylevel': 0.7489980469009814, 'colsample_bytree': 0.9420146676089547, 'eta': 0.0070706936561268224, 'eval_metric': ('many_logloss',), 'extra_dims': 2, 'gamma': 0, 'lambda': 0, 'max_depth': 5, 'min_child_weight': 2.1041667775099838e-05, 'obje

[0]	train-many_logloss:0.65305	test-many_logloss:0.65297	valid-many_logloss:0.65313
[200]	train-many_logloss:0.04954	test-many_logloss:0.10246	valid-many_logloss:0.09532
[203]	train-many_logloss:0.04886	test-many_logloss:0.10262	valid-many_logloss:0.09546
{'alpha': 2.7324514268482383, 'beta_eta': 0.08036958137175818, 'btype': 'R', 'colsample_bylevel': 0.7987531851832483, 'colsample_bytree': 0.7299740823920091, 'eta': 0.06030778497723384, 'eval_metric': ('many_logloss',), 'extra_dims': 3, 'gamma': 0, 'lambda': 0, 'max_depth': 8, 'min_child_weight': 0.00023882505207060682, 'objective': 'manybinary:logistic', 'subsample': 0.8750385697829414, 'output_dim': 10, 'nthread': 2}
[0]	train-many_logloss:0.42196	test-many_logloss:0.42207	valid-many_logloss:0.42207
[65]	train-many_logloss:0.00069	test-many_logloss:0.06019	valid-many_logloss:0.05699
NEW BEST VALUE!                                                        
{'alpha': 0, 'beta_eta': 0.009060610135766442, 'btype': 'I', 'colsample_bylevel'

[0]	train-many_logloss:0.65198	test-many_logloss:0.65196	valid-many_logloss:0.65212
[200]	train-many_logloss:0.10164	test-many_logloss:0.12623	valid-many_logloss:0.12873
[400]	train-many_logloss:0.03322	test-many_logloss:0.06992	valid-many_logloss:0.07124
[499]	train-many_logloss:0.02014	test-many_logloss:0.06035	valid-many_logloss:0.06149
NEW BEST VALUE!                                       
{'alpha': 0, 'beta_eta': 0.07375799600401638, 'btype': 'I', 'colsample_bylevel': 0.8357246683355697, 'colsample_bytree': 0.7491187395188089, 'eta': 0.605518780660455, 'eval_metric': ('many_logloss',), 'extra_dims': 0, 'gamma': 0, 'lambda': 0, 'max_depth': 5, 'min_child_weight': 118.844312919608, 'objective': 'manybinary:logistic', 'subsample': 0.6525233714163394, 'output_dim': 10, 'nthread': 2}
[0]	train-many_logloss:0.39745	test-many_logloss:0.39657	valid-many_logloss:0.39779
[79]	train-many_logloss:0.09116	test-many_logloss:0.13035	valid-many_logloss:0.13504
{'alpha': 0, 'beta_eta': 0.012588600

[66]	train-many_logloss:0.00065	test-many_logloss:0.05543	valid-many_logloss:0.05546
NEW BEST VALUE!                                                        
{'alpha': 0, 'beta_eta': 0.009060610135766442, 'btype': 'I', 'colsample_bylevel': 0.7332732947918047, 'colsample_bytree': 0.5030735772414907, 'eta': 0.0015245875835189877, 'eval_metric': ('many_logloss',), 'extra_dims': 7, 'gamma': 9.066632495478617e-06, 'lambda': 0, 'max_depth': 3, 'min_child_weight': 135.18110003620356, 'objective': 'manybinary:logistic', 'subsample': 0.5593100367488764, 'output_dim': 10, 'nthread': 2}
[0]	train-many_logloss:0.68699	test-many_logloss:0.68698	valid-many_logloss:0.68699
[200]	train-many_logloss:0.16255	test-many_logloss:0.16991	valid-many_logloss:0.17175
[400]	train-many_logloss:0.08542	test-many_logloss:0.10051	valid-many_logloss:0.10075
[499]	train-many_logloss:0.07086	test-many_logloss:0.09094	valid-many_logloss:0.09025
{'alpha': 9.442657203270879e-05, 'beta_eta': 0.17396370377116593, 'btype': '

In [7]:
import pandas as pd
pd.concat([
    pd.Series(r).to_frame().T for r in results
], axis=0).to_csv('./mnist_wxgb_results_paper_2.csv', index=False)