In [None]:
%matplotlib inline
import matplotlib

In [None]:
import discopt
import pandas as pd
import numpy as np
import copy
import sklearn
import sklearn.datasets
import xgboost as xgb
import scipy
from scipy import stats

In [None]:
n_jobs = 3
nrep = 20

# Prepare data 

In [None]:
np.random.seed(0)
digits = sklearn.datasets.load_digits()
x = digits.data
y = digits.target

ntrain = 100
order = np.random.permutation(y.size)
tr = order[:ntrain]
val = order[ntrain:]
ytr = y[tr]
xtr = x[tr, :]
yval = y[val]
xval = x[val, :]


# HYPEROPT (TPE) 

In [None]:
import hyperopt

In [None]:
space_hyperopt = {
    'max_depth': hyperopt.hp.quniform('max_depth', 1, 20, 1),
    'lr_trees_ratio': hyperopt.hp.uniform('lr_trees_ratio', 2, 20),
    'n_estimators': hyperopt.hp.quniform('n_estimators', 50, 300, 1),
    'log_gamma': hyperopt.hp.uniform('log_gamma', np.log(0.01), np.log(10)),
    'log_reg_lambda': hyperopt.hp.uniform('log_reg_lambda', np.log(0.01), np.log(10)),
    'subsample': hyperopt.hp.uniform('subsample', 0.2, 1),
    'colsample_bylevel': hyperopt.hp.uniform('colsample_bylevel', 0.2, 1),
    'colsample_bytree': hyperopt.hp.uniform('colsample_bytree', 0.2, 1),
}

In [None]:
def clf_acc(clf, xtr, ytr, xte, yte):
    clf.fit(xtr, ytr)
    pred = clf.predict(xte)
    return (yte == pred).mean()

In [None]:
def obj_func(hyperparams):
    # uses global xtr, ytr, xval, yval
    hyperparams = xgb_parse_params(hyperparams)
    obj = -clf_acc(xgb.sklearn.XGBClassifier(nthread=n_jobs,**hyperparams), xtr, ytr, xval, yval)
    print 'obj', obj, 'arguments', hyperparams, '\n'
    return obj

In [None]:
def xgb_parse_params(params):
    # parse transformed xgb arguments to usual xgb arguments
    params = copy.deepcopy(params)
    if 'max_depth' in params:
        params['max_depth'] = int(params['max_depth'])

    if ('lr_trees_ratio' in params) and ('n_estimators' in params):
        params['learning_rate'] = params['lr_trees_ratio'] / params['n_estimators']
        del params['lr_trees_ratio']

    if 'n_estimators' in params:
        params['n_estimators'] = int(params['n_estimators'])

    for name in params:
        if name.startswith('log_'):
            new_name = name[4:]
            params[new_name] = np.exp(params[name])
            del params[name]
    return params


In [None]:
# algo = hyperopt.tpe.rand
algo = hyperopt.tpe.suggest
trials = hyperopt.Trials() # this object will keep info about runed experiments
best = hyperopt.fmin(fn= obj_func,
            space=space_hyperopt,
            algo=algo,
            max_evals=nrep,
            trials=trials)        

In [None]:
best

In [None]:
# more results are in trails object

In [None]:
list(trials)[0]