In [1]:
%matplotlib inline
import matplotlib

In [2]:
%load_ext autoreload
%autoreload 2

In [18]:
import discopt
import pandas as pd
import numpy as np
import copy
import sklearn
import sklearn.datasets
import xgboost as xgb
import scipy
from scipy import stats

In [11]:
n_jobs = 3
nrep = 20

# HYPEROPT (TPE) 

In [25]:
from hyperopt import hp, Trials, fmin, tpe, rand

In [5]:
np.random.seed(0)
digits = sklearn.datasets.load_digits()
x = digits.data
y = digits.target

ntrain = 100
order = np.random.permutation(y.size)
tr = order[:ntrain]
val = order[ntrain:]
ytr = y[tr]
xtr = x[tr, :]
yval = y[val]
xval = x[val, :]


In [6]:
def obj_func(kwargs, verbose=1, n_jobs=n_jobs):
    # uses global xtr, ytr, xval, yval
    kwargs = xgb_parse_params(kwargs)
    obj = -clf_acc(xgb.sklearn.XGBClassifier(nthread=n_jobs,**kwargs), xtr, ytr, xval, yval)
    if verbose > 0:
        print 'obj', obj, 'arguments', kwargs, '\n'
    return obj

In [7]:
def xgb_parse_params(kwargs):
    # parse transformed xgb arguments to usual xgb arguments
    kwargs = copy.deepcopy(kwargs)
    if 'max_depth' in kwargs:
        kwargs['max_depth'] = int(kwargs['max_depth'])

    if ('lr_trees_ratio' in kwargs) and ('n_estimators' in kwargs):
        kwargs['learning_rate'] = kwargs['lr_trees_ratio'] / kwargs['n_estimators']
        del kwargs['lr_trees_ratio']

    if 'n_estimators' in kwargs:
        kwargs['n_estimators'] = int(kwargs['n_estimators'])

    for name in kwargs:
        if name.startswith('log_'):
            new_name = name[4:]
            kwargs[new_name] = np.exp(kwargs[name])
            del kwargs[name]
    return kwargs


In [8]:
def clf_acc(clf, xtr, ytr, xte, yte):
    clf.fit(xtr, ytr)
    pred = clf.predict(xte)
    return (yte == pred).mean()

In [9]:
space_hyperopt = {
    'max_depth': hp.quniform('max_depth', 1, 20, 1),
    'lr_trees_ratio': hp.uniform('lr_trees_ratio', 2, 20),
    'n_estimators': hp.quniform('n_estimators', 50, 300, 1),
    'log_gamma': hp.uniform('log_gamma', np.log(0.01), np.log(10)),
    'log_reg_lambda': hp.uniform('log_reg_lambda', np.log(0.01), np.log(10)),
    'subsample': hp.uniform('subsample', 0.2, 1),
    'colsample_bylevel': hp.uniform('colsample_bylevel', 0.2, 1),
    'colsample_bytree': hp.uniform('colsample_bytree', 0.2, 1),
}

In [12]:
# algo = tpe.rand
algo = tpe.suggest
trials = Trials() # this object will keep info about runed experiments
best = fmin(fn= obj_func,
            space=space_hyperopt,
            algo=tpe.suggest,
            max_evals=nrep,
            trials=trials)        

obj -0.79375368297 arguments {'colsample_bytree': 0.767954185201517, 'colsample_bylevel': 0.930354896394116, 'learning_rate': 0.02789603819107345, 'n_estimators': 296, 'subsample': 0.8807481591535247, 'reg_lambda': 0.03114935405843873, 'max_depth': 9, 'gamma': 2.5561603759756042} 

obj -0.0989982321744 arguments {'colsample_bytree': 0.4537860199326206, 'colsample_bylevel': 0.24410877634464248, 'learning_rate': 0.04958852728552931, 'n_estimators': 169, 'subsample': 0.38269881752701906, 'reg_lambda': 7.9649080267403747, 'max_depth': 13, 'gamma': 7.9919787755197538} 

obj -0.556865055981 arguments {'colsample_bytree': 0.9617368721966661, 'colsample_bylevel': 0.9491109644705069, 'learning_rate': 0.18332030665705265, 'n_estimators': 71, 'subsample': 0.42657760293063957, 'reg_lambda': 0.63550126282659991, 'max_depth': 9, 'gamma': 8.4928896306139876} 

obj -0.853270477313 arguments {'colsample_bytree': 0.47466199805808956, 'colsample_bylevel': 0.297236129539653, 'learning_rate': 0.07483999318

In [13]:
list(trials)[0]

{'book_time': datetime.datetime(2017, 4, 2, 21, 21, 58, 800000),
 'exp_key': None,
 'misc': {'cmd': ('domain_attachment', 'FMinIter_Domain'),
  'idxs': {'colsample_bylevel': [0],
   'colsample_bytree': [0],
   'log_gamma': [0],
   'log_reg_lambda': [0],
   'lr_trees_ratio': [0],
   'max_depth': [0],
   'n_estimators': [0],
   'subsample': [0]},
  'tid': 0,
  'vals': {'colsample_bylevel': [0.930354896394116],
   'colsample_bytree': [0.767954185201517],
   'log_gamma': [0.9385062794512926],
   'log_reg_lambda': [-3.468961770450397],
   'lr_trees_ratio': [8.257227304557741],
   'max_depth': [9.0],
   'n_estimators': [296.0],
   'subsample': [0.8807481591535247]},
  'workdir': None},
 'owner': None,
 'refresh_time': datetime.datetime(2017, 4, 2, 21, 21, 59, 502000),
 'result': {'loss': -0.793753682969947, 'status': 'ok'},
 'spec': None,
 'state': 2,
 'tid': 0,
 'version': 0}

In [14]:
best

{'colsample_bylevel': 0.257303663847359,
 'colsample_bytree': 0.463944476707143,
 'log_gamma': -3.194003573182015,
 'log_reg_lambda': -2.6195411341278074,
 'lr_trees_ratio': 18.30262046898361,
 'max_depth': 2.0,
 'n_estimators': 175.0,
 'subsample': 0.7302026885582977}

In [15]:
obj_func(best)

obj -0.862109605186 arguments {'colsample_bytree': 0.463944476707143, 'colsample_bylevel': 0.257303663847359, 'learning_rate': 0.10458640267990633, 'max_depth': 2, 'n_estimators': 175, 'subsample': 0.7302026885582977, 'reg_lambda': 0.072836277242366035, 'gamma': 0.041007365864507192} 



-0.86210960518562163

# SPEARMINT (GP)

In [26]:
import spearmint

In [None]:
import utils_xgb_main
exp_name = 'test_xgb'

JOB_DIR = '/home/kc/Konrad/python/hyperparams_opt/src/spearmint_data/xgb_spearmint/'
CONFIG_BASE_DIR = os.path.join(JOB_DIR, 'config_base_xgb')
CONFIG_DIR = os.path.join(JOB_DIR,'config.json')
SPEARMINT_MAIN_DIR = '/home/kc/Konrad/python/hyperparams_opt/src/Spearmint/spearmint/main.py'

def get_config_base():
    return json.loads(open(CONFIG_BASE_DIR).read())

def min_sample_required(train_data_shape):
    eps = 1e-7
    nrow, ncol = train_data_shape
    inv_col = 1. / ncol
    cs_min = inv_col ** 0.5 + eps
    inv_row = 1. / nrow
    ss_min = inv_row + eps
    return cs_min, ss_min

def prepare_data_dependent_fields(config):
    train_data_shape = xtr.shape
    cs_min, ss_min = min_sample_required(train_data_shape)
    config['variables']['colsample_bylevel']['min'] = cs_min
    config['variables']['colsample_bytree']['min'] = cs_min
    config['variables']['subsample']['min'] = ss_min

def prepare_config(exp_name, sample_based_on_data):
    conf = get_config_base()
    print 'config', conf
    if sample_based_on_data:
        prepare_data_dependent_fields(conf)
    conf['experiment-name'] = exp_name
    with open(CONFIG_DIR, 'w') as f:
        json.dump(conf,f, sort_keys=True, indent=4, separators=(',', ' : '))

def run_spearmint_job(name, sample_based_on_data): #, randomize=True):
    prepare_config(name, sample_based_on_data)
    cmd = 'python {main_dir} {job_dir}'.format(main_dir=SPEARMINT_MAIN_DIR, job_dir=JOB_DIR)
    print 'executing shell command: {}'.format(cmd)
    subprocess.call(cmd,shell=True)

# DISCOPT 

In [16]:
def scipy_uniform(min_val, max_val):
    return scipy.stats.uniform(min_val, max_val - min_val)

In [19]:
space_scipy = {
    'max_depth': stats.randint(1, 21),
    'lr_trees_ratio': scipy_uniform(2,10),
    'n_estimators': stats.randint(50, 301),
    'log_gamma': scipy_uniform(np.log(0.01), np.log(10)),
    'log_reg_lambda': scipy_uniform(np.log(0.01), np.log(10)),
    'subsample': scipy_uniform(0.2, 1),
    'colsample_bylevel': scipy_uniform(0.2, 1),
    'colsample_bytree': scipy_uniform(0.2, 1),
}

In [20]:
scores = discopt.search_min(obj_func, space=space_scipy, nrep=nrep)


obj -0.864466705952 arguments {'colsample_bytree': 0.38172527358010955, 'colsample_bylevel': 0.36400913083401787, 'learning_rate': 0.024422556641555116, 'n_estimators': 285, 'subsample': 0.99685701680814987, 'reg_lambda': 2.4342746716749226, 'max_depth': 6, 'gamma': 0.15222382181376989} 

obj -0.771950500884 arguments {'colsample_bytree': 0.41355664146756332, 'colsample_bylevel': 0.94947574931738554, 'learning_rate': 0.062213784571623743, 'n_estimators': 52, 'subsample': 0.28018674993929849, 'reg_lambda': 1.0777674618889217, 'max_depth': 12, 'gamma': 0.10347708256090966} 

obj -0.838538597525 arguments {'colsample_bytree': 0.91764598549793264, 'colsample_bylevel': 0.49848470718296212, 'learning_rate': 0.025705953476262259, 'n_estimators': 215, 'subsample': 0.89369163327493029, 'reg_lambda': 6.5101819005769173, 'max_depth': 20, 'gamma': 0.12573025846585489} 

obj -0.733647613435 arguments {'colsample_bytree': 0.65170176799403046, 'colsample_bylevel': 0.4803096135334306, 'learning_rate':

In [21]:
scores

Unnamed: 0,lr_trees_ratio,colsample_bytree,colsample_bylevel,log_gamma,n_estimators,subsample,log_reg_lambda,max_depth,obj
0,6.960429,0.381725,0.364009,-1.882403,285.0,0.996857,0.889649,6.0,-0.864467
1,3.235117,0.413557,0.949476,-2.268405,52.0,0.280187,0.074892,12.0,-0.771951
2,5.52678,0.917646,0.498485,-2.073616,215.0,0.893692,1.873367,20.0,-0.838539
3,6.419523,0.651702,0.48031,1.380114,249.0,0.435413,-0.314485,11.0,-0.733648
4,3.822966,0.300649,0.575995,0.193842,137.0,0.548283,-3.929522,11.0,-0.84561
5,5.169782,0.715379,0.523819,-1.959042,220.0,0.223905,-4.035996,4.0,-0.807307
6,5.27886,0.23668,0.257979,-0.284938,102.0,0.840608,-3.936477,11.0,-0.823807
7,8.169169,0.229624,0.577184,-4.334897,295.0,0.499872,-2.376293,15.0,-0.850324
8,9.276463,0.528463,0.706026,-4.293839,141.0,0.989094,-2.219192,17.0,-0.856806
9,9.739801,0.281766,0.958699,-3.51063,153.0,0.596326,-3.849348,18.0,-0.851503


# DISCOPT SHORTCUT FOR XGB 

In [22]:
def train_and_valid(clf):
    # put here trainig and validation, return objective
    # that this would be minimalized so use negative accuracy/AUC/F1...
    # validation method is up to you
    # clf is xgboost.sklearn.XGBoostClassifier instance
    clf.fit(xtr, ytr)
    pred = clf.predict(xval)
    obj = -(pred == yval).mean() # accuracy
    return obj

In [23]:
best_params, scores = discopt.search_params_for_xgb(train_valid_func=train_and_valid, train_data_shape=xtr.shape, ntrials=nrep)

best objective yet -0.842663523866
best objective yet -0.842663523866
best objective yet -0.868591632292
best objective yet -0.868591632292
best objective yet -0.868591632292
best objective yet -0.868591632292
best objective yet -0.868591632292
best objective yet -0.868591632292
best objective yet -0.868591632292
best objective yet -0.868591632292
best objective yet -0.868591632292
best objective yet -0.868591632292
best objective yet -0.868591632292
best objective yet -0.868591632292
best objective yet -0.868591632292
best objective yet -0.868591632292
best objective yet -0.868591632292
best objective yet -0.868591632292
best objective yet -0.868591632292
best objective yet -0.868591632292


In [24]:
best_params

{'colsample_bylevel': 0.81110456790293606,
 'colsample_bytree': 0.36200464550488803,
 'gamma': 2.758957718883623,
 'learning_rate': 0.034653970569318703,
 'max_depth': 18,
 'n_estimators': 104,
 'reg_lambda': 6.5650051277478179,
 'subsample': 0.12992248318790817}