# Задача, на которой будут тестироваться оптимизаторы:

**Задача ["Выход из он-лайн игры"](http://mlbootcamp.ru/round/10/sandbox/) <- данные от туда же**

In [2]:
import pandas as pd
import numpy as np
import logging
import os
import inspect


import gc
from copy import deepcopy

from xgboost import XGBClassifier
from sklearn.model_selection import StratifiedKFold, cross_val_score

from smac.configspace import ConfigurationSpace
from ConfigSpace.hyperparameters import CategoricalHyperparameter, \
    UniformFloatHyperparameter, UniformIntegerHyperparameter

from smac.tae.execute_func import ExecuteTAFuncDict
from smac.scenario.scenario import Scenario
from smac.facade.smac_facade import SMAC

np.random.seed(1)



In [3]:
data = pd.read_csv('x_train.csv', sep = ';')

In [4]:
X_train = deepcopy(data)

#целевая переменная
y_train = pd.read_csv("y_train.csv")
y_train = y_train["target"] 

In [5]:
xgb = XGBClassifier()

In [6]:
kf = StratifiedKFold(n_splits = 5, shuffle = True, random_state = 241) #для кросс-валидации

In [7]:
np.mean(cross_val_score(xgb, X_train, y_train, scoring="neg_log_loss", cv = kf))

-0.38203842464869414

### Тип суррогата: древовидные парзеновские оценщики

In [8]:
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials

In [8]:
%%time
#пр-во пар-ов, на котором будет происходить оптимизация
space = {
             'n_estimators' : hp.choice('n_estimators', range(100, 1000)),
             'learning_rate' : hp.uniform('learning_rate', 0.025, 1.0,),
             'max_depth' : hp.choice('max_depth', range(1, 20)), 
             'min_child_weight' : hp.choice('min_child_weight', range(1, 6)),
             'subsample' : hp.uniform('subsample', 0.5, 1.),
             'gamma' : hp.uniform('gamma', 0.5, 1.),
             'colsample_bytree' : hp.uniform('colsample_bytree', 0.5, 1),
#             "reg_lambda": hp.uniform("reg_lambda", 0.01, 20.0),
#            "seed": hp.choice("seed", range(1, 701, 100))             
        }

#Переменная, хранящая лучшее значение log-loss
best = 10000000000000 #здесь может быть любое достаточно большое положит число

i = 0
#оптимизируемая функция
def f(params):
    global best #используется зн-е глобальной переменной best
    global i
    
    #классификатор
    xgb = XGBClassifier(**params)
    
    #ср абсолютное зн-е log-loss на кросс-валидации
    log_loss_mean = abs(np.mean(cross_val_score(xgb, X_train, y_train, scoring="neg_log_loss", cv = kf)))
    
    if log_loss_mean < best:
        best = log_loss_mean
        print('new best:', best, params)
    
    i+=1
    print(i)
    print("-----------------------------------------")
    return {'loss': log_loss_mean, 'status': STATUS_OK} 

#генератор испытаний
trials = Trials()

#ищем гиперпар-ры для xgboost, доставляющие мин функции f(params)
best = fmin(f, space, algo=tpe.suggest, max_evals=50, trials=trials)


new best: 0.388904560638 {'learning_rate': 0.6734661495636179, 'max_depth': 1, 'subsample': 0.6563538429180416, 'n_estimators': 667, 'min_child_weight': 4, 'gamma': 0.5829501037191249, 'colsample_bytree': 0.7529749029119877}
1
-----------------------------------------
new best: 0.385458594157 {'learning_rate': 0.657389223410132, 'max_depth': 1, 'subsample': 0.5750125372935442, 'n_estimators': 117, 'min_child_weight': 2, 'gamma': 0.6610742775274993, 'colsample_bytree': 0.6983639344471162}
2
-----------------------------------------
3
-----------------------------------------
4
-----------------------------------------
5
-----------------------------------------
6
-----------------------------------------
7
-----------------------------------------
8
-----------------------------------------
9
-----------------------------------------
10
-----------------------------------------
11
-----------------------------------------
12
-----------------------------------------
13
-----------------

In [9]:
#лучшие гиперпараметры, найденные hyperopt
params = {'learning_rate': 0.1295815997700141,\
          'max_depth': 1, 'subsample': 0.5328168746127077,\
          'n_estimators': 693, 'min_child_weight': 3,\
          'gamma': 0.5699342507112768,\
          'colsample_bytree': 0.7925019468068861}

In [10]:
xgb = XGBClassifier(**params)

In [11]:
-1*np.mean(cross_val_score(xgb, X_train, y_train, scoring="neg_log_loss", cv = kf))

0.38371602040847941

### 2. Тип суррогата: SMAC (Sequential Model-based Algorithm Configuration)

In [12]:
def xgb_from_cfg(cfg, seed):
    
    xgb = XGBClassifier(
        n_estimators=cfg["num_trees"],
        learning_rate=cfg["learning_rate"],
        max_depth=cfg["max_depth"],
        min_child_weight=cfg["min_child_weight"],
        subsample=cfg["subsample"],
        gamma=cfg["gamma"],
        colsample_bytree=cfg["colsample_bytree"],
        seed=seed)
         
    kf = StratifiedKFold(n_splits = 5, shuffle = True, random_state = 241) #для кросс-валидации
    return -1 * np.mean(cross_val_score(xgb, X_train, y_train, scoring="neg_log_loss", cv = kf))  # Because cross_validation sign-flips the score

In [13]:
logger = logging.getLogger("XGB-example")
logging.basicConfig(level=logging.INFO)
logger.info("Running XGB example for SMAC. If you experience "
            "difficulties, try to decrease the memory-limit.")

# Build Configuration Space which defines all parameters and their ranges.
# To illustrate different parameter types,
# we use continuous, integer and categorical parameters.
cs = ConfigurationSpace()

INFO:XGB-example:Running XGB example for SMAC. If you experience difficulties, try to decrease the memory-limit.


In [14]:
#defining of hyperparams space
num_trees = UniformIntegerHyperparameter("num_trees",100,1000,default=100) 
learning_rate = UniformFloatHyperparameter("learning_rate",0.025,1.,default=0.025)
max_depth = UniformIntegerHyperparameter("max_depth",1,20,default=1)
min_child_weight = UniformIntegerHyperparameter("min_child_weight",1,6,default=1)
subsample = UniformFloatHyperparameter("subsample",0.5,1.,default=0.5)
gamma = UniformFloatHyperparameter("gamma",0.5,1.,default=0.5)
colsample_bytree=UniformFloatHyperparameter("colsample_bytree",0.5,1.,default=0.5)
reg_lambda = UniformFloatHyperparameter("reg_lambda", 0.01, 20.0,default=0.01),

In [15]:
cs.add_hyperparameters([num_trees, learning_rate, max_depth,\
                        min_child_weight, subsample, gamma, colsample_bytree])

[num_trees, Type: UniformInteger, Range: [100, 1000], Default: 100,
 learning_rate, Type: UniformFloat, Range: [0.025, 1.0], Default: 0.025000000000000001,
 max_depth, Type: UniformInteger, Range: [1, 20], Default: 1,
 min_child_weight, Type: UniformInteger, Range: [1, 6], Default: 1,
 subsample, Type: UniformFloat, Range: [0.5, 1.0], Default: 0.5,
 gamma, Type: UniformFloat, Range: [0.5, 1.0], Default: 0.5,
 colsample_bytree, Type: UniformFloat, Range: [0.5, 1.0], Default: 0.5]

In [16]:
# SMAC scenario oject
scenario = Scenario({"run_obj": "quality",   # we optimize quality (alternative runtime)
                     "runcount-limit": 50,  # maximum number of function evaluations
                     "cs": cs,               # configuration space
                     "deterministic": "true",
                     "memory_limit": 2000,   # adapt this to reasonable value for your hardware
                     })

INFO:smac.scenario.scenario.Scenario:Output to smac3-output_2017-09-15_17:55:24_(191889)


In [17]:
# To optimize, we pass the function to the SMAC-object
smac = SMAC(scenario=scenario, rng=np.random.seed(0),
            tae_runner=xgb_from_cfg)

In [18]:
# Example call of the function with default values
# It returns: Status, Cost, Runtime, Additional Infos
def_value = smac.get_tae_runner().run(cs.get_default_configuration(), 1)[1]
print("Value for default configuration: %.2f" % (def_value))


Value for default configuration: 0.41


In [19]:
%%time
# Start optimization
try:
    incumbent = smac.optimize()
finally:
    incumbent = smac.solver.incumbent

inc_value = smac.get_tae_runner().run(incumbent, 1)[1]
print("Optimized Value: %.2f" % (inc_value))

INFO:smac.intensification.intensification.Intensifier:Challenger (0.3986) is better than incumbent (0.4080) on 1 runs.
INFO:smac.intensification.intensification.Intensifier:Changes in incumbent:
INFO:smac.intensification.intensification.Intensifier:  colsample_bytree : 0.5 -> 0.6699848504576715
INFO:smac.intensification.intensification.Intensifier:  gamma : 0.5 -> 0.8663036117676315
INFO:smac.intensification.intensification.Intensifier:  learning_rate : 0.025 -> 0.8496658980498933
INFO:smac.intensification.intensification.Intensifier:  max_depth : 1 -> 2
INFO:smac.intensification.intensification.Intensifier:  min_child_weight : 1 -> 2
INFO:smac.intensification.intensification.Intensifier:  num_trees : 100 -> 118
INFO:smac.intensification.intensification.Intensifier:  subsample : 0.5 -> 0.6552715554923843
INFO:smac.intensification.intensification.Intensifier:Updated estimated cost of incumbent on 1 runs: 0.3986
INFO:smac.intensification.intensification.Intensifier:Challenger (0.3848) is

Optimized Value: 0.38
CPU times: user 4.1 s, sys: 492 ms, total: 4.59 s
Wall time: 21min 54s


In [12]:
#лучшие гиперпараметры, найденные SMAC 
xgb = XGBClassifier(**{"colsample_bytree": 0.597294316367996,
  "gamma": 0.872345979468649,
  "learning_rate": 0.10797678097278995,
  "max_depth": 1,
  "min_child_weight": 1,
  "n_estimators": 711,
  "subsample": 0.6636531251821975})

In [13]:
np.mean(cross_val_score(xgb, X_train, y_train, scoring="neg_log_loss", cv = kf))

-0.38351816420657026

###  3. Тип суррогата: гаусовский процесс

In [14]:
from bayes_opt import BayesianOptimization

In [18]:
def xgb_for_bayes( 
                    max_depth,
                    learning_rate,
                    n_estimators,  
                    gamma,
                    subsample,
                    colsample_bytree,
                    min_child_weight):
    
    xgb = XGBClassifier(max_depth=int(max_depth),
                    learning_rate=learning_rate,
                    n_estimators=int(n_estimators), 
                    gamma=gamma,
                    min_child_weight=int(min_child_weight),
                    subsample=subsample,
                    colsample_bytree=colsample_bytree,
                    seed=0)
    
    score = cross_val_score(xgb, X_train, y_train, scoring="neg_log_loss", cv=kf)
    return np.mean(score)

In [19]:
%%time
num_iter = 50 #число итераций
init_points = 10 #число начальных точек

#defining of hyperparams space
xgb_BO = BayesianOptimization(xgb_for_bayes, 
                              {
                                "max_depth": (1, 20),
                                "learning_rate": (0.025, 1.0),
                                "n_estimators": (100, 1000),
                                "gamma": (0.5, 1.),
                                "subsample": (0.5, 1.),
                                "colsample_bytree": (0.5, 1.),
                                "min_child_weight": (1,6)
                              }
                             )

xgb_BO.maximize(init_points=init_points, n_iter=num_iter)

[31mInitialization[0m
[94m--------------------------------------------------------------------------------------------------------------------------------------------------[0m
 Step |   Time |      Value |   colsample_bytree |     gamma |   learning_rate |   max_depth |   min_child_weight |   n_estimators |   subsample | 
    1 | 00m24s | [35m  -0.70771[0m | [32m            0.5099[0m | [32m   0.6633[0m | [32m         0.9058[0m | [32m    17.7828[0m | [32m            1.5737[0m | [32m      255.1065[0m | [32m     0.7929[0m | 
    2 | 00m14s | [35m  -0.41558[0m | [32m            0.5131[0m | [32m   0.7635[0m | [32m         0.1590[0m | [32m    12.8498[0m | [32m            5.7474[0m | [32m      223.4222[0m | [32m     0.9848[0m | 
    3 | 01m21s |   -0.49916 |             0.5142 |    0.9430 |          0.1608 |     15.2679 |             3.2496 |       939.3359 |      0.7805 | 
    4 | 00m34s |   -0.98691 |             0.6231 |    0.6786 |          0.8122 |    

# Таким образом, по времени вычислений и качеству оптимизации оптимизаторы заняли следующие места:

## 1. [SMAC](https://github.com/automl/SMAC3)
## 2. [TPE estimator](https://github.com/hyperopt/hyperopt)
## 3. [Gaussian process](https://github.com/fmfn/BayesianOptimization)