## **SMAC**
SMAC (sequential model-based algorithm configuration) is a versatile tool for optimizing algorithm parameters (or the parameters of some other process we can run automatically, or a function we can evaluate, such as a simulation).

SMAC has helped us speed up both local search and tree search algorithms by orders of magnitude on certain instance distributions. Recently, we have also found it to be very effective for the hyperparameter optimization of machine learning algorithms, scaling better to high dimensions and discrete input dimensions than other algorithms. Finally, the predictive models SMAC is based on can also capture and exploit important information about the model domain, such as which input variables are most important. 

In [240]:
# !curl https://raw.githubusercontent.com/automl/smac3/master/requirements.txt | xargs -n 1 -L 1 pip install
!pip install smac --user



In [0]:
import pandas as pd
from sklearn.datasets import load_boston
boston = load_boston()

In [0]:
#convert the data in to pandas dataframe
dfx = pd.DataFrame(boston.data, columns = boston.feature_names)

In [0]:
dfy = pd.DataFrame(boston.target, columns = ['target'])

In [244]:
dfcombine = dfx.join(dfy)
#let us view and examine the head of the combined dataframe
dfcombine.head()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,target
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.09,1.0,296.0,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3.0,222.0,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0.0,0.458,7.147,54.2,6.0622,3.0,222.0,18.7,396.9,5.33,36.2


In [0]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(dfx,dfy,test_size = 0.2,random_state = 42)


# Using the SMAC-HPO interface to tune a random forest



In [246]:
import logging

import numpy as np
from ConfigSpace.hyperparameters import CategoricalHyperparameter, \
    UniformFloatHyperparameter, UniformIntegerHyperparameter

from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import make_scorer
from sklearn.model_selection import cross_val_score

from smac.configspace import ConfigurationSpace
from smac.facade.smac_hpo_facade import SMAC4HPO
from smac.scenario.scenario import Scenario




def rf_from_cfg(cfg, seed):
    """
        Creates a random forest regressor from sklearn and fits the given data on it.
        This is the function-call we try to optimize. Chosen values are stored in
        the configuration (cfg).

        Parameters:
        -----------
        cfg: Configuration
            configuration chosen by smac
        seed: int or RandomState
            used to initialize the rf's random generator

        Returns:
        -----------
        np.mean(rmses): float
            mean of root mean square errors of random-forest test predictions
            per cv-fold
    """
    rfr = RandomForestRegressor(
        n_estimators=cfg["num_trees"],
        criterion=cfg["criterion"],
        min_samples_split=cfg["min_samples_to_split"],
        min_samples_leaf=cfg["min_samples_in_leaf"],
        min_weight_fraction_leaf=cfg["min_weight_frac_leaf"],
        max_features=cfg["max_features"],
        max_leaf_nodes=cfg["max_leaf_nodes"],
        bootstrap=cfg["do_bootstrapping"],
        random_state=seed)

    def rmse(y, y_pred):
        return np.sqrt(np.mean((y_pred - y) ** 2))

    # Creating root mean square error for sklearns crossvalidation
    rmse_scorer = make_scorer(rmse, greater_is_better=False)
    score = cross_val_score(rfr, x_train.values, y_train.values.ravel(), cv=10, scoring=rmse_scorer)
    return -1 * np.mean(score)  # Because cross_validation sign-flips the score


logger = logging.getLogger("RF-example")
logging.basicConfig(level=logging.INFO)
# logging.basicConfig(level=logging.DEBUG)  # Enable to show debug-output
logger.info("Running random forest example for SMAC. If you experience "
            "difficulties, try to decrease the memory-limit.")

# Build Configuration Space which defines all parameters and their ranges.
# To illustrate different parameter types,
# we use continuous, integer and categorical parameters.
cs = ConfigurationSpace()

# We can add single hyperparameters:
do_bootstrapping = CategoricalHyperparameter(
    "do_bootstrapping", ["true", "false"], default_value="true")
cs.add_hyperparameter(do_bootstrapping)

# Or we can add multiple hyperparameters at once:
num_trees = UniformIntegerHyperparameter("num_trees", 10, 100, default_value=10)
max_features = UniformIntegerHyperparameter("max_features", 1, boston.data.shape[1], default_value=1)
min_weight_frac_leaf = UniformFloatHyperparameter("min_weight_frac_leaf", 0.0, 0.5, default_value=0.0)
criterion = CategoricalHyperparameter("criterion", ["mse", "mae"], default_value="mse")
min_samples_to_split = UniformIntegerHyperparameter("min_samples_to_split", 2, 20, default_value=2)
min_samples_in_leaf = UniformIntegerHyperparameter("min_samples_in_leaf", 1, 20, default_value=1)
max_leaf_nodes = UniformIntegerHyperparameter("max_leaf_nodes", 10, 1000, default_value=100)

cs.add_hyperparameters([num_trees, min_weight_frac_leaf, criterion,
                        max_features, min_samples_to_split, min_samples_in_leaf, max_leaf_nodes])

# SMAC scenario object
scenario = Scenario({"run_obj": "quality",  # we optimize quality (alternative runtime)
                     "runcount-limit": 10,  # max. number of function evaluations; for this example set to a low number
                     "cs": cs,  # configuration space
                     "deterministic": "True",
                     "memory_limit": 3072  # adapt this to reasonable value for your hardware
                     })

# To optimize, we pass the function to the SMAC-object
smac = SMAC4HPO(scenario=scenario, rng=np.random.RandomState(42),
              tae_runner=rf_from_cfg)

# Example call of the function with default values
# It returns: Status, Cost, Runtime, Additional Infos
def_value = smac.get_tae_runner().run(cs.get_default_configuration(), 1)[1]
print("Value for default configuration: %.2f" % def_value)

# Start optimization
try:
    incumbent = smac.optimize()
finally:
    incumbent = smac.solver.incumbent

inc_value = smac.get_tae_runner().run(incumbent, 1)[1]
print("Optimized Value: %.2f" % inc_value)

INFO:RF-example:Running random forest example for SMAC. If you experience difficulties, try to decrease the memory-limit.
INFO:smac.utils.io.cmd_reader.CMDReader:Output to smac3-output_2020-06-06_14:35:26_443277
INFO:smac.facade.smac_hpo_facade.SMAC4HPO:Optimizing a deterministic scenario for quality without a tuner timeout - will make SMAC deterministic and only evaluate one configuration per iteration!
INFO:smac.initial_design.sobol_design.SobolDesign:Running initial design for 2 configurations
INFO:smac.facade.smac_hpo_facade.SMAC4HPO:<class 'smac.facade.smac_hpo_facade.SMAC4HPO'>
INFO:smac.optimizer.smbo.SMBO:Running initial design
INFO:smac.intensification.intensification.Intensifier:First run, no incumbent provided; challenger is assumed to be the incumbent


Value for default configuration: 4.59


INFO:smac.intensification.intensification.Intensifier:Updated estimated cost of incumbent on 1 runs: 5.8904
INFO:smac.intensification.intensification.Intensifier:Updated estimated cost of incumbent on 1 runs: 5.8904
INFO:smac.intensification.intensification.Intensifier:Wallclock time limit for intensification reached (used: 2.284095 sec, available: 0.000010 sec)
INFO:smac.intensification.intensification.Intensifier:Updated estimated cost of incumbent on 1 runs: 5.8904
INFO:smac.intensification.intensification.Intensifier:Wallclock time limit for intensification reached (used: 1.713006 sec, available: 0.000010 sec)
INFO:smac.intensification.intensification.Intensifier:Updated estimated cost of incumbent on 1 runs: 5.8904
INFO:smac.intensification.intensification.Intensifier:Wallclock time limit for intensification reached (used: 2.815118 sec, available: 0.000010 sec)
INFO:smac.intensification.intensification.Intensifier:Updated estimated cost of incumbent on 1 runs: 5.8904
INFO:smac.int

Optimized Value: 4.14


In [0]:
rfr = RandomForestRegressor(criterion='mse',
   bootstrap= False,
   max_features = 7,
   max_leaf_nodes=467,
   min_samples_leaf=13,
   min_samples_split =18,
   min_weight_fraction_leaf =0.04146821760969338,
   n_estimators = 55)

In [229]:
rfr.fit(x_train, y_train.values.ravel())

RandomForestRegressor(bootstrap=True, ccp_alpha=0.0, criterion='mse',
                      max_depth=None, max_features=1, max_leaf_nodes=844,
                      max_samples=None, min_impurity_decrease=0.0,
                      min_impurity_split=None, min_samples_leaf=1,
                      min_samples_split=8,
                      min_weight_fraction_leaf=0.43483383881295706,
                      n_estimators=1000, n_jobs=None, oob_score=False,
                      random_state=None, verbose=0, warm_start=False)

In [0]:
y_pred = rfr.predict(x_test)

In [248]:
y_pred

array([24.8598213 , 25.71230439, 20.58931232, 23.73480972, 20.12984345,
       22.60559932, 22.18915707, 20.38111159, 20.58524856, 20.64820308,
       22.19773579, 22.63664864, 20.07376295, 22.63891291, 22.9140518 ,
       21.09836638, 21.34106128, 20.07376295, 25.71230439, 20.11123376,
       25.01736366, 25.71230439, 21.37536028, 23.09433414, 20.07376295,
       20.07376295, 23.77941419, 20.67696433, 22.70207966, 21.31837146,
       23.13051249, 24.79409433, 20.71443513, 20.39224627, 20.07376295,
       20.78306093, 25.58027386, 23.88973657, 23.12834601, 23.58152793,
       21.03731359, 25.67201794, 25.74692454, 23.63198314, 25.70619132,
       20.11123376, 21.37536028, 24.26303876, 20.52592706, 25.20382392,
       22.78469054, 25.74442465, 21.41283108, 23.23255419, 24.74601899,
       21.19749477, 20.11973801, 25.10556525, 25.53474198, 23.4668153 ,
       25.00300641, 25.71230439, 25.37730832, 21.79676233, 25.12673823,
       22.48507883, 20.74429033, 24.90387158, 25.71230439, 20.08

In [0]:
import numpy as np
def rmse(y, y_pred):
        return np.sqrt(np.mean((y_pred.reshape(-1,1)- y_test.values) ** 2))

In [233]:
rmse(y_test, y_pred)




7.437427541884284