In [1]:
import warnings

warnings.filterwarnings('ignore')

In [2]:
from sklearn.datasets import load_boston as load_dataset

dataset = load_dataset()

In [3]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    dataset.data, dataset.target, test_size=0.3, random_state=0)

In [4]:
from sklearn.metrics import r2_score

In [5]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import ExtraTreesRegressor

# NOTE: The default hyperparameters are intentionally bad
# to give more room for improvement during the BTBSession demo
default_hyperparams = {
    'n_estimators': 2,
    'max_features': 'log2',
    'min_samples_split': 2,
    'min_samples_leaf': 2,
}

rf = RandomForestRegressor(random_state=0, **default_hyperparams)

rf.fit(X_train, y_train)
pred = rf.predict(X_test)

r2_score(y_test, pred)

0.7149946643194653

In [6]:
tunables = {
    'random_forest': {
        'n_estimators': {'type': 'int', 'default': 2, 'range': [1, 1000]},
        'max_features': {'type': 'str', 'default': 'log2', 'range': [None, 'auto', 'log2', 'sqrt']},
        'min_samples_split': {'type': 'int', 'default': 2, 'range': [2, 20]},
        'min_samples_leaf': {'type': 'int', 'default': 2, 'range': [1, 20]},
    },
    'extra_trees': {
        'n_estimators': {'type': 'int', 'default': 2, 'range': [1, 1000]},
        'max_features': {'type': 'str', 'default': 'log2', 'range': [None, 'auto', 'log2', 'sqrt']},
        'min_samples_split': {'type': 'int', 'default': 2, 'range': [2, 20]},
        'min_samples_leaf': {'type': 'int', 'default': 2, 'range': [1, 20]},
    }
}

In [23]:
from sklearn.model_selection import cross_val_score
from sklearn.metrics import make_scorer

models = {
    'random_forest': RandomForestRegressor,
    'extra_trees': ExtraTreesRegressor,
}

def build_model(name, hyperparameters):
    model_class = models[name]
    return model_class(random_state=0, **hyperparameters)

def score_model(name, hyperparameters):
    model = build_model(name, hyperparameters)
    r2_scorer = make_scorer(r2_score)
    scores = cross_val_score(model, X_train, y_train, scoring=r2_scorer)
    return scores.mean()

In [21]:
%timeit score_model('random_forest', default_hyperparams)

5.83 ms ± 46.8 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [22]:
%timeit score_model('extra_trees', default_hyperparams)

4.98 ms ± 23.3 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [53]:
from btb.session import BTBSession

session = BTBSession(tunables, score_model, verbose=True)

In [58]:
session.run(iterations=1)

HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

0.557176



{'id': '7f46cf82c5c897b35dbcc813b7ae3c1c',
 'name': 'extra_trees',
 'config': {'n_estimators': 494,
  'max_features': None,
  'min_samples_split': 2,
  'min_samples_leaf': 12},
 'score': 0.7957158607230291}

In [None]:
best_proposal = session.run(iterations=1000)

HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))

1.238846
0.508359
0.837465
0.116272
0.207443
1.292001
0.359855
0.974083
0.505017
0.90993
1.190068
1.442615
0.996558
0.252241
1.137804
1.383644
1.382382
0.966254
0.646919
0.296913
1.109508
0.681838
0.804582
0.491484
0.939832
0.461509
0.339053
0.145221
0.079674
1.749239
1.234079
0.231933


In [26]:
import time
import datetime

In [28]:
start = datetime.datetime.now()

In [30]:
(datetime.datetime.now() - start).total_seconds()

28.406693

In [13]:
best_proposal

{'id': '818c14d6401323e048f0df3afc83ce23',
 'name': 'extra_trees',
 'config': {'n_estimators': 633,
  'max_features': 'log2',
  'min_samples_split': 2,
  'min_samples_leaf': 1},
 'score': 0.8681580067039918}

In [14]:
len(session.proposals)

101

In [15]:
best_model = build_model(best_proposal['name'], best_proposal['config'])

In [16]:
best_model.fit(X_train, y_train)
pred = best_model.predict(X_test)

r2_score(y_test, pred)

0.8003128949624385