In [None]:
"""
SHERPA is a Python library for hyperparameter tuning of machine learning models.
Copyright (C) 2018  Lars Hertel, Peter Sadowski, and Julian Collado.

This file is part of SHERPA.

SHERPA is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

SHERPA is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with SHERPA.  If not, see <http://www.gnu.org/licenses/>.
"""
from __future__ import print_function
import sherpa
import sherpa.algorithms.bayesian_optimization as bayesian_optimization
import time
import mxnet as mx
from mxnet import gluon, autograd
from mxnet.gluon import nn
from mxnet.gluon.data.vision import datasets, transforms

In [None]:
mnist_train = datasets.FashionMNIST(train=True)
X, y = mnist_train[0]
('X shape: ', X.shape, 'X dtype', X.dtype, 'y:', y)

In [None]:
transformer = transforms.Compose([transforms.ToTensor(),
                                    transforms.Normalize(0.13, 0.31)])
mnist_train = mnist_train.transform_first(transformer)

In [None]:
batch_size = 256
train_data = gluon.data.DataLoader(
    mnist_train, batch_size=batch_size, shuffle=True, num_workers=4)

In [None]:
mnist_valid = gluon.data.vision.FashionMNIST(train=False)
valid_data = gluon.data.DataLoader(mnist_valid.transform_first(transformer),
                                    batch_size=batch_size, num_workers=3)

In [None]:
parameters = [sherpa.Continuous(name='lr', range=[0.005, 0.1], scale='log'),
              sherpa.Continuous(name='dropout', range=[0., 0.5]),
              sherpa.Discrete('num_units', [72, 128])]

In [None]:

algorithm = bayesian_optimization.GPyOpt(max_concurrent=1,
                                         model_type='GP_MCMC',
                                         acquisition_type='EI_MCMC')

In [None]:
study = sherpa.Study(parameters=parameters,
                     algorithm=algorithm,
                     lower_is_better=False)

In [None]:
def acc(output, label):
    return (output.argmax(axis=1) == label.astype('float32')).mean().asscalar()

In [None]:
for trial in study:
    net = nn.Sequential()
    net.add(nn.Conv2D(channels=6, kernel_size=5, activation='relu'),
        nn.MaxPool2D(pool_size=2, strides=2),
        nn.Conv2D(channels=16, kernel_size=3, activation='relu'),
        nn.MaxPool2D(pool_size=2, strides=2),
        nn.Dropout(trial.parameters['dropout']),
        nn.Flatten(),
        nn.Dense(trial.parameters['num_units'], activation="relu"),
        nn.Dense(10))
    net.initialize(init=mx.init.Xavier())
    softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()
    trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': trial.parameters['lr']})
    for epoch in range(10):
        train_loss, train_acc, valid_acc = 0., 0., 0.
        for data, label in train_data:
            with autograd.record():
                output = net(data)
                loss = softmax_cross_entropy(output, label)
            loss.backward()
            trainer.step(batch_size)
            train_loss += loss.mean().asscalar()
            train_acc += acc(output, label)
        # validation accuracy
        for data, label in valid_data:
            valid_acc += acc(net(data), label)

        print("Epoch %d: loss %.3f, train acc %.3f, validation acc %.3f" % (
                epoch, train_loss/len(train_data), train_acc/len(train_data),
                valid_acc/len(valid_data)))
        study.add_observation(trial=trial,
                              iteration=epoch+1,
                              objective=valid_acc)
    study.finalize(trial=trial)

In [None]:
print(study.get_best_result())