In [1]:
"""
SHERPA is a Python library for hyperparameter tuning of machine learning models.
Copyright (C) 2018  Lars Hertel, Peter Sadowski, and Julian Collado.

This file is part of SHERPA.

SHERPA is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

SHERPA is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with SHERPA.  If not, see <http://www.gnu.org/licenses/>.
"""
from __future__ import print_function
import sherpa
from sherpa.algorithms import Genetic
import time
from keras.datasets import mnist
import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader
import torch.optim as optim
import torch.nn.functional as F

Using TensorFlow backend.


In [2]:
batch_size = 32
num_classes = 10
epochs = 15

In [3]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()

x_train = x_train.reshape(60000, 784)
x_test = x_test.reshape(10000, 784)
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')


60000 train samples
10000 test samples


In [21]:
parameters = [sherpa.Discrete('hidden_size', [16, 512]),
              sherpa.Discrete('n_layers', [1, 10]),
              sherpa.Choice('activation', [F.relu, F.tanh, F.sigmoid]),
              sherpa.Continuous('lr',[1e-4,1e-2]),
              sherpa.Continuous('dropout',[0.0,1.0])]
              

In [25]:
algorithm= Genetic(max_num_trials=100)

In [26]:
class MLP(nn.Module):
    def __init__(self,in_size,out_size,n_layers,hidden_size,act,dropout):
        super(MLP,self).__init__()
        self.n_layers=n_layers
        self.act=act
        for i in range(n_layers):
            if i==0:
                layer_in_size=in_size
            else:
                layer_in_size=hidden_size
            if i==(n_layers-1):
                layer_out_size=out_size
            else:
                layer_out_size=hidden_size
            
            setattr(self,'dense_{}'.format(i),nn.Linear(layer_in_size,layer_out_size))
            
        self.dropout=nn.Dropout(dropout)
        
    def forward(self,x):
        out=x
        for i in range(self.n_layers):
            if i==(self.n_layers-1):
                out=getattr(self,'dense_{}'.format(i))(self.dropout(out))
            else:
                out=self.act(getattr(self,'dense_{}'.format(i))(self.dropout(out)))
        return out

In [27]:
epochs=15
batch_size=64
train_data = DataLoader(TensorDataset(torch.from_numpy(x_train),torch.from_numpy(y_train).type(torch.long)),batch_size=batch_size,drop_last=True)
criterion = nn.CrossEntropyLoss()
x_test_tensor=torch.from_numpy(x_test)
y_test_tensor=torch.from_numpy(y_test).type(torch.long)

In [None]:
study = sherpa.Study(parameters=parameters,
                     algorithm=algorithm,
                     lower_is_better=False)
for trial in study:
    print("Trial {}:\t{}".format(trial.id, trial.parameters))
    mlp=MLP(x_train.shape[1],10,
            trial.parameters['n_layers'],
            trial.parameters['hidden_size'],
            trial.parameters['activation'],
            trial.parameters['dropout'])
    mlp.train()
    optimizer=optim.Adam(mlp.parameters(), lr=trial.parameters['lr'])
    for i in range(epochs):
        for x_batch, y_batch in train_data:
            optimizer.zero_grad()
            out=mlp(x_batch)
            loss=criterion(out,y_batch)
            loss.backward()
            optimizer.step()
    mlp.eval()        
    val_acc=(mlp(x_test_tensor).argmax(dim=1)==y_test_tensor).type(torch.float32).mean().item()
    print(val_acc)
    study.add_observation(trial=trial,
                      iteration=epochs,
                      objective=val_acc)
    study.finalize(trial=trial)

INFO:sherpa.core:
-------------------------------------------------------
SHERPA Dashboard running. Access via
http://127.0.1.1:8880 if on a cluster or
http://localhost:8880 if running locally.
-------------------------------------------------------
Process Process-10:
Traceback (most recent call last):
  File "/home/afuster/anaconda3/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/home/afuster/anaconda3/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/home/afuster/anaconda3/lib/python3.6/site-packages/flask/app.py", line 938, in run
    cli.show_server_banner(self.env, self.debug, self.name, False)
  File "/home/afuster/anaconda3/lib/python3.6/site-packages/flask/cli.py", line 629, in show_server_banner
    click.echo(message)
  File "/home/afuster/anaconda3/lib/python3.6/site-packages/click/utils.py", line 259, in echo
    file.write(message)
io.UnsupportedOperation: not writab

Trial 1:	{'hidden_size': 460, 'n_layers': 1, 'activation': <function tanh at 0x7f1ffe86b840>, 'lr': 0.00021695391812050625, 'dropout': 0.39973204847122445}
0.9197999835014343
Trial 2:	{'hidden_size': 115, 'n_layers': 6, 'activation': <function tanh at 0x7f1ffe86b840>, 'lr': 0.009140637249645395, 'dropout': 0.7670655675929657}
0.06909999996423721
Trial 3:	{'hidden_size': 166, 'n_layers': 5, 'activation': <function tanh at 0x7f1ffe86b840>, 'lr': 0.00034159143102930386, 'dropout': 0.3487156777277136}
0.9642000198364258
Trial 4:	{'hidden_size': 78, 'n_layers': 7, 'activation': <function sigmoid at 0x7f1ffe86b950>, 'lr': 0.007737311617395133, 'dropout': 0.710135204033945}
0.10279999673366547
Trial 5:	{'hidden_size': 429, 'n_layers': 3, 'activation': <function sigmoid at 0x7f1ffe86b950>, 'lr': 0.008554026409167272, 'dropout': 0.5679616916002217}
0.9617999792098999
Trial 6:	{'hidden_size': 120, 'n_layers': 2, 'activation': <function relu at 0x7f1ffe869268>, 'lr': 0.00838299992450946, 'dropout

0.9739999771118164
Trial 48:	{'hidden_size': 407, 'n_layers': 3, 'activation': <function sigmoid at 0x7f1ffe86b950>, 'lr': 0.009493107893987409, 'dropout': 0.13462578033866046}
0.9775999784469604
Trial 49:	{'hidden_size': 448, 'n_layers': 3, 'activation': <function sigmoid at 0x7f1ffe86b950>, 'lr': 0.008367673307662504, 'dropout': 0.13462578033866046}
0.9803000092506409
Trial 50:	{'hidden_size': 407, 'n_layers': 2, 'activation': <function sigmoid at 0x7f1ffe86b950>, 'lr': 0.008283454143537825, 'dropout': 0.2511163133899563}
0.9768000245094299
Trial 51:	{'hidden_size': 264, 'n_layers': 2, 'activation': <function sigmoid at 0x7f1ffe86b950>, 'lr': 0.008283454143537825, 'dropout': 0.13462578033866046}
0.9776999950408936
Trial 52:	{'hidden_size': 407, 'n_layers': 2, 'activation': <function sigmoid at 0x7f1ffe86b950>, 'lr': 0.009493107893987409, 'dropout': 0.13462578033866046}
0.9797000288963318
Trial 53:	{'hidden_size': 407, 'n_layers': 2, 'activation': <function sigmoid at 0x7f1ffe86b950>,

In [19]:
print(study.get_best_result())

{'Trial-ID': 14, 'Iteration': 15, 'activation': <function relu at 0x7f1ffe869268>, 'hidden_size': 74, 'n_layers': 3, 'Objective': 0.9758999943733215}
