<a href="https://colab.research.google.com/github/marcinwolter/MachineLearning2020/blob/main/mnist_mlp_minimal_optuna.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
"""
Optuna example that optimizes a neural network classifier configuration for the
MNIST dataset using Keras.
In this example, we optimize the validation accuracy of MNIST classification using
Keras. We optimize the filter and kernel size, kernel stride and layer activation.
"""
! pip install optuna

import warnings

from keras.backend import clear_session
from keras.datasets import mnist
from keras.layers import Dense, Dropout
from keras.models import Sequential
from keras.optimizers import RMSprop

import optuna

Collecting optuna
[?25l  Downloading https://files.pythonhosted.org/packages/87/10/06b58f4120f26b603d905a594650440ea1fd74476b8b360dbf01e111469b/optuna-2.3.0.tar.gz (258kB)
[K     |█▎                              | 10kB 24.0MB/s eta 0:00:01[K     |██▌                             | 20kB 17.2MB/s eta 0:00:01[K     |███▉                            | 30kB 15.9MB/s eta 0:00:01[K     |█████                           | 40kB 15.3MB/s eta 0:00:01[K     |██████▍                         | 51kB 12.4MB/s eta 0:00:01[K     |███████▋                        | 61kB 12.5MB/s eta 0:00:01[K     |████████▉                       | 71kB 12.5MB/s eta 0:00:01[K     |██████████▏                     | 81kB 13.3MB/s eta 0:00:01[K     |███████████▍                    | 92kB 14.0MB/s eta 0:00:01[K     |████████████▊                   | 102kB 14.0MB/s eta 0:00:01[K     |██████████████                  | 112kB 14.0MB/s eta 0:00:01[K     |███████████████▏                | 122kB 14.0MB/s eta 0:0

In [2]:

N_TRAIN_EXAMPLES = 3000
N_VALID_EXAMPLES = 1000
BATCHSIZE = 128
CLASSES = 10
EPOCHS = 20


In [3]:
def objective(trial):
    # Clear clutter from previous Keras session graphs.
    clear_session()

    (x_train, y_train), (x_valid, y_valid) = mnist.load_data()

    x_train = x_train.reshape(60000, 784)[:N_TRAIN_EXAMPLES].astype("float32") / 255
    x_valid = x_valid.reshape(10000, 784)[:N_VALID_EXAMPLES].astype("float32") / 255
    y_train = y_train[:N_TRAIN_EXAMPLES]
    y_valid = y_valid[:N_VALID_EXAMPLES]
    

    units = int(trial.suggest_discrete_uniform("units1", 32,1024,1))
    dropout = trial.suggest_uniform('dropout1', .00,.50)
    act = trial.suggest_categorical('activation', ['relu','sigmoid','linear'])
    lr = trial.suggest_float("lr", 1e-5, 1e-1, log=True)
    epochs = int(trial.suggest_discrete_uniform('epochs', 10,50,1))

    model = Sequential()
    model.add(Dense(units, activation='relu', input_shape=(784,)))
    model.add(Dropout(dropout))
    model.add(Dense(units, activation='relu'))
    model.add(Dropout(dropout))
    model.add(Dense(units, activation='relu'))
    model.add(Dropout(dropout))

    model.add(Dense(CLASSES, activation="softmax"))

    # We compile our model with a sampled learning rate.
    model.compile(
        loss="sparse_categorical_crossentropy", optimizer=RMSprop(lr=lr), metrics=["accuracy"]
    )

    model.fit(
        x_train,
        y_train,
        validation_data=(x_valid, y_valid),
        shuffle=True,
        batch_size=BATCHSIZE,
        epochs=epochs,
        verbose=False,
    )

    # Evaluate the model accuracy on the validation set.
    score = model.evaluate(x_valid, y_valid, verbose=0)
    return score[1]


# Find the best parameters

In [4]:


study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=100, timeout=600)


[32m[I 2020-12-05 21:20:38,304][0m A new study created in memory with name: no-name-a5fab272-7b7f-4c8a-a87d-f711ff01544d[0m


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


[32m[I 2020-12-05 21:20:50,700][0m Trial 0 finished with value: 0.9150000214576721 and parameters: {'units1': 960.0, 'dropout1': 0.14810802964670045, 'activation': 'sigmoid', 'lr': 0.00025055236818330055, 'epochs': 46.0}. Best is trial 0 with value: 0.9150000214576721.[0m
[32m[I 2020-12-05 21:20:54,161][0m Trial 1 finished with value: 0.8730000257492065 and parameters: {'units1': 449.0, 'dropout1': 0.02350613957868164, 'activation': 'linear', 'lr': 1.9864881070528688e-05, 'epochs': 30.0}. Best is trial 0 with value: 0.9150000214576721.[0m
[32m[I 2020-12-05 21:20:56,171][0m Trial 2 finished with value: 0.8899999856948853 and parameters: {'units1': 90.0, 'dropout1': 0.24222611364128904, 'activation': 'sigmoid', 'lr': 0.00128435226274464, 'epochs': 12.0}. Best is trial 0 with value: 0.9150000214576721.[0m
[32m[I 2020-12-05 21:20:58,628][0m Trial 3 finished with value: 0.8119999766349792 and parameters: {'units1': 706.0, 'dropout1': 0.25006974001942256, 'activation': 'linear', '

In [5]:

print("Number of finished trials: {}".format(len(study.trials)))

print("Best trial:")
trial = study.best_trial

print("  Value: {}".format(trial.value))

print("  Params: ")
for key, value in trial.params.items():
        print("    {}: {}".format(key, value))

Number of finished trials: 100
Best trial:
  Value: 0.9380000233650208
  Params: 
    units1: 253.0
    dropout1: 0.2670154265928311
    activation: linear
    lr: 0.004066744279868047
    epochs: 32.0


## Visualizing the Optimization History

In [6]:
from optuna.visualization import plot_optimization_history

plot_optimization_history(study)

## Visualizing High-dimensional Parameter Relationships

In [7]:
from optuna.visualization import plot_parallel_coordinate

plot_parallel_coordinate(study)

## Visualizing Parameter Relationships

In [8]:
from optuna.visualization import plot_contour

plot_contour(study)

### Selecting Parameters to Visualize

In [9]:
plot_contour(study, params=['units1', 'dropout1', 'lr','epochs'])

## Visualizing Individual Parameters

In [10]:
from optuna.visualization import plot_slice

plot_slice(study)

### Selecting Parameters to Visualize

In [11]:
plot_slice(study, params=['units1', 'dropout1'])

## Visualizing Parameter Importances

In [12]:
from optuna.visualization import plot_param_importances

plot_param_importances(study)