In [1]:
"""
The MIT License (MIT)
Copyright (c) 2021 NVIDIA
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
"""


'\nThe MIT License (MIT)\nCopyright (c) 2021 NVIDIA\nPermission is hereby granted, free of charge, to any person obtaining a copy of\nthis software and associated documentation files (the "Software"), to deal in\nthe Software without restriction, including without limitation the rights to\nuse, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of\nthe Software, and to permit persons to whom the Software is furnished to do so,\nsubject to the following conditions:\nThe above copyright notice and this permission notice shall be included in all\ncopies or substantial portions of the Software.\nTHE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\nIMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS\nFOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR\nCOPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER\nIN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OU

This code example demonstrates how to use Neural Architecture Search (NAS) to find a suitable architecture for CIFAR-10 classification. We implement random search and hill climbing. More context for this code example can be found in the section "Programming Example: Searching for an architecture for CIFAR-10 classification" in Chapter 17 in the book Learning Deep Learning by Magnus Ekman (ISBN: 9780137470358).

We start with initialization code and loading the dataset in the code snippet below. We define some variables that are part of defining the search space, such as what types of layers can be used and what kind of parameters and values are valid for each type of layer.


In [2]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Lambda
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Conv2D
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import MaxPooling2D
import numpy as np
import logging
import copy
tf.get_logger().setLevel(logging.ERROR)

MAX_MODEL_SIZE = 500000
CANDIDATE_EVALUATIONS = 100
EVAL_EPOCHS = 3
FINAL_EPOCHS = 20

layer_types = ['DENSE', 'CONV2D', 'MAXPOOL2D']
param_values = dict([('size', [16, 64, 256, 1024, 4096]),
                ('activation', ['relu', 'tanh', 'elu']),
                ('kernel_size', [(1, 1), (2, 2), (3, 3), (4, 4)]),
                ('stride', [(1, 1), (2, 2), (3, 3), (4, 4)]),
                ('dropout', [0.0, 0.4, 0.7, 0.9])])

layer_params = dict([('DENSE', ['size', 'activation', 'dropout']),
                     ('CONV2D', ['size', 'activation',
                                 'kernel_size', 'stride',
                                 'dropout']),
                     ('MAXPOOL2D', ['kernel_size', 'stride',
                                    'dropout'])])

# Load dataset.
cifar_dataset = keras.datasets.cifar10
(train_images, train_labels), (test_images,
                    test_labels) = cifar_dataset.load_data()

# Standardize dataset.
mean = np.mean(train_images)
stddev = np.std(train_images)
train_images = (train_images - mean) / stddev
test_images = (test_images - mean) / stddev

# Change labels to one-hot.
train_labels = to_categorical(train_labels,
                              num_classes=10)
test_labels = to_categorical(test_labels,
                             num_classes=10)


The next step is to build some infrastructure for automatically generating models. To keep things simple, we impose significant restrictions on the search space. To start with, we allow only sequential models. In addition, given our knowledge of the application (image classification), we impose a rigid structure on the network. We view the network as a combination of a bottom subnetwork and a top subnetwork. The bottom part consists of a combination of convolutional and maxpooling layers, and the top part consists of fully connected layers. In addition, we allow dropout layers after any layer, and we also add a flatten layer between the bottom and the top to ensure that we end up with a valid TensorFlow model.

The methods in the code snippet below are used to generate a random model within this constrained search space. There is also a method that computes the size of the resulting model in terms of the number of trainable parameters. Note that these methods do not have anything to do with TensorFlow but is our own representation of a network before invoking the DL framework.


In [3]:
# Methods to create a model definition.
def generate_random_layer(layer_type):
    layer = {}
    layer['layer_type'] = layer_type
    params = layer_params[layer_type]
    for param in params:
        values = param_values[param]
        layer[param] = values[np.random.randint(0, len(values))]
    return layer

def generate_model_definition():
    layer_count = np.random.randint(2, 9)
    non_dense_count = np.random.randint(1, layer_count)
    layers = []
    for i in range(layer_count):
        if i < non_dense_count:
            layer_type = layer_types[np.random.randint(1, 3)]
            layer = generate_random_layer(layer_type)
        else:
            layer = generate_random_layer('DENSE')
        layers.append(layer)
    return layers

def compute_weight_count(layers):
    last_shape = (32, 32, 3)
    total_weights = 0
    for layer in layers:
        layer_type = layer['layer_type']
        if layer_type == 'DENSE':
            size = layer['size']
            weights = size * (np.prod(last_shape) + 1)
            last_shape = (layer['size'])
        else:
            stride = layer['stride']
            if layer_type == 'CONV2D':
                size = layer['size']
                kernel_size = layer['kernel_size']
                weights = size * ((np.prod(kernel_size) *
                                   last_shape[2]) + 1)
                last_shape = (np.ceil(last_shape[0]/stride[0]),
                              np.ceil(last_shape[1]/stride[1]),
                              size)
            elif layer_type == 'MAXPOOL2D':
                weights = 0
                last_shape = (np.ceil(last_shape[0]/stride[0]),
                              np.ceil(last_shape[1]/stride[1]),
                              last_shape[2])
        total_weights += weights
    total_weights += ((np.prod(last_shape) + 1) * 10)
    return total_weights


The next set of methods takes the model definition created in the previous code snippet and creates and evaluates a corresponding TensorFlow model for a small number of epochs. This is all shown in the next code snippet. The method that evaluates the model imposes a size restriction. If the requested model has too many parameters, the method simply returns an accuracy of 0.0. The search algorithm that invokes the method will need to check for this and, if needed, generate a smaller model.


In [4]:
# Methods to create and evaluate model based on model definition.
def add_layer(model, params, prior_type):
    layer_type = params['layer_type']
    if layer_type == 'DENSE':
        if prior_type != 'DENSE':
            model.add(Flatten())
        size = params['size']
        act = params['activation']
        model.add(Dense(size, activation=act))
    elif layer_type == 'CONV2D':
        size = params['size']
        act = params['activation']
        kernel_size = params['kernel_size']
        stride = params['stride']
        model.add(Conv2D(size, kernel_size, activation=act,
                         strides=stride, padding='same'))
    elif layer_type == 'MAXPOOL2D':
        kernel_size = params['kernel_size']
        stride = params['stride']
        model.add(MaxPooling2D(pool_size=kernel_size,
                               strides=stride, padding='same'))
    dropout = params['dropout']
    if(dropout > 0.0):
        model.add(Dropout(dropout))

def create_model(layers):
    tf.keras.backend.clear_session()
    model = Sequential()
    model.add(Lambda(lambda x: x, input_shape=(32, 32, 3)))
    prev_layer = 'LAMBDA' # Dummy layer to set input_shape
    for layer in layers:
        add_layer(model, layer, prev_layer)
        prev_layer = layer['layer_type']
    model.add(Dense(10, activation='softmax'))
    model.compile(loss='categorical_crossentropy',
                  optimizer='adam', metrics=['accuracy'])
    return model

def create_and_evaluate_model(model_definition):
    weight_count = compute_weight_count(model_definition)
    if weight_count > MAX_MODEL_SIZE:
        return 0.0
    model = create_model(model_definition)
    history = model.fit(train_images, train_labels,
                        validation_data=(test_images, test_labels),
                        epochs=EVAL_EPOCHS, batch_size=64,
                        verbose=2, shuffle=False)
    acc = history.history['val_accuracy'][-1]
    print('Size: ', weight_count)
    print('Accuracy: %5.2f' %acc)
    return acc


We now have all the building blocks to implement pure random search. This is shown in the code snippet below. It consists of an outer for loop that runs for a fixed number of iterations. Each iteration randomly generates and evaluates a model. There is an inner loop to handle the case when the generated model is too big. The inner loop simply repeatedly generates random models until one is generated that adheres to the size restriction.


In [5]:
# Pure random search.
np.random.seed(7)
val_accuracy = 0.0
for i in range(CANDIDATE_EVALUATIONS):
    print(f'\nCurrent Iteration: {i + 1} of {CANDIDATE_EVALUATIONS}')
    valid_model = False
    while(valid_model == False):
        model_definition = generate_model_definition()
        acc = create_and_evaluate_model(model_definition)
        if acc > 0.0:
            valid_model = True
    if acc > val_accuracy:
        best_model = model_definition
        val_accuracy = acc
    print('Random search, best accuracy: %5.2f' %val_accuracy)


2021-10-21 17:53:05.133591: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2021-10-21 17:53:05.134020: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


Metal device set to: Apple M1

systemMemory: 8.00 GB
maxCacheSize: 2.67 GB



2021-10-21 17:53:06.150648: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:185] None of the MLIR Optimization Passes are enabled (registered 2)
2021-10-21 17:53:06.155171: W tensorflow/core/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz
2021-10-21 17:53:06.336740: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.


Epoch 1/3


2021-10-21 17:53:16.061900: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.


782/782 - 11s - loss: 3.1052 - accuracy: 0.1091 - val_loss: 2.1582 - val_accuracy: 0.1802
Epoch 2/3
782/782 - 11s - loss: 2.3526 - accuracy: 0.1324 - val_loss: 2.1013 - val_accuracy: 0.1830
Epoch 3/3
782/782 - 11s - loss: 2.3049 - accuracy: 0.1454 - val_loss: 2.0770 - val_accuracy: 0.1994
Size:  306570.0
Accuracy:  0.20
Random search, best accuracy:  0.20
Epoch 1/3


2021-10-21 17:53:40.168487: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.
2021-10-21 17:53:55.732628: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.


782/782 - 17s - loss: 2.4532 - accuracy: 0.1010 - val_loss: 2.3318 - val_accuracy: 0.1002
Epoch 2/3
782/782 - 15s - loss: 2.4027 - accuracy: 0.1013 - val_loss: 2.3261 - val_accuracy: 0.1000
Epoch 3/3
782/782 - 15s - loss: 2.4128 - accuracy: 0.0999 - val_loss: 2.3296 - val_accuracy: 0.1000
Size:  154650.0
Accuracy:  0.10
Random search, best accuracy:  0.20
Epoch 1/3


2021-10-21 17:54:29.159199: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.
2021-10-21 17:54:39.459875: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.


782/782 - 11s - loss: 2.4859 - accuracy: 0.1058 - val_loss: 2.2904 - val_accuracy: 0.1114
Epoch 2/3
782/782 - 10s - loss: 2.2955 - accuracy: 0.1225 - val_loss: 2.2655 - val_accuracy: 0.1080
Epoch 3/3
782/782 - 10s - loss: 2.2887 - accuracy: 0.1259 - val_loss: 2.2608 - val_accuracy: 0.1118
Size:  310746.0
Accuracy:  0.11
Random search, best accuracy:  0.20
Epoch 1/3


2021-10-21 17:55:01.711296: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.
2021-10-21 17:55:10.462582: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.


782/782 - 10s - loss: 2.3239 - accuracy: 0.1064 - val_loss: 2.2430 - val_accuracy: 0.1600
Epoch 2/3
782/782 - 8s - loss: 2.2919 - accuracy: 0.1199 - val_loss: 2.2325 - val_accuracy: 0.1598
Epoch 3/3
782/782 - 8s - loss: 2.2791 - accuracy: 0.1295 - val_loss: 2.2348 - val_accuracy: 0.1291
Size:  50026.0
Accuracy:  0.13
Random search, best accuracy:  0.20
Epoch 1/3


2021-10-21 17:55:28.470952: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.
2021-10-21 17:55:35.722685: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.


782/782 - 8s - loss: 2.5130 - accuracy: 0.1007 - val_loss: 2.3010 - val_accuracy: 0.1225
Epoch 2/3
782/782 - 7s - loss: 2.4562 - accuracy: 0.1039 - val_loss: 2.3016 - val_accuracy: 0.1006
Epoch 3/3
782/782 - 7s - loss: 2.3747 - accuracy: 0.1052 - val_loss: 2.3049 - val_accuracy: 0.1001
Size:  121882.0
Accuracy:  0.10
Random search, best accuracy:  0.20
Epoch 1/3


2021-10-21 17:55:51.761290: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.
2021-10-21 17:55:57.480200: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.


782/782 - 7s - loss: 1.9851 - accuracy: 0.2910 - val_loss: 1.7457 - val_accuracy: 0.3727
Epoch 2/3
782/782 - 6s - loss: 1.8499 - accuracy: 0.3369 - val_loss: 1.7037 - val_accuracy: 0.3908
Epoch 3/3
782/782 - 6s - loss: 1.8115 - accuracy: 0.3527 - val_loss: 1.6682 - val_accuracy: 0.4058
Size:  438986.0
Accuracy:  0.41
Random search, best accuracy:  0.41
Epoch 1/3


2021-10-21 17:56:12.315382: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.
2021-10-21 17:56:17.963353: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.


782/782 - 7s - loss: 2.5258 - accuracy: 0.1283 - val_loss: 2.2430 - val_accuracy: 0.1779
Epoch 2/3
782/782 - 6s - loss: 2.4336 - accuracy: 0.1342 - val_loss: 2.2219 - val_accuracy: 0.1637
Epoch 3/3
782/782 - 6s - loss: 2.4009 - accuracy: 0.1379 - val_loss: 2.2402 - val_accuracy: 0.1498
Size:  155658.0
Accuracy:  0.15
Random search, best accuracy:  0.41
Epoch 1/3


2021-10-21 17:56:31.244214: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.
2021-10-21 17:56:38.501649: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.


782/782 - 8s - loss: 2.1193 - accuracy: 0.2342 - val_loss: 1.8775 - val_accuracy: 0.3371
Epoch 2/3
782/782 - 8s - loss: 1.9610 - accuracy: 0.2927 - val_loss: 1.7945 - val_accuracy: 0.3710
Epoch 3/3
782/782 - 8s - loss: 1.9178 - accuracy: 0.3104 - val_loss: 1.7577 - val_accuracy: 0.3818
Size:  214474.0
Accuracy:  0.38
Random search, best accuracy:  0.41
Epoch 1/3


2021-10-21 17:56:57.005379: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.
2021-10-21 17:57:02.202410: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.


782/782 - 7s - loss: 4.8168 - accuracy: 0.1026 - val_loss: 2.3115 - val_accuracy: 0.0982
Epoch 2/3
782/782 - 5s - loss: 2.3725 - accuracy: 0.1049 - val_loss: 2.2907 - val_accuracy: 0.1231
Epoch 3/3
782/782 - 5s - loss: 2.3157 - accuracy: 0.1102 - val_loss: 2.2830 - val_accuracy: 0.1351
Size:  954.0
Accuracy:  0.14
Random search, best accuracy:  0.41
Epoch 1/3


2021-10-21 17:57:15.364103: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.
2021-10-21 17:57:31.100390: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.


782/782 - 17s - loss: 2.2200 - accuracy: 0.1518 - val_loss: 2.0427 - val_accuracy: 0.2211
Epoch 2/3
782/782 - 15s - loss: 2.1181 - accuracy: 0.1874 - val_loss: 1.9932 - val_accuracy: 0.2299
Epoch 3/3
782/782 - 16s - loss: 2.0866 - accuracy: 0.1934 - val_loss: 1.9712 - val_accuracy: 0.2389
Size:  435866.0
Accuracy:  0.24
Random search, best accuracy:  0.41
Epoch 1/3


2021-10-21 17:58:05.513101: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.
2021-10-21 17:58:19.988400: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.


782/782 - 16s - loss: 2.1493 - accuracy: 0.1966 - val_loss: 2.0479 - val_accuracy: 0.2186
Epoch 2/3
782/782 - 14s - loss: 2.0834 - accuracy: 0.2224 - val_loss: 2.0249 - val_accuracy: 0.2283
Epoch 3/3
782/782 - 14s - loss: 2.0609 - accuracy: 0.2324 - val_loss: 1.9988 - val_accuracy: 0.2423
Size:  375946.0
Accuracy:  0.24
Random search, best accuracy:  0.41
Epoch 1/3


2021-10-21 17:58:51.313625: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.
2021-10-21 17:59:13.568197: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.


782/782 - 24s - loss: 2.2716 - accuracy: 0.1267 - val_loss: 2.1686 - val_accuracy: 0.1750
Epoch 2/3
782/782 - 23s - loss: 2.2392 - accuracy: 0.1379 - val_loss: 2.1389 - val_accuracy: 0.1759
Epoch 3/3
782/782 - 23s - loss: 2.2258 - accuracy: 0.1417 - val_loss: 2.1341 - val_accuracy: 0.1834
Size:  257066.0
Accuracy:  0.18
Random search, best accuracy:  0.41
Epoch 1/3


2021-10-21 18:00:02.251215: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.
2021-10-21 18:00:13.694846: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.


782/782 - 13s - loss: 2.6767 - accuracy: 0.1022 - val_loss: 2.3036 - val_accuracy: 0.1000
Epoch 2/3
782/782 - 12s - loss: 2.3319 - accuracy: 0.0988 - val_loss: 2.3030 - val_accuracy: 0.1000
Epoch 3/3
782/782 - 12s - loss: 2.3146 - accuracy: 0.1015 - val_loss: 2.3027 - val_accuracy: 0.1000
Size:  206986.0
Accuracy:  0.10
Random search, best accuracy:  0.41
Epoch 1/3


2021-10-21 18:00:39.976095: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.
2021-10-21 18:00:48.436129: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.


782/782 - 10s - loss: 2.3568 - accuracy: 0.1006 - val_loss: 2.2956 - val_accuracy: 0.1080
Epoch 2/3
782/782 - 8s - loss: 2.3208 - accuracy: 0.1009 - val_loss: 2.3019 - val_accuracy: 0.1156
Epoch 3/3
782/782 - 8s - loss: 2.3104 - accuracy: 0.1054 - val_loss: 2.3036 - val_accuracy: 0.1082
Size:  320266.0
Accuracy:  0.11
Random search, best accuracy:  0.41
Epoch 1/3


2021-10-21 18:01:07.528362: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.
2021-10-21 18:01:13.077661: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.


782/782 - 7s - loss: 2.4093 - accuracy: 0.1494 - val_loss: 2.0052 - val_accuracy: 0.2667
Epoch 2/3
782/782 - 6s - loss: 2.0997 - accuracy: 0.2168 - val_loss: 1.9396 - val_accuracy: 0.2917
Epoch 3/3
782/782 - 6s - loss: 2.0585 - accuracy: 0.2370 - val_loss: 1.9106 - val_accuracy: 0.3104
Size:  12746.0
Accuracy:  0.31
Random search, best accuracy:  0.41
Epoch 1/3


2021-10-21 18:01:28.395337: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.
2021-10-21 18:01:39.857373: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.


782/782 - 13s - loss: 2.6268 - accuracy: 0.0988 - val_loss: 2.3031 - val_accuracy: 0.1241
Epoch 2/3
782/782 - 12s - loss: 2.3173 - accuracy: 0.1008 - val_loss: 2.3001 - val_accuracy: 0.1307
Epoch 3/3
782/782 - 12s - loss: 2.3111 - accuracy: 0.1007 - val_loss: 2.2909 - val_accuracy: 0.1354
Size:  414042.0
Accuracy:  0.14
Random search, best accuracy:  0.41
Epoch 1/3


2021-10-21 18:02:07.463392: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.
2021-10-21 18:02:16.939418: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.


782/782 - 11s - loss: 1.9162 - accuracy: 0.3001 - val_loss: 1.7194 - val_accuracy: 0.3840
Epoch 2/3
782/782 - 10s - loss: 1.7961 - accuracy: 0.3542 - val_loss: 1.6530 - val_accuracy: 0.4079
Epoch 3/3
782/782 - 10s - loss: 1.7436 - accuracy: 0.3749 - val_loss: 1.6047 - val_accuracy: 0.4290
Size:  261466.0
Accuracy:  0.43
Random search, best accuracy:  0.43
Epoch 1/3


2021-10-21 18:02:39.968789: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.
2021-10-21 18:02:49.528588: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.


782/782 - 11s - loss: 2.0304 - accuracy: 0.2649 - val_loss: 1.8072 - val_accuracy: 0.3437
Epoch 2/3
782/782 - 10s - loss: 1.9009 - accuracy: 0.3090 - val_loss: 1.7318 - val_accuracy: 0.3747
Epoch 3/3
782/782 - 10s - loss: 1.8378 - accuracy: 0.3364 - val_loss: 1.6840 - val_accuracy: 0.3927
Size:  360586.0
Accuracy:  0.39
Random search, best accuracy:  0.43
Epoch 1/3


2021-10-21 18:03:12.638286: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.
2021-10-21 18:03:19.711445: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.


782/782 - 8s - loss: 39.8069 - accuracy: 0.0955 - val_loss: 2.3026 - val_accuracy: 0.0994
Epoch 2/3
782/782 - 7s - loss: 9.9752 - accuracy: 0.0970 - val_loss: 2.3026 - val_accuracy: 0.1002
Epoch 3/3
782/782 - 7s - loss: 4.7888 - accuracy: 0.0967 - val_loss: 2.3026 - val_accuracy: 0.1001
Size:  5306.0
Accuracy:  0.10
Random search, best accuracy:  0.43
Epoch 1/3


2021-10-21 18:03:35.927394: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.
2021-10-21 18:03:45.111608: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.


782/782 - 11s - loss: 2.2424 - accuracy: 0.1455 - val_loss: 2.0723 - val_accuracy: 0.2276
Epoch 2/3
782/782 - 10s - loss: 2.1193 - accuracy: 0.1885 - val_loss: 2.0009 - val_accuracy: 0.2495
Epoch 3/3
782/782 - 10s - loss: 2.0868 - accuracy: 0.2052 - val_loss: 1.9881 - val_accuracy: 0.2381
Size:  185114.0
Accuracy:  0.24
Random search, best accuracy:  0.43
Epoch 1/3


2021-10-21 18:04:07.041842: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.


The next step is to implement the hill climbing algorithm (see book for details). This is done in the next code snippet. We create a helper method that randomly adjusts one of the parameters slightly to move an existing model into a neighboring model in the allowed search space. The first for loop determines the index of the boundary between the bottom (non-dense) and top (dense) layers. The next step is to determine whether to increase or decrease the capacity of the model. This is followed by determining whether to add/remove a layer or tweak parameters of an existing layer. Much of the logic is there to ensure that the modified model still stays within the boundaries of what is a legal model.

The actual hill climbing algorithm is implemented at the bottom of the code snippet. It assumes an initial model and gradually tweaks it in the direction that improves prediction accuracy. The implemented version of the algorithm is known as stochastic hill climbing. A parameter is modified at random, and if the resulting model is better than the previously best-known model, the change is kept. Otherwise, it is reverted, and another tweak is tried. The given implementation assumes that the hill climbing algorithm is run after doing random search, so there is a promising model to start from.


In [None]:
# Helper method for hill climbing and evolutionary algorithm.
def tweak_model(model_definition):
    layer_num = np.random.randint(0, len(model_definition))
    last_layer = len(model_definition) - 1
    for first_dense, layer in enumerate(model_definition):
        if layer['layer_type'] == 'DENSE':
            break
    if np.random.randint(0, 2) == 1:
        delta = 1
    else:
        delta = -1
    if np.random.randint(0, 2) == 1:
        # Add/remove layer.
        if len(model_definition) < 3:
            delta = 1 # Layer removal not allowed
        if delta == -1:
            # Remove layer.
            if layer_num == 0 and first_dense == 1:
                layer_num += 1 # Require >= 1 non-dense layer
            if layer_num == first_dense and layer_num == last_layer:
                layer_num -= 1 # Require >= 1 dense layer
            del model_definition[layer_num]
        else:
            # Add layer.
            if layer_num < first_dense:
                layer_type = layer_types[np.random.randint(1, 3)]
            else:
                layer_type = 'DENSE'
            layer = generate_random_layer(layer_type)
            model_definition.insert(layer_num, layer)
    else:
        # Tweak parameter.
        layer = model_definition[layer_num]
        layer_type = layer['layer_type']
        params = layer_params[layer_type]
        param = params[np.random.randint(0, len(params))]
        current_val = layer[param]
        values = param_values[param]
        index = values.index(current_val)
        max_index = len(values)
        new_val = values[(index + delta) % max_index]
        layer[param] = new_val

# Hill climbing, starting from best model from random search.
model_definition = best_model

for i in range(CANDIDATE_EVALUATIONS):
    valid_model = False
    while(valid_model == False):
        old_model_definition = copy.deepcopy(model_definition)
        tweak_model(model_definition)
        acc = create_and_evaluate_model(model_definition)
        if acc > 0.0:
            valid_model = True
        else:
            model_definition = old_model_definition
    if acc > val_accuracy:
        best_model = copy.deepcopy(model_definition)
        val_accuracy = acc
    else:
        model_definition = old_model_definition
    print('Hill climbing, best accuracy: %5.2f' %val_accuracy)


For both the random search algorithm and the hill climbing algorithm, our evaluation strategy was to evaluate each solution for only three epochs. We made the assumption that the resulting validation error would be a good indicator of how well the model would perform after more training. To get a more accurate evaluation of how well the best model actually performs, our final code snippet evaluates the best model for 20 epochs.


In [None]:
# Evaluate final model for larger number of epochs.
model = create_model(best_model)
model.summary()
model.compile(loss='categorical_crossentropy',
              optimizer='adam', metrics=['accuracy'])
history = model.fit(
    train_images, train_labels, validation_data =
    (test_images, test_labels), epochs=FINAL_EPOCHS, batch_size=64,
    verbose=2, shuffle=True)
