In [None]:
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import GPyOpt

print(np.__version__)
print(tf.__version__)
print(GPyOpt.__version__)

In [None]:
# Load data
fashion_mnist = tf.keras.datasets.fashion_mnist
(train_images, train_labels), (test_images, test_labels) = fashion_mnist.load_data()

class_names = ['T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat', 'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot']

In [None]:
# Data overview
print(train_images.shape)
print(len(train_labels))
print(train_labels)
print(test_images.shape)
print(len(test_labels))

In [None]:
# Scale pixel values to [0,1]
train_images = train_images / 255.0
test_images = test_images / 255.0

In [None]:
# Generate network structure
def generate_network_structure(n_layers, dropout_rate, n_filters, kernel_size, strides, activation):
    layers = []
    for i in range(n_layers):
        if i != 0 and i % 3 == 0:
            layers.append(tf.keras.layers.Dropout(dropout_rate))
        else:
            if i == 0:
                layers.append(tf.keras.layers.Conv2D(filters = n_filters,
                                                    activation = activation,
                                                    kernel_size = (kernel_size, kernel_size),
                                                    strides = (strides, strides),
                                                    padding = 'same',
                                                    input_shape = (28, 28, 1)))
            else:
                layers.append(tf.keras.layers.Conv2D(filters = n_filters,
                                                     activation = activation,
                                                     kernel_size = (kernel_size, kernel_size),
                                                     strides = (strides, strides),
                                                     padding = 'same'))
            layers.append(tf.keras.layers.BatchNormalization())
    
    layers.append(tf.keras.layers.Flatten())
    layers.append(tf.keras.layers.Dropout(dropout_rate))
    layers.append(tf.keras.layers.Dense(10, activation = 'softmax'))

    model = tf.keras.Sequential(layers)
    opt = tf.keras.optimizers.Adam(learning_rate = 1e-3)
    model.compile(optimizer = opt, loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits = True), metrics = ['accuracy'])

    return model

In [None]:
kernel_size = (3, 5, 7)
#learning_rate = (0, 1)
dropout_rate = (0, 0.99)
n_layers = (1, 2, 3, 4, 5, 6)
n_filters = (16, 32, 64)

# define the dictionary for GPyOpt
domain = [
          {'name': 'n_layers', 'type': 'discrete', 'domain': n_layers},
          {'name': 'dropout_rate', 'type': 'continuous', 'domain': dropout_rate},
          {'name': 'n_filters','type': 'discrete', 'domain': n_filters},
          {'name': 'kernel_size',  'type': 'discrete',    'domain': kernel_size}
          #{'name': 'learning_rate','type': 'continuous', 'domain': learning_rate},
         ]

saved_models = [] # A list of tuples containing (model, test_acc)

## we have to define the function we want to maximize --> validation accuracy, 
## note it should take a 2D ndarray but it is ok that it assumes only one point
## in this setting
def objective_function(x): 
    # we have to handle the categorical variables that is convert 0/1 to labels
    # log2/sqrt and gini/entropy

    #fit the model
    model = generate_network_structure(int(x[0,0]), x[0,1], int(x[0,2]), int(x[0,3]), 1, 'relu')
    model.fit(train_images, train_labels, epochs = 10)
    _, test_acc = model.evaluate(test_images,  test_labels, verbose = 2)
    saved_models.append((model, test_acc))
    model.save('Saved Models/model_' + str(test_acc))
    return test_acc

opt = GPyOpt.methods.BayesianOptimization(f = objective_function,   # function to optimize
                                              domain = domain,         # box-constrains of the problem
                                              acquisition_type = 'MPI' ,      # Select acquisition function MPI, EI, LCB
                                             )
opt.acquisition.exploration_weight = .1

opt.run_optimization(max_iter = 10) 

x_best = opt.X[np.argmin(opt.Y)]
print(f"The best parameters obtained:\n - n_layers = {str(x_best[0])}\n - dropout_rate = {str(x_best[1])}\n - n_filters = {str(x_best[2])}\n - kernel_size = {str(x_best[3])}")

In [None]:
# Save model
best_model = saved_models[np.argmin(opt.Y)][0]
best_model.save('Saved Models')

In [None]:
# Load model
best_model = tf.keras.models.load_model('Saved Models')

In [None]:
# Evaluate model
test_loss, test_acc = best_model.evaluate(test_images,  test_labels, verbose=2)
print('\nTest accuracy:', test_acc)

In [None]:
# Make predictions
predictions = best_model.predict(test_images)
print("Probability distribution:\n" + str(predictions[0]))
print("Best guess index:\n" + str(np.argmax(predictions[0])))

In [None]:
# Plot all predictions
def plot_image(i, predictions_array, true_label, img):
    true_label, img = true_label[i], img[i]
    plt.grid(False)
    plt.xticks([])
    plt.yticks([])

    plt.imshow(img, cmap=plt.cm.binary)

    predicted_label = np.argmax(predictions_array)
    if predicted_label == true_label:
        color = 'blue'
    else:
        color = 'red'

    plt.xlabel("{} {:2.0f}% ({})".format(class_names[predicted_label],
                                         100*np.max(predictions_array),
                                         class_names[true_label]),
                                         color=color)

def plot_value_array(i, predictions_array, true_label):
    true_label = true_label[i]
    plt.grid(False)
    plt.xticks(range(10))
    plt.yticks([])
    thisplot = plt.bar(range(10), predictions_array, color="#777777")
    plt.ylim([0, 1])
    predicted_label = np.argmax(predictions_array)

    thisplot[predicted_label].set_color('red')
    thisplot[true_label].set_color('blue')

In [None]:
# Plot the first X test images, their predicted labels, and the true labels.
# Color correct predictions in blue and incorrect predictions in red.
num_rows = 5
num_cols = 3
num_images = num_rows*num_cols
plt.figure(figsize=(2*2*num_cols, 2*num_rows))
for i in range(num_images):
  plt.subplot(num_rows, 2*num_cols, 2*i+1)
  plot_image(i, predictions[i], test_labels, test_images)
  plt.subplot(num_rows, 2*num_cols, 2*i+2)
  plot_value_array(i, predictions[i], test_labels)
plt.tight_layout()
plt.show()

In [None]:
# Grab an image from the test dataset.
img = test_images[1]

# Add the image to a batch where it's the only member.
img = (np.expand_dims(img,0))

# Now predict the correct label for this image
predictions_single = best_model.predict(img)
print(predictions_single)

# Plot the true class blue and the predicted class red
plot_value_array(1, predictions_single[0], test_labels)
_ = plt.xticks(range(10), class_names, rotation=45)
plt.show()

# The predicted class index
np.argmax(predictions_single[0])