# Install and Import all the necessary packages and libraries

In [1]:
import tensorflow as tf
from tensorflow.keras.datasets import fashion_mnist, mnist, cifar10, cifar100
import numpy as np
import time
import math
from tensorflow.keras import datasets, layers, models
from tensorflow.keras.optimizers import Adam
from tensorflow.python.keras import backend as K
import matplotlib.pyplot as plt
import pandas as pd
import gc
import multiprocessing
from multiprocessing import Process, Queue

# Definition of programmer's parameters (Controller's block)

In [2]:
#@markdown ##Dataset
topic_prefix = "cifar10" # @param ["cifar10", "cifar100", "mnist", "fashion_mnist"] {type:"string"}
if topic_prefix == "cifar10":
  dataset = cifar10.load_data()
elif topic_prefix == "cifar100":
  dataset = cifar100.load_data()
elif topic_prefix == "mnist":
  dataset = mnist.load_data()
elif topic_prefix == "fashion_mnist":
  dataset = fashion_mnist.load_data()

#@markdown ##Synopses-based Training Optimization Configuration
# Percentage of the dataset that will be used for training
sample_size_low = 0.8 #@param {type:"slider", min:0, max:0.99, step:0.05}
sample_size_step = 0.15 #@param {type:"number"}
sample_size_high = 1 #@param {type:"slider", min:0, max:1, step:0.05}

# Percentage of the dataset that will be used for testing
perc_test = 1 #@param {type:"number"}

# The ID of the method that will be employed during sampling
# 0: Simple reservoir sampling in our initial training dataset
# 1: Reservoir sampling in each class based on the number of samples (per class)
sampling_method_id = 1 #@param {type:"integer"}

#@markdown ##NN Architecture Configuration

total_num_of_layers = 5 #@param {type:"integer"}

set_of_layers = ['conv', 'pool', 'dense']

# Number of epochs for the training process
# One Epoch is when an ENTIRE (training) dataset is passed forward and backward through the neural network only once.
# NOTE: An epoch is comprised of one or more batches.
num_of_epochs_low = 1 #@param {type:"slider", min:1, max:30, step:1}
num_of_epochs_step = 4 #@param {type:"integer"}
num_of_epochs_high = 10 #@param {type:"slider", min:1, max:30, step:1}

# The value for the learning rate for the training process (it is inserted to the Adam optimizer)
# During the optimization, the algorithm needs to take a series of tiny steps to descend the error mountain in order to minimize the error.
# The direction of the step is determined by the gradient, while the step size is determined by the learning rate.
lr_low = 1e-3 #@param {type:"number"}
lr_high = 1e-3 #@param {type:"number"}
lr_list = np.geomspace(lr_low, lr_high, num = int(np.log10(lr_high) - np.log10(lr_low)) + 1).tolist()

# The size of the batch for the training process
# Total number of training examples present in a single batch.
# or, number of samples processed before the model is updated.
size_of_batch_low = 64 #@param {type:"integer"}
size_of_batch_step = 64 #@param {type:"integer"}
size_of_batch_high = 64 #@param {type:"integer"}

#@markdown ##Tradeoff score Configuration
# Maximum training time above which we cancel the experiment (in seconds)
# It is used in the score (accuracy--speed tradeoff) formula
theta_parameter = 10 #@param {type:"number"}

# Weight for the accuracy of the model. Max value: 0.99
# There is also the weight of training speed of the model (1 - lamda_acc)
# It is used in the score (accuracy--speed tradeoff) formula
lamda_acc = 0.5 #@param {type:"number"}


#@markdown ##Other Configuration(s)

CONV_NEURONS_CONST = 32 #@param {type:"integer"}
CONV_NEURONS_BOUND = 256 #@param {type:"integer"}
DENSE_NEURONS_CONST = 128 #@param {type:"integer"}
DENSE_NEURONS_BOUND = 32 #@param {type:"integer"}


Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz
[1m170498071/170498071[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 0us/step


# Load our image dataset

In [3]:
(train_images_all, train_labels_all), (test_images_all, test_labels_all) = dataset

# Get unique labels in our training dataset
unique_class_labels = np.unique(train_labels_all)

# Normalize pixel values in the trainning and the test datasets to be between 0 and 1
# TODO: parameter of normalization minmax scalar
train_images_all, test_images_all = train_images_all / 255.0, test_images_all / 255.0

if len(train_images_all.shape) == 3:
  # Make sure images have shape (28, 28, 1)
  train_images_all = np.expand_dims(train_images_all, -1)
  test_images_all = np.expand_dims(test_images_all, -1)
  print("x_train shape:", train_images_all.shape)
  print(train_images_all.shape[0], "train samples")
  print(test_images_all.shape[0], "test samples")

# Synopsis block of code

Optional: Check that our sampling method works appropriately

In [4]:
# A function that prints the occurence of each class in a list
def print_times_per_label(lst, labels_all):
  # Get unique labels in our training dataset
  unique_labels = np.unique(labels_all)
  for i in range(0, len(unique_labels)):
    print("Class", unique_labels[i], "has", lst.count(i), "samples in our dataset...")

**Reservoir Sampling Function**:
*Randomized algorithms for randomly choosing k samples from a list of n items, where n is either a very large or unknown number. Typically n is large enough that the list doesn’t fit into main memory.*

Algorithm implemented:


1.   Create an array reservoir[0,...,k-1] and copy first k items of stream[ ] to it.
2.   Now one by one consider all items from (k+1)th item to nth item.


> Steps

*   Generate a random number, denoted as j, from 0 to i where i is the index of the current item in stream[].
*   If j is in range 0 to k-1, replace reservoir[j] with stream[i]

In [5]:
# Select k items from a stream of items-data
import random

# A function to randomly select k items from stream[0..n-1].
def reservoir_sampling(stream, n, k):
  i = 0     # index for elements in stream[]

  # reservoir[] is the output array.
  # Initialize it with first k elements from stream[]
  reservoir = [0] * k

  for i in range(k):
    reservoir[i] = stream[i]

  # Iterate from the (k+1)th element to Nth element
  while(i < n):
    # Pick a random index from 0 to i.
    j = random.randrange(i+1)

    # If the randomly picked
    # index is smaller than k,
    # then replace the element
    # present at the index
    # with new element from stream
    if(j < k):
      reservoir[j] = stream[i]
    i+=1

  return reservoir

In [6]:
# A function that finds the size of each reservoir for every class depending on its occurence in the initial dataset
# and returns the unique labels that exist in our dataset along with the corresponding percentage
def reservoir_size_per_class(init_labels):

  # Get unique labels and their counts (how many times they appear) in our training dataset
  unique_labels, counts = np.unique(init_labels, return_counts = True)

  # Transform to list
  unique_labels_lst = unique_labels.tolist()
  counts_lst = counts.tolist()

  perc_per_class = []
  for i in range(len(unique_labels_lst)):
    perc_per_class.append(counts_lst[i]/len(init_labels))

  # print(perc_per_class)

  return perc_per_class, unique_labels_lst

"Pre-Processing Part (or Filtering)": Call the sampling mehod. Get samples from the training and the testing datasets.

In [7]:
def sampling_method(sampling_method_id, train_images_all, train_labels_all, sample_size, test_images_all, test_labels_all, perc_test):
  print("Percentage of filtering in our training dataset was set:")
  print(sample_size)
  if sampling_method_id == 0:
    # Simple reservoir sampling over the whole training dataset
    # Total size of the stream (or training dataset)
    n_train = len(train_images_all)

    # Number of samples that will be drawn
    k_train = int(n_train * sample_size)

    # Use the indexes of dataset in order to decide which samples will be drawn
    idx_tmp_train_list = list(range(0, n_train))

    # Find the indexes in order to construct the dataset that will be used during the training process
    idx_train = reservoir_sampling(idx_tmp_train_list, n_train, k_train)
  else:
    # Reservoir sampling in each class based on the number of samples (per class) that exist in the initial dataset
    # Find the size of each reservoir for every class depending on its occurence in the initial training dataset
    class_perc, unique_ids = reservoir_size_per_class(train_labels_all)

    # Stores the indexes (from all classes) in order to construct the dataset that will be used during the training process
    idx_train = []

    # Run for every single class the reservoir sampling seperately
    for i in range(0, len(unique_ids)):
      # Find the locations of each sample belonging to our class of interest
      tmp = np.where(train_labels_all == unique_ids[i])
      idx_of_class = tmp[0].tolist()

      # Run the reservoir sampling for the class of interest
      sampled_idx_of_class = reservoir_sampling(idx_of_class, len(idx_of_class), int(len(train_images_all) * sample_size * class_perc[i]))

      # Store the (sampled) samples from this class
      for j in range(0, len(sampled_idx_of_class)):
        idx_train.append(sampled_idx_of_class[j])

  # Store the corresponding images and labels from training dataset based on the sampled indexes
  train_images_lst = []
  for i in idx_train:
    train_images_lst.append(train_images_all[i])

  train_labels_lst = []
  for i in idx_train:
    train_labels_lst.append(train_labels_all[i])

  # Check the occurence of each class in the final training dataset
  print_times_per_label(train_labels_lst, train_labels_all)

  # Total size of the stream (or testing dataset)
  n_test = len(test_images_all)

  # Number of samples that will be drawn
  k_test = int(n_test * perc_test)

  # Use the indexes of dataset in order to decide which samples will be drawn
  idx_tmp_test_list = list(range(0, n_test))

  # Find the indexes in order to construct the dataset that will be used during the testing process
  idx_test = reservoir_sampling(idx_tmp_test_list, n_test, k_test)

  # Store the corresponding images and labels from testing dataset based on the sampled indexes
  test_images_lst = []
  for i in idx_test:
    test_images_lst.append(test_images_all[i])

  test_labels_lst = []
  for i in idx_test:
    test_labels_lst.append(test_labels_all[i])

  # Tranfsorm the lists that we stored our samples into arrays
  train_images = np.asarray(train_images_lst)
  train_labels = np.asarray(train_labels_lst)
  test_images = np.asarray(test_images_lst)
  test_labels = np.asarray(test_labels_lst)

  # Verify that the desired filtering was performed in both datasets
  print("Training dataset before sampling:")
  print(train_images_all.shape)
  print(train_labels_all.shape)
  print("Training dataset after sampling:")
  print(train_images.shape)
  print(train_labels.shape)

  print("Testing dataset before sampling:")
  print(test_images_all.shape)
  print(test_labels_all.shape)
  print("Testing dataset after sampling:")
  print(test_images.shape)
  print(test_labels.shape)

  return train_images, train_labels, test_images, test_labels

# CNN's architecture builder

In [8]:
def recreate_model(layers_lst, dataset_shape, CONV_NEURONS_CONST, DENSE_NEURONS_CONST, CONV_NEURONS_BOUND, DENSE_NEURONS_BOUND):
  # Initialize a sequential model
  model = models.Sequential()

  # Define the number of neurons for conv and dense layers
  conv_tmp2 = CONV_NEURONS_CONST
  dense_tmp2 = DENSE_NEURONS_CONST

  # Recreate the model
  for count, layer in enumerate(layers_lst):
    # First layer has to be a convolutional one
    if layer == 'conv' and count == 0:
      model.add(layers.Conv2D(int(conv_tmp2), (3, 3), activation='relu', input_shape = dataset_shape))
      conv_tmp2 = conv_tmp2 * 2
    # For the other layers
    else:
      if layer == 'conv':
        # Add a conv layer by doubling its neurons if they do not violate our user-defined bound
        if conv_tmp2 <= CONV_NEURONS_BOUND:
          model.add(layers.Conv2D(int(conv_tmp2), (3, 3), activation='relu'))
          conv_tmp2 = conv_tmp2 * 2
        else:
          model.add(layers.Conv2D(int(CONV_NEURONS_BOUND), (3, 3), activation='relu'))
          conv_tmp2 = CONV_NEURONS_BOUND
      elif layer == 'pool':
        # Add a pool layer
        model.add(layers.MaxPooling2D((2, 2), strides=(2,2), padding='same'))
      else:
        # If the next to-be-added-layer is dense and no other dense layer has been added so far, then add a flatten layer first...
        if dense_tmp2 == DENSE_NEURONS_CONST:
          model.add(layers.Flatten())
        # Add a dense layer by reducing (* 0.5) its neurons if they do not violate our user-defined bound
        if dense_tmp2 >= DENSE_NEURONS_BOUND:
          model.add(layers.Dense(int(dense_tmp2), activation='relu'))
          dense_tmp2 = dense_tmp2 / 2
        else:
          model.add(layers.Dense(int(DENSE_NEURONS_BOUND), activation='relu'))
          dense_tmp2 = DENSE_NEURONS_BOUND

  return model, conv_tmp2, dense_tmp2

In [9]:
def my_evaluate_first_phase(q, train_images, train_labels, test_images, test_labels, epochs, lr, size_of_batch):
  # Initialize a sequential network
  model = models.Sequential()

  # Define the neurons of conv and dense layers based on user's input
  conv_tmp = CONV_NEURONS_CONST
  dense_tmp = DENSE_NEURONS_CONST

  # Add the (first) conv layer
  model.add(layers.Conv2D(int(CONV_NEURONS_CONST), (3, 3), activation='relu', input_shape = dataset_shape))
  conv_tmp = conv_tmp * 2

  # Add manually a flatten and a dense layer in order to evaluate the network
  model.add(layers.Flatten())
  model.add(layers.Dense(len(unique_class_labels), activation='softmax'))

  model.compile(optimizer=Adam(lr), loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), metrics=['accuracy'])
  model.summary()

  start = time.time()

  blackbox = model.fit(x=train_images,
                      y=train_labels,
                      epochs=epochs,
                      batch_size=size_of_batch
                      )
  stop = time.time()

  # Compute the training speed of this CNN architecture
  tr_time = stop - start

  # Compute the accuracy of our training model in the testing dataset
  test_loss, test_acc = model.evaluate(test_images,  test_labels, verbose=2)

  # Compute the metric that captures the accuracy--speed tradeoff
  # tradeOff_metric = lamda_acc * test_acc - (1 - lamda_acc) * math.tanh(tr_time/theta_parameter - 1)

  # inverse additive penalty
  # tradeOff_metric = test_acc / (1 + 0.5 * min(1, tr_time/theta_parameter))

  # log-sigmoid dominance
  tradeOff_metric = 1 / (1 + np.exp(-0.5 * (test_acc - (1 - min(1, tr_time/theta_parameter)))))

  del model

  print("========================== EDW EINAI TO MULTI1111111111111111.......")
  print("Accuracy (on the testing dataset): {0:.2%}".format(test_acc))
  print(f"Training time: ", tr_time)
  print(tradeOff_metric)

  q.put([test_acc, tr_time, tradeOff_metric])

In [10]:
def my_evaluate_rest_phase(q, train_images, train_labels, test_images, test_labels, layer, current_df, dataset_shape, CONV_NEURONS_CONST, DENSE_NEURONS_CONST, CONV_NEURONS_BOUND, DENSE_NEURONS_BOUND):
  error_flag = -1

  # Recreate the network that consist of the best layers that we found in each of the previous steps/iterations
  model, conv_tmp, dense_tmp = recreate_model(current_df['LayerType'], dataset_shape, CONV_NEURONS_CONST, DENSE_NEURONS_CONST, CONV_NEURONS_BOUND, DENSE_NEURONS_BOUND)

  try:
    # If the to-be-added-layer is conv
    if layer == 'conv':
      # Add a conv layer by doubling its neurons if they do not violate our user-defined bound
      if conv_tmp <= CONV_NEURONS_BOUND:
        model.add(layers.Conv2D(int(conv_tmp), (3, 3), activation='relu'))
        conv_tmp = conv_tmp * 2
      else:
        model.add(layers.Conv2D(int(CONV_NEURONS_BOUND), (3, 3), activation='relu'))
    # If the to-be-added-layer is pool
    elif layer == 'pool':
      model.add(layers.MaxPooling2D((2, 2), strides=(2,2), padding='same'))
    # If the to-be-added-layer is dense
    else:
      # If the next to-be-added-layer is dense and no other dense layer has been added so far, then add a flatten layer first...
      if not isinstance(model.layers[-1], tf.keras.layers.Dense):
        model.add(layers.Flatten())
      # Add a dense layer by reducing (* 0.5) its neurons if they do not violate our user-defined bound
      if dense_tmp >= DENSE_NEURONS_BOUND:
        model.add(layers.Dense(int(dense_tmp), activation='relu'))
        dense_tmp = dense_tmp / 2
      else:
        model.add(layers.Dense(int(DENSE_NEURONS_BOUND), activation='relu'))


    # Check if the last layer is a Dense layer
    last_layer = model.layers[-1]

    # Check if the last layer of the network is dense
    # If it is just add the last dense layer for the classification
    # Otherwise you should first add a flatten layer
    if isinstance(last_layer, tf.keras.layers.Dense):
        model.add(layers.Dense(len(unique_class_labels), activation='softmax'))
    else:
        model.add(layers.Flatten())
        model.add(layers.Dense(len(unique_class_labels), activation='softmax'))

  except ValueError:
        print("No valid input...:(")
        error_flag = 1

  if error_flag == -1:
    model.compile(optimizer=Adam(current_df['LearningRate']), loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), metrics=['accuracy'])
    model.summary()

    start = time.time()

    blackbox = model.fit(x=train_images,
                        y=train_labels,
                        epochs=current_df['Epochs'],
                        batch_size=current_df['BatchSize']
                        )
    stop = time.time()

    # Compute the training speed of this CNN architecture
    tr_time = stop - start

    # Compute the accuracy of our training model in the testing dataset
    test_loss, test_acc = model.evaluate(test_images,  test_labels, verbose=2)

    # Compute the metric that captures the accuracy--speed tradeoff
    # tradeOff_metric = lamda_acc * test_acc - (1 - lamda_acc) * math.tanh(tr_time/theta_parameter - 1)

    # inverse additive penalty
    # tradeOff_metric = test_acc / (1 + 0.5 * min(1, tr_time/theta_parameter))

    # log-sigmoid dominance
    tradeOff_metric = 1 / (1 + np.exp(-0.5 * (test_acc - (1 - min(1, tr_time/theta_parameter)))))

    # Delete the Keras model with these hyper-parameters from memory.
    del model

    print("========================== EDW EINAI TO MULTI2222222222.......")
    print("Accuracy (on the testing dataset): {0:.2%}".format(test_acc))
    print(f"Training time: ", tr_time)
    print(tradeOff_metric)

    q.put([test_acc, tr_time, tradeOff_metric])
  else:
    q.put([0, 1000000000, 0])

In [11]:
def my_evaluate(q, train_images, train_labels, test_images, test_labels, lst_layers, dataset_shape, CONV_NEURONS_CONST, DENSE_NEURONS_CONST, CONV_NEURONS_BOUND, DENSE_NEURONS_BOUND, my_epochs, my_lr, my_batch):

  error_flag = -1

  if lst_layers[0] == 'conv':

    try:
      # Recreate the network that consist of the best layers that we found in each of the previous steps/iterations
      model, conv_tmp, dense_tmp = recreate_model(lst_layers, dataset_shape, CONV_NEURONS_CONST, DENSE_NEURONS_CONST, CONV_NEURONS_BOUND, DENSE_NEURONS_BOUND)

      # Check if the last layer is a Dense layer
      last_layer = model.layers[-1]

      # Check if the last layer of the network is dense
      # If it is just add the last dense layer for the classification
      # Otherwise you should first add a flatten layer
      if isinstance(last_layer, tf.keras.layers.Dense):
          model.add(layers.Dense(len(unique_class_labels), activation='softmax'))
      else:
          model.add(layers.Flatten())
          model.add(layers.Dense(len(unique_class_labels), activation='softmax'))

    except ValueError:
          print("No valid input...:(")
          error_flag = 1

    if error_flag == -1:
      model.compile(optimizer=Adam(my_lr), loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), metrics=['accuracy'])
      model.summary()

      start = time.time()

      blackbox = model.fit(x=train_images,
                          y=train_labels,
                          epochs=my_epochs,
                          batch_size=my_batch
                          )
      stop = time.time()

      # Compute the training speed of this CNN architecture
      tr_time = stop - start

      # Compute the accuracy of our training model in the testing dataset
      test_loss, test_acc = model.evaluate(test_images,  test_labels, verbose=2)

      # Compute the metric that captures the accuracy--speed tradeoff
      # tradeOff_metric = lamda_acc * test_acc - (1 - lamda_acc) * math.tanh(tr_time/theta_parameter - 1)

      # inverse additive penalty
      # tradeOff_metric = test_acc / (1 + 0.5 * min(1, tr_time/theta_parameter))

      # log-sigmoid dominance
      tradeOff_metric = 1 / (1 + np.exp(-0.5 * (test_acc - (1 - min(1, tr_time/theta_parameter)))))

      # Delete the Keras model with these hyper-parameters from memory.
      del model

      print("========================== EDW EINAI TO MULTI2222222222.......")
      print("Accuracy (on the testing dataset): {0:.2%}".format(test_acc))
      print(f"Training time: ", tr_time)
      print(tradeOff_metric)

      q.put([test_acc, tr_time, tradeOff_metric])
    else:
      q.put([0, 1000000000, 0])
  else:
    q.put([0, 1000000000, 0])

In [12]:
import itertools

combinations = list(itertools.product(set_of_layers, repeat= total_num_of_layers))

In [13]:
start_program = time.time()

# Get the shape of the input dataset
dataset_shape = train_images_all.shape[1:]

# Store the best type of layer of each step
best_score = -1000
best_sampling = -1
best_epochs = -1
best_layers = -1
best_acc = -1
best_train = -1

# The first layer in this set of datasets has to be a convolutional one!
# So our "search space" is all combinations of epochs and sampling rates based on user's input
for sample_size in np.arange(sample_size_low, sample_size_high + 0.01, sample_size_step):
  for epochs in range(num_of_epochs_low, num_of_epochs_high + 1, num_of_epochs_step):
    for combo in combinations:
      for lr in lr_list:
        for size_of_batch in range(size_of_batch_low, size_of_batch_high + 1, size_of_batch_step):

          print(combo)

          # Perform the sampling
          train_images, train_labels, test_images, test_labels = sampling_method(sampling_method_id, train_images_all, train_labels_all, sample_size, test_images_all, test_labels_all, perc_test)
          q = Queue()
          process_eval = multiprocessing.Process(target=my_evaluate, args=(q, train_images, train_labels, test_images, test_labels, combo, dataset_shape, CONV_NEURONS_CONST, DENSE_NEURONS_CONST, CONV_NEURONS_BOUND, DENSE_NEURONS_BOUND, epochs, lr, size_of_batch))
          process_eval.start()
          test_acc, tr_time, tradeOff_metric = q.get()
          process_eval.join()

          # Print the results.
          print()
          print("Accuracy (on the testing dataset): {0:.2%}".format(test_acc))
          print(f"Training time: ", tr_time)
          print(tradeOff_metric)
          print()

          # Delete the dfs.
          del train_images
          del train_labels
          del test_images
          del test_labels

          # Clear the Keras session, otherwise it will keep adding new
          # models to the same TensorFlow graph each time we create
          # a model with a different set of hyper-parameters.
          K.clear_session()
          tf.compat.v1.reset_default_graph()

          if tradeOff_metric > best_score:
            best_score = tradeOff_metric
            best_sampling = sample_size
            best_epochs = epochs
            best_layers = combo
            best_acc = test_acc
            best_train = tr_time



Output hidden; open in https://colab.research.google.com to view.

In [14]:
stop_program = time.time()

# Results

In [15]:
print(best_score)
print(best_sampling)
print(best_epochs)
print(best_layers)
print(best_acc)
print(best_train)

0.5929200577106841
0.9500000000000001
5
('conv', 'conv', 'pool', 'conv', 'conv')
0.7520999908447266
23.256213426589966


In [16]:
# Compute the time
program_time = stop_program - start_program
program_time

6709.429879188538