# Genetic Algorithm for Tuning CNN Hyperparameters

- Neural Network Architeture: CNN -> using keras;-
- Dataset: Mnist;
- Hyperparameters: Hidden Layers Size, Optimizers and Learning Rate.

### 1. Libraries

In [1]:
# Import Keras
import keras

# Import Mnist dataset
from keras.datasets import mnist

# Network Model  
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras.callbacks import EarlyStopping, ModelCheckpoint

#Import the network optimizers
from keras.optimizers import Adam, RMSprop, Adagrad, Adadelta, Adamax, SGD

#Import utilities
import numpy as np
from keras.utils.np_utils import to_categorical
from operator import add
from functools import reduce
from keras import backend as K
import random

Using TensorFlow backend.


### 2. Define Fixed Hyperparameters 

Initially, we will define 3 hyperparameters with fixed values: number of classes, batch size, and number of epochs. 
- Number of Classes: Define the total number of classes that the network can classify an image.
- Batch Size: The batch size is a number of samples processed before the model is updated.
- Epochs: The number of epochs is the number of complete passes through the training dataset.
- CallBacks: A callback is an object that can perform actions at various stages of training. In EarlyStopping, the training will stop when the monitored metric has stopped improving, in our case, the network accuracy.

In [0]:
n_classes = 10    
batch_size = 128
n_epochs = 2
callbacks = [EarlyStopping(monitor='val_accuracy', patience=5)]

### 3. Preparing the Data

The second step is to prepare the training and test data. Thereby, to make the netwotk easier to train will normalize the image pixel values from [0, 255] to [0.0, 1.0]. Also, since Keras requeires the third dimension, we will reshape each image from (28, 28) to (n_samples, 28, 28, 1).

In [0]:
# Data split between train and test sets
(X_train, y_train), (X_val, y_val) = mnist.load_data()

# Reshape Images data to a tensor of shape. For our 28x28 graysacle images, this would be 
# (n_samples, 28, 28, 1)

image_height = X_train.shape[1]
image_width = X_train.shape[2]
n_channels = 1 # we have grayscale images

X_train = np.reshape(X_train, (X_train.shape[0], image_height, image_width, n_channels))
X_val = np.reshape(X_val, (X_val.shape[0], image_height, image_width, n_channels))
input_shape = (image_height, image_width, n_channels)

# Normalize the images to values between (0.0, 1.0)
X_train = X_train.astype('float32')/255
X_val = X_val.astype('float32')/255

# Since we are using the Crossentropy loss to calculate the difference between the predictions and the labels, it is necessary to convert the labels from a class vector to a binary class matrix.
y_train = to_categorical(y_train, n_classes)
y_val = to_categorical(y_val, n_classes)

### 4. Building the Model

For building the CNN model, since our CNN will be a linear stack of layers, we will work with the Keras's Sequencial Class. Our CNN will be compose of:
- 3 Convolutional Layers;
- 2 MaxPooling Layers;
- 1 Flatten Layer;
- 1 Fully Connected Layers;
- Ouput Layer (Softmax);

In our work, we will try to find the best values for the Hyperparameters Hidden Layers Size, Optimizers and Learning Rate by using a Genetic Algorithm.

In [0]:
def create_model(parameters, n_classes, input_shape):
    print(parameters)
    
    #Inicialize the Hyperparamets
    optimizer = parameters['optimizer']
    learning_rate = parameters['learning_rate']
    layer1 = parameters['layer_1']
    layer2 = parameters['layer_2']
    layer3 = parameters['layer_3']
    dropout = parameters['Dropout']

    # Create the Model
    model = Sequential()
    # Add Convolutional layers
    model.add(Conv2D(filters=2**layer1, kernel_size=(3,3), activation='relu', padding='same',
                     input_shape=input_shape))
    model.add(MaxPooling2D(pool_size=(2,2)))

    if layer2 > 0:
      model.add(Conv2D(filters=2**layer2, kernel_size=(3,3), activation='relu', padding='same'))
      model.add(MaxPooling2D(pool_size=(2,2)))

    if layer3 > 0:
      model.add(Conv2D(filters=2**layer3, kernel_size=(3,3), activation='relu', padding='same'))
      model.add(MaxPooling2D(pool_size=(2,2))) 

    model.add(Dropout(dropout))   
    model.add(Flatten())
    # Densely connected layers
    model.add(Dense(128, activation='relu'))
    # Output layer
    model.add(Dense(n_classes, activation='softmax'))

    # Define the network optimizer function based on the optimizer hyperparameters defined by the GA.
    if optimizer == 'Adam':
        opt = Adam(learning_rate)
    elif optimizer == 'rmsprop':
        opt = RMSprop(learning_rate)
    elif optimizer == 'Adagrad':
        opt = Adagrad(learning_rate)
    elif optimizer == 'Adadelta':
        opt = Adadelta(learning_rate)
    elif optimizer == 'Adamax':
        opt = Adamax(learning_rate)
    elif optimizer == 'SGD':
        opt = SGD(learning_rate)
    
    model.compile(loss=keras.losses.categorical_crossentropy, optimizer=opt, metrics=['accuracy'])
    
    return model

We need a class Network that we can use to create a network with random parameters and train the network. Moreover, it should be able to retrieve the accuracy of the network.

In [0]:
class Network():
    def __init__(self, parameter_space=None):
        self.accuracy = 0.
        self.parameter_space = parameter_space
        self.network_parameters = {}
        
    def set_random_parameters(self):
        for parameter in self.parameter_space:
            self.network_parameters[parameter] = random.choice(self.parameter_space[parameter])
            
    def create_network(self, network):
        self.network_parameters = network
    
    def train(self):
        model = create_model(self.network_parameters, n_classes, input_shape)
        history = model.fit(X_train, y_train,
        batch_size=batch_size, epochs=n_epochs,
        verbose=0, validation_data=(X_val, y_val),
        callbacks=callbacks)
        self.accuracy = max(history.history['val_accuracy'])

### 5. Genetic Algorithm

In [0]:
class Genetic_Algorithm():
    def __init__(self, parameter_space, retain=0.3, random_select=0.1, mutate_prob=0.25):
        self.mutate_prob = mutate_prob
        self.random_select = random_select
        self.retain = retain
        self.parameter_space = parameter_space
    
    # Create Initial Population
    def create_population(self, count):
        population = []
        for _ in range(0, count):
                 network = Network(self.parameter_space)
                 network.set_random_parameters()
                 population.append(network)
        return population
                 
    # Get fitness from each individual. The fitness function will be the accuray of the network
    def get_fitness(network):
        return network.accuracy
   
    def get_grade(self, population):
        total = reduce(add, (get_fitness(network)
        for network in population))
        return float(total) / len(population)
               
    # Crossover to generate a new population
    def breed(self, mother, father):
        children = []
        for _ in range(2):
            child = {}
            for param in self.parameter_space:
                child[param] = random.choice(
                    [mother.network[param],
                    father.network[param]]
                )
            network = Network(self.nn_param_choices)
            network.create_set(child)
            if self.mutate_chance > random.random():
                network = self.mutate(network)
            children.append(network)
        return children
    
    # Mutate indiviuals to create a more diversified population  
    def mutate(self, network):
        mutation = random.choice(list
        (self.parameter_space.keys()))
        network.network[mutation] = random.choice(self.parameter_space[mutation])
        return network        
    
    # Evolve the population to generate a new population
    def evolve(self, pop):
        for net in pop:
          print(net)
          
        graded = [(self.get_fitness(network),
        network) for network in pop]
        graded = [x[1] for x in sorted(graded,
        key=lambda x: x[0], reverse=True)]
        retain_length = int(len(graded)*self.retain)
        
        parents = graded[:retain_length]
        
        for individual in graded[retain_length:]:
            if self.random_select > random.random():
                parents.append(individual)
        
        parents_length = len(parents)
        desired_length = len(pop) - parents_length
        children = []

        while len(children) < desired_length:
            male = random.randint(0,
            parents_length-1)
            female = random.randint(0,
            parents_length-1)
                 
            if male != female:
                male = parents[male]
                female = parents[female]
                children_new = self.breed(male,female)
                
                for child_new in children_new:
                    if len(children) < desired_length:
                        children.append(child_new)
                
        parents.extend(children)
                 
        return parents

We will retrieve the average accuracy across a population:

In [0]:
def get_population_accuracy(population):
    total_accuracy = 0
    for network in population:
        total_accuracy += network.accuracy
    
    return total_accuracy / len(population)

### 6. Set Parameters 

In [0]:
n_generations = 2
population_size = 2

We now set the remaining hyperparameters space that we want to explore: Hidden Layers Size, Optimizers and Learning Rate.

In [0]:
parameter_space = {
    'optimizer': ['Adam', 'rmsprop', 'Adagrad', 'Adadelta', 'Adamax', 'SGD'],
    'layer_1': [0, 1, 2, 3, 4, 5, 6, 7],
    'layer_2': [0, 1, 2, 3, 4, 5, 6, 7],
    'layer_3': [0, 1, 2, 3, 4, 5, 6, 7],
    'Dropout': [0, 0.25, 0.5],
    'learning_rate': [0.1, 0.01, 0.001, 0.0001]
}

We will create the initial population:

In [0]:
GA = Genetic_Algorithm(parameter_space)
population = GA.create_population(population_size)

In [0]:
for i in range(n_generations):
    print('Generation {}'.format(i))
    
    for network in population:
        network.train()

    average_accuracy = get_population_accuracy(population)
    print('Average accuracy: {:.2f}'.
    format(average_accuracy))
    
    # Evolve
    if i < n_generations - 1:
        s = GA.evolve(population)

Generation 0
{'optimizer': 'rmsprop', 'layer_1': 4, 'layer_2': 1, 'layer_3': 1, 'Dropout': 0.5, 'learning_rate': 0.0001}
