In [183]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.datasets import mnist

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

import random
from random import choice

from copy import deepcopy

import math

In [234]:
ACTIVATIONS = ['relu', 'sigmoid', 'softmax', 'tanh', 'elu']

class Individual:

    def __init__(self, config=None):
        """
        An individual is in the form of a 11 values vector
        [n0, n1, n2, n3, n4, n5, n6, n7, n8, n9] where
        [n0-n4] are the amount of neurons per layer
        [n5-n9] are the activation function on each layer
        n10 is the activation function on the last layer
        
        If any neuron in n0-n4 is 0, it's like having less layers
        """
        
        
        self.config = self.__generate_random() if config == None else config
        self.nn = self.__build_nn()
        self._initial_w = self.nn.get_weights()

    
    def __build_nn(self):
        
        model = Sequential()
        
        for idx, neurons in enumerate(self.config[:5]):
            if idx == 0:
                model.add(Dense(neurons, activation=self.config[5], input_shape=(784,)))
            
            if neurons != 0:
                model.add(Dense(neurons, activation=self.config[5]))
            
        model.add(Dense(10, activation=self.config[-1]))
        
        model.compile(optimizer='adam', loss='categorical_crossentropy',metrics=['accuracy'])
        
        return model
        
        
    def __generate_random(self):

        return [
            random.randint(1, 64),
            random.randint(0, 64),
            random.randint(0, 64),
            random.randint(0, 64),
            random.randint(0, 64),
            choice(ACTIVATIONS),
            choice(ACTIVATIONS),
        ]
    
    def fitness(self, X_train, y_train, X_test, y_test):
        self.nn.set_weights(self._initial_w)

        history = self.nn.fit(X_train, 
                              y_train, 
                              epochs=4, 
                              batch_size=256, 
                              verbose=0,
                              validation_data=(X_test, y_test))

        acc = history.history['accuracy'][-1]
        val_acc = history.history['val_accuracy'][-1]
        return acc
        

class AutoML:
    def __init__(self):
        
        # AutML hyperparameters
        self.SIZE = 10
        self.RETAIN = 0.5
        self.GENERATIONS = 10
        self.MUTATION_RATE = 0.10
        
        self._retain_int = int(self.SIZE * self.RETAIN)
   
        self.pop = self.__generate_pop(self.SIZE)
        self.best_individual = None
        self.best_score = -math.inf

    
    def fit(self, X, y):
        
        split_idx = int(len(X)*0.8)
        
        X_train = X[:split_idx]
        y_train = y[:split_idx]
        X_val = X[split_idx:]
        y_val = y[split_idx:]
        
        for GENERATION in range(self.GENERATIONS):
            # calculate score for each one
            scored = [ (p, p.fitness(X, y)) for p in self.pop ]

            # sort the population
            sorted_pop = sorted(scored, key=lambda p: p[1], reverse=True)
            
            avg_fitness = sum([p[1] for p in sorted_pop])/float(self.SIZE)
            
            print(f'Average fitness {avg_fitness} on iteration {GENERATION}')

            best = sorted_pop[0]

            if best[1] > self.best_score:
                self.best_score = best[1]
                self.best_individual = best[0]

            sorted_pop = [v[0] for v in sorted_pop[:self._retain_int]]

            while len(sorted_pop) < self.SIZE:

                idx_p1 = random.randint(0, self._retain_int-1)
                idx_p2 = random.randint(0, self._retain_int-1)
                while idx_p2 == idx_p1:
                    idx_p2 = random.randint(0, self._retain_int-1)

                new = self.recombine(sorted_pop[idx_p1], sorted_pop[idx_p2])

                sorted_pop.append(new)

            assert len(sorted_pop) == self.SIZE

            self.pop = []
            for i in sorted_pop:
                if random.random() < self.MUTATION_RATE:
                    self.pop.append(self.mutate(i))
                else:
                    self.pop.append(i)
        
        return self.best_individual, self.best_score
    
    
    def recombine(self, p1, p2):
        p1 = p1.config
        p2 = p2.config
        
        split_idx = random.randint(0, len(p1))
        
        child = deepcopy(p1[:split_idx])
        child += deepcopy(p2[split_idx:])
        
        return Individual(child)
        
    def mutate(self, p):
        p = p.config
        
        idx = random.randint(0, len(p)-1) # selects on property to change
        cur_value = p[idx]
        
        while p[idx] == cur_value:      # ensure we are changing
            if idx < 5:
                p[idx] = random.randint(0, 64)
            else:
                p[idx] = choice(ACTIVATIONS)
                
        return Individual(p)
    
    def __generate_pop(self, size):
        pop = []
        for _ in range(size):
            pop.append(self.__generate_individual())
            
        return pop
            

    def __generate_individual(self):
        return Individual()

In [235]:
(X_train, y_train), (X_test, y_test) = mnist.load_data()

In [236]:
# Normalize the images.
X_train = (X_train / 255.) - 0.5
X_test = (X_test / 255.) - 0.5

# Flatten the images.
X_train = X_train.reshape((-1, 784))
X_test = X_test.reshape((-1, 784))

y_train = keras.utils.to_categorical(y_train)
y_test = keras.utils.to_categorical(y_test)

In [237]:
a = AutoML()

In [238]:
bi, bscore = a.fit(X_train, y_train)

Average fitness 0.15213666781783103 on iteration 0
Average fitness 0.17387166991829872 on iteration 1
Average fitness 0.4282566629350185 on iteration 2
Average fitness 0.5792799927294254 on iteration 3
Average fitness 0.7538616612553597 on iteration 4
Average fitness 0.9392933249473572 on iteration 5
Average fitness 0.9392933309078216 on iteration 6
Average fitness 0.8574966669082642 on iteration 7
Average fitness 0.9416049957275391 on iteration 8
Average fitness 0.9440533339977264 on iteration 9


In [239]:
bscore

0.9498166441917419

In [243]:
a.pop[1].config

[26, 63, 25, 20, 53, 'tanh', 'softmax']

In [244]:
model = Sequential()

model.add(Dense(64, activation='relu', input_shape=(784,)))


model.add(Dense(64, activation='relu'))

model.add(Dense(10, activation='softmax'))

model.compile(optimizer='adam', loss='categorical_crossentropy',metrics=['accuracy'])

In [246]:
history = model.fit(X_train, y_train, epochs=4, batch_size=256)

Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


In [252]:
model = Sequential()

model.add(Dense(26, activation='relu', input_shape=(784,)))
model.add(Dense(63, activation='relu'))

model.add(Dense(25, activation='relu'))
model.add(Dense(20, activation='relu'))
model.add(Dense(53, activation='relu'))

model.add(Dense(10, activation='softmax'))

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [253]:
history = model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=4, batch_size=256)

Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4
