In [54]:
from tensorflow.keras.datasets import boston_housing
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.datasets import mnist
from tensorflow.keras.datasets import boston_housing

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

from sklearn.preprocessing import StandardScaler

import random
from random import choice

from copy import deepcopy

import math

In [55]:
(X_train, y_train), (X_test, y_test) = boston_housing.load_data()

In [56]:
SEED = 42 # for reproducibility

random.seed(SEED) 
tf.random.set_seed(SEED)

ACTIVATIONS = ['relu', 'sigmoid', 'softmax', 'tanh', 'elu', 'linear']
OPTIMIZERS = ['sgd' , 'rmsprop', 'adam', 'adadelta', 'adagrad', 'adamax']
CLASS_LOSSES = ['categorical_crossentropy', 'binary_crossentropy', 'sparse_categorical_crossentropy']
REGR_LOSSES = ['mean_squared_error', 'mean_absolute_error']
BATCH_SIZES = [8, 16, 32, 64, 128, 256]

class Individual:

    def __init__(self, input_shape, output_shape, losses, config=None):
        """
        An individual is in the form of a 11 values vector
        [n0, n1, n2, n3, n4, n5, n6, n7, n8, n9, n10] where
        [n0-n4] are the amount of neurons per layer
        [n5-n6] activation functions on hidden and output layer
        [n7] amount of layers to be considered
        [n8] optimizer to be used
        [n9] loss to be used
        [n10] batch size
        
        If any neuron_in_layer in n0-n4 is 0, it's like having less layers
        """
        self.input_shape = input_shape
        self.output_shape = output_shape
        self.losses = losses
        
        self.config = self.__generate_random() if config == None else config
        self.nn = self.__build_nn()
        self._initial_w = self.nn.get_weights()

    
    def __build_nn(self):
        
        model = Sequential()
        
        for idx, neurons in enumerate(self.config[:5]):
            if idx == 0:
                model.add(Dense(neurons, activation=self.config[5], input_shape=(self.input_shape,)))
            
            elif neurons != 0 and idx < self.config[7]:
                model.add(Dense(neurons, activation=self.config[5]))
            
        model.add(Dense(self.output_shape, activation=self.config[6]))
        
        model.compile(optimizer=self.config[8], loss=self.config[9], metrics=['accuracy'])
        
        return model
        
        
    def __generate_random(self):

        return [
            random.randint(8, 64),
            random.randint(8, 64),
            random.randint(8, 64),
            random.randint(8, 64),
            random.randint(8, 64),
            choice(ACTIVATIONS),
            choice(ACTIVATIONS),
            random.randint(1, 5),
            choice(OPTIMIZERS),
            choice(self.losses),
            choice(BATCH_SIZES)
        ]
    
    def fitness(self, X_train, y_train, X_test, y_test, epochs):
        self.nn.set_weights(self._initial_w)
        
        try:
            history = self.nn.fit(X_train, 
                                  y_train, 
                                  epochs=epochs, 
                                  batch_size=self.config[10], 
                                  verbose=0,
                                  validation_data=(X_test, y_test))

            acc = history.history['accuracy'][-1]
            val_acc = history.history['val_accuracy'][-1]
            loss = history.history['loss'][-1]
            val_loss = history.history['val_loss'][-1]
            
        except: # If we fail to train with given loss function, this prob doesn't fit well :)
            return 0
        
        # We want to avoid overffit so we balance train and test accuracy (or loss)
        
        # If in classification problem ... use accuracy directly...
        if self.losses == CLASS_LOSSES:
            return 0.7 * val_acc + 0.3 * acc
        # Use the loss  (but do np.sqrt on squared error, bcs we want to be fair between them)
        elif self.config[9] == 'mean_squared_error':
            return -np.sqrt(val_loss)
        elif self.config[9] == 'mean_absolute_error':
            return -val_loss
        
        # sanity check
        else:
            print('Houston, we have a problem.')

class AutoML:
    def __init__(self, generations=15, size=10, retain=0.5, mutation_rate=0.10):
        
        # AutML hyperparameters
        self.SIZE = size
        self.RETAIN = retain
        self.GENERATIONS = generations
        self.MUTATION_RATE = mutation_rate
        self.BETTER_EVAL = int(0.7*self.GENERATIONS)
        self.MAX_EPOCHS = 20
        
        self._retain_int = int(self.SIZE * self.RETAIN)
   
        self.best_individual = None
        self.best_score = -math.inf

    
    def fit(self, X, y):
        """
        This method will run a genetic algorithm to find a neural network for your data
        returns: The keras model
        """
        try:
            self.input_shape = X.shape[1]
        except IndexError:
            self.input_shape = 1
        
        try:
            self.output_shape = y.shape[1]
        except:
            self.output_shape = 1
        
        self.fit_history = []
        
        split_idx = int(len(X)*0.8)
        
        X_train = X[:split_idx]
        y_train = y[:split_idx]
        X_val = X[split_idx:]
        y_val = y[split_idx:]
        
        # allowing for loss discovery without knowing if it was classification or regresion was too much trouble
        # so we do some old fashioned (simple) analysis
        if len(np.unique(y)) == 2:
            print(f'Evaluating problem as a Classification problem')
            self.losses = CLASS_LOSSES
        else:
            print(f'Evaluating problem as a Regression problem')
            self.losses = REGR_LOSSES
        
        self.pop = self.__generate_pop(self.SIZE)
        
        epochs = 4
        
        for GENERATION in range(self.GENERATIONS):
                        
            # after a few iterations we have some good nets usually
            # then we start increasing the amount of epochs we train, to get more accurate results
            if GENERATION > self.BETTER_EVAL:
                epochs = min(int(1.5 * epochs), self.MAX_EPOCHS)
            
            print(f'Evaluating generation {GENERATION} with {epochs} epochs')
            
            
            # calculate score for each one
            scored = [ (p, p.fitness(X_train, y_train, X_val, y_val, epochs)) for p in self.pop ]

            # sort the population
            sorted_pop = sorted(scored, key=lambda p: p[1], reverse=True)
            
            avg_fitness = sum([p[1] for p in sorted_pop])/float(self.SIZE)
            self.fit_history.append(avg_fitness)
            print(f'Average fitness {avg_fitness} on iteration {GENERATION}')

            best = sorted_pop[0]

            if best[1] > self.best_score:
                self.best_score = best[1]
                self.best_individual = best[0]

            sorted_pop = [v[0] for v in sorted_pop[:self._retain_int]]

            # keep only uniques
            new_sorted_pop = []
            for pop in sorted_pop:
                if pop not in new_sorted_pop:
                    new_sorted_pop.append(pop)
            
            sorted_pop = new_sorted_pop

            # If we had too many copies...
            if len(sorted_pop) < 3:
                # add some mutations :)
                
                for _ in range(4-len(sorted_pop)):
                    sorted_pop.append(self.mutate(sorted_pop[0]))
            
            while len(sorted_pop) < self.SIZE:

                new = sorted_pop[0]
                while new.config in [i.config for i in sorted_pop]:
                    
                    idx_p1 = random.randint(0, len(sorted_pop)-1)
                    idx_p2 = random.randint(0, len(sorted_pop)-1)
                    while idx_p2 == idx_p1:
                        idx_p2 = random.randint(0, len(sorted_pop)-1)

                    new = self.recombine(sorted_pop[idx_p1], sorted_pop[idx_p2])
                    
                    # If breeded already in, then try some child mutation...
                    if new.config in [i.config for i in sorted_pop]:
                        new = self.mutate(new)

                sorted_pop.append(new)

            assert len(sorted_pop) == self.SIZE

            self.pop = []
            for i in sorted_pop:
                if random.random() < self.MUTATION_RATE:
                    self.pop.append(self.mutate(i))
                else:
                    self.pop.append(i)
                    
            print('New generation is ready')
        
        return self.best_individual.nn
    
    
    def recombine(self, p1, p2):
        p1 = p1.config
        p2 = p2.config
        
        split_idx = random.randint(0, len(p1))
        
        child = deepcopy(p1[:split_idx])
        child += deepcopy(p2[split_idx:])
        
        return Individual(self.input_shape, self.output_shape, self.losses, child)
        
    def mutate(self, p):
        p = p.config
        
        idx = random.randint(0, len(p)-1) # selects one property to change
        cur_value = p[idx]
        
        while p[idx] == cur_value:      # ensure we are changing
            
            if idx == 0:
                p[idx] = random.randint(1, 64)
            elif idx > 0 and idx < 5:
                p[idx] = random.randint(0, 64)
            elif idx == 5 or idx == 6:
                p[idx] = choice(ACTIVATIONS)
            elif idx == 7:
                p[idx] = random.randint(1, 5)
            elif idx == 8:
                p[idx] = choice(OPTIMIZERS)
            elif idx == 9:
                p[idx] = choice(self.losses)
            elif idx == 10:
                p[idx] = choice(BATCH_SIZES)
                
        return Individual(self.input_shape, self.output_shape, self.losses, p)
    
    def __generate_pop(self, size):
        pop = []
        for _ in range(size):
            pop.append(self.__generate_individual())
            
        return pop
            

    def __generate_individual(self):
        return Individual(self.input_shape, self.output_shape, self.losses)

In [57]:
def get_boston():
    (X_train, y_train), (X_test, y_test) = boston_housing.load_data()

    scaler = StandardScaler()
    scaler.fit(X_train)
    
    X_train = scaler.transform(X_train)
    X_test = scaler.transform(X_test)
    
    return (X_train, y_train), (X_test, y_test)

In [58]:
# Load the dataset
(X_train, y_train), (X_test, y_test) = get_boston()

# Created a AutoML class
a = AutoML()

# Just let the magic happen :)
model = a.fit(X_train, y_train)

Evaluating problem as a Regression problem
Evaluating generation 0 with 4 epochs
Average fitness -21.95597129429627 on iteration 0
New generation is ready
Evaluating generation 1 with 4 epochs
Average fitness -20.386927632149686 on iteration 1
New generation is ready
Evaluating generation 2 with 4 epochs
Average fitness -19.431851387023926 on iteration 2
New generation is ready
Evaluating generation 3 with 4 epochs
Average fitness -19.22057695388794 on iteration 3
New generation is ready
Evaluating generation 4 with 4 epochs
Average fitness -17.27517948150635 on iteration 4
New generation is ready
Evaluating generation 5 with 4 epochs
Average fitness -13.240061950683593 on iteration 5
New generation is ready
Evaluating generation 6 with 4 epochs
Average fitness -15.151275300979615 on iteration 6
New generation is ready
Evaluating generation 7 with 4 epochs
Average fitness -15.394983853093487 on iteration 7
New generation is ready
Evaluating generation 8 with 4 epochs
Average fitness -1

In [50]:
y_pred = model.predict(X_test)

In [52]:
mae = tf.keras.losses.MeanAbsoluteError()
mae(y_test, y_pred).numpy()

9.729578