# README

Ce projet est essentiellement une classe de AutoML qui se concentrent sur la découverte d'une architecture de réseau de neurones. 

L'objectif principal est ici de rendre le ML aussi simple que possible : Il s'agit d'une seule classe avec une seule fonction "fit". Il suffit d'entrer vos données, et cette méthode retournera un modèle de Keras entrainé. 

Il tente de traiter tous les problèmes qu'un ingénieur en apprentissage machine devra également traiter:

- Hyperparameters tunning
- Under/Over-Fitting

Cela permet également d'augmenter les périodes d'entraînement au fur et à mesure que les générations s'améliorent, de cette façon nous obtenons de meilleurs résultats.

## Running this Jupyter

Afin de faire fonctionner ce jupyter notebook, vous devez installer [keras](https://keras.io/)

In [22]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.datasets import mnist

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

import random
from random import choice

from copy import deepcopy

import math

In [32]:
random.seed(0) # for reproducibility

ACTIVATIONS = ['relu', 'sigmoid', 'softmax', 'tanh', 'elu']
OPTIMIZERS = ['sgd' , 'rmsprop', 'adam', 'adadelta', 'adagrad', 'adamax']
LOSSES = ['binary_crossentropy', 'categorical_crossentropy', 'sparse_categorical_crossentropy', 'mean_squared_error', 'mean_absolute_error']

class Individual:

    def __init__(self, input_shape, output_shape, config=None):
        """
        An individual is in the form of a 11 values vector
        [n0, n1, n2, n3, n4, n5, n6, n7, n8, n9] where
        [n0-n4] are the amount of neurons per layer
        [n5-n9] are the activation function on each layer
        n10 is the activation function on the last layer
        
        If any neuron in n0-n4 is 0, it's like having less layers
        """
        self.input_shape = input_shape
        self.output_shape = output_shape
        
        self.config = self.__generate_random() if config == None else config
        self.nn = self.__build_nn()
        self._initial_w = self.nn.get_weights()

    
    def __build_nn(self):
        
        model = Sequential()
        
        for idx, neurons in enumerate(self.config[:5]):
            if idx == 0:
                model.add(Dense(neurons, activation=self.config[5], input_shape=(self.input_shape,)))
            
            elif neurons != 0 and idx < self.config[7]:
                model.add(Dense(neurons, activation=self.config[5]))
            
        model.add(Dense(self.output_shape, activation=self.config[6]))
        
        model.compile(optimizer=self.config[8], loss=self.config[9], metrics=['accuracy'])
        
        return model
        
        
    def __generate_random(self):

        return [
            random.randint(8, 64),
            random.randint(8, 64),
            random.randint(8, 64),
            random.randint(8, 64),
            random.randint(8, 64),
            choice(ACTIVATIONS),
            choice(ACTIVATIONS),
            random.randint(1, 5),
            choice(OPTIMIZERS),
            choice(LOSSES)
        ]
    
    def fitness(self, X_train, y_train, X_test, y_test, epochs):
        self.nn.set_weights(self._initial_w)
        
        try:
            history = self.nn.fit(X_train, 
                                  y_train, 
                                  epochs=epochs, 
                                  batch_size=256, 
                                  verbose=0,
                                  validation_data=(X_test, y_test))

            acc = history.history['accuracy'][-1]
            val_acc = history.history['val_accuracy'][-1]
            
        except: # If we fail to train with given loss function, this prob doesn't fit well :)
            return 0
        
        # We want to avoid overffit so we balance train and test accuracy
        return 0.7 * val_acc + 0.3 * acc
        

class AutoML:
    def __init__(self, generations=10, size=10, retain=0.5, mutation_rate=0.10):
        
        # AutML hyperparameters
        self.SIZE = size
        self.RETAIN = retain
        self.GENERATIONS = generations
        self.MUTATION_RATE = mutation_rate
        self.BETTER_EVAL = int(0.7*self.GENERATIONS)
        self.MAX_EPOCHS = 16
        
        self._retain_int = int(self.SIZE * self.RETAIN)
   
        self.best_individual = None
        self.best_score = -math.inf

    
    def fit(self, X, y):
        """
        This method will run a genetic algorithm to find a neural network for your data
        returns: The keras model
        """
        
        self.input_shape = X.shape[1]
        self.output_shape = y.shape[1]
        
        self.pop = self.__generate_pop(self.SIZE)
        
        self.fit_history = []
        
        split_idx = int(len(X)*0.8)
        
        X_train = X[:split_idx]
        y_train = y[:split_idx]
        X_val = X[split_idx:]
        y_val = y[split_idx:]
        
        epochs = 4
        
        for GENERATION in range(self.GENERATIONS):
                        
            # after a few iterations we have some good nets usually
            # then we start increasing the amount of epochs we train, to get more accurate results
            if GENERATION > self.BETTER_EVAL:
                epochs = min(epochs + epochs // 2, self.MAX_EPOCHS)
                
            
            # calculate score for each one
            scored = [ (p, p.fitness(X_train, y_train, X_val, y_val, epochs)) for p in self.pop ]

            # sort the population
            sorted_pop = sorted(scored, key=lambda p: p[1], reverse=True)
            
            avg_fitness = sum([p[1] for p in sorted_pop])/float(self.SIZE)
            self.fit_history.append(avg_fitness)
            print(f'Average fitness {avg_fitness} on iteration {GENERATION}')

            best = sorted_pop[0]

            if best[1] > self.best_score:
                self.best_score = best[1]
                self.best_individual = best[0]

            sorted_pop = [v[0] for v in sorted_pop[:self._retain_int]]

            # keep only uniques
            new_sorted_pop = []
            for pop in sorted_pop:
                if pop not in new_sorted_pop:
                    new_sorted_pop.append(pop)
            
            sorted_pop = new_sorted_pop

            # If we had too many copies...
            if len(sorted_pop) < 3:
                # add some mutations :)
                
                for _ in range(4-len(sorted_pop)):
                    sorted_pop.append(self.mutate(sorted_pop[0]))
            
            while len(sorted_pop) < self.SIZE:

                new = sorted_pop[0]
                while new.config in [i.config for i in sorted_pop]:
                    
                    idx_p1 = random.randint(0, len(sorted_pop)-1)
                    idx_p2 = random.randint(0, len(sorted_pop)-1)
                    while idx_p2 == idx_p1:
                        idx_p2 = random.randint(0, len(sorted_pop)-1)

                    new = self.recombine(sorted_pop[idx_p1], sorted_pop[idx_p2])
                    
                    # If breeded already in, then try some child mutation...
                    if new.config in [i.config for i in sorted_pop]:
                        new = self.mutate(new)

                sorted_pop.append(new)

            assert len(sorted_pop) == self.SIZE

            self.pop = []
            for i in sorted_pop:
                if random.random() < self.MUTATION_RATE:
                    self.pop.append(self.mutate(i))
                else:
                    self.pop.append(i)
        
        return self.best_individual.nn
    
    
    def recombine(self, p1, p2):
        p1 = p1.config
        p2 = p2.config
        
        split_idx = random.randint(0, len(p1))
        
        child = deepcopy(p1[:split_idx])
        child += deepcopy(p2[split_idx:])
        
        return Individual(self.input_shape, self.output_shape, child)
        
    def mutate(self, p):
        p = p.config
        
        idx = random.randint(0, len(p)-1) # selects one property to change
        cur_value = p[idx]
        
        while p[idx] == cur_value:      # ensure we are changing
            
            if idx == 0:
                p[idx] = random.randint(1, 64)
            elif idx > 0 and idx < 5:
                p[idx] = random.randint(0, 64)
            elif idx == 5 or idx == 6:
                p[idx] = choice(ACTIVATIONS)
            elif idx == 7:
                p[idx] = random.randint(1, 5)
            elif idx == 8:
                p[idx] = choice(OPTIMIZERS)
            elif idx == 9:
                p[idx] = choice(LOSSES)
                
        return Individual(self.input_shape, self.output_shape, p)
    
    def __generate_pop(self, size):
        pop = []
        for _ in range(size):
            pop.append(self.__generate_individual())
            
        return pop
            

    def __generate_individual(self):
        return Individual(self.input_shape, self.output_shape)

In [33]:
def get_mnist():
    (X_train, y_train), (X_test, y_test) = mnist.load_data()

    # Normalize the images.
    X_train = (X_train / 255.) - 0.5
    X_test = (X_test / 255.) - 0.5

    # Flatten the images.
    X_train = X_train.reshape((-1, 784))
    X_test = X_test.reshape((-1, 784))

    y_train = keras.utils.to_categorical(y_train)
    y_test = keras.utils.to_categorical(y_test)
    
    return (X_train, y_train), (X_test, y_test)

In [None]:
# Load the dataset
(X_train, y_train), (X_test, y_test) = get_mnist()

# Created a AutoML class
a = AutoML()

# Just let the magic happen :)
model = a.fit(X_train, y_train)

In [28]:
y_pred = model.predict(X_test)
y_pred = keras.utils.to_categorical(np.argmax(y_pred, axis=1))

In [29]:
correct = 0
for idx in range(len(y_pred)):
    if (y_pred[idx] == y_test[idx]).all():
        correct += 1
        
print(f'Accuracy on test set {correct/float(len(y_pred))}')

Accuracy on test set 0.9567


In [31]:
a.best_individual.config

[58, 50, 27, 38, 36, 'relu', 'softmax', 5, 'rmsprop', 'binary_crossentropy']

### First test on a classification problem

The code above run AutoML in a classification problem. Note that we don't need to specify it's a classification problem or anything else. We just give the data and the genetic algorithm makes it figure it out which is the kind of problem to be solved.

It's interesting to see that (at least with `random.seed(15)` ) we find some expected results as loss function being some kind of cross_entropy, last activation function is a softmax and we do not need too many layers (as this overfits the model).