### Introdução

Uma rede neural possui muitos hiperparâmetros e o principal desafio é escolher um conjunto de hiperparâmetros que faça a rede ter uma boa acurácia no conjunto de teste. A minha idéia foi usar um algoritmo genético para selecionar um conjunto de hiperparâmetros adequados.

Para contruir as redes neurais foi usada a biblioteca keras e para construir o algoritmo genético foi usada a biblioteca deap.

In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

In [2]:
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.model_selection import train_test_split

In [3]:
from deap import base, creator, tools
from deap.algorithms import eaSimple

In [4]:
from scipy import stats
import time

In [5]:
diam = pd.read_csv('diamonds.csv')
diam.head()

Unnamed: 0,carat,cut,color,clarity,depth,table,price,x,y,z
0,0.23,Ideal,E,SI2,61.5,55.0,326,3.95,3.98,2.43
1,0.21,Premium,E,SI1,59.8,61.0,326,3.89,3.84,2.31
2,0.23,Good,E,VS1,56.9,65.0,327,4.05,4.07,2.31
3,0.29,Premium,I,VS2,62.4,58.0,334,4.2,4.23,2.63
4,0.31,Good,J,SI2,63.3,58.0,335,4.34,4.35,2.75


In [6]:
xnumenc = StandardScaler()
xcatenc = OneHotEncoder(sparse=False)
yenc = StandardScaler()

In [7]:
xnum = xnumenc.fit_transform(diam[['price']])
xcat = xcatenc.fit_transform(diam[['cut','color','clarity']])
x = np.concatenate([xnum, xcat], axis=1)
y = yenc.fit_transform(diam.drop(['cut','color','clarity','price'], axis=1))

Foi usada como função de ligação a função Leaky Relu com parâmetro igual a 0.01

In [8]:
def nn(camadas):
    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.9)
    _camadas = []
    for n in camadas:
        if n == 'batchnorm':
            _camadas.append(layers.BatchNormalization())
        elif n == 'dropout':
            _camadas.append(layers.Dropout(0.1))
        else:
            _camadas.append(layers.Dense(n, activation=layers.LeakyReLU(0.01)))
    _camadas.append(layers.Dense(1, activation='linear'))
    
    model = keras.Sequential(_camadas)

    model.compile(loss='mse', optimizer=keras.optimizers.Adam(learning_rate=0.01), metrics=['mae'])

    model.fit(x_train, y_train, epochs=20, verbose=0)

    return model.evaluate(x_test, y_test, verbose=0)[0]

In [9]:
creator.create("FitnessMin", base.Fitness, weights=(-1.0,))
creator.create("Individual", list, fitness=creator.FitnessMin)

In [10]:
def create_individual():
    N = 0
    while N == 0:
        N = stats.geom.rvs(p=0.25)+1 #stats.binom.rvs(p=0.3, n=10)
    prob = 0.5
    aux = [c if c > 1 else 1 for c in stats.binom.rvs(p=0.3, n=12, size=N)]
    camadas = []
    for c in aux:
        camadas.append(c)
        if np.random.random() < 0.2:
            camadas.append('batchnorm')
        if np.random.random() < 0.2:
            camadas.append('dropout')
    return camadas

In [11]:
toolbox = base.Toolbox()
toolbox.register("create_list", create_individual)
toolbox.register("individual", tools.initIterate, creator.Individual, toolbox.create_list)
toolbox.register("population", tools.initRepeat, list, toolbox.individual)

In [15]:
def gerar_individuo_especifico(lista):
    lista = list(lista)
    def aux(lista):
        for i in lista:
            yield i
    return tools.initIterate(list, aux)

In [16]:
def mut_sinal(individual, indpb):
    if isinstance(individual, float):
        #return (toolbox.individual(),)
        return (gerar_individuo_especifico(individual),)
    for i, value in enumerate(np.sign(stats.norm.rvs(size=len(individual)))):
        if np.random.random() < indpb:
            try:
                if not isinstance(individual[i],str):
                    if individual[i] > 1:
                        individual[i] += value
                else:
                    individual.pop(i)
            except IndexError:
                break
    if np.random.random() < indpb and len(individual) > 1:
        posicao = np.random.randint(0, len(individual))
        individual.pop(posicao)
    if np.random.random() < indpb:
        posicao = np.random.randint(0, len(individual)+1)
        individual.insert(posicao, int(np.array(individual).mean()))
    return (individual,)

In [17]:
def evaluate(individual):
    ft = nn(individual)
    return ft,

toolbox.register("mate", tools.cxOnePoint)
toolbox.register("mutate", mut_sinal, 0.5)
toolbox.register("select", tools.selTournament, tournsize=4)
toolbox.register("evaluate", evaluate)

In [18]:
pop = toolbox.population(50)
hof = tools.HallOfFame(3)
statistics = tools.Statistics(lambda ind: ind.fitness.values)
statistics.register("avg", np.mean)
statistics.register("std", np.std)
statistics.register("min", np.min)
statistics.register("max", np.max)

result = eaSimple(pop, toolbox, cxpb=1, mutpb=0.25, ngen=10, stats=statistics, halloffame=hof, verbose=True)

gen	nevals	avg    	std    	min     	max    
0  	50    	3.44412	20.2149	0.520095	144.947


TypeError: 'float' object is not iterable

In [26]:
print(hof)

[[4, 2], [6, 4], [5, 4]]


In [27]:
[ind.fitness.values for ind in hof]

[(0.5145150423049927,), (0.5152993202209473,), (0.5156368017196655,)]

In [28]:
from inspect import getsource

In [29]:
print(getsource(tools.selTournament))

def selTournament(individuals, k, tournsize, fit_attr="fitness"):
    """Select the best individual among *tournsize* randomly chosen
    individuals, *k* times. The list returned contains
    references to the input *individuals*.

    :param individuals: A list of individuals to select from.
    :param k: The number of individuals to select.
    :param tournsize: The number of individuals participating in each tournament.
    :param fit_attr: The attribute of individuals to use as selection criterion
    :returns: A list of selected individuals.

    This function uses the :func:`~random.choice` function from the python base
    :mod:`random` module.
    """
    chosen = []
    for i in range(k):
        aspirants = selRandom(individuals, tournsize)
        chosen.append(max(aspirants, key=attrgetter(fit_attr)))
    return chosen

