<a href="https://colab.research.google.com/github/kmbecker19/EC_Final_Project/blob/main/6560_Final_Project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
""" Evolutionary Deep Neural Net:

This project is an evolutionary algorithm used to build and optimize the architecture
of a deep neural network. The neural net in question is tested on the fashion MIST
data set, altough changed to the code can be made to accomidate other other 
data sets."""

!pip install deap

# TensorFlow and tf.keras
import tensorflow as tf

from tensorflow.keras.layers import (Dense, Dropout, Flatten, Activation)
from tensorflow.keras.activations import (relu, tanh, sigmoid, softmax)

# Helper libraries
import numpy as np
import matplotlib.pyplot as plt
from deap import tools, creator, base, algorithms
import random, numpy, copy, keras

EPOCHS = 2
CXPB = 0.0
MUTPB =1.0
NGEN = 50
MU = 1
LAMBDA= 4
MAX_NODES = 128
activations = [sigmoid, relu, tanh, softmax]

#Import fashion dataset
fashion_mnist = tf.keras.datasets.fashion_mnist
(train_images, train_labels), (test_images, test_labels) = fashion_mnist.load_data()

class_names = ['T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat',
               'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot']

train_images.shape
train_images = train_images / 255.0
test_images = test_images / 255.0

def compileAndTrain(ind):
  ind[0].compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])
  ind[0].fit(train_images, train_labels, epochs=EPOCHS)

def evaluate(ind):
  """test the neural network"""
  test_loss, test_acc = ind[0].evaluate(test_images,  test_labels, verbose=2)
  return (test_acc,)

def mutAddLayer(ind):
  if len(ind[0].layers) >= 5:
    if random.random()<0.5:
      return mutChangeActivation(ind)
    else:
      return mutChangeLayerSize(ind)
  else:
    child = copy.deepcopy(ind)
    nodes = random.randint(0, MAX_NODES+1)
    child_net = child[0]
    child_net.pop()
    child_net.add(tf.keras.layers.Dense(nodes, 
                  activation = activations[random.randint(0,3)]))
    child_net.add(tf.keras.layers.Dense(10))
    child[0] = child_net#Just in case
    compileAndTrain(child)
    # print ("mutated:", child[0].summary())
    return (child,)

def mutChangeActivation(ind):
    child = copy.deepcopy(ind)
    ind_net = ind[0]
    if len(ind_net.layers) <= 2:
      return mutAddLayer(ind)
    maxdepth = len(ind_net.layers) - 2
    depth = random.randint(1, maxdepth)
    activation = activations[random.randint(0,3)]
    model = tf.keras.Sequential([Flatten(input_shape=(28,28))])
    for i in range(1,len(ind_net.layers)-1):
      if i == depth:
        model.add(Dense(ind_net.layers[i].units, activation))
      else:
        model.add(Dense(ind_net.layers[i].units, ind_net.layers[i].activation))
    model.add(Dense(10))
    child+=[model]
    child.pop(0)
    compileAndTrain(child)
    # print ("mutated:", child[0].summary())
    return (child,)


def mutRemoveLayer(ind):
  child = copy.deepcopy(ind)
  ind_net = ind[0]
  if len(ind_net.layers) > 2:
    maxdepth = len(ind_net.layers) - 2
    depth = random.randint(1, maxdepth) #depth of removal
    #create child
    model = tf.keras.Sequential([Flatten(input_shape=(28,28))])
    for i in range(1,len(ind_net.layers)-1):
      if not i == depth:
        model.add(Dense(ind_net.layers[i].units, ind_net.layers[i].activation))
    model.add(Dense(10))
    child+=[model]
    child.pop(0)
    compileAndTrain(child)
    # print ("mutated:", child[0].summary())
    return (child,)   
  else:
    # if child is too small, we add a dense layer
    return mutAddLayer(ind)
      

def mutAddDropout(ind):
    child = copy.deepcopy(ind)
    child_net = child[0]
    if len(child_net.layers) <= 2:
      return mutAddLayer(ind)
    drop = random.uniform(0, 0.5)
    child_net.pop()
    child_net.add(Dropout(drop))
    child_net.add(Dense(10))
    child[0] = child_net#Just in case



    compileAndTrain(child)
    return (child,)

def mutChangeLayerSize(ind):
  child = copy.deepcopy(ind)
  ind_net = ind[0]
  # if child is too small, we add a dense layer
  if len(ind_net.layers) <= 2:
    return mutAddLayer(ind)
  else:
    maxdepth = len(ind_net.layers) - 2
    depth = random.randint(1, maxdepth)
    nodes = random.randint(0, MAX_NODES)
    model = tf.keras.Sequential([Flatten(input_shape=(28,28))])
    for i in range(1,len(ind_net.layers)-1):
      if i == depth:
        model.add(Dense(nodes, ind_net.layers[i].activation))
      else:
        model.add(Dense(ind_net.layers[i].units, ind_net.layers[i].activation))
    model.add(Dense(10))
    child+=[model]
    child.pop(0)
    compileAndTrain(child)
    return (child,)

def cloneAndMutate(population, toolbox, lambda_, cxpb, mutpb):
    assert (cxpb + mutpb) <= 1.0, (
        "The sum of the crossover and mutation probabilities must be smaller "
        "or equal to 1.0.")

    offspring = []
    for i in range(lambda_):
        op_choice = random.random()
        if op_choice < mutpb:  # Apply mutation
            ind = toolbox.clone(random.choice(population))
            if i % 3 == 0:
              ind, = toolbox.mutate1(ind)
            if i % 3 == 1:
              ind, = toolbox.mutate2(ind)
            elif i % 3 == 2:
              ind, = toolbox.mutate3(ind)
            elif i % 3 == 3:
              ind, = toolbox.mutate4(ind)
            del ind.fitness.values
            offspring.append(ind)
        else:                           # Apply reproduction
            offspring.append(random.choice(population))

    return offspring


def muPlusLambda(population, toolbox, mu, lambda_, cxpb, mutpb, ngen,
                   stats=None, halloffame=None, verbose=__debug__):
    logbook = tools.Logbook()
    logbook.header = ['gen', 'nevals'] + (stats.fields if stats else [])

    # Evaluate the individuals with an invalid fitness
    invalid_ind = [ind for ind in population if not ind.fitness.valid]
    fitnesses = toolbox.map(toolbox.evaluate, invalid_ind)
    for ind, fit in zip(invalid_ind, fitnesses):
        ind.fitness.values = fit

    if halloffame is not None:
        halloffame.update(population)

    record = stats.compile(population) if stats is not None else {}
    logbook.record(gen=0, nevals=len(invalid_ind), **record)
    if verbose:
        print(logbook.stream)

    # Begin the generational process
    for gen in range(1, ngen + 1):
        # Vary the population
        offspring = cloneAndMutate(population, toolbox, lambda_, cxpb, mutpb)

        # Evaluate the individuals with an invalid fitness
        invalid_ind = [ind for ind in offspring if not ind.fitness.valid]
        fitnesses = toolbox.map(toolbox.evaluate, invalid_ind)
        for ind, fit in zip(invalid_ind, fitnesses):
            ind.fitness.values = fit

        # Update the hall of fame with the generated individuals
        if halloffame is not None:
            halloffame.update(offspring)

        # Select the next generation population
        population[:] = toolbox.select(population + offspring, mu)

        # Update the statistics with the new population
        record = stats.compile(population) if stats is not None else {}
        logbook.record(gen=gen, nevals=len(invalid_ind), **record)
        if verbose:
            print(logbook.stream)
            ind = population[0]
    return population, logbook
  

def createEA():
  def initIndividual(icls):
    model = tf.keras.Sequential([
    tf.keras.layers.Flatten(input_shape=(28, 28)),
     #placeholder layer
    tf.keras.layers.Dense(10)
    ])
    #compile and trian network
    model.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])
    model.fit(train_images, train_labels, epochs=EPOCHS)
    return icls([model])

  creator.create("FitnessMax", base.Fitness, weights=(1.0,))
  creator.create("Individual", list, fitness=creator.FitnessMax)
  toolbox = base.Toolbox()

  toolbox.register("individual", initIndividual, creator.Individual)
  toolbox.register("population", tools.initRepeat, list, toolbox.individual)
  pop = toolbox.population(n=MU)
  hof = tools.HallOfFame(MU)

  toolbox.register("mutate1", mutAddLayer)
  toolbox.register("mutate2", mutChangeActivation)
  toolbox.register("mutate3", mutChangeLayerSize)
  toolbox.register("mutate4", mutRemoveLayer)
  toolbox.register("evaluate", evaluate)
  toolbox.register("select", tools.selTournament, tournsize=4)


  #Registering descriptive statistics
  stats = tools.Statistics(lambda ind: ind.fitness.values)
  stats.register("min", numpy.min)
  stats.register("avg", numpy.mean)
  stats.register("max", numpy.max)
  stats.register("std", numpy.std)


  muPlusLambda(pop, toolbox, mu=MU, lambda_=LAMBDA, cxpb=CXPB, mutpb=MUTPB, ngen=NGEN, stats=stats, halloffame=hof, verbose=True)
  best_ind = hof[0]
  print (best_ind[0].summary())
  best_ind[0].fit(train_images, train_labels, epochs=10)
  print (evaluate(best_ind))

createEA()


Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Epoch 1/2




Epoch 2/2
313/313 - 1s - loss: 0.4817 - accuracy: 0.8325 - 515ms/epoch - 2ms/step
gen	nevals	min   	avg   	max   	std
0  	1     	0.8325	0.8325	0.8325	0  
Epoch 1/2
Epoch 2/2
Epoch 1/2
Epoch 2/2
Epoch 1/2
Epoch 2/2
Epoch 1/2
Epoch 2/2
313/313 - 1s - loss: 0.3942 - accuracy: 0.8571 - 568ms/epoch - 2ms/step
313/313 - 1s - loss: 0.4331 - accuracy: 0.8455 - 522ms/epoch - 2ms/step
313/313 - 1s - loss: 0.3863 - accuracy: 0.8630 - 609ms/epoch - 2ms/step
313/313 - 1s - loss: 0.4423 - accuracy: 0.8424 - 516ms/epoch - 2ms/step
1  	4     	0.863 	0.863 	0.863 	0  
Epoch 1/2
Epoch 2/2
Epoch 1/2
Epoch 2/2
Epoch 1/2
Epoch 2/2
Epoch 1/2
Epoch 2/2
313/313 - 1s - loss: 0.3776 - accuracy: 0.8669 - 1s/epoch - 4ms/step
313/313 - 1s - loss: 0.3999 - accuracy: 0.8566 - 610ms/epoch - 2ms/step
313/313 - 1s - loss: 0.3951 - accuracy: 0.8595 - 566ms/epoch - 2ms/step
313/313 - 1s - loss: 0.3770 - accuracy: 0.8638 - 615ms/epoch - 2ms/step
2  	4     	0.8638	0.8638	0.8638	0  
Epoch 1/2
Epoch 2/2
Epoch 1/2
Epoch 2/2
E

_________________________________________________________________
 Layer (type)                Output Shape              Param #   
=================================================================
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense_3 (Dense)             (None, 54)                42390     
                                                                 
 dense_15 (Dense)            (None, 91)                5005      
                                                                 
 dense_27 (Dense)            (None, 84)                7728      
                                                                 
 dense_28 (Dense)            (None, 10)                850       
                                                                 
=================================================================
Total params: 55,973
Trainable params: 55,973
Non-trainable params: 0
_________________________________________________________________


313/313 - 1s - loss: 0.3290 - accuracy: 0.8880 - 646ms/epoch - 2ms/step


(0.8880000114440918,)

_________________________________________________________________
 Layer (type)                Output Shape              Param #   
=================================================================
 flatten_193 (Flatten)       (None, 784)               0         
                                                                 
 dense_786 (Dense)           (None, 82)                64370     
                                                                 
 dense_796 (Dense)           (None, 90)                7470      
                                                                 
 dense_806 (Dense)           (None, 39)                3549      
                                                                 
 dense_807 (Dense)           (None, 10)                400       
                                                                 
=================================================================
Total params: 75,789
Trainable params: 75,789
Non-trainable params: 0
_________________________________________________________________
None


(0.8776000142097473,)

_________________________________________________________________
 Layer (type)                Output Shape              Param #   
=================================================================
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense_3 (Dense)             (None, 103)               80855     
                                                                 
 dense_15 (Dense)            (None, 118)               12272     
                                                                 
 dense_25 (Dense)            (None, 41)                4879      
                                                                 
 dense_26 (Dense)            (None, 10)                420       
                                                                 
=================================================================
Total params: 98,426
Trainable params: 98,426
Non-trainable params: 0
_________________________________________________________________




(0.8944000005722046,)

_________________________________________________________________
 Layer (type)                Output Shape              Param #   
=================================================================
 flatten_163 (Flatten)       (None, 784)               0         
                                                                 
 dense_659 (Dense)           (None, 110)               86350     
                                                                 
 dense_660 (Dense)           (None, 75)                8325      
                                                                 
 dense_661 (Dense)           (None, 97)                7372      
                                                                 
 dense_662 (Dense)           (None, 10)                980       
                                                                 
=================================================================
Total params: 103,027
Trainable params: 103,027
Non-trainable params: 0
_________________________________________________________________

(0.8799999952316284,)

KYLES FRAMEWORK:

representation: trained neural net as individuals

Mutation: 100%mutation probability,

- add a dense layer, random nodes within range

    copy parent 

    pop copy.output

    add dense_layer

    readd copy.output

    train model
    
    return accuracy 


- change activation function to another one

    copy parent 

    pop copy.output

    change activation of last layer

    readd copy.output

    train model
    
    return accuracy 
  

- remove a layer (not valid for empty individual)

    copy parent 

    pop copy.output

    pop hidden layer

    readd copy.output

    train model
    
    return accuracy 

-  add dropout (random percentage within a certain

- mu + lambda selection strategy 