In [1]:
from __future__ import print_function
import os
import neat

import pandas as pd
import numpy as np
import random

import torch
import torch.nn as nn
import torch.optim as optim


from explaneat.core.backprop import NeatNet
from explaneat.core import backprop
from explaneat.core.backproppop import BackpropPopulation
from explaneat.visualization import visualize
from explaneat.core.experiment import ExperimentReporter
from explaneat.core.utility import one_hot_encode


from sklearn import datasets
from sklearn import metrics
from sklearn.preprocessing import StandardScaler

from copy import deepcopy

import time
from datetime import datetime

# Iris Experiment

This experiment (a) test the experimental environment, but is also to evaluate the efficacy of the ExplaNEAT algorithm. Speed is a critical factor, as well as stability of results on population size. Total run time will also be measured

First, we need to set a random seed and a total stopping point in the number of generations

In [2]:
random.seed(4242)

In [3]:
def one_hot_encode(vals):
    width = max(vals)
    newVals = []
    for val in vals:
        blank = [0. for _ in range(width + 1)]
        blank[val] = 1.
        newVals.append(blank)
    return np.asarray(newVals)


## Dataset

We are going to work with the Iris dataset, which will be loaded from `sklearn`. We want to characterise the efficacy of the algorithm with regards to a mostly untransformed dataset, so we will only normalise the features

In [4]:
iris = datasets.load_iris()
xs_raw = iris.data[:, :2]  # we only take the first two features.
scaler = StandardScaler()
scaler.fit(xs_raw)
xs = scaler.transform(xs_raw)
ys = iris.target
ys_onehot = one_hot_encode(ys)

Let's have a look at the data we are working with

In [5]:
xs[:5]

array([[-0.90068117,  1.01900435],
       [-1.14301691, -0.13197948],
       [-1.38535265,  0.32841405],
       [-1.50652052,  0.09821729],
       [-1.02184904,  1.24920112]])

In [6]:
ys[:5]

array([0, 0, 0, 0, 0])

In [7]:
ys_onehot[:5]

array([[1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.]])

## Performance metric

The NEAT implementation on which ExplaNEAT extends uses a single function call for evaluating fitness. Although this might be reworked for ExplaNEAT to be able to get consistency between the genome-evaluation and the backprop loss function, that can be reviewed later.

This use `CrossEntropyLoss` from `PyTorch`

In [8]:
def eval_genomes(genomes, config):
    loss = nn.CrossEntropyLoss()
    for genome_id, genome in genomes:
        net = neat.nn.FeedForwardNetwork.create(genome, config)
        preds = []
        for xi in xs:
            preds.append(net.activate(xi))
        genome.fitness = float(1./loss(torch.tensor(preds), torch.tensor(ys)))

## Base configuration

We are going to create the base configuration according to an external configuration file. Per experiment, we will adjust this, later, but this defines the defaults across all runs.

In [9]:
config_path = "./config-iris"
base_config = neat.Config(neat.DefaultGenome, neat.DefaultReproduction,
                     neat.DefaultSpeciesSet, neat.DefaultStagnation,
                     config_path)

In [10]:
base_config.pop_size

2

We also want to put a hard limit on how long this can go on for.

In [11]:
maxNGenerations = 20

We will create a method to manage the instantiation of a population on the basis of a specific config.

In [12]:
def instantiate_population(config, xs, ys, saveLocation):

    if not os.path.exists(saveLocation):
        os.makedirs(saveLocation)
        
    config.save(os.path.join(saveLocation, 'config.conf'))

    # Create the population, which is the top-level object for a NEAT run.
    p = BackpropPopulation(config, 
                            xs, 
                            ys, 
                            criterion=nn.CrossEntropyLoss())

    # Add a stdout reporter to show progress in the terminal.
    p.add_reporter(neat.StdOutReporter(True))
    stats = neat.StatisticsReporter()
    p.add_reporter(stats)
    p.add_reporter(neat.Checkpointer(5, filename_prefix=str(saveLocation) + "checkpoint-" ))
    bpReporter = backprop.BackpropReporter(True)
    p.add_reporter(bpReporter)
    p.add_reporter(ExperimentReporter(saveLocation))
    
    return p

# Experiment 1: Vary population size

The first experiment is going to examine the difference in run time different population sizes. 

In [13]:
population_points = [2, 5, 10, 25, 50, 100]

In [None]:
base_config.pop_size

2

In [None]:
saveLocationTemplate = './../../data/experiments/iris/experiment-population-{}-{}/'

## Start the experiment

In [None]:
for pop_size in population_points:
    for iteration_no in range(5):
        
        start_time = datetime.now()
        
        print("################################################")
        print("################################################")
        print("Starting population {} iteration {}".format(pop_size, iteration_no))
        print("Started at {}".format(start_time.strftime("%m/%d/%Y, %H:%M:%S")))
        print("################################################")
        print("################################################")
        
        
        config = deepcopy(base_config)
        config.pop_size = pop_size
        
        saveLocation = saveLocationTemplate.format(pop_size, iteration_no)
        
        p = instantiate_population(config, xs, ys, saveLocation)
        # Run for up to nGenerations generations.
        winner = p.run(eval_genomes, maxNGenerations)
        
        g = p.best_genome

        
        end_time = datetime.now()
        
        p.reporters.reporters[2].save_checkpoint(p.config, p.population, p.species, str(p.generation) + "-final")  
        
        winner_net = neat.nn.FeedForwardNetwork.create(winner, config)

        results = []
        for xi, xo in zip(xs, ys):
            output = winner_net.activate(xi)
            results.append([xi[0], xi[1], xo, output])

        df = pd.DataFrame(results)
        df.to_csv(os.path.join(saveLocation, 'results.csv'))
        
        ancestry = p.reporters.reporters[3].trace_ancestry_of_species(g.key, p.reproduction.ancestors) 

        ancestors = {
            k: v['genome'] for k, v in p.reporters.reporters[3].ancestry.items()
        }
    
#         visualize.create_ancestry_video(p.config, 
#                                         g, 
#                                         ancestry, 
#                                         ancestors, 
#                                         p.reporters.reporters[1], 
#                                         pathname=saveLocation)
        print("################################################")
        print("################################################")
        print("Have finished population {} iteration {}".format(pop_size, iteration_no))
        print("Started at {}".format(start_time.strftime("%m/%d/%Y, %H:%M:%S")))
        print("The time is {}".format(end_time.strftime("%m/%d/%Y, %H:%M:%S")))
        print("################################################")
        print("################################################")
    

################################################
################################################
Starting population 2 iteration 0
Started at 06/30/2019, 05:30:42
################################################
################################################

 ****** Running generation 0 ****** 

mean improvement: 0.0
best improvement: tensor(0., grad_fn=<SubBackward0>)
best loss: tensor(0.9079, grad_fn=<DivBackward0>)
Population's average fitness: 0.99736 stdev: 0.10412
Best fitness: 1.10148 - size: (3, 6) - species 1 - id 1
ending generation %s
Average adjusted fitness: 0.104
Mean genetic distance 1.783, standard deviation 1.261
Population of 2 members in 1 species:
   ID   age  size  fitness  adj fit  stag
     1    0     2      1.1    0.104     0
Total extinctions: 0
Generation time: 8.422 sec

 ****** Running generation 1 ****** 

mean improvement: 0.0
best improvement: tensor(0., grad_fn=<SubBackward0>)
best loss: tensor(0.9079, grad_fn=<DivBackward0>)
Population's average fit

mean improvement: 0.0
best improvement: tensor(0., grad_fn=<SubBackward0>)
best loss: tensor(0.9079, grad_fn=<DivBackward0>)
Population's average fitness: 0.99736 stdev: 0.10412
Best fitness: 1.10148 - size: (3, 6) - species 1 - id 1
ending generation %s
Average adjusted fitness: 0.104
Mean genetic distance 1.783, standard deviation 1.261
Population of 2 members in 1 species:
   ID   age  size  fitness  adj fit  stag
     1   13     2      1.1    0.104    13
Total extinctions: 0
Generation time: 8.321 sec (8.326 average)

 ****** Running generation 14 ****** 

mean improvement: 0.0
best improvement: tensor(0., grad_fn=<SubBackward0>)
best loss: tensor(0.9079, grad_fn=<DivBackward0>)
Population's average fitness: 0.99736 stdev: 0.10412
Best fitness: 1.10148 - size: (3, 6) - species 1 - id 1
ending generation %s
Average adjusted fitness: 0.104
Mean genetic distance 1.783, standard deviation 1.261
Population of 2 members in 1 species:
   ID   age  size  fitness  adj fit  stag
     1   14 

mean improvement: 0.0
best improvement: tensor(0., grad_fn=<SubBackward0>)
best loss: tensor(1.2985, grad_fn=<DivBackward0>)
Population's average fitness: 0.75284 stdev: 0.01729
Best fitness: 0.77013 - size: (3, 6) - species 2 - id 2
ending generation %s
Average adjusted fitness: 0.017
Mean genetic distance 2.171, standard deviation 1.980
Population of 4 members in 2 species:
   ID   age  size  fitness  adj fit  stag
     1    0     2      0.7    0.000     0
     2    0     2      0.8    0.035     0
Total extinctions: 0
Generation time: 8.369 sec

 ****** Running generation 1 ****** 

mean improvement: 0.0
best improvement: tensor(0., grad_fn=<SubBackward0>)
best loss: tensor(1.2338, grad_fn=<DivBackward0>)
Population's average fitness: 0.76294 stdev: 0.03088
Best fitness: 0.81050 - size: (4, 6) - species 2 - id 4


 SPECIES TOPOLOGY IMPROVEMENT


{'genome': <neat.genome.DefaultGenome object at 0x7f791e9efbe0>, 'fitness': 0.8104996085166931, 'firstDerivatives': [0.0, 0.0403694510459899

mean improvement: 0.0
best improvement: tensor(0., grad_fn=<SubBackward0>)
best loss: tensor(1.2338, grad_fn=<DivBackward0>)
Population's average fitness: 0.76294 stdev: 0.03088
Best fitness: 0.81050 - size: (4, 6) - species 2 - id 4
ending generation %s
Average adjusted fitness: 0.027
Mean genetic distance 2.171, standard deviation 1.980
Population of 4 members in 2 species:
   ID   age  size  fitness  adj fit  stag
     1    9     2      0.7    0.000     9
     2    9     2      0.8    0.055     8
Total extinctions: 0
Generation time: 17.106 sec (16.265 average)
Saving checkpoint to ./../../data/experiments/iris/experiment-population-2-1/checkpoint-9

 ****** Running generation 10 ****** 

mean improvement: 0.0
best improvement: tensor(0., grad_fn=<SubBackward0>)
best loss: tensor(1.2338, grad_fn=<DivBackward0>)
Population's average fitness: 0.76294 stdev: 0.03088
Best fitness: 0.81050 - size: (4, 6) - species 2 - id 4
ending generation %s
Average adjusted fitness: 0.027
Mean genetic

mean improvement: 0.0
best improvement: tensor(0., grad_fn=<SubBackward0>)
best loss: tensor(1.2080, grad_fn=<DivBackward0>)
Population's average fitness: 0.79560 stdev: 0.03224
Best fitness: 0.82784 - size: (3, 6) - species 1 - id 1
ending generation %s
Average adjusted fitness: 0.032
Mean genetic distance 1.952, standard deviation 1.538
Population of 4 members in 2 species:
   ID   age  size  fitness  adj fit  stag
     1    0     2      0.8    0.064     0
     2    0     2      0.8    0.000     0
Total extinctions: 0
Generation time: 8.354 sec

 ****** Running generation 1 ****** 

mean improvement: 0.0
best improvement: tensor(0., grad_fn=<SubBackward0>)
best loss: tensor(1.1155, grad_fn=<DivBackward0>)
Population's average fitness: 0.81399 stdev: 0.05396
Best fitness: 0.89647 - size: (4, 6) - species 1 - id 4


 SPECIES TOPOLOGY IMPROVEMENT


{'genome': <neat.genome.DefaultGenome object at 0x7f791697ac88>, 'fitness': 0.896469235420227, 'firstDerivatives': [0.0, 0.0686292052268982]

mean improvement: 0.0
best improvement: tensor(0., grad_fn=<SubBackward0>)
best loss: tensor(1.1155, grad_fn=<DivBackward0>)
Population's average fitness: 0.81399 stdev: 0.05396
Best fitness: 0.89647 - size: (4, 6) - species 1 - id 4
ending generation %s
Average adjusted fitness: 0.051
Mean genetic distance 1.952, standard deviation 1.538
Population of 4 members in 2 species:
   ID   age  size  fitness  adj fit  stag
     1   10     2      0.9    0.099     9
     2   10     2      0.8    0.002     9
Total extinctions: 0
Generation time: 16.736 sec (16.740 average)

 ****** Running generation 11 ****** 

mean improvement: 0.0
best improvement: tensor(0., grad_fn=<SubBackward0>)
best loss: tensor(1.1155, grad_fn=<DivBackward0>)
Population's average fitness: 0.81399 stdev: 0.05396
Best fitness: 0.89647 - size: (4, 6) - species 1 - id 4
ending generation %s
Average adjusted fitness: 0.051
Mean genetic distance 1.952, standard deviation 1.538
Population of 4 members in 2 species:
   ID   ag

mean improvement: 0.0
best improvement: tensor(0., grad_fn=<SubBackward0>)
best loss: tensor(1.1009, grad_fn=<DivBackward0>)
Population's average fitness: 0.86680 stdev: 0.04153
Best fitness: 0.90833 - size: (3, 6) - species 2 - id 2
ending generation %s
Average adjusted fitness: 0.042
Mean genetic distance 1.801, standard deviation 1.747
Population of 4 members in 2 species:
   ID   age  size  fitness  adj fit  stag
     1    0     2      0.8    0.000     0
     2    0     2      0.9    0.083     0
Total extinctions: 0
Generation time: 8.348 sec

 ****** Running generation 1 ****** 

mean improvement: 0.0
best improvement: tensor(0., grad_fn=<SubBackward0>)
best loss: tensor(1.1009, grad_fn=<DivBackward0>)
Population's average fitness: 0.86647 stdev: 0.04186
Best fitness: 0.90833 - size: (3, 6) - species 2 - id 2
ending generation %s
Average adjusted fitness: 0.043
Mean genetic distance 1.801, standard deviation 1.747
Population of 4 members in 2 species:
   ID   age  size  fitness  a

mean improvement: 0.0
best improvement: tensor(0., grad_fn=<SubBackward0>)
best loss: tensor(1.1009, grad_fn=<DivBackward0>)
Population's average fitness: 0.86647 stdev: 0.04186
Best fitness: 0.90833 - size: (3, 6) - species 2 - id 2
ending generation %s
Average adjusted fitness: 0.043
Mean genetic distance 1.801, standard deviation 1.747
Population of 4 members in 2 species:
   ID   age  size  fitness  adj fit  stag
     1   13     2      0.8    0.001    13
     2   13     2      0.9    0.084    13
Total extinctions: 0
Generation time: 16.535 sec (16.522 average)

 ****** Running generation 14 ****** 

mean improvement: 0.0
best improvement: tensor(0., grad_fn=<SubBackward0>)
best loss: tensor(1.1009, grad_fn=<DivBackward0>)
Population's average fitness: 0.86647 stdev: 0.04186
Best fitness: 0.90833 - size: (3, 6) - species 2 - id 2
ending generation %s
Average adjusted fitness: 0.043
Mean genetic distance 1.801, standard deviation 1.747
Population of 4 members in 2 species:
   ID   ag

mean improvement: 0.0
best improvement: tensor(0., grad_fn=<SubBackward0>)
best loss: tensor(1.0705, grad_fn=<DivBackward0>)
Population's average fitness: 0.83693 stdev: 0.09726
Best fitness: 0.93418 - size: (3, 6) - species 1 - id 1
ending generation %s
Average adjusted fitness: 0.097
Mean genetic distance 1.885, standard deviation 1.776
Population of 4 members in 2 species:
   ID   age  size  fitness  adj fit  stag
     1    0     2      0.9    0.195     0
     2    0     2      0.7    0.000     0
Total extinctions: 0
Generation time: 8.412 sec

 ****** Running generation 1 ****** 

mean improvement: 0.0
best improvement: tensor(0., grad_fn=<SubBackward0>)
best loss: tensor(1.0705, grad_fn=<DivBackward0>)
Population's average fitness: 0.83746 stdev: 0.08305
Best fitness: 0.93418 - size: (3, 6) - species 1 - id 1
ending generation %s
Average adjusted fitness: 0.098
Mean genetic distance 1.885, standard deviation 1.776
Population of 4 members in 2 species:
   ID   age  size  fitness  a

mean improvement: 0.0
best improvement: tensor(0., grad_fn=<SubBackward0>)
best loss: tensor(1.0705, grad_fn=<DivBackward0>)
Population's average fitness: 0.83746 stdev: 0.08305
Best fitness: 0.93418 - size: (3, 6) - species 1 - id 1
ending generation %s
Average adjusted fitness: 0.098
Mean genetic distance 1.885, standard deviation 1.776
Population of 4 members in 2 species:
   ID   age  size  fitness  adj fit  stag
     1   13     2      0.9    0.179    13
     2   13     2      0.8    0.016    12
Total extinctions: 0
Generation time: 16.406 sec (16.386 average)

 ****** Running generation 14 ****** 

mean improvement: 0.0
best improvement: tensor(0., grad_fn=<SubBackward0>)
best loss: tensor(1.0705, grad_fn=<DivBackward0>)
Population's average fitness: 0.83746 stdev: 0.08305
Best fitness: 0.93418 - size: (3, 6) - species 1 - id 1
ending generation %s
Average adjusted fitness: 0.098
Mean genetic distance 1.885, standard deviation 1.776
Population of 4 members in 2 species:
   ID   ag

mean improvement: 0.0
best improvement: tensor(0., grad_fn=<SubBackward0>)
best loss: tensor(1.1605, grad_fn=<DivBackward0>)
Population's average fitness: 0.80587 stdev: 0.04926
Best fitness: 0.86166 - size: (3, 6) - species 1 - id 3
ending generation %s
Average adjusted fitness: 0.064
Mean genetic distance 2.455, standard deviation 2.001
Population of 5 members in 2 species:
   ID   age  size  fitness  adj fit  stag
     1    0     3      0.9    0.080     0
     2    0     2      0.8    0.048     0
Total extinctions: 0
Generation time: 21.001 sec

 ****** Running generation 1 ****** 

mean improvement: 0.0
best improvement: tensor(0., grad_fn=<SubBackward0>)
best loss: tensor(1.1605, grad_fn=<DivBackward0>)
Population's average fitness: 0.82800 stdev: 0.03913
Best fitness: 0.86166 - size: (3, 6) - species 1 - id 3
ending generation %s
Average adjusted fitness: 0.040
Mean genetic distance 2.612, standard deviation 1.650
Population of 5 members in 2 species:
   ID   age  size  fitness  

mean improvement: 0.0
best improvement: tensor(0., grad_fn=<SubBackward0>)
best loss: tensor(1.1157, grad_fn=<DivBackward0>)
Population's average fitness: 0.83572 stdev: 0.06087
Best fitness: 0.92475 - size: (4, 4) - species 1 - id 11
ending generation %s
Average adjusted fitness: 0.046
Mean genetic distance 1.609, standard deviation 1.342
Population of 5 members in 2 species:
   ID   age  size  fitness  adj fit  stag
     1    6     3      0.9    0.093     1
     2    6     2      0.8    0.000     6
Total extinctions: 0
Generation time: 18.820 sec (19.791 average)

 ****** Running generation 7 ****** 

mean improvement: 0.0
best improvement: tensor(0., grad_fn=<SubBackward0>)
best loss: tensor(1.1157, grad_fn=<DivBackward0>)
Population's average fitness: 0.83928 stdev: 0.05925
Best fitness: 0.92475 - size: (4, 4) - species 1 - id 11
ending generation %s
Average adjusted fitness: 0.049
Mean genetic distance 1.608, standard deviation 1.339
Population of 5 members in 2 species:
   ID   a

mean improvement: 0.0
best improvement: tensor(0., grad_fn=<SubBackward0>)
best loss: tensor(1.0855, grad_fn=<DivBackward0>)
Population's average fitness: 0.86969 stdev: 0.08672
Best fitness: 0.97193 - size: (3, 1) - species 1 - id 17
ending generation %s
Average adjusted fitness: 0.075
Mean genetic distance 1.838, standard deviation 1.325
Population of 5 members in 2 species:
   ID   age  size  fitness  adj fit  stag
     1   17     3      1.0    0.149     6
     2   17     2      0.8    0.000    17
Total extinctions: 0
Generation time: 15.844 sec (16.827 average)

 ****** Running generation 18 ****** 

mean improvement: 0.0
best improvement: tensor(0., grad_fn=<SubBackward0>)
best loss: tensor(1.0855, grad_fn=<DivBackward0>)
Population's average fitness: 0.87101 stdev: 0.08637
Best fitness: 0.97193 - size: (3, 1) - species 1 - id 17
ending generation %s
Average adjusted fitness: 0.076
Mean genetic distance 1.622, standard deviation 1.385
Population of 5 members in 2 species:
   ID   

mean improvement: 0.0
best improvement: tensor(0., grad_fn=<SubBackward0>)
best loss: tensor(1.0888, grad_fn=<DivBackward0>)
Population's average fitness: 0.82602 stdev: 0.06334
Best fitness: 0.91844 - size: (3, 6) - species 1 - id 4
ending generation %s
Average adjusted fitness: 0.111
Mean genetic distance 3.057, standard deviation 1.849
Population of 5 members in 3 species:
   ID   age  size  fitness  adj fit  stag
     1    0     2      0.9    0.104     0
     2    0     2      0.8    0.119     0
     3    0     1       --       --     0
Total extinctions: 0
Generation time: 18.708 sec

 ****** Running generation 1 ****** 

mean improvement: 0.0
best improvement: tensor(0., grad_fn=<SubBackward0>)
best loss: tensor(1.0888, grad_fn=<DivBackward0>)
Population's average fitness: 0.86499 stdev: 0.04257
Best fitness: 0.91844 - size: (3, 6) - species 1 - id 4
ending generation %s
Average adjusted fitness: 0.049
Mean genetic distance 2.721, standard deviation 1.941
Population of 6 members 