In [17]:
from explaneat.data.uci import UCI_WRANGLER
from explaneat.experimenter.experiment import GenericExperiment
from explaneat.evaluators.evaluators import binary_cross_entropy

import torch
import torch.nn as nn
import torch.optim as optim

import logging
import random

import neat

from explaneat.core.backprop import NeatNet
from explaneat.core import backprop
from explaneat.core.backproppop import BackpropPopulation
from explaneat.visualization import visualize
from explaneat.core.experiment import ExperimentReporter
from explaneat.core.utility import one_hot_encode

from copy import deepcopy

from datetime import datetime

import pandas as pd

import gzip
try:
    import cPickle as pickle  # pylint: disable=import-error
except ImportError:
    import pickle  # pylint: disable=import-error

In [2]:
experiment_config_file = './experiment_config.json'
experiment = GenericExperiment(experiment_config_file, confirm_path_creation=False)
logger = experiment.logger

2022-04-28 09:45:03,448 - experimenter - INFO - Validating configuration schema
2022-04-28 09:45:03,449 - experimenter - INFO - Schema validation passed
2022-04-28 09:45:03,449 - experimenter - INFO - Starting to create folder structures
2022-04-28 09:45:03,449 - experimenter - INFO - Experiment folder name is test_experiment_220428T094503_3f8ba638
2022-04-28 09:45:03,450 - experimenter - INFO - Experiment root path is /Users/mike/dev-mtm/phd-neat-experiments/data/experiments/tests/test_experiment_220428T094503_3f8ba638
2022-04-28 09:45:03,451 - experimenter - INFO - Creating the root path
2022-04-28 09:45:03,451 - experimenter - INFO - Root path created
2022-04-28 09:45:03,452 - experimenter - INFO - Creating results
2022-04-28 09:45:03,453 - experimenter - INFO - Creating results/interim
2022-04-28 09:45:03,454 - experimenter - INFO - Creating results/final
2022-04-28 09:45:03,454 - experimenter - INFO - Creating configurations
2022-04-28 09:45:03,455 - experimenter - INFO - Creating

In [3]:

USE_CUDA = torch.cuda.is_available()
USE_CUDA = False
device = torch.device("cuda:1" if USE_CUDA else "cpu")
logger.info("Using device: {}".format(device))


2022-04-28 09:45:03,475 - experimenter - INFO - Using device: cpu


# BC Experiment

This experiment (a) test the experimental environment, but is also to evaluate the efficacy of the ExplaNEAT algorithm. Speed is a critical factor, as well as stability of results on population size. Total run time will also be measured

First, we need to set a random seed and a total stopping point in the number of generations

In [4]:
random.seed(experiment.config["random_seed"])
logger.info("random.seed set to {}".format(experiment.config["random_seed"]))

2022-04-28 09:45:03,489 - experimenter - INFO - random.seed set to 42


## Dataset

We are going to work with the Iris dataset, which will be loaded from `sklearn`. We want to characterise the efficacy of the algorithm with regards to a mostly untransformed dataset, so we will only normalise the features

In [5]:
data_wrangler = UCI_WRANGLER(experiment.config['data']['raw_location'],
        experiment.config['data']['raw_data_meta'])

2022-04-28 09:45:03,497 - experimenter.uci_wrangler - INFO - Loading meta file
2022-04-28 09:45:03,498 - experimenter.uci_wrangler - INFO - Loading raw data file
2022-04-28 09:45:03,506 - experimenter.uci_wrangler - INFO - Preprocessing data
2022-04-28 09:45:03,507 - experimenter.uci_wrangler - INFO - ys shape is (286,)
2022-04-28 09:45:03,508 - experimenter.uci_wrangler - INFO - recasting ys to (n,1)
2022-04-28 09:45:03,508 - experimenter.uci_wrangler - INFO - Finished preprocessing data


In [6]:
data_wrangler.create_train_test_split(experiment.config["train_test_ratio"],
            experiment.config["random_seed"])
data_wrangler.send_train_test_to_device(device)

2022-04-28 09:45:03,519 - experimenter.uci_wrangler - INFO - Creating train test split
2022-04-28 09:45:03,521 - experimenter.uci_wrangler - INFO - split created
2022-04-28 09:45:03,521 - experimenter.uci_wrangler - INFO - sending train test to device cpu
2022-04-28 09:45:03,522 - experimenter.uci_wrangler - INFO - train test are on device cpu


## Performance metric

The NEAT implementation on which ExplaNEAT extends uses a single function call for evaluating fitness. Although this might be reworked for ExplaNEAT to be able to get consistency between the genome-evaluation and the backprop loss function, that can be reviewed later.

This use `CrossEntropyLoss` from `PyTorch`

## Base configuration

We are going to create the base configuration according to an external configuration file. Per experiment, we will adjust this, later, but this defines the defaults across all runs.

In [7]:
config_path = "./config-bchard"
base_config = neat.Config(neat.DefaultGenome, neat.DefaultReproduction,
                     neat.DefaultSpeciesSet, neat.DefaultStagnation,
                     config_path)


In [8]:
experiment.register_config_file("./config-bchard", "neat_config")

2022-04-28 09:45:03,547 - experimenter - INFO - Saving experiment configuration
2022-04-28 09:45:03,548 - experimenter - INFO - Saving other config files


We also want to put a hard limit on how long this can go on for.

We will create a method to manage the instantiation of a population on the basis of a specific config.

In [9]:
def instantiate_population(config, xs, ys, saveLocation):

    if not os.path.exists(saveLocation):
        os.makedirs(saveLocation)
        
    config.save(os.path.join(saveLocation, 'config.conf'))

    # Create the population, which is the top-level object for a NEAT run.
    p = BackpropPopulation(config, 
                            xs, 
                            ys, 
                            criterion=nn.BCELoss())

    # Add a stdout reporter to show progress in the terminal.
    p.add_reporter(neat.StdOutReporter(True))
    stats = neat.StatisticsReporter()
    p.add_reporter(stats)
    p.add_reporter(neat.Checkpointer(5, filename_prefix=str(saveLocation) + "checkpoint-" ))
    bpReporter = backprop.BackpropReporter(True)
    p.add_reporter(bpReporter)
    p.add_reporter(ExperimentReporter(saveLocation))
    
    return p

# Experiment 1: Vary population size

The first experiment is going to examine the difference in run time different population sizes. 

In [10]:
epoch_points = [10, 25, 50, 100, 150]
# epoch_points = [10]

In [11]:
base_config.pop_size

50

In [12]:
saveLocationTemplate = './../../data/experiments/bchard/experiment-test-{}-{}/'

## Start the experiment

In [13]:
len(data_wrangler.X_train.shape)

2

In [14]:
data_wrangler.ys.dtype

dtype('float64')

In [15]:
my_random_seed = experiment.config["random_seed"]
for epochs in epoch_points:
    for iteration_no in range(20):
        my_random_seed += 1
        random.seed(my_random_seed)
        start_time = datetime.now()
        
        logger.info("################################################")
        logger.info("################################################")
        logger.info("Starting epochs {} iteration {}".format(epochs, iteration_no))
        logger.info("Started at {}".format(start_time.strftime("%m/%d/%Y, %H:%M:%S")))
        logger.info("################################################")
        logger.info("################################################")
        
        
        config = deepcopy(base_config)
#         config.pop_size = pop_size
        
        saveLocation = saveLocationTemplate.format(epochs, iteration_no)
        
        p = instantiate_population(config, data_wrangler.X_train, data_wrangler.y_train, saveLocation)
        # Run for up to nGenerations generations.
        winner = p.run(binary_cross_entropy, experiment.config["max_n_generations"], nEpochs = epochs)
        
        g = p.best_genome

        
        end_time = datetime.now()
        
        p.reporters.reporters[2].save_checkpoint(p.config, p.population, p.species, str(p.generation) + "-final")  
        
        winner_net = neat.nn.FeedForwardNetwork.create(winner, config)

        results = []
        for xi, xo in zip(data_wrangler.X_test, data_wrangler.y_test):
            output = winner_net.activate(xi)
            results.append([xi, xo, output])

        df = pd.DataFrame(results)
        df.to_csv(os.path.join(saveLocation, 'results.csv'))

        ancestry = p.reporters.reporters[3].trace_ancestry_of_species(g.key, p.reproduction.ancestors) 

        ancestors = {
            k: v['genome'] for k, v in p.reporters.reporters[3].ancestry.items()
        }

        ## Save all of these to disc
        filename = 'fullStatus.xplnt'
        logger.info("Saving checkpoint to {0}".format(filename))

        with gzip.open(os.path.join(saveLocation, filename), 'w', compresslevel=5) as f:
            data = (p, g, ancestry, ancestors, random.getstate())
            pickle.dump(data, f, protocol=pickle.HIGHEST_PROTOCOL)
            
            
        with gzip.open(os.path.join(saveLocation, 'train_test_data.pkl'), 'w', compresslevel=5) as f:
            train_Test = (data_wrangler.X_train, data_wrangler.X_test, data_wrangler.y_train, data_wrangler.y_test)
            pickle.dump(train_Test, f, protocol=pickle.HIGHEST_PROTOCOL)
    

2022-04-28 09:45:03,657 - experimenter - INFO - ################################################
2022-04-28 09:45:03,658 - experimenter - INFO - ################################################
2022-04-28 09:45:03,659 - experimenter - INFO - Starting epochs 10 iteration 0
2022-04-28 09:45:03,659 - experimenter - INFO - Started at 04/28/2022, 09:45:03
2022-04-28 09:45:03,659 - experimenter - INFO - ################################################
2022-04-28 09:45:03,659 - experimenter - INFO - ################################################
2022-04-28 09:45:03,666 - experimenter.backproppop - INFO - about to start backprop with 10 epochs
2022-04-28 09:45:03,773 - experimenter.backproppop - INFO - mean improvement: -0.009926275630361352
2022-04-28 09:45:03,773 - experimenter.backproppop - INFO - best improvement: -0.01937369413189549
2022-04-28 09:45:03,774 - experimenter.backproppop - INFO - best loss: 0.4639141054307522
2022-04-28 09:45:03,774 - experimenter.evaluators - INFO - Xs dty

The function - generationStart - has just started at 1651095903.665639

 ****** Running generation 0 ****** 

The function - generationStart - took 0.00048828125 seconds to complete
The function - pre_backprop - has just started at 1651095903.666142
The function - pre_backprop - took 5.91278076171875e-05 seconds to complete
The function - backprop - has just started at 1651095903.666214
The function - backprop - took 0.10841894149780273 seconds to complete
The function - post_backprop - has just started at 1651095903.774654
The function - post_backprop - took 1.9073486328125e-05 seconds to complete
The function - evaluate fitness - has just started at 1651095903.774721
The function - evaluate fitness - took 0.016906023025512695 seconds to complete
The function - post evaluate - has just started at 1651095903.791688
Population's average fitness: 0.76231 stdev: 0.19642
Best fitness: 1.37543 - size: (1, 9) - species 3 - id 29
Key: 29
Fitness: 1.3754269789988238
Nodes:
	0 DefaultNodeGene(k

NameError: name 'xs' is not defined