Merge branch 'develop' into main

leap-ec · Mar 10, 2022 · d292c07 · d292c07
2 parents 38c70e8 + ebc7198
commit d292c07
Show file tree

Hide file tree

Showing 7 changed files with 260 additions and 44 deletions.
diff --git a/README.md b/README.md
@@ -12,6 +12,11 @@ perform better for those sets.
 * Python 3.[78]
 * [LEAP https://github.com/AureumChaos/LEAP](https://github.com/AureumChaos/LEAP)
 
+## Installation
+
+1. Activate your conda or virtual environment
+2. cd into top-level gremlin directory
+3. `pip install .`
 
 ## Configuration
 Gremlin is essentially a thin convenience wrapper around [LEAP]
@@ -25,21 +30,24 @@ contain information that can be exploited to tune training data.
 Example Gremlin configuration YAML:
 
 ```yaml
-pop_size: 5
-max_generations: 4
-k_elites: 2 # optional parameter for specifying the number (k) elites we keep per generation
-problem: problem.MNISTProblem()
-representation: representation.MNISTRepresentation()
+pop_size: 25
+algorithm: async # or bgen
+async: # parameters for asynchronous steady-state EA
+  init_pop_size: ${pop_size}
+  max_births: 2000
+  ind_file: inds.csv # optional file for writing individuals as they are evaluated
+  ind_file_probe: probe.log_ind # optional functor or function for writing ind_file
+
 pop_file: pop.csv # where we will write out each generation in CSV format
+problem: problem.QLearnerBalanceProblem("${env:GREMLIN_QLEARNER_CARTPOLE_MODEL_FPATH}")
+representation: representation.BalanceRepresentation()
 imports:
-  - probe
-pipeline:
-  - ops.tournament_selection
+  - probe # need to import our probe.py so that LEAP sees our probe pipeline operator
+pipeline: # isotropic means we mutate all genes with the given stds
+  - ops.random_selection
   - ops.clone
-  - mutate_randint(expected_num_mutations=1, bounds=representation.MNISTRepresentation.genome_bounds)
-  - ops.evaluate
-  - probe.IndividualProbeCSV('inds.csv') # our own probe to see every single created offspring
-  - ops.pool(size=${pop_size})
+  - mutate_gaussian(expected_num_mutations='isotropic', std=[0.1, 0.001, 0.01, 0.001], hard_bounds=representation.BalanceRepresentation.genome_bounds)
+  - ops.pool(size=1)
 ```
 
 Essentially, you will have to define the following
@@ -78,9 +86,21 @@ $ gremlin config.yml
 
 ## Versions
 
-* `v0.2dev` -- Revamped config system and heavily refactored/simplified code.
-* `v0.1dev` -- Initial raw release
+* `v0.3`
+  * Add support for config variable `algorithm` that denotes if using a 
+    traditional by-generation EA or an asynchronous steady-state EA
+* `v0.2dev`, 2/17/22 
+  * revamped config system and heavily 
+    refactored/simplified code
+* `v0.1dev`, 10/14/21 
+  * initial raw release
 
 ## Sub-directories
 * `gremlin/` -- main `gremlin` code
 * `examples/` -- examples for using gremlin; currently only has MNIST example
+
+## Main web site
+
+The `gremlin` github repository is [https://github.com/markcoletti/gremlin]
+(https://github.com/markcoletti/gremlin).  `main` is the release branch and 
+active work occurs on the `develop` branch.
diff --git a/examples/MNIST/async.yml b/examples/MNIST/async.yml
@@ -0,0 +1,17 @@
+#
+# Used for running MNIST example with an asynchronous steady-state EA
+#
+# gremlin.py config.yml async.yml
+#
+algorithm: async
+async: # parameters for asynchronous steady-state EA
+  init_pop_size: ${pop_size} # default to pop size, but can be set to the number of dask workers
+  max_births: 25
+  ind_file: inds.csv # optional file for writing individuals as they are evaluated
+  ind_file_probe: probe.log_ind # optional functor or function for writing ind_file
+  # scheduler_file: if no dask scheduler file is specified, assume dask workers are per-core on localhost
+pipeline:
+  - ops.random_selection # randomly select from pool of prospective parents
+  - ops.clone
+  - mutate_randint(expected_num_mutations=1, bounds=representation.MNISTRepresentation.genome_bounds)
+  - ops.pool(size=1) # for ASEA needs to be one since we're asynchronous assigning a single offspring to a worker to be evaluated
diff --git a/examples/MNIST/config.yml b/examples/MNIST/config.yml
@@ -3,8 +3,10 @@
 # Usage:
 #     $ gremlin.py config.yml
 pop_size: 7
-max_generations: 5
-k_elites: 2 # optional parameter for specifying the number (k) elites we keep per generation
+algorithm: bygen
+bygen: # parameters that only make sense for a by-generation EA
+  max_generations: 5
+  k_elites: 2 # optional parameter for specifying the number (k) elites we keep per generation
 problem: problem.MNISTProblem()
 representation: representation.MNISTRepresentation()
 pop_file: pop.csv # where we will write out each generation in CSV format

diff --git a/examples/MNIST/probe.py b/examples/MNIST/probe.py
@@ -1,6 +1,11 @@
 #!/usr/bin/env python3
-''' Define a bespoke LEAP probe for printing individuals to a CSV file.
+''' Define bespoke LEAP probes for printing individuals to a CSV file.
+
+    This shows two different approaches to writing out individuals.  The first
+    uses a functor (a class that behaves as a function), and the second is a
+    function that uses closures.
 '''
+import sys
 import csv
 from pathlib import Path
 
@@ -22,7 +27,7 @@ class IndividualProbeCSV():
     def __init__(self, csv_file):
         super().__init__()
         self.csv_file = Path(csv_file)
-        self.csv_writer = csv.DictWriter(open(csv_file, 'w'),
+        self.csv_writer = csv.DictWriter(self.csv_file.open('w'),
                                          fieldnames=['birth_id',
                                                      'digit',
                                                      'start_eval_time',
@@ -49,3 +54,49 @@ def __call__(self, next_individual):
                                       'fitness'        : individual.fitness})
 
             yield individual
+
+
+
+def log_ind(stream=sys.stdout, header=True):
+    """ This is for logging individuals for the asynchronous EA in more detail
+    than the optional logging individuals.
+
+    :param stream: to which we want to write the machine details
+    :param header: True if we want a header for the CSV file
+    :return: a function for recording where individuals are evaluated
+    """
+    stream = stream
+    writer = csv.DictWriter(stream,
+                            fieldnames=['hostname', 'pid', 'uuid', 'birth_id',
+                                        'digit',
+                                        'start_eval_time', 'stop_eval_time',
+                                        'fitness'])
+
+    if header:
+        writer.writeheader()
+
+    def write_record(individual):
+        """ This writes a row to the CSV for the given individual
+
+        evaluate() will tack on the hostname and pid for the individual.  The
+        uuid should also be part of the distrib.Individual, too.
+
+        :param individual: to be written to stream
+        :return: None
+        """
+        nonlocal stream
+        nonlocal writer
+
+        writer.writerow({'hostname': individual.hostname,
+                         'pid': individual.pid,
+                         'uuid': individual.uuid,
+                         'birth_id': individual.birth_id,
+                         'digit': individual.genome[0],
+                         'start_eval_time': individual.start_eval_time,
+                         'stop_eval_time': individual.stop_eval_time,
+                         'fitness': individual.fitness})
+        # On some systems, such as Summit, we need to force a flush else there
+        # will be no output until the very end of the run.
+        stream.flush()
+
+    return write_record
diff --git a/gremlin/__version__.py b/gremlin/__version__.py
@@ -1 +1 @@
-__version__ = 'v0.2dev'
+__version__ = 'v0.3'