In [1]:
 !pip install gym[classic_control]
import gym
import neat
import numpy as np
import time
from abc import ABC, abstractmethod

zsh:1: no matches found: gym[classic_control]


In [2]:
render_slowdown = 0.03

def run(config_file, eval_genomes, visual_reporter):
    # Load configuration.
    config = neat.Config(neat.DefaultGenome, neat.DefaultReproduction,
                         neat.DefaultSpeciesSet, neat.DefaultStagnation,
                         config_file)

    # Create the population, which is the top-level object for a NEAT run.
    p = neat.Population(config)

    # Add a stdout reporter to show progress in the terminal.
    p.add_reporter(neat.StdOutReporter(True))
    p.add_reporter(neat.StatisticsReporter())
    p.add_reporter(neat.Checkpointer(5))
    p.add_reporter(visual_reporter)

    # Run for up to 300 generations.
    winner = p.run(eval_genomes, 500)
    return winner

def simulate(actor, turns, render):
    render_mode = "human" if render else "None"
    env = gym.make(simulation_type, render_mode=render_mode)
    observation = env.reset()[0]
    
    for i in range(turns):
        action = actor.act(observation)
        observation, reward, terminated, truncated, info = env.step(action)
        actor.learn(observation, reward)

        if render == True:
            time.sleep(render_slowdown)
            env.render()
            
        if terminated:
            break
    
    env.close()
    
def createGenomeEvaluator(Actor):
    def eval_genomes(genomes, config):
        for genome_id, genome in genomes:
            genome.fitness = -1000
            net = neat.nn.FeedForwardNetwork.create(genome, config)
            actor = Actor(genome, net, True)
            simulate(actor, num_turns, False)
            
    return eval_genomes

class Actor(ABC):
    def __init__(self, genome, net, isLearning):
        self.genome = genome
        self.net = net
        self.isLearning = isLearning
            
    def learn(self, observation, fitness):
        if self.isLearning == True:
            self.genome.fitness = self.evaluate(observation, fitness)
            
    @abstractmethod
    def act(self, observation):
        pass
    
    @staticmethod
    @abstractmethod
    def evaluate(observation, fitness):
        return fitness
    
class VisualReporter(neat.reporting.BaseReporter):
    def __init__(self, Actor, min_generation, generation_gap):
        neat.reporting.BaseReporter.__init__(self)
        self.Actor = Actor
        self.current_generation = 0
        self.min_generation = min_generation
        self.generation_gap = generation_gap
        
    def end_generation(self, config, population, species_set):
        self.current_generation += 1
        
    def post_evaluate(self, config, population, species, best_genome):
        if self.current_generation > self.min_generation and self.current_generation % self.generation_gap == 0:
            best_net = neat.nn.FeedForwardNetwork.create(best_genome, config)
            best_acrobot = self.Actor(best_genome, best_net, False)
            simulate(best_acrobot, num_turns, True)

In [3]:
num_turns = 200
render_generation = 1
min_render_generation = 0
simulation_type = "Acrobot-v1"

class Acrobot(Actor):
    def act(self, observation):
        output = self.net.activate(observation)[0]

        if output > 0.75:
            return 1
        elif output <0.25:
            return -1
        else:
            return 0
    
    @staticmethod
    def evaluate(observation, fitness):
        return fitness
        
eval_genomes = createGenomeEvaluator(Acrobot)
visual_reporter = VisualReporter(Acrobot, min_render_generation, render_generation)
best = run('neat_acrobot.cfg', eval_genomes, visual_reporter)


 ****** Running generation 0 ****** 



KeyboardInterrupt: 

In [4]:
config = neat.Config(neat.DefaultGenome, neat.DefaultReproduction,
                     neat.DefaultSpeciesSet, neat.DefaultStagnation,
                     'neat_acrobot.cfg')

best_net = neat.nn.FeedForwardNetwork.create(best, config)
best_acrobot = Acrobot(best, best_net, False)
simulate(best_acrobot, num_turns, True)

In [15]:
num_turns = 500
render_generation = 10
min_render_generation = 20
simulation_type = "MountainCarContinuous-v0"

class MountainCar(Actor):        
    def act(self, observation):
        output = self.net.activate(observation)
        return ([output[0] * 2 - 1])
    
    @staticmethod
    def evaluate(observation, fitness):
        return fitness + 100 * abs(observation[0])
        

eval_genomes = createGenomeEvaluator(MountainCar)
visual_reporter = VisualReporter(MountainCar, min_render_generation, render_generation)
best = run('neat_mountain.cfg', eval_genomes, visual_reporter)


 ****** Running generation 0 ****** 



TypeError: unsupported operand type(s) for *: 'dict' and 'float'

In [17]:
config = neat.Config(neat.DefaultGenome, neat.DefaultReproduction,
                     neat.DefaultSpeciesSet, neat.DefaultStagnation,
                     'neat_mountain.cfg')

best_net = neat.nn.FeedForwardNetwork.create(best, config)
best_mountain_car = MountainCar(best, best_net, False)
simulate(best_mountain_car, num_turns, True)

In [7]:
num_turns = 250
render_generation = 3
min_render_generation = 4
simulation_type = "LunarLanderContinuous-v2"

class LunarLander(Actor):
    def act(self, observation):
        output = self.net.activate(observation)
        return ([i*2-1 for i in output])
    
    @staticmethod
    def evaluate(observation, fitness):
        return fitness - 10 * observation[1] - abs(observation[0]) - observation[2]

eval_genomes = createGenomeEvaluator(LunarLander)       
visual_reporter = VisualReporter(LunarLander, min_render_generation, render_generation)
best = run('neat_lander.cfg', eval_genomes, visual_reporter)


 ****** Running generation 0 ****** 





Population's average fitness: -106.35481 stdev: 19.35771
Best fitness: -11.44636 - size: (2, 16) - species 1 - id 86
Average adjusted fitness: 0.581
Mean genetic distance 1.077, standard deviation 0.274
Population of 100 members in 1 species:
   ID   age  size  fitness  adj fit  stag
     1    0   100    -11.4    0.581     0
Total extinctions: 0
Generation time: 3.359 sec

 ****** Running generation 1 ****** 

Population's average fitness: -104.12848 stdev: 10.69826
Best fitness: -36.03064 - size: (2, 15) - species 1 - id 110
Average adjusted fitness: 0.315
Mean genetic distance 1.054, standard deviation 0.274
Population of 100 members in 1 species:
   ID   age  size  fitness  adj fit  stag
     1    1   100    -36.0    0.315     1
Total extinctions: 0
Generation time: 3.004 sec (3.182 average)

 ****** Running generation 2 ****** 

Population's average fitness: -105.03252 stdev: 9.85288
Best fitness: -97.12207 - size: (2, 15) - species 1 - id 271
Average adjusted fitness: 0.856
Mean g

KeyboardInterrupt: 

In [16]:
config = neat.Config(neat.DefaultGenome, neat.DefaultReproduction,
                     neat.DefaultSpeciesSet, neat.DefaultStagnation,
                     'neat_lander.cfg')

best_net = neat.nn.FeedForwardNetwork.create(best, config)
best_lunar_lander = LunarLander(best, best_net, False)
simulate(best_lunar_lander, num_turns, True)