In [15]:
import gym
import gym
import gym.wrappers

import matplotlib.pyplot as plt

import multiprocessing
import neat
from neat import nn, population, statistics, Config
import numpy as np
import os
import pickle
import random
import time
from pathlib import Path

CONFIGS_PATH = Path('../config')

In [16]:
env = gym.make('CartPole-v1')

config_path = CONFIGS_PATH / 'config-cart-pole-v0'
config = neat.Config(
    neat.DefaultGenome,
    neat.DefaultReproduction,
    neat.DefaultSpeciesSet,
    neat.DefaultStagnation,
    str(config_path),
)

In [17]:
def simulate_species(net, env, episodes=1, steps=5000, render=False):
    fitnesses = []
    for runs in range(episodes):
        inputs = env.reset()
        cum_reward = 0.0
        for j in range(steps):
            outputs = net.activate(inputs)
            action = np.argmax(outputs)
            inputs, reward, done, _ = env.step(action)
            if render:
                env.render()
            if done:
                break
            cum_reward += reward

        fitnesses.append(cum_reward)

    fitness = np.array(fitnesses).mean()
    return fitness

def evaluate_genome(g, config):
    net = nn.FeedForwardNetwork.create(g, config)
    return simulate_species(net, env, render=False,)

def eval_fitness(genomes, config):
    for g_id, g in genomes:
        fitness = evaluate_genome(g, config)
        g.fitness = fitness

pop = population.Population(config)
pop.add_reporter(neat.StdOutReporter(True))
stats = neat.StatisticsReporter()
pop.add_reporter(stats)
pop.add_reporter(neat.Checkpointer(5))

winner = pop.run(eval_fitness, 1000)


 ****** Running generation 0 ****** 

Population's average fitness: 17.68500 stdev: 29.19548
Best fitness: 337.00000 - size: (2, 8) - species 1 - id 145
Average adjusted fitness: 0.032
Mean genetic distance 1.470, standard deviation 0.393
Population of 200 members in 1 species:
   ID   age  size  fitness  adj fit  stag
     1    0   200    337.0    0.032     0
Total extinctions: 0
Generation time: 0.120 sec

 ****** Running generation 1 ****** 

Population's average fitness: 37.80500 stdev: 62.06792
Best fitness: 499.00000 - size: (2, 8) - species 1 - id 353
Average adjusted fitness: 0.063
Mean genetic distance 1.560, standard deviation 0.383
Population of 200 members in 1 species:
   ID   age  size  fitness  adj fit  stag
     1    1   200    499.0    0.063     0
Total extinctions: 0
Generation time: 0.177 sec (0.149 average)

 ****** Running generation 2 ****** 

Population's average fitness: 61.01500 stdev: 105.30477
Best fitness: 499.00000 - size: (2, 8) - species 1 - id 353
Avera

In [18]:
# env = gym.make('CartPole-v1')

winner = stats.best_genome()
winner_net = neat.nn.FeedForwardNetwork.create(winner, config)
    
inputs = env.reset()
steps = 1000
for _ in range(steps):
    outputs = winner_net.activate(inputs)
    action = np.argmax(outputs)
    inputs, reward, done, _ = env.step(action)
    env.render()

print("completed!")
env.close()

completed!
