In [1]:
#!/usr/bin/python3
from __future__ import print_function
import gym
import pybullet_envs
import MultiNEAT as NEAT
import MultiNEAT.viz as viz
import random as rnd
import pickle
import numpy as np
import cv2
from tqdm import tqdm
import matplotlib.pyplot as plt
import os
import sys
import time
from MultiNEAT import EvaluateGenomeList_Parallel
from MultiNEAT import GetGenomeList, ZipFitness
from concurrent.futures import ProcessPoolExecutor, as_completed

rnd.seed(0)

rng = NEAT.RNG()
rng.TimeSeed()

In [2]:
params = NEAT.Parameters()
params.PopulationSize = 30
params.DynamicCompatibility = True
params.WeightDiffCoeff = 1.0
params.CompatTreshold = 2.0
params.YoungAgeTreshold = 15
params.SpeciesMaxStagnation = 15
params.OldAgeTreshold = 35
params.MinSpecies = 2
params.MaxSpecies = 4
params.RouletteWheelSelection = False
params.Elitism = True
params.RecurrentProb = 0.15
params.OverallMutationRate = 0.2

params.MutateWeightsProb = 0.8
params.MutateNeuronTimeConstantsProb = 0.1
params.MutateNeuronBiasesProb = 0.1

params.WeightMutationMaxPower = 0.5
params.WeightReplacementMaxPower = 1.0
params.MutateWeightsSevereProb = 0.5
params.WeightMutationRate = 0.25

params.TimeConstantMutationMaxPower = 0.1
params.BiasMutationMaxPower = params.WeightMutationMaxPower

params.MaxWeight = 8

params.MutateAddNeuronProb = 0.1
params.MutateAddLinkProb = 0.2
params.MutateRemLinkProb = 0.0

params.MinActivationA  = 1.0
params.MaxActivationA  = 6.0

params.MinNeuronTimeConstant = 0.04
params.MaxNeuronTimeConstant = 0.24

params.MinNeuronBias = -params.MaxWeight
params.MaxNeuronBias = params.MaxWeight

params.ActivationFunction_SignedSigmoid_Prob = 0.0
params.ActivationFunction_UnsignedSigmoid_Prob = 0.0
params.ActivationFunction_Tanh_Prob = 1.0
params.ActivationFunction_SignedStep_Prob = 0.0
params.ActivationFunction_Linear_Prob = 0.0

params.CrossoverRate = 0.75  # mutate only 0.25
params.MultipointCrossoverRate = 0.4
params.SurvivalRate = 0.2

params.MutateNeuronTraitsProb = 0
params.MutateLinkTraitsProb = 0

In [3]:
# returns fitness of 1 genome
def evaluate(genome):
    net = NEAT.NeuralNetwork()
    genome.BuildPhenotype(net)
    
    env = gym.make('LunarLander-v2')
    
    observation = env.reset()
    net.Flush()
    
    f = 0

    for t in range(300):

        inp = observation.tolist()
        net.Input(inp + [1.0])
        net.ActivateLeaky(0.1)
        out = list(net.Output())
        action = np.argmax(out)
        observation, reward, done, info = env.step(action)
        if done: break

        f += reward

    return f

# run_index => num of generations to complete
# for 1000 generations or when max fitness is reached,
# do evaluate each genome in the pop
def getbest(i):
    
    # build population
    g = NEAT.Genome(0, 3, 0, 1, False, NEAT.ActivationFunction.UNSIGNED_SIGMOID,
                    NEAT.ActivationFunction.UNSIGNED_SIGMOID, 0, params, 0)
    pop = NEAT.Population(g, params, True, 1.0, i)
    pop.RNG.Seed(int(time.clock()*100))

    # for max x generations, evaluate each genome in the pop, 
    # if max fitness is reached then break else continue with next generation
    generations = 0
    for generation in range(10):
        print('Starting Generation: '+str(generation))
        genome_list = NEAT.GetGenomeList(pop)
        fitness_list = EvaluateGenomeList_Parallel(genome_list, evaluate, display=False)
        NEAT.ZipFitness(genome_list, fitness_list)
        pop.Epoch()
        generations = generation
        best = max(fitness_list)
        print('best fitness: '+str(best))
        if best > 100000:
            break

    return generations, pop.GetBestGenome()

In [4]:
gens = []
runs = 1
# do 100 runs
# in each run, do a full calc
# after all finished, show the list of needed generation per run and the avg gens needed
for run in range(runs):
    gen, winner = getbest(run)
    gens += [gen]
    
    with open('winnerMultiNEATPara.pkl', 'wb') as output:
        pickle.dump(winner, output, 1)
        
    print('Run:', run, 'Generations to solve:', gen)
avg_gens = sum(gens) / len(gens)

print('All:', gens)
print('Average:', avg_gens)

Starting Generation: 0
best fitness: 90.53292637847464
Starting Generation: 1
best fitness: 124.72347592709896
Starting Generation: 2
best fitness: 107.47999489535243
Starting Generation: 3
best fitness: 116.32508947715647
Starting Generation: 4
best fitness: 144.45852756662327
Starting Generation: 5
best fitness: 52.5639100738876
Starting Generation: 6
best fitness: 46.11979881568291
Starting Generation: 7
best fitness: 53.0108473440724
Starting Generation: 8
best fitness: 14.10517933672432
Starting Generation: 9
best fitness: 23.027015515981276
Run: 0 Generations to solve: 9
All: [9]
Average: 9.0


In [5]:
with open('winnerMultiNEATPara.pkl','rb') as readWinner:
    g = pickle.load(readWinner)
    env = gym.make('LunarLander-v2')
    while True:
        try:
            observation = env.reset()
            net = NEAT.NeuralNetwork()
            g.BuildPhenotype(net)
            reward = 0

            for t in range(250):

                time.sleep(0.01)
                env.render()

                # interact with NN
                inp = observation.tolist()
                net.Input(inp + [1.0])
                net.ActivateLeaky(0.1)
                out = list(net.Output())

                # render NN
                img = viz.Draw(net)
                cv2.imshow("current best", img)
                cv2.waitKey(1)

                action = np.argmax(out)
                observation, reward, done, info = env.step(action)

                if done:
                    break

        except Exception as ex:
            print(ex)
            time.sleep(0.2)

KeyboardInterrupt: 

In [1]:
for x in range(1):
    print('bla')

bla
