<img src="../imgs/logo.png" width="20%" align="right" style="margin:0px 20px">


# Evolutionary Computation

## 5.3 Deep Neuroevolution

<a rel="license" href="http://creativecommons.org/licenses/by-sa/4.0/"><img alt="Creative Commons License" align="left" src="https://i.creativecommons.org/l/by-sa/4.0/80x15.png" /></a>&nbsp;| Dennis G. Wilson | <a href="https://d9w.github.io/evolution/">https://d9w.github.io/evolution/</a>

In [1]:
using PyCall
using Conda

In [2]:
Conda.add("gym")

┌ Info: Running `conda install -y gym` in root environment
└ @ Conda /Users/louiseplacidet/.julia/packages/Conda/3rPhK/src/Conda.jl:113


Collecting package metadata (current_repodata.json): ...working... done
Solving environment: ...working... done

# All requested packages already installed.



In [3]:
import Random
Random.seed!(1234);

In [4]:
include("cmaes.jl");

In [5]:
struct FCLayer
    w::Array{Float64}
    b::Array{Float64}
end

struct SimpleANN
    l1::FCLayer
    l2::FCLayer
    out::FCLayer
end

In [6]:
function SimpleANN(input::Int, N1::Int, N2::Int, output::Int)
    l1 = FCLayer(zeros(N1, input), zeros(N1))
    l2 = FCLayer(zeros(N2, N1), zeros(N2))
    out = FCLayer(zeros(output, N2), zeros(output))
    SimpleANN(l1, l2, out)
end

SimpleANN

In [7]:
ann = SimpleANN(5, 64, 64, 4);

In [8]:
function compute(inputs::Array{Float64}, ann::SimpleANN)
    x = ann.l1.w * inputs .+ ann.l1.b
    x = ann.l2.w * x .+ ann.l2.b
    x = ann.out.w * x .+ ann.out.b
    x
end

compute (generic function with 1 method)

In [9]:
compute(zeros(5), ann)

4-element Array{Float64,1}:
 0.0
 0.0
 0.0
 0.0

In [10]:
gym = pyimport("gym")

PyObject <module 'gym' from '/Users/louiseplacidet/.julia/conda/3/lib/python3.7/site-packages/gym/__init__.py'>

In [11]:
env = gym.make("CartPole-v1")
n_in = 4
n_out = 2;

In [12]:
function play_env(ann::SimpleANN; render=false)
    env = gym.make("CartPole-v1")
    env.seed(0)
    obs = env.reset()
    total_reward = 0.0
    done = false
    
    while ~done
        action = argmax(compute(obs, ann))-1
        obs, reward, done, _ = env.step(action)
        if render
            env.render()
        end
        total_reward += reward
    end
    env.close()
    env = nothing
    Base.GC.gc()
    total_reward
end

play_env (generic function with 1 method)

In [13]:
ann = SimpleANN(n_in, 5, 5, n_out)
play_env(ann; render=true)

9.0

In [14]:
play_env(ann)

9.0

In [13]:
function genes_to_ann(genes::Array{Float64})
    ann = SimpleANN(n_in, 5, 5, n_out)
    layers = [ann.l1.w, ann.l1.b, ann.l2.w, ann.l2.b, ann.out.w, ann.out.b]
    L = 1
    j = 1
    for i in eachindex(genes)
        if j > length(layers[L])
            L += 1
            j = 1
        end
        layers[L][j] = genes[i]
        j += 1
    end
    ann
end

genes_to_ann (generic function with 1 method)

In [14]:
function objective(genes::Array{Float64})
    ann = genes_to_ann(genes)
    -play_env(ann)
end

objective (generic function with 1 method)

In [15]:
N = n_in*5 + 5 + 5*5 + 5 + 5*n_out + n_out

67

In [16]:
ann = genes_to_ann(randn(N))

SimpleANN(FCLayer([0.8673472019512456 2.2118774995743475 -0.5605013381807765 0.11009612632217552; -0.9017438158568171 0.5328132821695382 -0.019291781689849075 -0.2511757400198831; … ; -0.9029142938652416 0.5023344963886675 1.852782957725545 0.07211635315125874; 0.8644013132535154 -0.5169836206932686 -0.8277634318169205 -1.503429457351051], [1.5641682355362416, -1.3967353668333795, 1.1054978391059092, -1.1067299135255761, -3.2113596499239088]), FCLayer([-0.07401454242444336 -1.2796722102183824 … -0.0801624859452718 -0.11445720023473198; 0.1509756176321479 0.9973171556575041 … -1.0912192142639132 0.1658369675157237; … ; -0.31015257323306406 -0.036445994527206725 … -0.3154372460983286 -1.009783518501397; -0.6027068905147959 0.14197425838484184 … -1.3614476249361172 -0.5438048486401967], [-1.2267247260093483, -0.5417157197543718, -0.6864935365141717, -0.7129319615024848, -0.3270588960867544]), FCLayer([0.5148361669491318 -0.3079736599439786 … -0.9871768159072896 -0.5227721501159824; 2.4174

In [19]:
play_env(ann)

13.0

In [17]:
play_env(ann)

13.0

In [18]:
c = CMAES(N=N, µ=10, λ=30, τ=sqrt(N), τ_c=N^2, τ_σ=sqrt(N))
for i in 1:5
    step!(c, objective)
    println(i, " ", maximum(.-c.F_λ))
end

1 206.0
2 128.0
3 39.0
4 119.0
5 116.0


In [19]:
best = nothing
best_fit = -Inf
c = CMAES(N=N, µ=10, λ=30, τ=sqrt(N), τ_c=N^2, τ_σ=sqrt(N))
for i in 1:20
    step!(c, objective)
    bestind = argmin(c.F_λ)
    maxfit = -c.F_λ[bestind]
    println(i, " ", maxfit)
    if maxfit > best_fit
        best = copy(c.offspring[bestind])
        best_fit = maxfit
    end
    if best_fit == 500
        break
    end
end

1 94.0
2 116.0
3 500.0


In [20]:
ann = genes_to_ann(best)
play_env(ann; render=true)

500.0