In [None]:
import numpy as np
from evostra import EvolutionStrategy
from evostra.models import FeedForwardNetwork
import gym
env = gym.make('Marvin-v0')
# A feed forward neural network with input size of 5, two hidden layers of size 4 and output of size 3
model = FeedForwardNetwork(layer_sizes=[24, 24, 24, 4])

In [None]:
def evaluate_model(weights, max_iter=10000, verbose=False):
    global model, env
    model.set_weights(weights)
    observation = env.reset()
    done = False
    i = 0
    r_sum = 0
    while not done and i < max_iter:
        pred = model.predict(observation)
        observation, reward, done, _ = env.step(pred)
        i += 1
        r_sum += reward
    if verbose:
        print(f"Episode end after {i} iterations with reward = {r_sum} and done status {done}")
    return r_sum

In [None]:
# if your task is computationally expensive, you can use num_threads > 1 to use multiple processes;
# if you set num_threads=-1, it will use number of cores available on the machine; Here we use 1 process as the
#  task is not computationally expensive and using more processes would decrease the performance due to the IPC overhead.
es = EvolutionStrategy(model.get_weights(), evaluate_model, population_size=20, sigma=0.1, learning_rate=0.03, decay=0.995, num_threads=-1)
es.run(100, print_step=1)

In [None]:
def render_env(model, env, max_iter=None, verbose=True):
    observation = env.reset()
    done = False
    i = 0
    r_sum = 0
    while not done or (max_iter is not None and i < max_iter):
        env.render()
        observation, reward, done, _ = env.step(model.predict(observation))
        i += 1
        r_sum += reward
    if verbose:
        print(f"Episode end after {i} iterations with reward = {r_sum} and done status {done}")

In [None]:
render_env(model, env)