In [1]:
# requires gym, pygame
import gym, os
from genepro.node_impl import *
from genepro.evo import Evolution

# Set up the environment in which experiments will take place
env_name = "CartPole-v1"
env = gym.make(env_name)

# Let's set up the possible nodes for GP
num_features = env.observation_space.shape[0] + 1 # observations from environment + action at time t-1
leaf_nodes = [Feature(i) for i in range(num_features)]
leaf_nodes = leaf_nodes + [Constant()] # allow to use random constants
internal_nodes = [Plus(),Minus(),Times(),Div(),Log()]

# create a fitness function for that environment
def fitness_function(tree, num_episodes=10, episode_duration=500, render=False, ignore_done=False):
  rewards = list()
  for _ in range(num_episodes):
    # get initial state
    observation = env.reset()
    # we do not have an action at time -1, let's set it randomly
    action = env.action_space.sample()
    for _ in range(episode_duration):
      if render:
        env.render()
      # build up the input sample for GP
      input_sample = np.concatenate((observation, [action])).reshape((1,-1))
      # get output (squeezing because it is encapsulated in an array)
      output = tree(input_sample).astype(float).squeeze()
      action = 0 if output < .5 else 1
      observation, reward, done, _ = env.step(action)
      rewards.append(reward)
      if done and not ignore_done:
        break

  # compute and return fitness
  fitness = np.sum(rewards)
  return fitness





In [2]:
# set up and run the evolution
evo = Evolution(
  fitness_function, internal_nodes, leaf_nodes, 
  pop_size=128,
  max_gens=20,
  max_tree_size=31,
  n_jobs=4,
  verbose=True)
evo.evolve()







gen: 1,	best of gen fitness: 794.000,	best of gen size: 23
gen: 2,	best of gen fitness: 1270.000,	best of gen size: 24
gen: 3,	best of gen fitness: 2015.000,	best of gen size: 15
gen: 4,	best of gen fitness: 2274.000,	best of gen size: 15
gen: 5,	best of gen fitness: 4540.000,	best of gen size: 28
gen: 6,	best of gen fitness: 5000.000,	best of gen size: 28
gen: 7,	best of gen fitness: 5000.000,	best of gen size: 28
gen: 8,	best of gen fitness: 5000.000,	best of gen size: 28
gen: 9,	best of gen fitness: 5000.000,	best of gen size: 30
gen: 10,	best of gen fitness: 5000.000,	best of gen size: 28
gen: 11,	best of gen fitness: 5000.000,	best of gen size: 24
gen: 12,	best of gen fitness: 5000.000,	best of gen size: 24
gen: 13,	best of gen fitness: 5000.000,	best of gen size: 28
gen: 14,	best of gen fitness: 5000.000,	best of gen size: 28
gen: 15,	best of gen fitness: 5000.000,	best of gen size: 26
gen: 16,	best of gen fitness: 5000.000,	best of gen size: 19
gen: 17,	best of gen fitness: 5000

In [3]:
# fetch elite
best = evo.best_of_gens[-1]
print(best.get_readable_repr())

(((((x_2+x_3)*(log(abs(x_2))*-7.121002061758766))*(log(abs(x_2))*-4.429066357034663))*3.230677925414046)-(((x_0*x_1)/((x_3+x_1)+x_3))*log(abs((x_4-x_0)))))


In [4]:
# see simplified representation (if possible, i.e., symbols are compatible)
from sympy import simplify
simpl_repr = simplify(best.get_readable_repr())
print(simpl_repr)

(-x_0*x_1*log(Abs(x_0 - x_4)) + 101.893613186628*(x_1 + 2*x_3)*(x_2 + x_3)*log(Abs(x_2))**2)/(x_1 + 2*x_3)


In [5]:
# you can try the line below, but does not work on some machines
#fitness_function(elite, num_episodes=3, episode_duration=1000, render=True, ignore_done=True)

# alternatively, what follows uses an external evaluation script
os.system("python run_gym_tree.py '"+str(best.get_subtree())+"'")


MovieWriter imagemagick unavailable; using Pillow instead.


0