In [1]:
# IMPORT LIBRARIES

import gym
import numpy as np
import glfw
import random

In [2]:
# CONFIGURE HYPERPARAMETERS

ACTION_LEN = 1000
FIRST_GENERATION_SIZE = 100
NEXT_GENERATION_SIZE = 100
NUM_GENERATIONS = 20
DECIMAL_PERISH = 0.6

In [3]:
# ENVIRONMENT SETUP

env = gym.make('Ant-v2')
obs = env.reset()
obs_dim = env.observation_space.shape[0]
act_dim = env.action_space.shape[0]

print(obs_dim, act_dim)
#env.render()

111 8


In [4]:
class Candidate:
    num_instances = 1

    def __init__(self, action_len, gen):
        self.action_len = action_len
        self.gen = gen
        self.cand_num = Candidate.num_instances
        self.reward = 0
        self.moves = []
        Candidate.num_instances += 1
    
    def set_reward(self, reward):
        self.reward = reward
        
    def add_move(self, move):
        self.moves.append(move)
    
    def __str__(self):
        cand_str = "Candidate {cand_num}, Generation {gen}, Reward {reward}\n\n"
        return cand_str.format(cand_num=self.cand_num, gen=self.gen, reward=self.reward)
    
    def __add__(self, other):
        # mates self with other, produces offspring
        offspring = Candidate(self.action_len, self.gen + 1)
        
        # coin flip
        if (np.random.choice([True, False])):
            for i in range(self.action_len):
                if (i % 2 == 0):
                      offspring.add_move(self.moves[i])
                else:
                      offspring.add_move(other.moves[i])
        else:
            for i in range(self.action_len):
                if (i % 2 == 0):
                    offspring.add_move(other.moves[i])
                else:
                    offspring.add_move(self.moves[i])            
        return offspring
                
        
    
        
        

In [5]:



def generate_candidate():

    initialCandidate = Candidate(ACTION_LEN, 0)
    for i in range(initialCandidate.action_len):
        move = np.random.uniform(low=-1, high=1, size=act_dim)
        initialCandidate.add_move(move)
    return initialCandidate

def generate_first_generation():
    population = []
    for i in range(FIRST_GENERATION_SIZE):
        population.append(generate_candidate())
    return population

# def generate_first_generation():
#     population = []
#     cand = generate_candidate()
#     for i in range(FIRST_GENERATION_SIZE):
#         population.append(cand)
#     return population

def perform_natural_selection(current_population):
    parents = []
    NUM_PERISH = (int) (NEXT_GENERATION_SIZE * DECIMAL_PERISH)
    
    sorted_by_reward = sorted(current_population, key=lambda cand: cand.reward) 
    #print("SORTED LENGTH", len(sorted_by_reward))
    
    #for i in range(len(sorted_by_reward)):
    #    print(sorted_by_reward[i])
    selected = sorted_by_reward[NUM_PERISH:]
    #print("SELECTED LENGTH", len(selected))
    #for i in range(len(selected)):
    #    print(selected[i])    
    best = selected[-1]
    #print("BEST", best)
    num_to_add = 1
    for candidate in selected:
        for i in range(num_to_add):
            parents.append(candidate)
        num_to_add += 1
    #print("PARENTS LENGTH", len(parents))
    random.shuffle(parents)
    return parents, best

def create_offspring(parents, best):
    offspring = []
    offspring.append(best)
    
    for i in range(NEXT_GENERATION_SIZE):
        f_parent_ind = random.randint(0, len(parents) - 1)
        s_parent_ind = random.randint(0, len(parents) - 1)
        
        f_parent = parents[f_parent_ind]
        s_parent = parents[s_parent_ind]
        
        child = f_parent + s_parent
        offspring.append(child)

    return offspring

def evolve():
    print('START EVOLUTION')
    population = generate_first_generation()

    
    for generation in range(NUM_GENERATIONS):
        for i in population:
            print(i.moves[1])
        for candidate in population:
            env.reset()
            env.seed(1)
            total_reward = 0
            cand_done = False
            moves_taken = 0
            while ((not cand_done) and (moves_taken < candidate.action_len)):
                env.render()
                move = candidate.moves[moves_taken].reshape((1,-1)).astype(np.float32)
                obs, reward, done, _ = env.step(np.squeeze(move, axis=0))
                cand_done = done
                total_reward += reward
                moves_taken += 1
                #print(moves_taken, candidate.action_len, len(candidate.moves))
            candidate.set_reward(total_reward)
            env.reset()

        parents, best = perform_natural_selection(population)
        population = create_offspring(parents, best)
        #sorted_by_reward = sorted(population, key=lambda cand: cand.reward)
        #print("best of offspring", sorted_by_reward[-1])

        print('GENERATION : ', generation, ', BEST : ', best)
    print('EVOLUTION HAS FINISHED')
        
    
    

In [6]:
evolve()
env.close()
glfw.terminate()

START EVOLUTION
[ 0.07538948  0.48989891  0.59100518  0.31067777 -0.63579731  0.85019025
  0.7830583   0.25147164]
[-0.87290908 -0.80141082 -0.00171853  0.35501875 -0.22610715 -0.58201066
 -0.66405268  0.32533118]
[-0.34189909 -0.73407373 -0.95844952 -0.89086757  0.27138662 -0.40749055
 -0.61341939 -0.27724359]
[ 0.05663698  0.80220946 -0.6781765  -0.91992342  0.34063717 -0.47911232
 -0.00708259 -0.13599651]
[ 0.87117475  0.6410836  -0.81309859  0.52902661 -0.89438489 -0.32332884
 -0.87043974  0.23851721]
[ 0.67151594  0.22660327 -0.79506693  0.48063849  0.9969875  -0.93141656
  0.5500456  -0.73661505]
[ 0.31879112 -0.26845377  0.78343984  0.97956393 -0.37163945 -0.20971542
 -0.80623829 -0.78345147]
[-0.5869105  -0.85899286  0.02139769  0.14748951 -0.18564741 -0.23861097
  0.30554285 -0.88164312]
[ 0.90569939 -0.91251186 -0.51343452  0.76904061  0.70499198  0.29095977
 -0.33475169 -0.75093404]
[-0.51799005 -0.16769186 -0.92216946  0.75441692  0.08452771 -0.12299759
  0.02082311 -0.1266

SystemExit: 0

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)
