In [1]:
import numpy as np
from gym import utils
from gym.envs.mujoco import mujoco_env


class AntEnv(mujoco_env.MujocoEnv, utils.EzPickle):
    def __init__(self):
        mujoco_env.MujocoEnv.__init__(self, "ant.xml", 5)
        utils.EzPickle.__init__(self)

    def step(self, a):
        xposbefore = self.get_body_com("torso")[0]
        self.do_simulation(a, self.frame_skip)
        xposafter = self.get_body_com("torso")[0]
        forward_reward = (xposafter - xposbefore) / self.dt
        ctrl_cost = 0.5 * np.square(a).sum()
        contact_cost = (
            0.5 * 1e-3 * np.sum(np.square(np.clip(self.sim.data.cfrc_ext, -1, 1)))
        )
        survive_reward = 1.0
        reward = forward_reward - ctrl_cost - contact_cost + survive_reward
        state = self.state_vector()
        notdone = np.isfinite(state).all() and state[2] >= 0.2 and state[2] <= 1.0
        done = not notdone
        ob = self._get_obs()
        return (
            ob,
            reward,
            done,
            dict(
                reward_forward=forward_reward,
                reward_ctrl=-ctrl_cost,
                reward_contact=-contact_cost,
                reward_survive=survive_reward,
            ),
        )

    def _get_obs(self):
        return np.concatenate(
            [
                self.sim.data.qpos.flat[2:],
                self.sim.data.qvel.flat,
                np.clip(self.sim.data.cfrc_ext, -1, 1).flat,
            ]
        )

    def reset_model(self):
        qpos = self.init_qpos + self.np_random.uniform(
            size=self.model.nq, low=-0.1, high=0.1
        )
        qvel = self.init_qvel + self.np_random.randn(self.model.nv) * 0.1
        self.set_state(qpos, qvel)
        return self._get_obs()

    def viewer_setup(self):
        self.viewer.cam.distance = self.model.stat.extent * 0.5

In [1]:
import gym
import numpy as np
import tensorflow as tf
import glfw
import random

# sess = tf.compat.v1.Session()
ACTION_LEN = 120
FIRST_GENERATION_SIZE = 100
NEXT_GENERATION_SIZE = 5
NUM_GENERATIONS = 20
NUM_PERISH = 60
# ACTION_LEN = 5
# FIRST_GENERATION_SIZE = 5
# NEXT_GENERATION_SIZE = 80
# NUM_GENERATIONS = 20
# NUM_PERISH = 2

env = gym.make('Ant-v2')
obs = env.reset()
obs_dim = env.observation_space.shape[0]
act_dim = env.action_space.shape[0]

print (obs_dim,act_dim)
env.render()

# action_test = np.random.randn(act_dim,1)
def generate_action():   
    # generates an initial candidate action
    # action consists of ACTION_LEN number of random moves
    action = []
    for i in range(ACTION_LEN):
        move = np.random.uniform(low=-1, high=1, size=act_dim)
        action.append(move)
    return action

def generate_first_generation():
    # generates the first generation
    population = []
    for i in range(FIRST_GENERATION_SIZE):
        population.append(generate_action())
    return population

def mate(cand_1, cand_2):
    move_len = len(cand_1[0])
    split_point = (move_len)//2
    #print('SPLIT_POINT', split_point)
    new_action = []
    action_len = len(cand_1)
    index = 0
    for index in range(action_len):
        new_move = []
        move_1 = cand_1[index] 
        move_2 = cand_2[index]
        #print("MOVE 1 : ", move_1)
        if (np.random.choice([True, False])):
            new_move.append(move_1[:split_point])
            new_move.append(move_2[split_point:])
            new_move = np.array([element for sublist in new_move for element in sublist])
            #print('NEW_MOVE IS', new_move, len(new_move))
        else:
            new_move.append(move_2[:split_point])
            new_move.append(move_1[split_point:])
            new_move = np.array([element for sublist in new_move for element in sublist])

        new_action.append(new_move)
    #print(new_action)
    return np.array(new_action)

def perform_natural_selection(current_generation, rewards):
    # generation results sorted in ascending order by rewards    
    parents = []
    generation_results = sorted(rewards.items(), key=lambda kv: kv[1])
    selected = generation_results[NUM_PERISH:]
    print(selected)
    index = 0
    #for candidate in generation_results:
    for candidate in selected:
        cand_index = candidate[0]
        action = current_generation[cand_index]
        for i in range(index + 1): # add more if performed better to give higher probability of high performers to reproduce
            parents.append(action)
        index += 1
    propagate = current_generation[selected[-1][0]]
    random.shuffle(parents)
    return parents, selected[-1], propagate
    #return parents, generation_results[-1]

def create_offspring(parents, best):
    offspring = []
    #print('Propagate', propagate)
    #print('BEST', best)
    for i in range(NEXT_GENERATION_SIZE):
        first_p_ind = random.randint(0, len(parents)-1)
        second_p_ind = random.randint(0, len(parents)-1)                                     
        
        first_parent = parents[first_p_ind]
        second_parent = parents[second_p_ind]
                                      
        child = mate(first_parent, second_parent)
        offspring.append(child)
    
    offspring.append(best)
    #print('OFFSPRING', offspring)
    

    return offspring
    
def evolve():                                      
    
    print('START EVOLUTION')
    population = generate_first_generation()
    
    for generation in range(NUM_GENERATIONS):
        rewards = {}
        index = 0


        for candidate in population:
            # action_test = np.random.randn(act_dim,1)
            # action = np.random.uniform(low=-1, high=1, size=act_dim)
            env.reset()
            total_reward = 0
            for move in candidate:
                env.render()
                move = move.reshape((1,-1)).astype(np.float32)
                obs, reward, done, _ = env.step(np.squeeze(move, axis=0))
                total_reward += reward
            rewards[index] = total_reward
            index += 1
            #print(total_reward)
                                      
        parents, best, propagate = perform_natural_selection(population, rewards)
        population = create_offspring(parents, propagate)                                    
        print('GENERATION : ', generation, ', BEST : ', best)
    print('EVOLUTION HAS FINISHED')



                   
# for i in range(1000):
#     env.render()
#     action = np.random.randn(act_dim,1)
#     action = action.reshape((1,-1)).astype(np.float32)
#     obs, reward, done, _ = env.step(np.squeeze(action, axis=0))
evolve()
env.close()
glfw.terminate()

111 8
Creating window glfw
START EVOLUTION
[(61, -35.14874482133368), (2, -33.9457600746676), (54, -33.71245582404593), (25, -32.926654542862344), (21, -32.83079978752921), (73, -31.591907333914605), (9, -30.9916639119981), (96, -29.667163200244676), (45, -28.85639172170742), (71, -27.421464909834235), (86, -27.370325255982607), (69, -25.873200693157184), (80, -24.748006096145495), (87, -24.741350421988404), (51, -22.47293107667204), (63, -22.156095454867515), (92, -22.046620035172378), (88, -21.71408828111673), (40, -20.72434206298357), (6, -16.28034020290265), (0, -15.94998655875721), (79, -14.835681785238556), (41, -14.163931765616898), (47, -13.865652476201321), (32, -13.57352821234236), (10, -12.855590343379472), (50, -9.160807707750573), (56, -9.095337684299261), (65, -6.953879192675463), (28, -6.7313992659406), (24, 4.5836354378027675), (23, 4.615995984251413), (62, 5.024138429153837), (15, 8.915658108201573), (67, 13.983144276188685), (34, 17.243493498868894), (78, 17.681320087

IndexError: list index out of range

In [1]:
print(obs)

NameError: name 'obs' is not defined

In [2]:
print(reward)

-1.2841226817445293


In [3]:
print(done)

True
