<a href="https://colab.research.google.com/github/tomcotter7/OpenAI-GeneticAlgorithms/blob/main/neuroevolution_robotics.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Required Imports

In [260]:
#Include this at the top of your colab code
import os
import gym
import random 
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt

if not os.path.exists('.mujoco_setup_complete'):
  # Get the prereqs
  !apt-get -qq update
  !apt-get -qq install -y libosmesa6-dev libgl1-mesa-glx libglfw3 libgl1-mesa-dev libglew-dev patchelf
  # Get Mujoco
  !mkdir ~/.mujoco
  !wget -q https://mujoco.org/download/mujoco210-linux-x86_64.tar.gz -O mujoco.tar.gz
  !tar -zxf mujoco.tar.gz -C "$HOME/.mujoco"
  !rm mujoco.tar.gz
  # Add it to the actively loaded path and the bashrc path (these only do so much)
  !echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$HOME/.mujoco/mujoco210/bin' >> ~/.bashrc 
  !echo 'export LD_PRELOAD=$LD_PRELOAD:/usr/lib/x86_64-linux-gnu/libGLEW.so' >> ~/.bashrc 
  # THE ANNOYING ONE, FORCE IT INTO LDCONFIG SO WE ACTUALLY GET ACCESS TO IT THIS SESSION
  !echo "/root/.mujoco/mujoco210/bin" > /etc/ld.so.conf.d/mujoco_ld_lib_path.conf
  !ldconfig
  # Install Mujoco-py
  !pip3 install -U 'mujoco-py<2.2,>=2.1'
  # run once
  !touch .mujoco_setup_complete

try:
  if _mujoco_run_once:
    pass
except NameError:
  _mujoco_run_once = False
if not _mujoco_run_once:
  # Add it to the actively loaded path and the bashrc path (these only do so much)
  try:
    os.environ['LD_LIBRARY_PATH']=os.environ['LD_LIBRARY_PATH'] + ':/root/.mujoco/mujoco210/bin'
  except KeyError:
    os.environ['LD_LIBRARY_PATH']='/root/.mujoco/mujoco210/bin'
  try:
    os.environ['LD_PRELOAD']=os.environ['LD_PRELOAD'] + ':/usr/lib/x86_64-linux-gnu/libGLEW.so'
  except KeyError:
    os.environ['LD_PRELOAD']='/usr/lib/x86_64-linux-gnu/libGLEW.so'
  # presetup so we don't see output on first env initialization
  import mujoco_py

# Agent Class

In [293]:
class Agent:

    def __init__(self, name, gpu):

        self.nn = self.build_model(gpu)
        self.gpu = gpu
        self.name = name

    def build_model(self, gpu):
        if gpu:
            model = tf.keras.Sequential()
            model.add(tf.keras.layers.Input(shape=(61,)))
            model.add(tf.keras.layers.Dense(128, activation="relu"))
            model.add(tf.keras.layers.Dense(100, activation="relu"))
            model.add(tf.keras.layers.Dense(64, activation="relu"))
            model.add(tf.keras.layers.Dense(32, activation="relu"))
            model.add(tf.keras.layers.Dense(28, activation="relu"))
            model.add(tf.keras.layers.Dense(20))
        else:
            model = tf.keras.Sequential()
            model.add(tf.keras.layers.Dense(
                64, input_shape=(61,), activation="relu"))
            model.add(tf.keras.layers.Dense(32, activation="relu"))
            model.add(tf.keras.layers.Dense(20, activation="sigmoid"))
        return model

    def update_weights(self, new_weights):
        for index, layer in enumerate(self.nn.layers):
            layer.set_weights(new_weights[index])

    def get_weights(self):
        return [layer.get_weights()[0] for layer in self.nn.layers]
      
    def get_bias(self):
      return [layer.get_weights()[1] for layer in self.nn.layers]


# Genetic Algorithm

In [285]:
# GLOBAL CONSTANTS 
GPU = True
GENERATIONS = 20
POP_SIZE = 15

## Run Agent on Environment Functions

In [308]:
# function to reshape the obs so that they can be passed into the network
def reshape_obs(obs, num=61):
    return np.reshape(obs, [1, num])


# function to create the initial generation
def create_initial_gen(n=10):
    gen = []
    for i in range(n):
        gen.append(Agent("agent"+str(i), gpu=GPU))
    return gen


# function to run a generation across an environment
def run_gen_env(env, gen):
    results = []
    for agent in gen:
        nn_specific_results = []
        for i in range(5):
            reward = run_env(env, agent.nn)
            nn_specific_results.append(reward)
        results.append((agent.name, np.mean(nn_specific_results, axis=0)))
    return gen, results


# function to run an agent in an environment
def run_env(env, nn):
    obs = env.reset()['observation']
    reward = -1
    done = False
    j = 0
    while not done:
        action = nn.predict(reshape_obs(obs))
        obs, reward, done, info = env.step(action[0])
        obs = obs['observation']
        if j == 50 or reward == 0:
            break
        j += 1

    return reward

## Evolutionary Functions

In [294]:
# transforms the weights into 1-d numpy arrays

def flatten(agent_weights):
  flattened_weights = []
  shapes = []
  for lw in agent_weights:
    shapes.append(lw.shape)
    flat = np.array([])
    flat = np.append(flat, lw)
    flattened_weights.append(flat)
  
  return list(zip(flattened_weights, shapes))

In [295]:
# crossover by randomly switching certain weights of each layer

def crossover(dna1, dna2, cr):
  for layer in range(len(dna1)):
    for index, wv in enumerate(dna1[layer][0]):
      chance = random.uniform(0,1)
      if chance < cr:
        dna1[layer][0][index] = dna2[layer][0][index]
        dna2[layer][0][index] = wv

In [296]:
# mutate each weight in a certain layer by multiplying it by a random value between 0.5 and 1.5

def mutate_layer_weight(lw, mr):
    chance = random.uniform(0, 1)
    if chance < mr:
        lw = np.vectorize(lambda x: np.multiply(x, random.uniform(0.5, 1.5)))(lw)
    return lw

In [297]:
# call mutate_layer_weight on each layer

def mutate(dna, mr):
  new_dna = [(mutate_layer_weight(lw, mr), shape) for lw, shape in dna]
  return new_dna

In [298]:
def mutate_entire_agent(agent, mr=0.05):
  weights = mutate(flatten(agent.get_weights()), mr)
  biases = mutate(flatten(agent.get_bias()), mr)
  c = Agent(agent.name, agent.gpu)
  full_weights = combine_weight_and_bias(weights, biases)
  c.update_weights(full_weights)
  return c



In [299]:
# perform crossover then mutation on the agents values (e.g. weights or biases) passed into the function
def evolution(agent1_values, agent2_values, cr, mr):

  weights1 = flatten(agent1_values)
  weights2 = flatten(agent2_values)

  crossover(weights1, weights2, cr)
    
  new_weights1 = mutate(weights1, mr)
  new_weights2 = mutate(weights2, mr)

  return new_weights1, new_weights2
  

In [300]:
def combine_weight_and_bias(weights, biases):
  final = [[np.reshape(weights[index][0], weights[index][1]), np.reshape(biases[index][0], biases[index][1])] for index in range(len(weights))]
  return final

In [309]:
def full_evo(agent1, agent2):
  weights = [agent1.get_weights(), agent2.get_weights()]
  biases = [agent1.get_bias(), agent2.get_bias()]


  evo_weights1, evo_weights2 = evolution(weights[0], weights[1], cr=0.4, mr=0.2)

  evo_bias1, evo_bias2 = evolution(biases[0], biases[1], cr=0.1, mr=0.1)

  #now we need to combine the weights and bias to set them to an Agent

  full1 = combine_weight_and_bias(evo_weights1, evo_bias1)
  full2 = combine_weight_and_bias(evo_weights2, evo_bias2)

  c1 = Agent(agent1.name, agent1.gpu)
  c1.update_weights(full1)
  c2 = Agent(agent2.name, agent2.gpu)
  c2.update_weights(full2)

  return c1, c2

In [302]:
full_evo(Agent("test", True), Agent("test", True))

[array([[-0.10802417, -0.13941199, -0.14725724, ..., -0.03590132,
        -0.129311  ,  0.11685979],
       [-0.06291599, -0.11259812,  0.06004842, ...,  0.01370895,
         0.16434634, -0.14025518],
       [-0.10669128,  0.0035543 , -0.10531288, ...,  0.17700848,
        -0.00983332, -0.03306046],
       ...,
       [-0.13729465, -0.08691733,  0.15313813, ..., -0.13788874,
        -0.07117555, -0.00027056],
       [ 0.02355093, -0.09500442,  0.02647603, ..., -0.05456118,
         0.03263813,  0.02095374],
       [ 0.12946224,  0.06677338,  0.01627354, ...,  0.15744182,
        -0.07578646,  0.01514158]], dtype=float32), array([[-0.05084975, -0.02976103, -0.10517196, ...,  0.02345252,
        -0.05281692, -0.00448467],
       [-0.01692662,  0.09198944,  0.04184642, ...,  0.15202604,
         0.04157738, -0.04039554],
       [-0.05899554, -0.07785743, -0.00473924, ...,  0.06618731,
        -0.02430093, -0.02298453],
       ...,
       [ 0.14539118, -0.07161734, -0.00315721, ..., -0.158

(<__main__.Agent at 0x7f5058a59150>, <__main__.Agent at 0x7f505d63b3d0>)

## Run GA

In [None]:
env = gym.make('HandManipulateBlockDense-v0').env

def get_best_n_names(lst, n=2):
    best_n_names = [name for name, _ in sorted(lst, key=lambda x: x[1])[:n]]
    return best_n_names


def get_best_nn(gen, gen_reward):

    new_gen = []

    # initially let's perform elitism and select the best 2 individuals.
    best_2_names = get_best_n_names(gen_reward)
    best_agents = [mutate_entire_agent(indiv) for indiv in gen if indiv.name in best_2_names]
    new_gen.append(best_agents[0])
    new_gen.append(best_agents[1])

    # now let's perform tournament selection n times

    for i in range(int(len(gen) / 2) - 1):
        tournament_1 = random.sample(gen_reward, int(len(gen_reward) / 2))
        # take the two best parents from this tournament.
        best_2 = get_best_n_names(tournament_1)
        agents = [indiv for indiv in gen if indiv.name in best_2]
        # perform crossover
        c1, c2 = full_evo(agents[0], agents[1])
        new_gen.append(c1)
        new_gen.append(c2)

    return new_gen

def best_score(gen_reward):
  best_score = [score for _, score in sorted(gen_reward, key=lambda x: x[1])[:1]]
  return best_score

best_scores = []
mean_scores = []
initial_gen = create_initial_gen(n=POP_SIZE)
gen, gen_reward = run_gen_env(env, initial_gen)
for g in range(GENERATIONS):
    best_scores.append(best_score(gen_reward))
    mean_scores.append(np.mean([score for _, score in gen_reward]))
    gen = get_best_nn(gen, gen_reward)
    gen, gen_reward = run_gen_env(env, gen)

plt.figure(figsize=(10,10))
plt.plot(best_scores, label="Best Score")
plt.plot(mean_scores, label="Mean Score")
plt.legend(loc="upper right")
