<a href="https://colab.research.google.com/github/p3rpl3x1ty/NeuroEvolution/blob/main/NeuroEvolution_fromScratch_Cartpool.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install gym pyvirtualdisplay > /dev/null 2>&1
!apt-get install -y xvfb python-opengl ffmpeg > /dev/null 2>&1

In [None]:
import gym
from gym.wrappers import Monitor
import glob
import io
import base64
from IPython.display import HTML
from pyvirtualdisplay import Display
from IPython import display as ipythondisplay

display = Display(visible=0, size=(1400, 900))
display.start()

"""
Utility functions to enable video recording of gym environment 
and displaying it.
To enable video, just do "env = wrap_env(env)""
"""

def show_video():
  mp4list = glob.glob('video/*.mp4')
  if len(mp4list) > 0:
    mp4 = mp4list[0]
    video = io.open(mp4, 'r+b').read()
    encoded = base64.b64encode(video)
    ipythondisplay.display(HTML(data='''<video alt="test" autoplay 
                loop controls style="height: 400px;">
                <source src="data:video/mp4;base64,{0}" type="video/mp4" />
             </video>'''.format(encoded.decode('ascii'))))
  else: 
    print("Could not find video")
    

def wrap_env(env):
  env = Monitor(env, './video', force=True)
  return env

In [None]:
import numpy as np
import gym

In [None]:
# Fully Connected Neural Network
# Used in conjuction with NeuroEvolution instead of Backpropagation
class neural_network:
    def __init__(self, inputs=1, outputs=1, hidden_layers=1, hidden_nodes=1, population_size=100,
                 mutation=0.1, bias=1, custom_weights=None):
        self.inputs = inputs                    # Number of input connections
        self.outputs = outputs                  # Number of output connections
        self.hidden_layers = hidden_layers      # Number of hidden layers
        self.hidden_nodes = hidden_nodes        # Number of nodes in a given hidden layer
        self.population_size = population_size  # Population size of each generation
        self.mutation = mutation                # Liklihood of mutation
        self.bias = bias                        # The bias
        self.population_counter = 0
        self.population = None
        self.current_best = [[0,0],[0,0],[0,0],[0,0],[0,0],[0,0]]
        
    def create_population(self, population={}):
        # This needs work as it only allows for hidden layers to all be equal in size.
        # i.e, if you have 2 hidden layers each layer would have the same number of neurons.
        self.population = population
        for i in range(int(self.population_size)):
            weights = []
            weights.append(self.create_weights(self.inputs, self.hidden_nodes))
            for layer in range(1, self.hidden_layers):
                weights.append(self.create_weights(self.hidden_nodes, self.hidden_nodes))
            weights.append(self.create_weights(self.hidden_nodes, self.outputs))
            self.population[self.population_counter] = [weights, self.population_counter, 0] # Dictionary with weights and fitness value of genome
            self.population_counter += 1
                
    def create_weights(self, l1_nodes, l2_nodes):
        # l1_nodes = the amount of nodes or neurons in initial layer
        # l2_nodes = the amount of nodes or neurons in the layer the initial layer connects to
        return np.random.uniform(-0.5, 0.5, (l1_nodes, l2_nodes))
    
    def crossover(self, parents):
        keys = []
        for key in parents.keys():
            keys.append(key)
        for i in range(int(self.population_size/4)):
            child = []
            for layer in range(self.hidden_layers + 1):
                child.append([])
                for connection in range(len(self.new_population[keys[0]][0][layer])):    
                    child[layer].append(self.new_population[ keys[(np.random.random() > .5)] ][0][layer][connection])
            self.new_population[self.population_counter] = [child, self.population_counter, 0]
            self.population_counter += 1
                
    def next_generation(self):
        if sum([sum(i) for i in self.current_best]) < 1:
            self.create_population()
        else:
            self.new_population = {}
            for i in range(len(self.current_best)):
                self.new_population[self.current_best[i][0]] = self.population[self.current_best[i][0]]

            self.crossover(self.new_population)
            
            self.create_population(self.new_population)
            #self.population = self.new_population
            
            self.new_population = {}


In [None]:
##############################################################################
########### ------------HELPER FUNCTIONS -----------------   #################
##############################################################################
def relu(x):
    return np.where(x>0, x, 0)

def softmax(x):
    x = np.exp(x - np.max(x))
    x[x==0] = 1e-15
    return np.array(x / x.sum())

def act(state, weights, bias):
    a = relu((state @ weights[0]) + bias)
    for i in range(1, len(weights)):
        a = relu((a @ weights[i]) + bias)
    probabilities = softmax(a)
    return np.argmax(probabilities)

def update_score(nn, genome, score):
    nn.population[genome][2] += score
    nn.population[genome][2] = (nn.population[genome][2] / 2)
    
    for i in range(2):
        if nn.current_best[i][0] == genome:
            y = nn.population[genome][2]
            nn.current_best[i][1] = y
    
    nn.current_best.sort(key=lambda x: x[1], reverse=True)
    if nn.current_best[-1][1] < score:
        # Make sure not to add a parent back into current best. This will cause you a serious headache.
        # So this is a check to make sure the current genome isn't already in current_best
        if not any([i[0]==genome for i in nn.current_best]):
            nn.current_best.insert(0, [genome, score])
            nn.current_best.pop()
            nn.current_best.sort(key=lambda x: x[1], reverse=True)

In [None]:
def train(generations=2):
    # Initialize Network
    nn = neural_network(inputs=4, outputs=2, hidden_layers=1, hidden_nodes=8, population_size=100)

    # Set up our gym environment
    env=gym.make('CartPole-v0')
    # Increasing max steps
    env._max_episode_steps=1e20
    print("Generation --- Best Genome Score --- Best Genome ID")
    for generation in range(generations):
        nn.current_best.sort(key=lambda x: x[1], reverse=True)
        print(generation, nn.current_best[0][1], nn.current_best[0][0])
        nn.next_generation()
        for genome in nn.population:
            for _ in range(5):
                state = env.reset()
                for t in range(10000):
                    #env.render()
                    state, _, done, _ = env.step(act(np.array(state), nn.population[genome][0], 1))
                    if done:
                        #env.reset()
                        update_score(nn, genome, t)
                        break

    # Closing our enviroment
    env.close()
    # Getting our final reward
    print("Best genomes: {}".format(nn.current_best))
    print(nn.population_counter)
    return [nn.population[nn.current_best[0][0]]][0]

In [None]:
def test_network(network):
    # Set up our gym environment
    env=gym.make('CartPole-v0')
    # Increasing max steps
    #env._max_episode_steps=1e20
    env._max_episode_steps=1000
    env = wrap_env(env)
    state = env.reset()
    weights = network[0]
    genome = network[1]
    score = network[2]
    for t in range(10000):
        env.render()
        state, _, done, _ = env.step(act(np.array(state), weights, 1))
        if done:
            print(genome, t)
            env.reset()
            break
    env.close()

In [None]:
generations = 42 #Change this number to run more generations of genomes.
# 42 can be a bit much if things go well, you could be waiting a few minutes. Less is probably better but you know, 42. Do I need to say more?

best_network = train(generations)
print("Best Network from 'Evolution':")
print(best_network)


Generation --- Best Genome Score --- Best Genome ID
0 0 0
1 117.9375 98
2 98.435546875 98
3 103.26361083984375 98
4 93.41448783874512 98
5 102.95045274496078 98
6 109.15470164828002 98
7 599.875 812
8 551.68359375 812
9 914.875 1014
10 852.62109375 1014
11 5249.59375 1249
12 4763.5810546875 1249
13 4354.625 1478
14 5739.9375 1600
15 8577 1745
16 6666.9375 1865
17 9961 1985
18 5063.71875 1981
19 6753 2236
20 7686.125 2353
21 6548.5546875 2373
22 5389 2608
23 8215 2740
24 9411 2862
25 5849.3828125 2863
26 9097 3121
27 7027 3234
28 8607 3372
29 9909 3487
30 6553.875 3608
31 8103 3747
32 7429 3874
33 7871 3994
34 7949 4117
35 6285.25 4247
36 4372.28125 4247
37 9868 4494
38 4299.0625 4604
39 5960.5 4736
40 8895 4862
41 5299 4984
Best genomes: [[4976, 4159.28125], [5105, 3857.1875], [5118, 3687], [5121, 3587], [5109, 3519], [5122, 3496]]
5225
Best Network from 'Evolution':
[[[array([-0.20778194,  0.38792339, -0.32044774, -0.10851988, -0.3879654 ,
       -0.04953791,  0.4651769 ,  0.02371821]

TypeError: ignored

In [None]:
test_network(best_network)

4976 999


In [None]:
show_video()

In [None]:
def test_weights(weights):
    # test_weights = Kobe
                                                    
    # First we need to set up our gym environment
    env=gym.make('CartPole-v0')
    # Increasing max steps
    #env._max_episode_steps=1e20
    env._max_episode_steps=900
    env = wrap_env(env)
    state = env.reset()
    for t in range(10000):
        env.render()
        state, _, done, _ = env.step(act(np.array(state), weights, 1))
        if done:
            print(t)
            env.reset()
            break
    env.close()

In [None]:
kobe_weights = [([[ 0.30263124,  0.47814879, -0.19686573, -0.34770843,  0.30430385,
          0.39926983,  0.16303453, -0.34522856],
        [-0.05675589,  0.49568219,  0.46898572,  0.18752217, -0.10781119,
          0.17361164, -0.06617073,  0.019675  ],
        [-0.36858914, -0.23357126,  0.04438254,  0.49198591, -0.26465441,
          0.01762487, -0.49231585, -0.44240676],
        [-0.28438128,  0.10548093,  0.25434609, -0.12895371, -0.20302942,
          0.01086639, -0.05952716, -0.45258727]]),([[ 0.29704685, -0.07796342],
        [-0.22159064,  0.49489256],
        [-0.47032932,  0.39414666],
        [ 0.25335558,  0.18086856],
        [-0.24611982, -0.24187248],
        [-0.15594916, -0.49670978],
        [-0.00426263, -0.45526976],
        [ 0.06679129, -0.27524174]])]

test_weights(kobe_weights)

899


In [None]:
show_video()