# Covariance Matrix Adaptation Evolution Strategy (CMA-ES)
Evolving the weights of a fixed Neural Controller using CMA-ES.

In [1]:
import importlib
import environment 
import neural_controller
importlib.reload(environment)
importlib.reload(neural_controller)

import numpy as np
from deap import base, cma, creator, tools, algorithms

In [2]:
initial_setting = {
    'agents': np.array([[0, 5], [0, 10], [0, 15]], dtype=float),
    'blocks': np.array([[9, 16], [13, 7], [6, 5], [10, 11]], dtype=float),
    'colors': np.array([environment.RED, environment.RED, environment.BLUE, environment.GREEN], dtype=int)
}
env = environment.Environment(objective = [(environment.RED, environment.NORTH_EDGE)],
                   size = environment.SIMULATION_ARENA_SIZE, 
                   n_agents = 3, 
                   n_blocks = 4,
                   n_neighbors = 3,
                   sensor_range = environment.SIMULATION_SENSOR_RANGE,
                   sensor_angle = 360,
                   max_distance_covered_per_step = environment.SIMULATION_MAX_DISTANCE,
                   sensitivity = 0.5,
                   initial_setting = initial_setting)
env.reset()
env.print_env()

. . . . . . 0 . . . . . . 1 . . . . . 2 . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . [94mO[0m . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . [91mO[0m . . . .
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . [92mO[0m . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . . . [91mO[0m . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . 

In [3]:
input_dim = (env.n_types + 2) * env.n_neighbors + env.n_types - 2
output_dim = 2
hidden_units = [32]
layer_sizes = [input_dim] + [output_dim]

nn = neural_controller.NeuralController(layer_sizes, hidden_activation="sigmoid", output_activation="sigmoid")
nn.summary()

NeuralController with layer sizes:  [44, 2]
Total weights: 90, not set


In [4]:
def calculate_fitness(individual, n_steps=100, verbose = False):
        
    fitness = 0
    obs, _ = env.reset()
    
    # Set the weights of the network
    nn.set_weights_from_vector(individual)

    for step in range(n_steps):
        
        nn_inputs = env.process_observation(obs)
        nn_outputs = np.array(nn.predict(nn_inputs))
        actions = np.round(nn_outputs * np.array([env.max_distance_covered_per_step, env.sensor_angle]), 1)
            
        obs, reward, done, _, _ = env.step(actions)

        fitness += reward

        if verbose:
            print("\nStep", step)
            # print("Observation: ", obs)
            # print("NN inputs: ", nn_inputs)
            print("Action: ", actions)
            env.print_env()
            print("Reward: ", reward)
        
        if done:
            fitness += (n_steps - step) / 2
            break
    
    return [float(fitness)]

In [5]:
population_size = 100

# Set up the fitness and individual
creator.create("FitnessMax", base.Fitness, weights=(1.0,))  # Maximization problem
creator.create("Individual", list, fitness=creator.FitnessMax)

toolbox = base.Toolbox()

toolbox.register("evaluate", calculate_fitness)

# Strategy parameters for CMA-ES
strategy = cma.Strategy(centroid=[0.0]*nn.total_weights, sigma=1.0, lambda_=population_size)
toolbox.register("generate", strategy.generate, creator.Individual)
toolbox.register("update", strategy.update)

# Statistics to keep track of the evolution
stats = tools.Statistics(lambda ind: ind.fitness.values)
stats.register("best", np.max)
stats.register("avg", np.mean)
stats.register("std", np.std)
stats.register("worst", np.min)


# Using the best strategy to retrieve the best individual
hof = tools.HallOfFame(1)

In [6]:
statistics = algorithms.eaGenerateUpdate(toolbox, ngen=100, stats=stats, halloffame=hof)

gen	nevals	best  	avg     	std    	worst
0  	100   	0.3759	-2.53212	17.0654	-165 
1  	100   	2.06264	-5.35018	32.6643	-275 
2  	100   	1.11022e-16	-16.4526	73.7267	-435 
3  	100   	8.5575     	-14.7275	72.9911	-600 
4  	100   	0.94487    	-4.13247	20.9632	-163.845
5  	100   	0.604794   	-0.513998	3.2816 	-28.9517
6  	100   	12.2359    	0.165061 	2.40825	-14.83  
7  	100   	6.54134    	-0.334311	2.02114	-13.0931
8  	100   	6.5799     	-9.57419 	50.4373	-419.642
9  	100   	6.93427    	-3.37719 	20.1821	-178.815
10 	100   	6.29888    	-5.7911  	36.1838	-355.669
11 	100   	0.147686   	-22.6384 	101.668	-776.711
12 	100   	6.08584    	-7.03322 	39.9591	-290    
13 	100   	8.64927    	-7.1855  	35.2582	-259.959
14 	100   	6.65368    	-15.9346 	86.6054	-702.464
15 	100   	13.123     	-46.6229 	178.541	-1345   
16 	100   	6.65       	-4.62319 	30.8631	-304.708
17 	100   	1.06828    	-13.4751 	78.4177	-740.885
18 	100   	8.7271     	-13.0724 	63.506 	-505    
19 	100   	7.55148    	-4.58216 	39

KeyboardInterrupt: 

In [61]:
calculate_fitness(hof[0], verbose=True)


Step 0
Action:  [[1.9 5.8]
 [1.9 5.8]
 [1.9 5.8]]
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . 0 . . . . . 1 . . . . . 2 . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . [94mO[0m . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . [91mO[0m . . . .
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . [92mO[0m . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . . . [91mO[0m . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . .

[93.48766574630615]