# Covariance Matrix Adaptation Evolution Strategy (CMA-ES)
Evolving the weights of a fixed Neural Controller using CMA-ES.

In [48]:
import importlib
import environment 
import neural_controller
importlib.reload(environment)
importlib.reload(neural_controller)

import numpy as np
from deap import base, cma, creator, tools, algorithms

In [6]:
initial_setting = {
    'agents': np.array([[5, 5], [10, 10], [15, 15]], dtype=float),
    'blocks': np.array([[3, 16], [13, 7], [16, 4]], dtype=float),
    'colors': np.array([environment.RED, environment.RED, environment.BLUE], dtype=int)
}
env = environment.Environment(objective = [(environment.RED, environment.TOP_EDGE), (environment.BLUE, environment.RIGHT_EDGE)],
                   size = environment.SIMULATION_ARENA_SIZE, 
                   n_agents = 3, 
                   n_blocks = 3,
                   n_neighbors = 3,
                   sensor_range = environment.SIMULATION_SENSOR_RANGE,
                   sensor_angle = 360,
                   max_distance_covered_per_step = environment.SIMULATION_MAX_DISTANCE,
                   sensitivity = 0.5,
                   initial_setting = initial_setting)

In [49]:
input_dim = (env.n_types + 2) * env.n_neighbors + env.n_types - 2
output_dim = 5
hidden_units = [64]
layer_sizes = [input_dim] + hidden_units + [output_dim]

neural_controller = neural_controller.NeuralController(layer_sizes, activation="sigmoid")
neural_controller.summary()

NeuralController with layer sizes:  [44, 64, 5]
Total weights: 3205, not set


In [41]:
def calculate_fitness(individual, verbose = False):
    fitness = 0
    obs, _ = env.reset()
    
    # Set the weights of the network
    neural_controller.set_weights_from_vector(individual)

    for _ in range(50):
        
        # Extract data for all agents at once for vectorized computation
        types = obs["neighbors"][:, :, 0].copy()
        distances = obs["neighbors"][:, :, 1].copy() / env.sensor_range
        directions = obs["neighbors"][:, :, 2].copy() / env.sensor_angle
        carrying_block = obs["carrying"].copy()

        # Vectorized one-hot encoding for types
        types_one_hot_encoded = np.eye(env.n_types)[types.astype(int)]

        # Adjust carrying_block indices for one-hot encoding
        carrying_block[carrying_block == -1] = 0  # Map -1 to 0 for one-hot encoding
        carrying_block_one_hot_encoded = np.eye(env.n_types-2)[carrying_block - 2]

        # Reshape distances and directions for concatenation
        distances = distances.reshape(env.n_agents, -1, 1)
        directions = directions.reshape(env.n_agents, -1, 1)

        # Concatenate all inputs
        nn_inputs = np.concatenate([types_one_hot_encoded, distances, directions], axis=2)
        nn_inputs = nn_inputs.reshape(env.n_agents, -1)

        # Concatenate carrying_block_one_hot_encoded
        nn_inputs = np.concatenate([nn_inputs, carrying_block_one_hot_encoded], axis=1)

        # Predict in one go
        nn_outputs = neural_controller.predict(nn_inputs)  # Network can handle batch inputs

        # Apply softmax to the first 3 outputs
        exp_outputs = np.exp(nn_outputs[:, :3])
        softmax_outputs = exp_outputs / np.sum(exp_outputs, axis=1, keepdims=True)

        # Apply sigmoid to the last 2 outputs
        sigmoid_outputs = 1 / (1 + np.exp(-nn_outputs[:, 3:]))

        # Combine outputs
        nn_outputs = np.concatenate([softmax_outputs, sigmoid_outputs], axis=1)

        # Generate actions for all agents
        actions = [{
            "action": np.argmax(nn_output[:2]),
            "move": [
                nn_output[3] * env.max_distance_covered_per_step,
                nn_output[4] * env.sensor_angle
            ]
        } for nn_output in nn_outputs]

        obs, reward, done, _ = env.step(actions)

        fitness += reward

        if verbose:
            print(nn_outputs)
            print(actions)
            env.print_env()
            print(reward)
            print(obs)
        
        if done:
            break

    return [float(fitness)]

In [37]:
population_size = 20

# Set up the fitness and individual
creator.create("FitnessMax", base.Fitness, weights=(1.0,))  # Maximization problem
creator.create("Individual", list, fitness=creator.FitnessMax)

toolbox = base.Toolbox()

toolbox.register("evaluate", calculate_fitness)

# Strategy parameters for CMA-ES
strategy = cma.Strategy(centroid=[0.0]*neural_controller.total_weights, sigma=1.0, lambda_=population_size)
toolbox.register("generate", strategy.generate, creator.Individual)
toolbox.register("update", strategy.update)

# Statistics to keep track of the evolution
stats = tools.Statistics(lambda ind: ind.fitness.values)
stats.register("best", np.max)
stats.register("avg", np.mean)
stats.register("std", np.std)
stats.register("worst", np.min)


# Using the best strategy to retrieve the best individual
hof = tools.HallOfFame(1)



In [42]:
# The algorithm itself
algorithms.eaGenerateUpdate(toolbox, ngen=5, stats=stats, halloffame=hof)
hof[0]

KeyboardInterrupt: 

In [43]:
calculate_fitness(hof[0], verbose=True)

[[0.22324885 0.62223985 0.15451131 0.09572865 0.02869375]
 [0.22324885 0.62223985 0.15451131 0.09572865 0.02869375]
 [0.22324885 0.62223985 0.15451131 0.09572865 0.02869375]]
[{'action': 1, 'move': [0.3829145882091361, 10.329751631129579]}, {'action': 1, 'move': [0.3829145882091361, 10.329751631129579]}, {'action': 1, 'move': [0.3829145882091359, 10.329751631129579]}]
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . [91mO[0m . . . .
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . [0m0[0m . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . 

[0.0]