# Genetic Algorithm NeuroEvolution
Evolving the weights of a fixed Neural Controller using Genetic Algorithm

In [4]:
import importlib
import environment 
import neural_controller
importlib.reload(environment)
importlib.reload(neural_controller)

import numpy as np
import time

In [5]:
initial_setting = {
    'agents': np.array([[5, 5], [10, 10], [15, 15]], dtype=float),
    'blocks': np.array([[3, 16], [13, 7], [16, 4]], dtype=float),
    'colors': np.array([environment.RED, environment.RED, environment.BLUE], dtype=int)
}
env = environment.Environment(objective = [(environment.RED, environment.TOP_EDGE), (environment.BLUE, environment.RIGHT_EDGE)],
                   size = environment.SIMULATION_ARENA_SIZE, 
                   n_agents = 3, 
                   n_blocks = 3,
                   n_neighbors = 3,
                   sensor_range = environment.SIMULATION_SENSOR_RANGE,
                   sensor_angle = 360,
                   max_distance_covered_per_step = environment.SIMULATION_MAX_DISTANCE,
                   sensitivity = 0.5,
                   initial_setting = initial_setting)

In [7]:
input_dim = (env.n_types + 2) * env.n_neighbors + env.n_types - 2
output_dim = 5
hidden_units = [64]
layer_sizes = [input_dim] + hidden_units + [output_dim]

neural_controller = neural_controller.NeuralController(layer_sizes, activation="sigmoid")
neural_controller.summary()

NeuralController with layer sizes:  [44, 64, 5]
Total weights: 3205, not set


In [92]:
def calculate_fitnesses(population : np.ndarray, 
                        network : NeuralController, 
                        env : environment.Environment, 
                        n_episodes=50,
                        verbose=False):
    
    fitnesses = np.zeros(len(population))
    
    for genome_id, genome in enumerate(population):
        
        fitnesses[genome_id] = 0
        obs, _ = env.reset()
        
        # Set the weights of the network
        network.set_weights_from_vector(genome)

        for _ in range(n_episodes):
            
            # Extract data for all agents at once for vectorized computation
            types = obs["neighbors"][:, :, 0].copy()
            distances = obs["neighbors"][:, :, 1].copy() / env.sensor_range
            directions = obs["neighbors"][:, :, 2].copy() / env.sensor_angle
            carrying_block = obs["carrying"].copy()

            # Vectorized one-hot encoding for types
            types_one_hot_encoded = np.eye(env.n_types)[types.astype(int)]

            # Adjust carrying_block indices for one-hot encoding
            carrying_block[carrying_block == -1] = 0  # Map -1 to 0 for one-hot encoding
            carrying_block_one_hot_encoded = np.eye(env.n_types-2)[carrying_block - 2]

            # Reshape distances and directions for concatenation
            distances = distances.reshape(env.n_agents, -1, 1)
            directions = directions.reshape(env.n_agents, -1, 1)

            # Concatenate all inputs
            nn_inputs = np.concatenate([types_one_hot_encoded, distances, directions], axis=2)
            nn_inputs = nn_inputs.reshape(env.n_agents, -1)

            # Concatenate carrying_block_one_hot_encoded
            nn_inputs = np.concatenate([nn_inputs, carrying_block_one_hot_encoded], axis=1)

            # Predict in one go
            nn_outputs = network.predict(nn_inputs)  # Network can handle batch inputs
 
            # Apply softmax to the first 3 outputs
            exp_outputs = np.exp(nn_outputs[:, :3])
            softmax_outputs = exp_outputs / np.sum(exp_outputs, axis=1, keepdims=True)

            # Apply sigmoid to the last 2 outputs
            sigmoid_outputs = 1 / (1 + np.exp(-nn_outputs[:, 3:]))

            # Combine outputs
            nn_outputs = np.concatenate([softmax_outputs, sigmoid_outputs], axis=1)

            # Generate actions for all agents
            actions = [{
                "action": np.argmax(nn_output[:2]),
                "move": [
                    nn_output[3] * env.max_distance_covered_per_step,
                    nn_output[4] * env.sensor_angle
                ]
            } for nn_output in nn_outputs]

            obs, reward, done, _ = env.step(actions)

            fitnesses[genome_id] += reward

            if verbose:
                print(nn_outputs)
                print(actions)
                env.print_env()
                print(reward)
                print(obs)
            
            if done:
                break

    return fitnesses

In [93]:
def select_parents(population, fitnesses, num_parents):
    return [population[i] for i in np.argsort(fitnesses)[-num_parents:]]

def crossover(parents, offspring_size):
    offspring = []
    for _ in range(offspring_size):
        parent1 = parents[np.random.randint(len(parents))]
        parent2 = parents[np.random.randint(len(parents))]
        crossover_point = np.random.randint(len(parent1))
        child = np.concatenate((parent1[:crossover_point], parent2[crossover_point:]))
        offspring.append(child)
    return offspring

def mutate(offspring, mutation_rate=0.1):
    for child in offspring:
        for i in range(len(child)):
            if np.random.rand() < mutation_rate:
                child[i] += np.random.normal()
    return offspring

In [94]:
pop_size = 50
num_generations = 20
num_parents = 10

population = np.random.normal(0, 1, (pop_size, neural_controller.total_weights))
population.shape

(50, 3205)

In [97]:
for generation in range(num_generations):
    print(f"Generation {generation}")
    start_gen = time.time()
    
    fitnesses = calculate_fitnesses(population, neural_controller, env, n_episodes=500)

    parents = select_parents(population, fitnesses, num_parents)

    offspring = crossover(parents, offspring_size=pop_size - num_parents)

    offspring = mutate(offspring)
    
    population = parents + offspring
    
    end_gen = time.time()
    avg_fitness = np.mean(fitnesses)   
    print(f"Time taken: {end_gen- start_gen}")
    print(f"Best = {max(fitnesses)}, Average = {np.mean(fitnesses)}, Std = {np.std(fitnesses)}\n")

best_genome = population[np.argmax(fitnesses)]

Generation 0
Time taken: 10.832767009735107
Best = 0.0, Average = 0.0, Std = 0.0

Generation 1
Time taken: 10.534852981567383
Best = 0.0, Average = 0.0, Std = 0.0

Generation 2
Time taken: 10.036014080047607
Best = 0.0, Average = 0.0, Std = 0.0

Generation 3
Time taken: 9.838810920715332
Best = 0.0, Average = 0.0, Std = 0.0

Generation 4
Time taken: 12.31999683380127
Best = 0.0, Average = 0.0, Std = 0.0

Generation 5
Time taken: 12.540268898010254
Best = 0.0, Average = 0.0, Std = 0.0

Generation 6


KeyboardInterrupt: 

In [96]:
calculate_fitnesses([best_genome], neural_controller, env, n_episodes=50, verbose=True)

[[5.44162196e-11 9.95021174e-01 4.97882641e-03 7.11569177e-05
  3.89874148e-01]
 [5.44162196e-11 9.95021174e-01 4.97882641e-03 7.11569177e-05
  3.89874148e-01]
 [5.44162196e-11 9.95021174e-01 4.97882641e-03 7.11569177e-05
  3.89874148e-01]]
[{'action': 1, 'move': [0.0002846276708626373, 140.3546934092468]}, {'action': 1, 'move': [0.0002846276708626373, 140.3546934092468]}, {'action': 1, 'move': [0.0002846276708626373, 140.3546934092468]}]
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . [91mO[0m . . . .
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . [0m0[0m . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . 

array([0.])