In [60]:
import numpy as np
import time
import importlib
import environment 
importlib.reload(environment)

<module 'environment' from '/Users/lorenzoleuzzi/Documents/GitHub/lifelong_evolutionary_swarms/environment.py'>

In [61]:
initial_setting = {
    'agents': np.array([[5, 5], [10, 10], [15, 15]], dtype=float),
    'blocks': np.array([[4, 16], [13, 5], [16, 4]], dtype=float),
    'colors': np.array([environment.RED, environment.RED, environment.BLUE], dtype=int)
}
env = environment.Environment(objective = [(environment.RED, environment.TOP_EDGE), (environment.BLUE, environment.RIGHT_EDGE)],
                   size = environment.SIMULATION_ARENA_SIZE, 
                   n_agents = 3, 
                   n_blocks = 3,
                   n_neighbors = 3,
                   sensor_range = environment.SIMULATION_SENSOR_RANGE,
                   sensor_angle = 360,
                   max_distance_covered_per_step = environment.SIMULATION_MAX_DISTANCE,
                   sensitivity = 0.5,
                   initial_setting = initial_setting)

In [62]:
class NeuralController:
    def __init__(self, layer_sizes, weights=None):
        self.layer_sizes = layer_sizes
        self.weights = weights
        self.total_weights = sum((layer_sizes[i] + 1) * layer_sizes[i+1] for i in range(len(layer_sizes) - 1))
    
    def predict(self, X):
        a = X
        for weight, bias in self.weights[:-1]:  # Apply ReLU to all layers except the last
            z = np.dot(a, weight) + bias
            a = np.maximum(0, z) # ReLU activation
        
        # Linear activation for the last layer
        final_weight, final_bias = self.weights[-1]
        output = np.dot(a, final_weight) + final_bias
        return output
    
    def set_weights_from_vector(self, w):
        self.weights = []
        start = 0
        for i in range(len(self.layer_sizes) - 1):
            end = start + self.layer_sizes[i] * self.layer_sizes[i+1]
            weight = w[start:end].reshape(self.layer_sizes[i], self.layer_sizes[i+1])
            start = end
            end = start + self.layer_sizes[i+1]
            bias = w[start:end]
            start = end
            self.weights.append((weight, bias))
    
    def summary(self):
        weights_set = ""
        if self.weights == None:
            weights_set = "not set"
        print("NeuralController with layer sizes: ", self.layer_sizes)
        print(f"Total weights: {self.total_weights}, {weights_set}")

In [63]:
input_dim = (env.n_types + 2) * env.n_neighbors + env.n_types - 2
output_dim = 5
hidden_units = [64]
layer_sizes = [input_dim] + hidden_units + [output_dim]

neural_controller = NeuralController(layer_sizes)
neural_controller.summary()

NeuralController with layer sizes:  [44, 64, 5]
Total weights: 3205, not set


In [73]:
def calculate_fitnesses(population : np.ndarray, 
                        network : NeuralController, 
                        env : environment.Environment, 
                        n_episodes=50,
                        verbose=False):
    
    fitnesses = np.zeros(len(population))
    
    for genome_id, genome in enumerate(population):
        
        fitnesses[genome_id] = 0
        obs, _ = env.reset()
        
        # Set the weights of the network
        network.set_weights_from_vector(genome)

        for _ in range(n_episodes):
            
            # Extract data for all agents at once for vectorized computation
            types = obs["neighbors"][:, :, 0]
            distances = obs["neighbors"][:, :, 1] / env.max_distance_covered_per_step
            directions = obs["neighbors"][:, :, 2] / env.sensor_angle
            carrying_block = obs["carrying"]

            # Vectorized one-hot encoding for types
            types_one_hot_encoded = np.eye(env.n_types)[types.astype(int)]

            # Adjust carrying_block indices for one-hot encoding
            carrying_block[carrying_block == -1] = 0  # Map -1 to 0 for one-hot encoding
            carrying_block_one_hot_encoded = np.eye(env.n_types-2)[carrying_block - 2]

            # Reshape distances and directions for concatenation
            distances = distances.reshape(env.n_agents, -1, 1)
            directions = directions.reshape(env.n_agents, -1, 1)

            # Concatenate all inputs
            nn_inputs = np.concatenate([types_one_hot_encoded, distances, directions], axis=2)
            nn_inputs = nn_inputs.reshape(env.n_agents, -1)

            # Concatenate carrying_block_one_hot_encoded
            nn_inputs = np.concatenate([nn_inputs, carrying_block_one_hot_encoded], axis=1)

            # Predict in one go
            nn_outputs = network.predict(nn_inputs)  # Network can handle batch inputs

            # Apply softmax to the first 3 outputs
            exp_outputs = np.exp(nn_outputs[:, :3])
            softmax_outputs = exp_outputs / np.sum(exp_outputs, axis=1, keepdims=True)

            # Apply sigmoid to the last 2 outputs
            sigmoid_outputs = 1 / (1 + np.exp(-nn_outputs[:, 3:]))

            # Combine outputs
            nn_outputs = np.concatenate([softmax_outputs, sigmoid_outputs], axis=1)

            # Generate actions for all agents
            actions = [{
                "action": np.argmax(nn_output[:2]),
                "move": [
                    nn_output[3] * env.max_distance_covered_per_step,
                    nn_output[4] * env.sensor_range
                ]
            } for nn_output in nn_outputs]

            obs, reward, done, _ = env.step(actions)

            fitnesses[genome_id] += reward

            if verbose:
                print(actions)
                env.print_env()
                print(reward)
            
            if done:
                break

    return fitnesses

In [55]:
def select_parents(population, fitnesses, num_parents):
    return [population[i] for i in np.argsort(fitnesses)[-num_parents:]]

def crossover(parents, offspring_size):
    offspring = []
    for _ in range(offspring_size):
        parent1 = parents[np.random.randint(len(parents))]
        parent2 = parents[np.random.randint(len(parents))]
        crossover_point = np.random.randint(len(parent1))
        child = np.concatenate((parent1[:crossover_point], parent2[crossover_point:]))
        offspring.append(child)
    return offspring

def mutate(offspring, mutation_rate=0.1):
    for child in offspring:
        for i in range(len(child)):
            if np.random.rand() < mutation_rate:
                child[i] += np.random.normal()
    return offspring

In [67]:
pop_size = 50
num_generations = 20
num_parents = 10

population = np.random.uniform(-1, 1, (pop_size, neural_controller.total_weights))
population.shape

(50, 3205)

In [74]:
for generation in range(num_generations):
    print(f"Generation {generation}")
    start_gen = time.time()
    
    fitnesses = calculate_fitnesses(population, neural_controller, env, n_episodes=50)

    parents = select_parents(population, fitnesses, num_parents)

    offspring = crossover(parents, offspring_size=pop_size - num_parents)

    offspring = mutate(offspring)
    
    population = parents + offspring
    
    end_gen = time.time()
    avg_fitness = np.mean(fitnesses)   
    print(f"Time taken: {end_gen- start_gen}")
    print(f"Best = {max(fitnesses)}, Average = {np.mean(fitnesses)}, Std = {np.std(fitnesses)}\n")

best_genome = population[np.argmax(fitnesses)]

Generation 0
(3, 5)


IndexError: index 1000 is out of bounds for axis 0 with size 3

In [72]:
calculate_fitnesses([best_genome], neural_controller, env, n_episodes=50, verbose=True)

[{'action': 0, 'move': [6.516281400628383e-08, 1.617813476349164]}, {'action': 0, 'move': [6.516281400628383e-08, 1.617813476349164]}, {'action': 0, 'move': [6.516281400628383e-08, 1.617813476349164]}]
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . [91mO[0m . . . .
. . . . . . 0 . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . 1 . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . .

array([-150.])