# Environment 

In [3]:
import numpy as np
import pygame

import gymnasium as gym
from gymnasium import spaces
from math import sqrt, atan2, degrees

## Robot features

- 25 cm diameter
- compass
- 360 vision sensor and object reconition in range 50cm
- comunication between others robots
- ability to pick up stuff (in they're in the same position of the object)
- holonomic motion (every directions)
- maximum velocity: 200 cm/s
- maximum acceleration: 400 cm/s²

In [4]:
TIME_PER_STEP = 1 # a step is seconds
ROBOT_SIZE = 25 # in cm (diameter)
SENSOR_RANGE = 75 # in cm #TODO change
MAX_VELOCITY = 200 # in cm/s
VELOCITY = 100 # initial 0 max 200, in cm/s
MAX_ACC = 400 # in cm/s^2
MAX_DISTANCE = VELOCITY * TIME_PER_STEP # in cm

SIMULATION_ROBOT_SIZE = ROBOT_SIZE / ROBOT_SIZE # 1
SIMULATION_SENSOR_RANGE = SENSOR_RANGE / ROBOT_SIZE # 3
SIMULATION_MAX_DISTANCE = MAX_DISTANCE / ROBOT_SIZE # 4

We are in a continuous 2D environment (no physics), a robot possesses the capability to navigate in any direction, covering any distance up to a defined maximum per step. Additionally, the robot can pick up (when underneath) and deposit objects.

In [5]:
MOVE = 0
PICK_UP = 1
PUT_DOWN = 2

The robots are equipped with sensory equipment capable of identifying nearby entities. A "neighbor" is characterized by a tuple comprising the type of object, the distance to it, and its relative direction. Accordingly, each robot maintains a list of such tuples for a predefined fixed number of neighboring entities.

## Arena

5m x 5m with robots and colored objects 

In [6]:
ARENA_SIZE = 500 # in cm
SIMULATION_ARENA_SIZE = ARENA_SIZE / ROBOT_SIZE # robot size is 1 in the simulation
N_TYPES = 9 # 7 colors, 1 robot, 1 empty

## Objective
List of (color_id, edge). The robots must pick up the objects and deposit them in right position. The deposit area is in an edge of the arena.

In [7]:
TOP_EDGE = 0
RIGHT_EDGE = 1
LEFT_EDGE = 2
BOTTOM_EDGE = 3

In [8]:
RED = 2
GREEN = 3
BLUE = 4
YELLOW = 5
PURPLE = 6
ORANGE = 7
GREY = 8

In [9]:
REWARD_RIGHT_PICKUP = 10
REWARD_RIGHT_PUTDOWN = 20
REWARD_WRONG_PICKUP = -5
REWARD_WRONG_PUTDOWN = -10
REWARD_MOVING_RIGHT_DIRECTION = 5
REWARD_MOVE = -1

## Environment construction

In [10]:
class GridWorldEnv(gym.Env):
    metadata = {"render_modes": ["human", "rgb_array"], "render_fps": 4}
    def __init__(
            self, 
            objective = [(2, TOP_EDGE)], # objective is a list of tuples (color, edge)
            seed=None,
            render_mode=None, 
            size=100, 
            n_agents=3, 
            n_blocks=3, 
            n_neighbors = 4,
            sensor_range = 2,
            sensor_degree = 360,
            max_agent_movment_per_step = 5,
            sensitivity = 0.5, # How close the agent can get to the block to pick it up 
            initial_setting = None
            ):
        self.objective = objective
        self._completed = []
        
        self.seed = seed
        self.size = size  # The size of the square grid
        self.window_size = 512  # The size of the PyGame window
        self._sensitivity = sensitivity

        self._n_agents = n_agents
        self.n_blocks = n_blocks

        self._n_neighbors = n_neighbors
        self._sensors_range = sensor_range
        self._sensors_degree = sensor_degree
        self._sensors_angle = self._sensors_degree / self._n_neighbors
        self._neighbors = np.zeros((self._n_agents, n_neighbors, 3), dtype=float) # init sensors

        self._agents_locations = np.zeros((self._n_agents, 2), dtype=float)
        self._agents_picked_up = np.full(self._n_agents, -1, dtype=int)

        self._blocks_location = np.zeros((self.n_blocks, 2), dtype=float)
        self._blocks_colors = np.zeros(self.n_blocks, dtype=int)
        self._blocks_picked_up = np.full(self.n_blocks, -1, dtype=int)

        self._rewards = np.zeros(self._n_agents)

        self._initial_setting = initial_setting

        self.color_map = {
            2: "\033[91m",  # Red
            3: "\033[94m",  # Blue
            4: "\033[92m",  # Green
            5: "\033[93m",  # Yellow
            6: "\033[95m",  # Purple
            7: "\033[33m",   # Orange
            8: "\033[90m",   # Dark Gray
        }
        self.reset_color = "\033[0m"  # Resets color to default

        # Define the action space for a single robot
        single_robot_action_space = spaces.Dict({
            "move": spaces.Box(low=np.array([0, 0]), high=np.array([max_agent_movment_per_step, 360]), dtype=float),
            "action": spaces.Discrete(3)  # 0: MOVE, 1: PICK_UP, 2: PUT_DOWN
        })

        # Create a tuple of action spaces, one for each robot
        self.action_space = spaces.Tuple([single_robot_action_space for _ in range(self._n_agents)])

        self.observation_space = spaces.Dict(
            {
                "sensors": spaces.Box(0, 255, shape=(self._n_agents, n_neighbors, 3), dtype=float),
            }
        )

        if initial_setting is not None:
            assert len(initial_setting['agents']) == n_agents
            assert len(initial_setting['blocks']) == n_blocks
            assert len(initial_setting['colors']) == n_blocks

            # Check if agents and blocks are within the arena
            for agent in initial_setting['agents']:
                assert agent[0] >= 0 and agent[0] < size
                assert agent[1] >= 0 and agent[1] < size
            for block in initial_setting['blocks']:
                assert block[0] >= 0 and block[0] < size
                assert block[1] >= 0 and block[1] < size
            
            # All the color objective must be in the blocks colors
            for color, _ in objective:
                assert color in initial_setting['colors']

        assert render_mode is None or render_mode in self.metadata["render_modes"]
        self.render_mode = render_mode
        self.window = None
        self.clock = None

    def _calculate_distance_direction(self, pointA, pointB, distance_type='euclidean'):
        x1, y1 = pointA
        x2, y2 = pointB

        # Calculate distance
        if distance_type == 'manhattan':
            distance = abs(x1 - x2) + abs(y1 - y2)
        elif distance_type == 'euclidean':
            distance = sqrt((x1 - x2) ** 2 + (y1 - y2) ** 2)
        else:
            raise ValueError("Invalid distance type. Use 'manhattan' or 'euclidean'.")

        # Calculate direction in degrees
        angle_radians = atan2(y2 - y1, x2 - x1)
        direction_degrees = degrees(angle_radians)

        # Normalize the direction to be between 0 and 360 degrees
        if direction_degrees < 0:
            direction_degrees += 360
        # down is 0/360 degrees, right is 90 degrees, up is 180 degrees, left is 270 degrees

        return distance, direction_degrees
    
    def _get_obs(self):
        # Reset sensors
        self._neighbors = np.zeros((self._n_agents, self._n_neighbors, 3), dtype=float)
        
        # Mimic sensors reading
        for i in range(self._n_agents):
            neighbor_counter = -1
            # TODO: covered_directions = [] # To ensure that the sensors only detect one agent per direction (the closest one)
            
            # Check if the sensors detect other agents
            for j in range(self._n_agents):
                if i != j:
                    distance, direction = self._calculate_distance_direction(self._agents_locations[i], 
                                                                            self._agents_locations[j])
                    if distance <= self._sensors_range: # If the other agent is within the sensor range
                        if (neighbor_counter >= self._n_neighbors - 1):
                            # Substitute with the furthest
                            max_distance = -1
                            max_distance_index = -1
                            for k in range(self._n_neighbors):
                                if self._neighbors[i, k, 1] > max_distance:
                                    max_distance = self._neighbors[i, k, 1]
                                    max_distance_index = k
                            if distance < max_distance:
                                self._neighbors[i, max_distance_index] = [1, distance, direction]
                        else:
                            neighbor_counter += 1
                            self._neighbors[i, neighbor_counter] = [1, distance, direction] # 1 to indicate an agent
            
            # Check if the sensors detect blocks
            for j in range(self.n_blocks):
                distance, direction = self._calculate_distance_direction(self._agents_locations[i], 
                                                                        self._blocks_location[j])
                if distance <= self._sensors_range: # If the block is within the sensor range
                    if (neighbor_counter >= self._n_neighbors - 1):
                        # Substitute with the furthest
                        max_distance = -1
                        max_distance_index = -1
                        for k in range(self._n_neighbors):
                            if self._neighbors[i, k, 1] > max_distance:
                                max_distance = self._neighbors[i, k, 1]
                                max_distance_index = k
                        if distance < max_distance:
                            self._neighbors[i, max_distance_index] = [self._blocks_colors[j], distance, direction]
                    else:
                        neighbor_counter += 1
                        self._neighbors[i, neighbor_counter] = [self._blocks_colors[j], distance, direction]
        
        # Sort the neighbors by distance
        # Define a custom sort key that ignores rows with 0 in the second column
        def sort_key(row):
            return row[1] if row[1] != 0 else np.inf
        
        self._neighbors = np.array([sorted(subarr, key=sort_key) for subarr in self._neighbors])

        return {"neighbors": self._neighbors, "carrying": self._agents_picked_up}
    
    def reset(self, seed=None, options=None):
        # We need the following line to seed self.np_random
        super().reset(seed=seed)

        self._agents_picked_up = np.full(self._n_agents, -1, dtype=int)
        self._blocks_picked_up = np.full(self.n_blocks, -1, dtype=int)
        
        if self._initial_setting is not None:
            self._agents_locations = self._initial_setting['agents'].copy()
            self._blocks_location = self._initial_setting['blocks'].copy()
            self._blocks_colors = self._initial_setting['colors'].copy()
        else:
            # Choose the agent's location uniformly at random
            for i in range(self._n_agents):
                # Check if the agents are not spawning in the same location
                while True:
                    self._agents_locations[i] = self.np_random.uniform(0, self.size, size=2)
                    if i == 0 or not np.any(np.linalg.norm(self._agents_locations[i] - self._agents_locations[:i], axis=1) < self._sensitivity):
                        break

            for i in range(self.n_blocks):
                # Check if the blocks are not spawning in the same location
                while True:
                    self._blocks_location[i] = self.np_random.uniform(0, self.size, size=2)
                    if i == 0 or not np.any(np.linalg.norm(self._blocks_location[i] - self._blocks_location[:i], axis=1) < self._sensitivity):
                        break
                self._blocks_colors[i] = self.np_random.integers(2, 2 + len(self.color_map), dtype=int)
        
        self._task_counter = 0
        for color, _ in self.objective:
            for i in range(self.n_blocks):
                if self._blocks_colors[i] == color:
                    self._task_counter += 1

        observation = self._get_obs()
        info = {}

        return observation, info
    
    def step(self, action):
        
        self._rewards = np.zeros(self._n_agents)
        
        for i in range(self._n_agents):
            
            if action[i]['action'] == PICK_UP:
                if self._agents_picked_up[i] == -1: # If the agent is not carrying a block
                    for j in range(self.n_blocks):
                        # If the agent is in the same location as the block
                        if np.linalg.norm(self._agents_locations[i] - self._blocks_location[j]) < self._sensitivity: 
                            self._blocks_location[j] = [-1,-1] # Set as picked up
                            self._agents_picked_up[i] = self._blocks_colors[j] # The agent knows the color of the block it picked up
                            self._blocks_picked_up[j] = i # The block is picked up by the agent

                            # Reward the agent for picking up the block if in objective or not
                            if self._blocks_colors[j] in [obj[0] for obj in self.objective]:
                                self._rewards[i] += REWARD_RIGHT_PICKUP
                            else:
                                self._rewards[i] += REWARD_WRONG_PICKUP
            
            if action[i]['action'] == PUT_DOWN:
                if self._agents_picked_up[i] != -1: # If the agent is carrying a block
                    for j in range(self.n_blocks):
                        if (self._blocks_picked_up[j] == i): # If the block is picked up by the agent
                            self._blocks_location[j] = self._agents_locations[i] # Set the block location to the agent location
                            self._agents_picked_up[i] = -1 # The agent is not carrying a block anymore
                            self._blocks_picked_up[j] = -1 # The block is not picked up by any agent anymore

                            # Reward the agent for putting down the block if in objective or not
                            for color, edge in self.objective:
                                if color == self._blocks_colors[j]:
                                    if edge == TOP_EDGE and self._blocks_location[j][0] < self._sensitivity:
                                        self._rewards[i] += REWARD_RIGHT_PUTDOWN
                                        self._completed.append(i)
                                    elif edge == RIGHT_EDGE and self._blocks_location[j][1] > self.size - 1 - self._sensitivity:
                                        self._rewards[i] += REWARD_RIGHT_PUTDOWN
                                        self._completed.append(i)
                                    elif edge == LEFT_EDGE and self._blocks_location[j][1] < self._sensitivity:
                                        self._rewards[i] += REWARD_RIGHT_PUTDOWN
                                        self._completed.append(i)
                                    elif edge == BOTTOM_EDGE and self._blocks_location[j][0] > self.size - 1 - self._sensitivity:
                                        self._rewards[i] += REWARD_RIGHT_PUTDOWN
                                        self._completed.append(i)
                                    else:
                                        self._rewards[i] += REWARD_WRONG_PUTDOWN

            if action[i]['action'] == MOVE:
                # Map the action to the direction we walk in
                distance, direction = action[i]['move']
                
                direction_radians = direction
                if direction > 180:
                    direction_radians -= 360
                direction_radians = np.radians(direction)
                dx = distance * np.cos(direction_radians)
                dy = distance * np.sin(direction_radians)

                # `np.clip` to make sure we don't leave the grid
                new_x = np.clip(self._agents_locations[i][0] + dx, 0, self.size - 1)
                new_y = np.clip(self._agents_locations[i][1] + dy, 0, self.size - 1)
                new_position = np.array([new_x, new_y])
                
                # Check if the new position is not too close to another agent
                agent_locations_but_i = np.delete(self._agents_locations, i, axis=0)
                differences = new_position - agent_locations_but_i
                distances = np.linalg.norm(differences, axis=1)
                occupied_by_agent = np.any(distances < self._sensitivity)
                # Check if the new position is not too close to a block while carrying one (can't pick up two blocks)
                occupied_by_block_while_carrying = np.any(np.linalg.norm(new_position - self._blocks_location, axis=1) < self._sensitivity) and self._agents_picked_up[i] != -1
                # Same poisition as before
                same_position = np.all(new_position == self._agents_locations[i])
                
                if not occupied_by_agent and not occupied_by_block_while_carrying and not same_position:
                    self._agents_locations[i] = new_position

                    # Reward the agent for moving in the right direction
                    if self._agents_picked_up[i] != -1: # If the agent is carrying a block
                        for color, edge in self.objective:
                            if color == self._agents_picked_up[i]:
                                target_edge = edge
                                break
                        if target_edge == TOP_EDGE:
                            if direction > 135 and direction < 225: # Moving up
                                self._rewards[i] += REWARD_MOVING_RIGHT_DIRECTION
                        elif target_edge == RIGHT_EDGE:
                            if direction > 45 and direction < 135: # Moving right
                                self._rewards[i] += REWARD_MOVING_RIGHT_DIRECTION
                        elif target_edge == LEFT_EDGE:
                            if direction > 225 and direction < 315: # Moving left
                                self._rewards[i] += REWARD_MOVING_RIGHT_DIRECTION
                        elif target_edge == BOTTOM_EDGE:
                            if direction > 315 or direction < 45: # Moving down
                                self._rewards[i] += REWARD_MOVING_RIGHT_DIRECTION

            self._rewards[i] += REWARD_MOVE # Punish the agent for timestep (force efficiency)? TODO: check if it's necessary

        observation = self._get_obs()

        done = False
        # Check if the objective is met
        if len(self._completed) == self._task_counter:
            done = True

        reward = sum(self._rewards)
        info = self._completed
        
        return observation, reward, done, info
    
    def print_env(self):
        # Define the size of the visualization grid
        vis_grid_size = 25  # Adjust based on desired resolution
        
        # Create an empty visual representation of the environment
        visual_grid = [["." for _ in range(vis_grid_size)] for _ in range(vis_grid_size)]
        
        # Populate the visual grid with blocks
        for i, block in enumerate(self._blocks_location):
            # Convert continuous coordinates to discrete grid positions
            x, y = int(block[0] * vis_grid_size / self.size), int(block[1] * vis_grid_size / self.size)
            if 0 <= x < vis_grid_size and 0 <= y < vis_grid_size:
                color_id = self._blocks_colors[i]
                color_code = self.color_map.get(color_id, self.reset_color)
                visual_grid[x][y] = f"{color_code}O{self.reset_color}"
        
        # Populate the visual grid with agents
        for i, agent in enumerate(self._agents_locations):
            # Convert continuous coordinates to discrete grid positions
            x, y = int(agent[0] * vis_grid_size / self.size), int(agent[1] * vis_grid_size / self.size)
            if 0 <= x < vis_grid_size and 0 <= y < vis_grid_size:
                if self._agents_picked_up[i] != 0:
                    color_id = self._agents_picked_up[i]
                    color_code = self.color_map.get(color_id, self.reset_color)
                    visual_grid[x][y] = f"{color_code}{i}{self.reset_color}"
                else:
                    visual_grid[x][y] = str(i)
        
        # Print the visual representation
        for row in visual_grid:
            print(" ".join(row))
        
        print()

        
    def print_neighbors(self):
        for i in range(self._n_agents):
            flag = False
            for j in range(self._n_neighbors):
                if self._neighbors[i,j,0] != 0:
                    entity = "agent"
                    if self._neighbors[i,j,0] != 1:
                        entity = f"block (color: {self._neighbors[i,j,0]})"
                    distance = self._neighbors[i,j,1]
                    direction = self._neighbors[i,j,2]
                    print(f"Agent {i} sees {entity}: {distance} distance and {direction} degrees direction")
                    flag = True
            if not flag:
                print(f"Agent {i} doesn't see anything")
            print()
        
    def close(self):
        if self.window is not None:
            pygame.display.quit()
            pygame.quit()

In [11]:
initial_setting = {
    'agents': np.array([[5, 5], [10, 10], [15, 15]], dtype=float),
    'blocks': np.array([[5, 10], [10, 5], [15, 10]], dtype=float),
    'colors': np.array([2, 2, 3], dtype=int)
}
env = GridWorldEnv(render_mode='rgb_array', 
                   objective=[(2, TOP_EDGE), (3, RIGHT_EDGE)],
                   size=SIMULATION_ARENA_SIZE, 
                   n_agents=3, 
                   n_blocks=3,
                   n_neighbors=4,
                   sensor_range=SIMULATION_SENSOR_RANGE,
                   sensor_degree=360,
                   max_agent_movment_per_step=SIMULATION_MAX_DISTANCE,
                   sensitivity=0.5,
                   initial_setting=initial_setting)
initial_state, _ = env.reset() # Initial state
env.print_env()

. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . [0m0[0m . . . . . [91mO[0m . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . [91mO[0m . . . . . [0m1[0m . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . [94mO[0m . . . . . [0m2[0m . . . . . .

## Little Demo for testing

In [12]:
action = env.action_space.sample()
action[0]['action'] = MOVE
action[0]['move'] = [1, 136]
action[1]['action'] = MOVE
action[1]['move'] = [1, 90]
action[2]['action'] = MOVE
action[2]['move'] = [5, 0]
next_state, reward, done, _ = env.step(action)
env.print_env()
reward

. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . [0m0[0m . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . [91mO[0m . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . [91mO[0m . . . . . . [0m1[0m . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . [94mO[0m . . . . . . . . . . . .
. . . .

-3.0

# Evolutionary Neural Networks

WORK IN PROGRESS

In [13]:
import neat

In [14]:
def fitness_function(genomes, config):
    for genome_id, genome in genomes:
        
        genome.fitness = 0
        net = neat.nn.FeedForwardNetwork.create(genome, config)
        obs, _ = env.reset()
        for _ in range(20):
            
            actions = []
            for i in range(env._n_agents):
                types = obs["neighbors"][i][:,0]
                types_one_hot_encoded = np.eye(N_TYPES)[types.astype(int)] # One hot encoding of the types
                distances = obs["neighbors"][i][:,1] / SIMULATION_SENSOR_RANGE # Normalize the distances
                directions = obs["neighbors"][i][:,2] / 360 # Normalize the directions
                carrying_block = obs["carrying"][i]
                carrying_block = 0 if carrying_block == -1 else carrying_block
                carrying_block_one_hot_encoded = np.eye(N_TYPES)[carrying_block] # One hot encoding of the carrying block
               
                nn_input = np.concatenate((types_one_hot_encoded, 
                                   distances.reshape(-1,1), 
                                   directions.reshape(-1,1)), axis=1).flatten()

                nn_input = np.concatenate((nn_input, carrying_block_one_hot_encoded))
                if (len(nn_input) != 53): # Define 53 with variables
                    print("Check the NN input size")
                           
                nn_output = net.activate(nn_input)
                action = {
                    "action": np.argmax(nn_output[:2]),
                    "move": [nn_output[3]*SIMULATION_MAX_DISTANCE, nn_output[4]*360]
                }
                actions.append(action)

            obs, reward, done, _ = env.step(actions)
            genome.fitness += reward
            if done:
                break

In [15]:
# Set configuration file
config_path = "./neat_config.txt"
config = neat.config.Config(neat.DefaultGenome, neat.DefaultReproduction,
                            neat.DefaultSpeciesSet, neat.DefaultStagnation, config_path)

# Create core evolution algorithm class
p = neat.Population(config)

# Add reporter for fancy statistical result
p.add_reporter(neat.StdOutReporter(True))
stats = neat.StatisticsReporter()
p.add_reporter(stats)

# Run NEAT
winner = p.run(fitness_function, 10)


 ****** Running generation 0 ****** 



Population's average fitness: -60.00000 stdev: 0.00000
Best fitness: -60.00000 - size: (5, 265) - species 1 - id 1
Average adjusted fitness: 0.000
Mean genetic distance 1.116, standard deviation 0.201
Population of 150 members in 1 species:
   ID   age  size  fitness  adj fit  stag
     1    0   150    -60.0    0.000     0
Total extinctions: 0
Generation time: 3.503 sec

 ****** Running generation 1 ****** 

Population's average fitness: -60.00000 stdev: 0.00000
Best fitness: -60.00000 - size: (5, 265) - species 1 - id 1
Average adjusted fitness: 0.000
Mean genetic distance 1.146, standard deviation 0.203
Population of 150 members in 1 species:
   ID   age  size  fitness  adj fit  stag
     1    1   150    -60.0    0.000     1
Total extinctions: 0
Generation time: 6.661 sec (5.082 average)

 ****** Running generation 2 ****** 

Population's average fitness: -60.00000 stdev: 0.00000
Best fitness: -60.00000 - size: (5, 265) - species 1 - id 1
Average adjusted fitness: 0.000
Mean genetic 

In [16]:
winner_net = neat.nn.FeedForwardNetwork.create(winner, config)
obs, _ = env.reset()
env.print_env()
for _ in range(20):
    actions = []
    print(obs["neighbors"])
    for i in range(env._n_agents):
        types = obs["neighbors"][i][:,0]
        types_one_hot_encoded = np.eye(N_TYPES)[types.astype(int)] # One hot encoding of the types
        distances = obs["neighbors"][i][:,1] / SIMULATION_SENSOR_RANGE # Normalize the distances
        directions = obs["neighbors"][i][:,2] / 360 # Normalize the directions
        carrying_block = obs["carrying"][i]
        carrying_block = 0 if carrying_block == -1 else carrying_block
        carrying_block_one_hot_encoded = np.eye(N_TYPES)[carrying_block] # One hot encoding of the carrying block
        
        nn_input = np.concatenate((types_one_hot_encoded, 
                            distances.reshape(-1,1), 
                            directions.reshape(-1,1)), axis=1).flatten()

        nn_input = np.concatenate((nn_input, carrying_block_one_hot_encoded))
        if (len(nn_input) != 53): # Define 53 with variables
            print("Check the NN input size")

        print(nn_input)     
        nn_output = winner_net.activate(nn_input)
        action = {
            "action": np.argmax(nn_output[:2]),
            "move": [nn_output[3]*SIMULATION_MAX_DISTANCE, nn_output[4]*360]
        }
        actions.append(action)
    print(actions)
    obs, reward, done, _ = env.step(actions)
    env.print_env()
    print(reward)
    if done:
        break

. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . [0m0[0m . . . . . [91mO[0m . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . [91mO[0m . . . . . [0m1[0m . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . [94mO[0m . . . . . [0m2[0m . . . . . .

In [18]:
env._agents_locations

array([[ 7.02336098,  5.00000715],
       [19.        , 10.01111173],
       [19.        , 15.00014298]])