# Environment Demo

In [48]:
import numpy as np
import importlib
import environment 
importlib.reload(environment)
from environment import Environment

## Robot features

- 25 cm diameter
- compass
- 360 vision sensor and object reconition in range 50cm
- comunication between others robots
- ability to pick up stuff (in they're in the same position of the object)
- holonomic motion (every directions)
- maximum velocity: 200 cm/s
- maximum acceleration: 400 cm/s²

In [49]:
TIME_PER_STEP = 1 # a step is seconds
ROBOT_SIZE = 25 # in cm (diameter)
SENSOR_RANGE = 75 # in cm #TODO change
MAX_VELOCITY = 200 # in cm/s
VELOCITY = 100 # initial 0 max 200, in cm/s
MAX_ACC = 400 # in cm/s^2
MAX_DISTANCE = VELOCITY * TIME_PER_STEP # in cm

SIMULATION_ROBOT_SIZE = ROBOT_SIZE / ROBOT_SIZE # 1
SIMULATION_SENSOR_RANGE = SENSOR_RANGE / ROBOT_SIZE # 3
SIMULATION_MAX_DISTANCE = MAX_DISTANCE / ROBOT_SIZE # 4

We are in a continuous 2D environment (no physics), a robot possesses the capability to navigate in any direction, covering any distance up to a defined maximum per step. Additionally, the robot can pick up (when underneath) and deposit objects.

In [50]:
MOVE = 0
PICK_UP = 1
PUT_DOWN = 2

The robots are equipped with sensory equipment capable of identifying nearby entities. A "neighbor" is characterized by a tuple comprising the type of object, the distance to it, and its relative direction. Accordingly, each robot maintains a list of such tuples for a predefined fixed number of neighboring entities.

## Arena

5m x 5m with robots and colored objects 

In [51]:
ARENA_SIZE = 500 # in cm
SIMULATION_ARENA_SIZE = ARENA_SIZE / ROBOT_SIZE # robot size is 1 in the simulation

## Objective
List of (color_id, edge). The robots must pick up the objects and deposit them in right position. The deposit area is in an edge of the arena.

In [52]:
TOP_EDGE = 0
RIGHT_EDGE = 1
LEFT_EDGE = 2
BOTTOM_EDGE = 3
RED = 3
BLUE = 4
GREEN = 5
YELLOW = 6
PURPLE = 7
ORANGE = 8
GREY = 9

In [53]:
REWARD_RIGHT_PICKUP = 10
REWARD_RIGHT_PUTDOWN = 20
REWARD_WRONG_PICKUP = -5
REWARD_WRONG_PUTDOWN = -10
REWARD_MOVING_RIGHT_DIRECTION = 2
REWARD_MOVE = -1

## Environment construction

In [54]:
initial_setting = {
    'agents': np.array([[5, 5], [10, 10], [15, 15]], dtype=float),
    'blocks': np.array([[4, 16], [13, 5], [16, 4]], dtype=float),
    'colors': np.array([RED, RED, BLUE], dtype=int)
}
env = Environment(objective=[(RED, TOP_EDGE), (BLUE, RIGHT_EDGE)],
                   size=SIMULATION_ARENA_SIZE, 
                   n_agents=3, 
                   n_blocks=3,
                   n_neighbors=3,
                   sensor_range=SIMULATION_SENSOR_RANGE,
                   sensor_angle=360,
                   max_distance_covered_per_step=SIMULATION_MAX_DISTANCE,
                   sensitivity=0.5,
                   initial_setting=initial_setting)
initial_state, _ = env.reset() # Initial state
env.print_env()
initial_state

. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . [91mO[0m . . . .
. . . . . . [0m0[0m . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . [0m1[0m . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . [91mO[0m . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . [0m2[0m . . . . . .
. . . . 

{'neighbors': array([[[0., 0., 0.],
         [0., 0., 0.],
         [0., 0., 0.]],
 
        [[0., 0., 0.],
         [0., 0., 0.],
         [0., 0., 0.]],
 
        [[0., 0., 0.],
         [0., 0., 0.],
         [0., 0., 0.]]]),
 'carrying': array([-1, -1, -1])}

## Testing

In [55]:
action = env.action_space.sample()
action[0]['action'] = MOVE
action[0]['move'] = [3, 0]
action[1]['action'] = MOVE
action[1]['move'] = [1, 90]
action[2]['action'] = MOVE
action[2]['move'] = [5, 0]
next_state, reward, done, _ = env.step(action)
env.print_env()
reward

. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . [91mO[0m . . . .
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . [0m0[0m . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . [0m1[0m . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . [91mO[0m . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . . 

-3.0

## Profiling

about 3000 steps per second

In [56]:
import time

In [57]:
total_time = 0
for i in range(3000):
    action = env.action_space.sample()
    start = time.time()
    next_state, reward, done, _ = env.step(action)
    end = time.time()
    total_time += end - start
total_time / 50, total_time

(0.021619114875793457, 1.0809557437896729)