In [2]:
!pip install vmas benchmarl pyvirtualdisplay moviepy
!apt-get install python3-opengl
import pyvirtualdisplay
display = pyvirtualdisplay.Display(visible=False, size=(1400, 900))
display.start()

Collecting vmas
  Downloading vmas-1.5.1.tar.gz (218 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m218.1/218.1 kB[0m [31m7.5 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting benchmarl
  Downloading benchmarl-1.5.0.tar.gz (101 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m101.4/101.4 kB[0m [31m11.9 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting pyvirtualdisplay
  Downloading PyVirtualDisplay-3.0-py3-none-any.whl.metadata (943 bytes)
Collecting pyglet<=1.5.27 (from vmas)
  Downloading pyglet-1.5.27-py3-none-any.whl.metadata (7.6 kB)
Collecting torchrl~=0.8.0 (from benchmarl)
  Downloading torchrl-0.8.1-cp312-cp312-manylinux_2_28_x86_64.whl.metadata (41 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m41.2/4

<pyvirtualdisplay.display.Display at 0x7ca025e9c440>

In [3]:
import torch

import vmas.simulator.core
import vmas.simulator.utils
from vmas.simulator.dynamics.common import Dynamics


class FixedWingKinematicBicycle(Dynamics):
    def __init__(
        self,
        world: vmas.simulator.core.World,
        width: float,
        l_f: float,
        l_r: float,
        max_steering_angle: float,
        min_v: float = 0.3,
        max_v: float = 1.0,
        integration: str = "rk4",
    ):
        super().__init__()
        assert integration in (
            "rk4",
            "euler",
        ), "Integration method must be 'euler' or 'rk4'."
        self.width = width
        self.l_f = l_f
        self.l_r = l_r
        self.max_steering_angle = max_steering_angle
        self.dt = world.dt
        self.integration = integration
        self.world = world
        self.min_v = min_v
        self.max_v = max_v

    def f(self, state, steering_command, v_command):
        theta = state[:, 2]  # Yaw angle
        beta = torch.atan2(
            torch.tan(steering_command) * self.l_r / (self.l_f + self.l_r),
            torch.tensor(1, device=self.world.device),
        )  # [-pi, pi] slip angle
        dx = v_command * torch.cos(theta + beta)
        dy = v_command * torch.sin(theta + beta)
        dtheta = (
            v_command
            / (self.l_f + self.l_r)
            * torch.cos(beta)
            * torch.tan(steering_command)
        )
        return torch.stack((dx, dy, dtheta), dim=1)  # [batch_size,3]

    def euler(self, state, steering_command, v_command):
        # Calculate the change in state using Euler's method
        # For Euler's method, see https://math.libretexts.org/Bookshelves/Calculus/Book%3A_Active_Calculus_(Boelkins_et_al.)/07%3A_Differential_Equations/7.03%3A_Euler's_Method (the full link may not be recognized properly, please copy and paste in your browser)
        return self.dt * self.f(state, steering_command, v_command)

    def runge_kutta(self, state, steering_command, v_command):
        # Calculate the change in state using fourth-order Runge-Kutta method
        # For Runge-Kutta method, see https://math.libretexts.org/Courses/Monroe_Community_College/MTH_225_Differential_Equations/3%3A_Numerical_Methods/3.3%3A_The_Runge-Kutta_Method
        k1 = self.f(state, steering_command, v_command)
        k2 = self.f(state + self.dt * k1 / 2, steering_command, v_command)
        k3 = self.f(state + self.dt * k2 / 2, steering_command, v_command)
        k4 = self.f(state + self.dt * k3, steering_command, v_command)
        return (self.dt / 6) * (k1 + 2 * k2 + 2 * k3 + k4)

    @property
    def needed_action_size(self) -> int:
        return 2

    def process_action(self):
        # Extracts the velocity and steering angle from the agent's actions and convert them to physical force and torque
        v_command = self.agent.action.u[:, 0]
        # The only change we make:
        v_command = torch.clamp(
            v_command, self.min_v, self.max_v
        )

        steering_command = self.agent.action.u[:, 1]
        # Ensure steering angle is within bounds
        steering_command = torch.clamp(
            steering_command, -self.max_steering_angle, self.max_steering_angle
        )

        # Current state of the agent
        state = torch.cat((self.agent.state.pos, self.agent.state.rot), dim=1)

        v_cur_x = self.agent.state.vel[:, 0]  # Current velocity in x-direction
        v_cur_y = self.agent.state.vel[:, 1]  # Current velocity in y-direction
        v_cur_angular = self.agent.state.ang_vel[:, 0]  # Current angular velocity

        # Select the integration method to calculate the change in state
        if self.integration == "euler":
            delta_state = self.euler(state, steering_command, v_command)
        else:
            delta_state = self.runge_kutta(state, steering_command, v_command)

        # Calculate the accelerations required to achieve the change in state.
        acceleration_x = (delta_state[:, 0] - v_cur_x * self.dt) / self.dt**2
        acceleration_y = (delta_state[:, 1] - v_cur_y * self.dt) / self.dt**2
        acceleration_angular = (
            delta_state[:, 2] - v_cur_angular * self.dt
        ) / self.dt**2

        # Calculate the forces required for the linear accelerations
        force_x = self.agent.mass * acceleration_x
        force_y = self.agent.mass * acceleration_y

        # Calculate the torque required for the angular acceleration
        torque = self.agent.moment_of_inertia * acceleration_angular

        # Update the physical force and torque required for the user inputs
        self.agent.state.force[:, vmas.simulator.utils.X] = force_x
        self.agent.state.force[:, vmas.simulator.utils.Y] = force_y
        self.agent.state.torque = torque.unsqueeze(-1)

Gym has been unmaintained since 2022 and does not support NumPy 2.0 amongst other critical functionality.
Please upgrade to Gymnasium, the maintained drop-in replacement of Gym, or contact the authors of your software and request that they upgrade.
See the migration guide at https://gymnasium.farama.org/introduction/migration_guide/ for additional information.
  return datetime.utcnow().replace(tzinfo=utc)


In [4]:
import typing
from typing import Callable, Dict, List

import torch
from torch import Tensor

from vmas import render_interactively
from vmas.simulator.core import Agent, Entity, Landmark, Box, Sphere, World
from vmas.simulator.scenario import BaseScenario
from vmas.simulator.sensors import Lidar
from vmas.simulator.utils import Color, ScenarioUtils, X, Y
from vmas.simulator.dynamics.kinematic_bicycle import KinematicBicycle

if typing.TYPE_CHECKING:
    from vmas.simulator.rendering import Geom


class FWDiscoveryScenario(BaseScenario):
    def make_world(self, batch_dim: int, device: torch.device, **kwargs):
        self.n_agents = kwargs.pop("n_agents", 5)
        self.n_targets = kwargs.pop("n_targets", 7)
        self.x_semidim = kwargs.pop("x_semidim", 1)
        self.y_semidim = kwargs.pop("y_semidim", 1)
        self._min_dist_between_entities = kwargs.pop("min_dist_between_entities", 0.2)
        self._lidar_range = kwargs.pop("lidar_range", 0.35)
        self._covering_range = kwargs.pop("covering_range", 0.25)

        self.use_agent_lidar = kwargs.pop("use_agent_lidar", False)
        self.n_lidar_rays_entities = kwargs.pop("n_lidar_rays_entities", 15)
        self.n_lidar_rays_agents = kwargs.pop("n_lidar_rays_agents", 12)

        self._agents_per_target = kwargs.pop("agents_per_target", 2)
        self.targets_respawn = kwargs.pop("targets_respawn", True)
        self.shared_reward = kwargs.pop("shared_reward", False)

        self.agent_collision_penalty = kwargs.pop("agent_collision_penalty", 0)
        self.covering_rew_coeff = kwargs.pop("covering_rew_coeff", 1.0)
        self.time_penalty = kwargs.pop("time_penalty", 0)
        self.render_action = kwargs.pop("render_action", False) # Modification
        ScenarioUtils.check_kwargs_consumed(kwargs)

        self._comms_range = self._lidar_range
        self.min_collision_distance = 0.005
        self.agent_radius = 0.05
        self.target_radius = self.agent_radius

        self.viewer_zoom = 1
        self.target_color = Color.GREEN

        # Make world
        world = World(
            batch_dim,
            device,
            x_semidim=self.x_semidim,
            y_semidim=self.y_semidim,
            collision_force=500,
            substeps=2,
            drag=0.25,
        )

        # Add agents
        entity_filter_agents: Callable[[Entity], bool] = lambda e: e.name.startswith(
            "agent"
        )
        entity_filter_targets: Callable[[Entity], bool] = lambda e: e.name.startswith(
            "target"
        )
        _max_steering_angle = torch.pi/4
        for i in range(self.n_agents):
            # Constraint: all agents have same action range and multiplier
            agent = Agent(
                name=f"agent_{i}",
                collide=True,
                color=Color.ORANGE, # Modification (not important)
                shape=Box(length=self.agent_radius * 2, width=self.agent_radius),
                sensors=(
                    [
                        Lidar(
                            world,
                            n_rays=self.n_lidar_rays_entities,
                            max_range=self._lidar_range,
                            entity_filter=entity_filter_targets,
                            render_color=Color.GREEN,
                        )
                    ]
                    + (
                        [
                            Lidar(
                                world,
                                angle_start=0.05,
                                angle_end=2 * torch.pi + 0.05,
                                n_rays=self.n_lidar_rays_agents,
                                max_range=self._lidar_range,
                                entity_filter=entity_filter_agents,
                                render_color=Color.BLUE,
                            )
                        ]
                        if self.use_agent_lidar
                        else []
                    )
                ),
                dynamics=FixedWingKinematicBicycle(
                    world,
                    width=self.agent_radius,
                    l_f=self.agent_radius,
                    l_r=self.agent_radius,
                    max_steering_angle=_max_steering_angle
                ), # Modification
                render_action=self.render_action # Modification
            )
            agent.collision_rew = torch.zeros(batch_dim, device=device)
            agent.covering_reward = agent.collision_rew.clone()
            world.add_agent(agent)

        self._targets = []
        for i in range(self.n_targets):
            target = Landmark(
                name=f"target_{i}",
                collide=True,
                movable=False,
                shape=Sphere(radius=self.target_radius),
                color=self.target_color,
            )
            world.add_landmark(target)
            self._targets.append(target)

        self.covered_targets = torch.zeros(batch_dim, self.n_targets, device=device)
        self.shared_covering_rew = torch.zeros(batch_dim, device=device)

        return world

    def reset_world_at(self, env_index: int = None):
        placable_entities = self._targets[: self.n_targets] + self.world.agents
        if env_index is None:
            self.all_time_covered_targets = torch.full(
                (self.world.batch_dim, self.n_targets),
                False,
                device=self.world.device,
            )
        else:
            self.all_time_covered_targets[env_index] = False
        ScenarioUtils.spawn_entities_randomly(
            entities=placable_entities,
            world=self.world,
            env_index=env_index,
            min_dist_between_entities=self._min_dist_between_entities,
            x_bounds=(-self.world.x_semidim, self.world.x_semidim),
            y_bounds=(-self.world.y_semidim, self.world.y_semidim),
        )
        for target in self._targets[self.n_targets :]:
            target.set_pos(self.get_outside_pos(env_index), batch_index=env_index)

    def reward(self, agent: Agent):
        is_first = agent == self.world.agents[0]
        is_last = agent == self.world.agents[-1]

        if is_first:
            self.time_rew = torch.full(
                (self.world.batch_dim,),
                self.time_penalty,
                device=self.world.device,
            )
            self.agents_pos = torch.stack(
                [a.state.pos for a in self.world.agents], dim=1
            )
            self.targets_pos = torch.stack([t.state.pos for t in self._targets], dim=1)
            self.agents_targets_dists = torch.cdist(self.agents_pos, self.targets_pos)
            self.agents_per_target = torch.sum(
                (self.agents_targets_dists < self._covering_range).type(torch.int),
                dim=1,
            )
            self.covered_targets = self.agents_per_target >= self._agents_per_target

            self.shared_covering_rew[:] = 0
            for a in self.world.agents:
                self.shared_covering_rew += self.agent_reward(a)
            self.shared_covering_rew[self.shared_covering_rew != 0] /= 2

        # Avoid collisions with each other
        agent.collision_rew[:] = 0
        for a in self.world.agents:
            if a != agent:
                agent.collision_rew[
                    self.world.get_distance(a, agent) < self.min_collision_distance
                ] += self.agent_collision_penalty

        if is_last:
            if self.targets_respawn:
                occupied_positions_agents = [self.agents_pos]
                for i, target in enumerate(self._targets):
                    occupied_positions_targets = [
                        o.state.pos.unsqueeze(1)
                        for o in self._targets
                        if o is not target
                    ]
                    occupied_positions = torch.cat(
                        occupied_positions_agents + occupied_positions_targets,
                        dim=1,
                    )
                    pos = ScenarioUtils.find_random_pos_for_entity(
                        occupied_positions,
                        env_index=None,
                        world=self.world,
                        min_dist_between_entities=self._min_dist_between_entities,
                        x_bounds=(-self.world.x_semidim, self.world.x_semidim),
                        y_bounds=(-self.world.y_semidim, self.world.y_semidim),
                    )

                    target.state.pos[self.covered_targets[:, i]] = pos[
                        self.covered_targets[:, i]
                    ].squeeze(1)
            else:
                self.all_time_covered_targets += self.covered_targets
                for i, target in enumerate(self._targets):
                    target.state.pos[self.covered_targets[:, i]] = self.get_outside_pos(
                        None
                    )[self.covered_targets[:, i]]
        covering_rew = (
            agent.covering_reward
            if not self.shared_reward
            else self.shared_covering_rew
        )

        return agent.collision_rew + covering_rew + self.time_rew

    def get_outside_pos(self, env_index):
        return torch.empty(
            (
                (1, self.world.dim_p)
                if env_index is not None
                else (self.world.batch_dim, self.world.dim_p)
            ),
            device=self.world.device,
        ).uniform_(-1000 * self.world.x_semidim, -10 * self.world.x_semidim)

    def agent_reward(self, agent):
        agent_index = self.world.agents.index(agent)

        agent.covering_reward[:] = 0
        targets_covered_by_agent = (
            self.agents_targets_dists[:, agent_index] < self._covering_range
        )
        num_covered_targets_covered_by_agent = (
            targets_covered_by_agent * self.covered_targets
        ).sum(dim=-1)
        agent.covering_reward += (
            num_covered_targets_covered_by_agent * self.covering_rew_coeff
        )
        return agent.covering_reward

    def observation(self, agent: Agent):
        lidar_1_measures = agent.sensors[0].measure()
        obs = {"obs" : torch.cat(
            [lidar_1_measures]
            + ([agent.sensors[1].measure()] if self.use_agent_lidar else []),
            dim=-1),
                "pos" : agent.state.pos,
                "vel" : agent.state.vel
        }
        if isinstance(agent.dynamics, KinematicBicycle) or isinstance(agent.dynamics, FixedWingKinematicBicycle):
            obs.update({
                "rot": agent.state.rot,
                "ang_vel": agent.state.ang_vel
            })
        return obs

    def info(self, agent: Agent) -> Dict[str, Tensor]:
        info = {
            "covering_reward": (
                agent.covering_reward
                if not self.shared_reward
                else self.shared_covering_rew
            ),
            "collision_rew": agent.collision_rew,
            "targets_covered": self.covered_targets.sum(-1),
        }
        return info

    def done(self):
        return self.all_time_covered_targets.all(dim=-1)

    def extra_render(self, env_index: int = 0) -> "List[Geom]":
        from vmas.simulator import rendering

        geoms: List[Geom] = []
        # Target ranges
        for target in self._targets:
            range_circle = rendering.make_circle(self._covering_range, filled=False)
            xform = rendering.Transform()
            xform.set_translation(*target.state.pos[env_index])
            range_circle.add_attr(xform)
            range_circle.set_color(*self.target_color.value)
            geoms.append(range_circle)
        # Communication lines
        for i, agent1 in enumerate(self.world.agents):
            for j, agent2 in enumerate(self.world.agents):
                if j <= i:
                    continue
                agent_dist = torch.linalg.vector_norm(
                    agent1.state.pos - agent2.state.pos, dim=-1
                )
                if agent_dist[env_index] <= self._comms_range:
                    color = Color.BLACK.value
                    line = rendering.Line(
                        (agent1.state.pos[env_index]),
                        (agent2.state.pos[env_index]),
                        width=1,
                    )
                    xform = rendering.Transform()
                    line.add_attr(xform)
                    line.set_color(*color)
                    geoms.append(line)

        return geoms

In [16]:
import copy
from typing import Callable, Optional
from benchmarl.environments import VmasTask
from benchmarl.utils import DEVICE_TYPING
from torchrl.envs import EnvBase, VmasEnv

def get_env_fun(
    self,
    num_envs: int,
    continuous_actions: bool,
    seed: Optional[int],
    device: DEVICE_TYPING):
  config = copy.deepcopy(self.config)
  if (hasattr(self, "name") and self.name == "NAVIGATION") or (
      self is VmasTask.NAVIGATION
  ):
      scenario = FWDiscoveryScenario()
  else:
      scenario = self.name.lower()
  return lambda: VmasEnv(
      scenario=scenario,
      num_envs=num_envs,
      continuous_actions=continuous_actions,
      seed=seed,
      device=device,
      categorical_actions=True,
      **config)

In [17]:
try:
    from benchmarl.environments import VmasClass
    VmasClass.get_env_fun = get_env_fun
except ImportError:
    print("Import Error")
    VmasTask.get_env_fun = get_env_fun

In [18]:
import wandb
import os
from google.colab import userdata
os.environ["WANDB_API_KEY"] = userdata.get("WANDB_API_KEY")
os.environ["WANDB_MODE"] = "online"
wandb.login(key=os.environ["WANDB_API_KEY"])

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


True

In [19]:
from benchmarl.algorithms import IppoConfig
Ippo_algorithm_config = IppoConfig.get_from_yaml()

In [21]:
from benchmarl.experiment import ExperimentConfig
from benchmarl.experiment import Experiment

experiment_config = ExperimentConfig.get_from_yaml() # We start by loading the defaults

experiment_config.sampling_device = "cuda" if torch.cuda.is_available() else "cpu"
experiment_config.train_device = "cuda" if torch.cuda.is_available() else "cpu"

experiment_config.max_n_frames = 20_000_000
experiment_config.gamma = 0.99
experiment_config.on_policy_collected_frames_per_batch = 100_000
experiment_config.on_policy_n_envs_per_worker = 1000
experiment_config.on_policy_n_minibatch_iters = 45
experiment_config.on_policy_minibatch_size = 4096
experiment_config.evaluation = True
experiment_config.render = True
experiment_config.share_policy_params = True
experiment_config.evaluation_interval = 200_000
experiment_config.evaluation_episodes = 200
experiment_config.loggers = ["wandb"]

task = VmasTask.NAVIGATION.get_from_yaml()

task.config = {
    "max_steps" : 100,
    "n_agents" : 4,
    "shared_reward" : False,
    "x_semidim" : 1,
    "y_semidim" : 1,
    "render_action" : True,
    "agents_per_target" : 1,
    "use_agent_lidar" : False,
    "agent_collision_penalty" : -1,
    "time_penalty" : -0.01
}
from benchmarl.models import MlpConfig
model_config = MlpConfig(
        num_cells=[256, 256],
        layer_class=torch.nn.Linear,
        activation_class=torch.nn.SiLU,
    )

critic_model_config = MlpConfig(
        num_cells=[256, 256],
        layer_class=torch.nn.Linear,
        activation_class=torch.nn.SiLU,
)
experiment = Experiment(
    task=task,
    algorithm_config=Ippo_algorithm_config,
    model_config=model_config,
    critic_model_config=critic_model_config,
    seed=1337,
    config=experiment_config,
)
experiment.run()



mean return = -4.7352495193481445:  55%|█████▍    | 109/200 [1:28:50<1:14:10, 48.90s/it]




mean return = -5.996002197265625:   0%|          | 1/200 [00:42<2:19:44, 42.14s/it][A[A



mean return = -2.6052515506744385:   1%|          | 2/200 [01:23<2:17:32, 41.68s/it][A[A

mean return = -1.2687515020370483:   2%|▏         | 3/200 [01:49<1:52:37, 34.30s/it][A[A



mean return = -1.1502515077590942:   2%|▏         | 4/200 [02:31<2:02:37, 37.54s/it][A[A

mean return = -0.8807511925697327:   2%|▎         | 5/200 [02:56<1:47:14, 33.00s/it][A[A



mean return = -0.7352513670921326:   3%|▎         | 6/200 [03:39<1:57:21, 36.30s/it][A[A

mean return = -0.6865013837814331:   4%|▎         | 7/200 [04:04<1:44:46, 32.57s/it][A[A



mean return = -0.33825138211250305:   4%|▍         | 8/200 [04:46<1:54:32, 35.79s/it][A[A

mean return = -0.6092513203620911:   4%|▍         | 9/200 [05:11<1:42:59, 32.35s/it] [A[A



mean return = -0.5307512283325195:   5%|▌         | 10/200 [05:54<1:53:14, 35.76s/it][A[A

mean return = -0.7197514772415161:   6%|▌         | 11/200 [06:19<1:42:01, 32.39s/it][A[A



mean return = -0.508001446723938:   6%|▌         | 12/200 [07:02<1:51:00, 35.43s/it] [A[A

mean return = -0.5232515335083008:   6%|▋         | 13/200 [07:26<1:40:20, 32.19s/it][A[A



mean return = -0.37850141525268555:   7%|▋         | 14/200 [08:08<1:49:02, 35.17s/it][A[A

mean return = -0.3517515957355499:   8%|▊         | 15/200 [08:34<1:39:18, 32.21s/it] [A[A



mean return = -0.13525156676769257:   8%|▊         | 16/200 [09:17<1:49:11, 35.61s/it][A[A

mean return = 0.09974855929613113:   8%|▊         | 17/200 [09:42<1:38:38, 32.34s/it] [A[A



mean return = 0.01624840684235096:   9%|▉         | 18/200 [10:25<1:47:28, 35.43s/it][A[A

mean return = 0.12849821150302887:  10%|▉         | 19/200 [10:49<1:37:04, 32.18s/it][A[A



mean return = 0.27099817991256714:  10%|█         | 20/200 [11:31<1:44:57, 34.99s/it][A[A

mean return = 0.6274982690811157:  10%|█         | 21/200 [11:55<1:35:02, 31.85s/it] [A[A



mean return = 0.4617479741573334:  11%|█         | 22/200 [12:39<1:45:05, 35.42s/it][A[A

mean return = 0.6567478775978088:  12%|█▏        | 23/200 [13:03<1:34:47, 32.13s/it][A[A



mean return = 0.6857478022575378:  12%|█▏        | 24/200 [13:46<1:43:05, 35.15s/it][A[A

mean return = 0.9747474789619446:  12%|█▎        | 25/200 [14:10<1:33:16, 31.98s/it][A[A



mean return = 1.1262476444244385:  13%|█▎        | 26/200 [14:52<1:41:22, 34.96s/it][A[A

mean return = 1.3889973163604736:  14%|█▎        | 27/200 [15:17<1:31:44, 31.82s/it][A[A



mean return = 1.5334972143173218:  14%|█▍        | 28/200 [15:59<1:40:23, 35.02s/it][A[A

mean return = 1.5424970388412476:  14%|█▍        | 29/200 [16:23<1:30:35, 31.79s/it][A[A



mean return = 1.8992466926574707:  15%|█▌        | 30/200 [17:06<1:39:25, 35.09s/it][A[A

mean return = 1.8732465505599976:  16%|█▌        | 31/200 [17:31<1:29:52, 31.91s/it][A[A



mean return = 2.0877463817596436:  16%|█▌        | 32/200 [18:12<1:37:32, 34.84s/it][A[A

mean return = 2.2207462787628174:  16%|█▋        | 33/200 [18:37<1:28:23, 31.76s/it][A[A



mean return = 2.107245683670044:  17%|█▋        | 34/200 [19:19<1:36:23, 34.84s/it] [A[A

mean return = 2.36374568939209:  18%|█▊        | 35/200 [19:43<1:27:03, 31.66s/it] [A[A



mean return = 2.6014957427978516:  18%|█▊        | 36/200 [20:26<1:35:30, 34.94s/it][A[A

mean return = 2.4947457313537598:  18%|█▊        | 37/200 [20:51<1:26:47, 31.95s/it][A[A



mean return = 2.6757452487945557:  19%|█▉        | 38/200 [21:32<1:33:55, 34.79s/it][A[A

mean return = 2.6494948863983154:  20%|█▉        | 39/200 [21:57<1:25:03, 31.70s/it][A[A



mean return = 2.7914950847625732:  20%|██        | 40/200 [22:38<1:32:33, 34.71s/it][A[A

mean return = 3.2569942474365234:  20%|██        | 41/200 [23:03<1:23:49, 31.64s/it][A[A



mean return = 3.094994306564331:  21%|██        | 42/200 [23:46<1:32:12, 35.02s/it] [A[A

mean return = 3.3969943523406982:  22%|██▏       | 43/200 [24:10<1:23:13, 31.81s/it][A[A



mean return = 3.5449938774108887:  22%|██▏       | 44/200 [24:52<1:30:54, 34.96s/it][A[A

mean return = 3.611243724822998:  22%|██▎       | 45/200 [25:17<1:22:10, 31.81s/it] [A[A



mean return = 3.726243734359741:  23%|██▎       | 46/200 [25:59<1:29:27, 34.85s/it][A[A

mean return = 3.7022435665130615:  24%|██▎       | 47/200 [26:23<1:20:52, 31.71s/it][A[A



mean return = 3.878993511199951:  24%|██▍       | 48/200 [27:06<1:28:55, 35.10s/it] [A[A

mean return = 3.8462438583374023:  24%|██▍       | 49/200 [27:31<1:20:14, 31.89s/it][A[A



mean return = 3.8439934253692627:  25%|██▌       | 50/200 [28:12<1:27:08, 34.86s/it][A[A

mean return = 4.066993236541748:  26%|██▌       | 51/200 [28:37<1:18:58, 31.80s/it] [A[A



mean return = 4.008993148803711:  26%|██▌       | 52/200 [29:19<1:26:12, 34.95s/it][A[A

mean return = 3.9189937114715576:  26%|██▋       | 53/200 [29:44<1:18:00, 31.84s/it][A[A



mean return = 4.054493427276611:  27%|██▋       | 54/200 [30:27<1:25:58, 35.33s/it] [A[A

mean return = 4.233243465423584:  28%|██▊       | 55/200 [30:52<1:17:32, 32.09s/it][A[A



mean return = 3.718743324279785:  28%|██▊       | 56/200 [31:34<1:23:56, 34.98s/it][A[A

mean return = 4.089993476867676:  28%|██▊       | 57/200 [31:58<1:15:52, 31.84s/it][A[A



mean return = 3.842743396759033:  29%|██▉       | 58/200 [32:40<1:22:27, 34.84s/it][A[A

mean return = 4.059243202209473:  30%|██▉       | 59/200 [33:05<1:14:39, 31.77s/it][A[A



mean return = 4.153493404388428:  30%|███       | 60/200 [33:48<1:22:27, 35.34s/it][A[A

mean return = 4.300243377685547:  30%|███       | 61/200 [34:13<1:14:14, 32.05s/it][A[A



mean return = 4.158742904663086:  31%|███       | 62/200 [34:54<1:20:26, 34.97s/it][A[A

mean return = 4.24249267578125:  32%|███▏      | 63/200 [35:19<1:12:42, 31.84s/it] [A[A



mean return = 4.124743461608887:  32%|███▏      | 64/200 [36:01<1:18:57, 34.83s/it][A[A

mean return = 4.371993541717529:  32%|███▎      | 65/200 [36:25<1:11:22, 31.73s/it][A[A



mean return = 4.265243053436279:  33%|███▎      | 66/200 [37:08<1:18:33, 35.17s/it][A[A

mean return = 4.371743202209473:  34%|███▎      | 67/200 [37:32<1:10:33, 31.83s/it][A[A



mean return = 4.226243019104004:  34%|███▍      | 68/200 [38:15<1:16:59, 34.99s/it][A[A

mean return = 4.206993103027344:  34%|███▍      | 69/200 [38:39<1:09:26, 31.80s/it][A[A



mean return = 4.192242622375488:  35%|███▌      | 70/200 [39:21<1:15:24, 34.81s/it][A[A

mean return = 4.535492420196533:  36%|███▌      | 71/200 [39:45<1:08:04, 31.67s/it][A[A



mean return = 4.4217424392700195:  36%|███▌      | 72/200 [40:28<1:14:50, 35.09s/it][A[A

mean return = 4.37349271774292:  36%|███▋      | 73/200 [40:53<1:07:36, 31.94s/it]  [A[A



mean return = 4.725992202758789:  37%|███▋      | 74/200 [41:35<1:13:22, 34.94s/it][A[A

mean return = 4.425992488861084:  38%|███▊      | 75/200 [41:59<1:06:11, 31.77s/it][A[A



mean return = 4.482492446899414:  38%|███▊      | 76/200 [42:43<1:13:03, 35.35s/it][A[A

mean return = 4.504742622375488:  38%|███▊      | 77/200 [43:07<1:05:31, 31.97s/it][A[A



mean return = 4.556492805480957:  39%|███▉      | 78/200 [43:49<1:11:10, 35.00s/it][A[A

mean return = 4.374492645263672:  40%|███▉      | 79/200 [44:14<1:04:14, 31.86s/it][A[A



mean return = 4.378242492675781:  40%|████      | 80/200 [44:55<1:09:36, 34.81s/it][A[A

mean return = 4.368492603302002:  40%|████      | 81/200 [45:20<1:02:51, 31.69s/it][A[A



mean return = 4.321242809295654:  41%|████      | 82/200 [46:03<1:08:51, 35.02s/it][A[A

mean return = 4.3839921951293945:  42%|████▏     | 83/200 [46:27<1:02:02, 31.82s/it][A[A



mean return = 4.475742340087891:  42%|████▏     | 84/200 [47:09<1:07:09, 34.74s/it] [A[A

mean return = 4.419492244720459:  42%|████▎     | 85/200 [47:33<1:00:53, 31.77s/it][A[A



mean return = 4.464993000030518:  43%|████▎     | 86/200 [48:16<1:06:17, 34.89s/it][A[A

mean return = 4.519493103027344:  44%|████▎     | 87/200 [48:40<59:43, 31.71s/it]  [A[A



mean return = 4.522492408752441:  44%|████▍     | 88/200 [49:23<1:05:24, 35.04s/it][A[A

mean return = 4.470242500305176:  44%|████▍     | 89/200 [49:47<58:54, 31.84s/it]  [A[A



mean return = 4.128993034362793:  45%|████▌     | 90/200 [50:29<1:03:49, 34.81s/it][A[A

mean return = 4.447493076324463:  46%|████▌     | 91/200 [50:53<57:34, 31.69s/it]  [A[A



mean return = 4.591492176055908:  46%|████▌     | 92/200 [51:36<1:03:15, 35.14s/it][A[A

mean return = 4.469492435455322:  46%|████▋     | 93/200 [52:01<57:02, 31.99s/it]  [A[A



mean return = 4.278992176055908:  47%|████▋     | 94/200 [52:43<1:01:37, 34.88s/it][A[A

mean return = 4.439242362976074:  48%|████▊     | 95/200 [53:07<55:33, 31.75s/it]  [A[A



mean return = 4.827742576599121:  48%|████▊     | 96/200 [53:49<1:00:10, 34.72s/it][A[A

mean return = 4.704992294311523:  48%|████▊     | 97/200 [54:13<54:19, 31.65s/it]  [A[A



mean return = 4.805242538452148:  49%|████▉     | 98/200 [54:56<59:27, 34.97s/it][A[A

mean return = 4.803742408752441:  50%|████▉     | 99/200 [55:20<53:30, 31.79s/it][A[A



mean return = 4.6652421951293945:  50%|█████     | 100/200 [56:02<58:06, 34.86s/it][A[A

mean return = 4.567992210388184:  50%|█████     | 101/200 [56:27<52:33, 31.85s/it] [A[A



mean return = 4.49149227142334:  51%|█████     | 102/200 [57:10<57:18, 35.09s/it] [A[A

mean return = 4.516992568969727:  52%|█████▏    | 103/200 [57:34<51:28, 31.84s/it][A[A



mean return = 4.400742530822754:  52%|█████▏    | 104/200 [58:16<55:54, 34.94s/it][A[A

mean return = 4.806742191314697:  52%|█████▎    | 105/200 [58:41<50:18, 31.78s/it][A[A



mean return = 4.660492420196533:  53%|█████▎    | 106/200 [59:22<54:30, 34.79s/it][A[A

mean return = 4.720242500305176:  54%|█████▎    | 107/200 [59:47<49:13, 31.76s/it][A[A



mean return = 4.817992210388184:  54%|█████▍    | 108/200 [1:00:30<53:50, 35.11s/it][A[A

mean return = 4.605992317199707:  55%|█████▍    | 109/200 [1:00:54<48:21, 31.88s/it][A[A



mean return = 4.7327423095703125:  55%|█████▌    | 110/200 [1:01:37<52:26, 34.96s/it][A[A

mean return = 4.485992431640625:  56%|█████▌    | 111/200 [1:02:01<47:08, 31.78s/it] [A[A



mean return = 4.758242607116699:  56%|█████▌    | 112/200 [1:02:43<51:13, 34.93s/it][A[A

mean return = 4.776242256164551:  56%|█████▋    | 113/200 [1:03:08<46:03, 31.76s/it][A[A



mean return = 4.636242389678955:  57%|█████▋    | 114/200 [1:03:51<50:23, 35.16s/it][A[A

mean return = 4.763492584228516:  57%|█████▊    | 115/200 [1:04:15<45:17, 31.97s/it][A[A



mean return = 4.808992385864258:  58%|█████▊    | 116/200 [1:04:57<49:05, 35.06s/it][A[A

mean return = 4.647742748260498:  58%|█████▊    | 117/200 [1:05:22<44:09, 31.93s/it][A[A



mean return = 4.613992214202881:  59%|█████▉    | 118/200 [1:06:06<48:26, 35.45s/it][A[A

mean return = 4.702742576599121:  60%|█████▉    | 119/200 [1:06:30<43:28, 32.20s/it][A[A



mean return = 4.770992279052734:  60%|██████    | 120/200 [1:07:13<46:59, 35.24s/it][A[A

mean return = 4.697242259979248:  60%|██████    | 121/200 [1:07:37<42:15, 32.09s/it][A[A



mean return = 4.79299259185791:  61%|██████    | 122/200 [1:08:21<46:06, 35.47s/it] [A[A

mean return = 4.648992538452148:  62%|██████▏   | 123/200 [1:08:45<41:20, 32.21s/it][A[A



mean return = 4.70424222946167:  62%|██████▏   | 124/200 [1:09:27<44:33, 35.17s/it] [A[A

mean return = 4.833492279052734:  62%|██████▎   | 125/200 [1:09:52<39:59, 32.00s/it][A[A



mean return = 4.862242221832275:  63%|██████▎   | 126/200 [1:10:36<43:46, 35.49s/it][A[A

mean return = 4.6999921798706055:  64%|██████▎   | 127/200 [1:11:00<39:10, 32.19s/it][A[A



mean return = 4.6757426261901855:  64%|██████▍   | 128/200 [1:11:42<42:04, 35.06s/it][A[A

mean return = 4.5874924659729:  64%|██████▍   | 129/200 [1:12:06<37:41, 31.86s/it]   [A[A



mean return = 4.9482421875:  65%|██████▌   | 130/200 [1:12:48<40:38, 34.84s/it]   [A[A

mean return = 4.4464921951293945:  66%|██████▌   | 131/200 [1:13:13<36:28, 31.72s/it][A[A



mean return = 4.837742328643799:  66%|██████▌   | 132/200 [1:13:55<39:35, 34.94s/it] [A[A

mean return = 4.636242389678955:  66%|██████▋   | 133/200 [1:14:19<35:29, 31.78s/it][A[A



mean return = 4.785242557525635:  67%|██████▋   | 134/200 [1:15:01<38:13, 34.75s/it][A[A

mean return = 4.69699239730835:  68%|██████▊   | 135/200 [1:15:26<34:28, 31.83s/it] [A[A



mean return = 4.54799222946167:  68%|██████▊   | 136/200 [1:16:09<37:30, 35.16s/it][A[A

mean return = 4.9177422523498535:  68%|██████▊   | 137/200 [1:16:33<33:27, 31.87s/it][A[A



mean return = 5.034992218017578:  69%|██████▉   | 138/200 [1:17:15<36:04, 34.91s/it] [A[A

mean return = 4.931992530822754:  70%|██████▉   | 139/200 [1:17:40<32:16, 31.75s/it][A[A



mean return = 5.015742301940918:  70%|███████   | 140/200 [1:18:21<34:45, 34.76s/it][A[A

mean return = 4.751242637634277:  70%|███████   | 141/200 [1:18:46<31:07, 31.66s/it][A[A



mean return = 4.990492343902588:  71%|███████   | 142/200 [1:19:29<33:51, 35.02s/it][A[A

mean return = 4.877992630004883:  72%|███████▏  | 143/200 [1:19:53<30:12, 31.81s/it][A[A



mean return = 4.866992473602295:  72%|███████▏  | 144/200 [1:20:35<32:34, 34.90s/it][A[A

mean return = 4.772492408752441:  72%|███████▎  | 145/200 [1:21:00<29:05, 31.74s/it][A[A



mean return = 4.73049259185791:  73%|███████▎  | 146/200 [1:21:42<31:23, 34.87s/it] [A[A

mean return = 4.9802422523498535:  74%|███████▎  | 147/200 [1:22:06<27:59, 31.69s/it][A[A



mean return = 4.725992202758789:  74%|███████▍  | 148/200 [1:22:48<30:16, 34.93s/it] [A[A

mean return = 4.737242221832275:  74%|███████▍  | 149/200 [1:23:13<27:00, 31.77s/it][A[A



mean return = 4.779242515563965:  75%|███████▌  | 150/200 [1:23:55<28:58, 34.77s/it][A[A

mean return = 4.620242595672607:  76%|███████▌  | 151/200 [1:24:19<25:50, 31.65s/it][A[A



mean return = 4.556742191314697:  76%|███████▌  | 152/200 [1:25:02<28:06, 35.13s/it][A[A

mean return = 4.52274227142334:  76%|███████▋  | 153/200 [1:25:26<24:57, 31.86s/it] [A[A



mean return = 4.7689924240112305:  77%|███████▋  | 154/200 [1:26:08<26:42, 34.84s/it][A[A

mean return = 4.895742416381836:  78%|███████▊  | 155/200 [1:26:33<23:46, 31.70s/it] [A[A



mean return = 4.982992649078369:  78%|███████▊  | 156/200 [1:27:15<25:30, 34.77s/it][A[A

mean return = 4.846992492675781:  78%|███████▊  | 157/200 [1:27:39<22:40, 31.65s/it][A[A



mean return = 4.9212422370910645:  79%|███████▉  | 158/200 [1:28:22<24:29, 35.00s/it][A[A

mean return = 5.219491958618164:  80%|███████▉  | 159/200 [1:28:46<21:44, 31.82s/it] [A[A



mean return = 4.843742370605469:  80%|████████  | 160/200 [1:29:28<23:12, 34.81s/it][A[A

mean return = 4.76774263381958:  80%|████████  | 161/200 [1:29:53<20:41, 31.84s/it] [A[A



mean return = 4.848992347717285:  81%|████████  | 162/200 [1:30:36<22:21, 35.31s/it][A[A

mean return = 4.994992256164551:  82%|████████▏ | 163/200 [1:31:00<19:43, 31.98s/it][A[A



mean return = 4.928492546081543:  82%|████████▏ | 164/200 [1:31:42<20:55, 34.89s/it][A[A

mean return = 4.822992324829102:  82%|████████▎ | 165/200 [1:32:07<18:30, 31.74s/it][A[A



mean return = 4.829492568969727:  83%|████████▎ | 166/200 [1:32:48<19:39, 34.68s/it][A[A

mean return = 5.184742450714111:  84%|████████▎ | 167/200 [1:33:12<17:22, 31.58s/it][A[A



mean return = 5.157991886138916:  84%|████████▍ | 168/200 [1:33:55<18:38, 34.95s/it][A[A

mean return = 5.185992240905762:  84%|████████▍ | 169/200 [1:34:20<16:24, 31.77s/it][A[A



mean return = 5.072742462158203:  85%|████████▌ | 170/200 [1:35:02<17:25, 34.86s/it][A[A

mean return = 5.202742576599121:  86%|████████▌ | 171/200 [1:35:26<15:20, 31.73s/it][A[A



mean return = 4.938492298126221:  86%|████████▌ | 172/200 [1:36:08<16:16, 34.88s/it][A[A

mean return = 5.0154924392700195:  86%|████████▋ | 173/200 [1:36:33<14:15, 31.69s/it][A[A



mean return = 4.973492622375488:  87%|████████▋ | 174/200 [1:37:15<15:07, 34.91s/it] [A[A

mean return = 4.940242290496826:  88%|████████▊ | 175/200 [1:37:39<13:13, 31.76s/it][A[A



mean return = 5.148742198944092:  88%|████████▊ | 176/200 [1:38:21<13:54, 34.75s/it][A[A

mean return = 4.976242542266846:  88%|████████▊ | 177/200 [1:38:46<12:07, 31.65s/it][A[A



mean return = 4.995492458343506:  89%|████████▉ | 178/200 [1:39:29<12:52, 35.11s/it][A[A

mean return = 5.123992443084717:  90%|████████▉ | 179/200 [1:39:53<11:09, 31.87s/it][A[A



mean return = 4.756492614746094:  90%|█████████ | 180/200 [1:40:35<11:36, 34.81s/it][A[A

mean return = 4.740992546081543:  90%|█████████ | 181/200 [1:40:59<10:02, 31.71s/it][A[A



mean return = 4.996992588043213:  91%|█████████ | 182/200 [1:41:41<10:25, 34.75s/it][A[A

mean return = 4.937992572784424:  92%|█████████▏| 183/200 [1:42:05<08:57, 31.64s/it][A[A



mean return = 4.869992256164551:  92%|█████████▏| 184/200 [1:42:48<09:20, 35.04s/it][A[A

mean return = 5.024742603302002:  92%|█████████▎| 185/200 [1:43:13<07:57, 31.85s/it][A[A



mean return = 4.874492645263672:  93%|█████████▎| 186/200 [1:43:55<08:08, 34.89s/it][A[A

mean return = 4.7649922370910645:  94%|█████████▎| 187/200 [1:44:19<06:52, 31.76s/it][A[A



mean return = 4.764742374420166:  94%|█████████▍| 188/200 [1:45:02<07:02, 35.19s/it] [A[A

mean return = 5.107242584228516:  94%|█████████▍| 189/200 [1:45:27<05:51, 31.95s/it][A[A



mean return = 4.908242225646973:  95%|█████████▌| 190/200 [1:46:08<05:48, 34.85s/it][A[A

mean return = 4.747742176055908:  96%|█████████▌| 191/200 [1:46:33<04:45, 31.73s/it][A[A



mean return = 4.9579925537109375:  96%|█████████▌| 192/200 [1:47:15<04:38, 34.84s/it][A[A

mean return = 4.849742412567139:  96%|█████████▋| 193/200 [1:47:39<03:41, 31.66s/it] [A[A



mean return = 4.631992816925049:  97%|█████████▋| 194/200 [1:48:22<03:29, 34.92s/it][A[A

mean return = 4.8964924812316895:  98%|█████████▊| 195/200 [1:48:46<02:38, 31.75s/it][A[A



mean return = 4.687493324279785:  98%|█████████▊| 196/200 [1:49:28<02:19, 34.94s/it] [A[A

mean return = 4.692992687225342:  98%|█████████▊| 197/200 [1:49:53<01:35, 31.77s/it][A[A



mean return = 4.506242752075195:  99%|█████████▉| 198/200 [1:50:36<01:10, 35.12s/it][A[A

mean return = 4.41674280166626: 100%|█████████▉| 199/200 [1:51:00<00:31, 31.89s/it] [A[A



mean return = 4.608242988586426: 100%|██████████| 200/200 [1:51:42<00:00, 34.78s/it][A[A

0,1
collection/agents/info/collision_rew,███▇█▆▃▅▄▄▆▄▃▃▅▃▃▂▁▃▂▃▁▁▃▃▃▂▂▂▄▁▃▃▂▃▂▃▃▁
collection/agents/info/covering_reward,▁▂▂▂▂▃▃▃▃▄▅▆▆▇▇▇▇▇▇▇▇▇▇▇▇███████████████
collection/agents/info/targets_covered,▁▁▂▂▂▄▅▅▅▆▆▇▇▇▇▇▇▇▇▇█▇██████████████████
collection/agents/reward/episode_reward_max,▁▁▂▄▅▅▆▆▆▆▇█▇▇▇█▇▇██▇▇▇█▆▇▇█▇▇▇▇▇▇█▇▇▇▆█
collection/agents/reward/episode_reward_mean,▁▁▂▂▃▄▄▄▅▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇█▇▇█▇██▇█▇▇██▇▇
collection/agents/reward/episode_reward_min,▂▁▄▃▆▅█▆▂▅▆▃▅▆▅▃▆▃▄▇▃▅▆▅▇▇▆▃▂▆▅▄▃▆▅▃▅▅▅▇
collection/reward/episode_reward_max,▁▁▁▂▂▃▃▄▄▆▆▆▆▆▅▅▇▆▆▆▆▆▇▆▆▆▇▇▆▆▆▇▇█▆▆▆▆▇▇
collection/reward/episode_reward_mean,▁▁▂▂▃▄▇▇▇▇▇▇▇▇▇▇▇█▇▇▇▇▇█▇█▇▇██████▇███▇▇
collection/reward/episode_reward_min,▁▆█▇▆▇█▇▄▅▇▄█▆▆▆▄▄▂▇▂▅▇▄▆█▄▅▅▇▆▇▄▅▇▁▅▄▄▆
collection/reward/reward_max,█▁▁▁▁██▁█▁▁█▁█▁█████▁███▁███████▁▁█████▁

0,1
collection/agents/info/collision_rew,-0.03195
collection/agents/info/covering_reward,0.08803
collection/agents/info/targets_covered,0.34273
collection/agents/reward/episode_reward_max,14.24998
collection/agents/reward/episode_reward_mean,4.60824
collection/agents/reward/episode_reward_min,-26.24997
collection/reward/episode_reward_max,14.24998
collection/reward/episode_reward_mean,4.60824
collection/reward/episode_reward_min,-26.24997
collection/reward/reward_max,1.99


mean return = 4.608242988586426: 100%|██████████| 200/200 [1:51:42<00:00, 33.51s/it]
