In [1]:
!pip install vmas benchmarl pyvirtualdisplay moviepy
!apt-get install python3-opengl
import pyvirtualdisplay
display = pyvirtualdisplay.Display(visible=False, size=(1400, 900))
display.start()

Collecting vmas
  Downloading vmas-1.5.1.tar.gz (218 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m218.1/218.1 kB[0m [31m17.1 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting benchmarl
  Downloading benchmarl-1.5.0.tar.gz (101 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m101.4/101.4 kB[0m [31m10.9 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting pyvirtualdisplay
  Downloading PyVirtualDisplay-3.0-py3-none-any.whl.metadata (943 bytes)
Collecting pyglet<=1.5.27 (from vmas)
  Downloading pyglet-1.5.27-py3-none-any.whl.metadata (7.6 kB)
Collecting torchrl~=0.8.0 (from benchmarl)
  Downloading torchrl-0.8.1-cp312-cp312-manylinux_2_28_x86_64.whl.metadata (41 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m41.2/4

<pyvirtualdisplay.display.Display at 0x7f56f487c740>

In [2]:
import torch

import vmas.simulator.core
import vmas.simulator.utils
from vmas.simulator.dynamics.common import Dynamics


class FixedWingKinematicBicycle(Dynamics):
    def __init__(
        self,
        world: vmas.simulator.core.World,
        width: float,
        l_f: float,
        l_r: float,
        max_steering_angle: float,
        min_v: float = 0.3,
        max_v: float = 1.0,
        integration: str = "rk4",
    ):
        super().__init__()
        assert integration in (
            "rk4",
            "euler",
        ), "Integration method must be 'euler' or 'rk4'."
        self.width = width
        self.l_f = l_f
        self.l_r = l_r
        self.max_steering_angle = max_steering_angle
        self.dt = world.dt
        self.integration = integration
        self.world = world
        self.min_v = min_v
        self.max_v = max_v

    def f(self, state, steering_command, v_command):
        theta = state[:, 2]  # Yaw angle
        beta = torch.atan2(
            torch.tan(steering_command) * self.l_r / (self.l_f + self.l_r),
            torch.tensor(1, device=self.world.device),
        )  # [-pi, pi] slip angle
        dx = v_command * torch.cos(theta + beta)
        dy = v_command * torch.sin(theta + beta)
        dtheta = (
            v_command
            / (self.l_f + self.l_r)
            * torch.cos(beta)
            * torch.tan(steering_command)
        )
        return torch.stack((dx, dy, dtheta), dim=1)  # [batch_size,3]

    def euler(self, state, steering_command, v_command):
        # Calculate the change in state using Euler's method
        # For Euler's method, see https://math.libretexts.org/Bookshelves/Calculus/Book%3A_Active_Calculus_(Boelkins_et_al.)/07%3A_Differential_Equations/7.03%3A_Euler's_Method (the full link may not be recognized properly, please copy and paste in your browser)
        return self.dt * self.f(state, steering_command, v_command)

    def runge_kutta(self, state, steering_command, v_command):
        # Calculate the change in state using fourth-order Runge-Kutta method
        # For Runge-Kutta method, see https://math.libretexts.org/Courses/Monroe_Community_College/MTH_225_Differential_Equations/3%3A_Numerical_Methods/3.3%3A_The_Runge-Kutta_Method
        k1 = self.f(state, steering_command, v_command)
        k2 = self.f(state + self.dt * k1 / 2, steering_command, v_command)
        k3 = self.f(state + self.dt * k2 / 2, steering_command, v_command)
        k4 = self.f(state + self.dt * k3, steering_command, v_command)
        return (self.dt / 6) * (k1 + 2 * k2 + 2 * k3 + k4)

    @property
    def needed_action_size(self) -> int:
        return 2

    def process_action(self):
        # Extracts the velocity and steering angle from the agent's actions and convert them to physical force and torque
        v_command = self.agent.action.u[:, 0]
        # The only change we make:
        v_command = torch.clamp(
            v_command, self.min_v, self.max_v
        )

        steering_command = self.agent.action.u[:, 1]
        # Ensure steering angle is within bounds
        steering_command = torch.clamp(
            steering_command, -self.max_steering_angle, self.max_steering_angle
        )

        # Current state of the agent
        state = torch.cat((self.agent.state.pos, self.agent.state.rot), dim=1)

        v_cur_x = self.agent.state.vel[:, 0]  # Current velocity in x-direction
        v_cur_y = self.agent.state.vel[:, 1]  # Current velocity in y-direction
        v_cur_angular = self.agent.state.ang_vel[:, 0]  # Current angular velocity

        # Select the integration method to calculate the change in state
        if self.integration == "euler":
            delta_state = self.euler(state, steering_command, v_command)
        else:
            delta_state = self.runge_kutta(state, steering_command, v_command)

        # Calculate the accelerations required to achieve the change in state.
        acceleration_x = (delta_state[:, 0] - v_cur_x * self.dt) / self.dt**2
        acceleration_y = (delta_state[:, 1] - v_cur_y * self.dt) / self.dt**2
        acceleration_angular = (
            delta_state[:, 2] - v_cur_angular * self.dt
        ) / self.dt**2

        # Calculate the forces required for the linear accelerations
        force_x = self.agent.mass * acceleration_x
        force_y = self.agent.mass * acceleration_y

        # Calculate the torque required for the angular acceleration
        torque = self.agent.moment_of_inertia * acceleration_angular

        # Update the physical force and torque required for the user inputs
        self.agent.state.force[:, vmas.simulator.utils.X] = force_x
        self.agent.state.force[:, vmas.simulator.utils.Y] = force_y
        self.agent.state.torque = torque.unsqueeze(-1)

Gym has been unmaintained since 2022 and does not support NumPy 2.0 amongst other critical functionality.
Please upgrade to Gymnasium, the maintained drop-in replacement of Gym, or contact the authors of your software and request that they upgrade.
See the migration guide at https://gymnasium.farama.org/introduction/migration_guide/ for additional information.
  return datetime.utcnow().replace(tzinfo=utc)


In [3]:
import typing
from typing import Callable, Dict, List

import torch
from torch import Tensor

from vmas import render_interactively
from vmas.simulator.core import Agent, Entity, Landmark, Box, Sphere, World
from vmas.simulator.scenario import BaseScenario
from vmas.simulator.sensors import Lidar
from vmas.simulator.utils import Color, ScenarioUtils, X, Y
from vmas.simulator.dynamics.kinematic_bicycle import KinematicBicycle

if typing.TYPE_CHECKING:
    from vmas.simulator.rendering import Geom


class FWDiscoveryScenario(BaseScenario):
    def make_world(self, batch_dim: int, device: torch.device, **kwargs):
        self.n_agents = kwargs.pop("n_agents", 5)
        self.n_targets = kwargs.pop("n_targets", 7)
        self.x_semidim = kwargs.pop("x_semidim", 1)
        self.y_semidim = kwargs.pop("y_semidim", 1)
        self._min_dist_between_entities = kwargs.pop("min_dist_between_entities", 0.2)
        self._lidar_range = kwargs.pop("lidar_range", 0.35)
        self._covering_range = kwargs.pop("covering_range", 0.25)

        self.use_agent_lidar = kwargs.pop("use_agent_lidar", False)
        self.n_lidar_rays_entities = kwargs.pop("n_lidar_rays_entities", 15)
        self.n_lidar_rays_agents = kwargs.pop("n_lidar_rays_agents", 12)

        self._agents_per_target = kwargs.pop("agents_per_target", 2)
        self.targets_respawn = kwargs.pop("targets_respawn", True)
        self.shared_reward = kwargs.pop("shared_reward", False)

        self.agent_collision_penalty = kwargs.pop("agent_collision_penalty", 0)
        self.covering_rew_coeff = kwargs.pop("covering_rew_coeff", 1.0)
        self.time_penalty = kwargs.pop("time_penalty", 0)
        self.render_action = kwargs.pop("render_action", False) # Modification
        ScenarioUtils.check_kwargs_consumed(kwargs)

        self._comms_range = self._lidar_range
        self.min_collision_distance = 0.005
        self.agent_radius = 0.05
        self.target_radius = self.agent_radius

        self.viewer_zoom = 1
        self.target_color = Color.GREEN

        # Make world
        world = World(
            batch_dim,
            device,
            x_semidim=self.x_semidim,
            y_semidim=self.y_semidim,
            collision_force=500,
            substeps=2,
            drag=0.25,
        )

        # Add agents
        entity_filter_agents: Callable[[Entity], bool] = lambda e: e.name.startswith(
            "agent"
        )
        entity_filter_targets: Callable[[Entity], bool] = lambda e: e.name.startswith(
            "target"
        )
        _max_steering_angle = torch.pi/4
        for i in range(self.n_agents):
            # Constraint: all agents have same action range and multiplier
            agent = Agent(
                name=f"agent_{i}",
                collide=True,
                color=Color.ORANGE, # Modification (not important)
                shape=Box(length=self.agent_radius * 2, width=self.agent_radius),
                sensors=(
                    [
                        Lidar(
                            world,
                            n_rays=self.n_lidar_rays_entities,
                            max_range=self._lidar_range,
                            entity_filter=entity_filter_targets,
                            render_color=Color.GREEN,
                        )
                    ]
                    + (
                        [
                            Lidar(
                                world,
                                angle_start=0.05,
                                angle_end=2 * torch.pi + 0.05,
                                n_rays=self.n_lidar_rays_agents,
                                max_range=self._lidar_range,
                                entity_filter=entity_filter_agents,
                                render_color=Color.BLUE,
                            )
                        ]
                        if self.use_agent_lidar
                        else []
                    )
                ),
                dynamics=FixedWingKinematicBicycle(
                    world,
                    width=self.agent_radius,
                    l_f=self.agent_radius,
                    l_r=self.agent_radius,
                    max_steering_angle=_max_steering_angle
                ), # Modification
                render_action=self.render_action # Modification
            )
            agent.collision_rew = torch.zeros(batch_dim, device=device)
            agent.covering_reward = agent.collision_rew.clone()
            world.add_agent(agent)

        self._targets = []
        for i in range(self.n_targets):
            target = Landmark(
                name=f"target_{i}",
                collide=True,
                movable=False,
                shape=Sphere(radius=self.target_radius),
                color=self.target_color,
            )
            world.add_landmark(target)
            self._targets.append(target)

        self.covered_targets = torch.zeros(batch_dim, self.n_targets, device=device)
        self.shared_covering_rew = torch.zeros(batch_dim, device=device)

        return world

    def reset_world_at(self, env_index: int = None):
        placable_entities = self._targets[: self.n_targets] + self.world.agents
        if env_index is None:
            self.all_time_covered_targets = torch.full(
                (self.world.batch_dim, self.n_targets),
                False,
                device=self.world.device,
            )
        else:
            self.all_time_covered_targets[env_index] = False
        ScenarioUtils.spawn_entities_randomly(
            entities=placable_entities,
            world=self.world,
            env_index=env_index,
            min_dist_between_entities=self._min_dist_between_entities,
            x_bounds=(-self.world.x_semidim, self.world.x_semidim),
            y_bounds=(-self.world.y_semidim, self.world.y_semidim),
        )
        for target in self._targets[self.n_targets :]:
            target.set_pos(self.get_outside_pos(env_index), batch_index=env_index)

    def reward(self, agent: Agent):
        is_first = agent == self.world.agents[0]
        is_last = agent == self.world.agents[-1]

        if is_first:
            self.time_rew = torch.full(
                (self.world.batch_dim,),
                self.time_penalty,
                device=self.world.device,
            )
            self.agents_pos = torch.stack(
                [a.state.pos for a in self.world.agents], dim=1
            )
            self.targets_pos = torch.stack([t.state.pos for t in self._targets], dim=1)
            self.agents_targets_dists = torch.cdist(self.agents_pos, self.targets_pos)
            self.agents_per_target = torch.sum(
                (self.agents_targets_dists < self._covering_range).type(torch.int),
                dim=1,
            )
            self.covered_targets = self.agents_per_target >= self._agents_per_target

            self.shared_covering_rew[:] = 0
            for a in self.world.agents:
                self.shared_covering_rew += self.agent_reward(a)
            self.shared_covering_rew[self.shared_covering_rew != 0] /= 2

        # Avoid collisions with each other
        agent.collision_rew[:] = 0
        for a in self.world.agents:
            if a != agent:
                agent.collision_rew[
                    self.world.get_distance(a, agent) < self.min_collision_distance
                ] += self.agent_collision_penalty

        if is_last:
            if self.targets_respawn:
                occupied_positions_agents = [self.agents_pos]
                for i, target in enumerate(self._targets):
                    occupied_positions_targets = [
                        o.state.pos.unsqueeze(1)
                        for o in self._targets
                        if o is not target
                    ]
                    occupied_positions = torch.cat(
                        occupied_positions_agents + occupied_positions_targets,
                        dim=1,
                    )
                    pos = ScenarioUtils.find_random_pos_for_entity(
                        occupied_positions,
                        env_index=None,
                        world=self.world,
                        min_dist_between_entities=self._min_dist_between_entities,
                        x_bounds=(-self.world.x_semidim, self.world.x_semidim),
                        y_bounds=(-self.world.y_semidim, self.world.y_semidim),
                    )

                    target.state.pos[self.covered_targets[:, i]] = pos[
                        self.covered_targets[:, i]
                    ].squeeze(1)
            else:
                self.all_time_covered_targets += self.covered_targets
                for i, target in enumerate(self._targets):
                    target.state.pos[self.covered_targets[:, i]] = self.get_outside_pos(
                        None
                    )[self.covered_targets[:, i]]
        covering_rew = (
            agent.covering_reward
            if not self.shared_reward
            else self.shared_covering_rew
        )

        return agent.collision_rew + covering_rew + self.time_rew

    def get_outside_pos(self, env_index):
        return torch.empty(
            (
                (1, self.world.dim_p)
                if env_index is not None
                else (self.world.batch_dim, self.world.dim_p)
            ),
            device=self.world.device,
        ).uniform_(-1000 * self.world.x_semidim, -10 * self.world.x_semidim)

    def agent_reward(self, agent):
        agent_index = self.world.agents.index(agent)

        agent.covering_reward[:] = 0
        targets_covered_by_agent = (
            self.agents_targets_dists[:, agent_index] < self._covering_range
        )
        num_covered_targets_covered_by_agent = (
            targets_covered_by_agent * self.covered_targets
        ).sum(dim=-1)
        agent.covering_reward += (
            num_covered_targets_covered_by_agent * self.covering_rew_coeff
        )
        return agent.covering_reward

    def observation(self, agent: Agent):
        lidar_1_measures = agent.sensors[0].measure()
        obs = {"obs" : torch.cat(
            [lidar_1_measures]
            + ([agent.sensors[1].measure()] if self.use_agent_lidar else []),
            dim=-1),
                "pos" : agent.state.pos,
                "vel" : agent.state.vel
        }
        if isinstance(agent.dynamics, KinematicBicycle) or isinstance(agent.dynamics, FixedWingKinematicBicycle):
            obs.update({
                "rot": agent.state.rot,
                "ang_vel": agent.state.ang_vel
            })
        return obs

    def info(self, agent: Agent) -> Dict[str, Tensor]:
        info = {
            "covering_reward": (
                agent.covering_reward
                if not self.shared_reward
                else self.shared_covering_rew
            ),
            "collision_rew": agent.collision_rew,
            "targets_covered": self.covered_targets.sum(-1),
        }
        return info

    def done(self):
        return self.all_time_covered_targets.all(dim=-1)

    def extra_render(self, env_index: int = 0) -> "List[Geom]":
        from vmas.simulator import rendering

        geoms: List[Geom] = []
        # Target ranges
        for target in self._targets:
            range_circle = rendering.make_circle(self._covering_range, filled=False)
            xform = rendering.Transform()
            xform.set_translation(*target.state.pos[env_index])
            range_circle.add_attr(xform)
            range_circle.set_color(*self.target_color.value)
            geoms.append(range_circle)
        # Communication lines
        for i, agent1 in enumerate(self.world.agents):
            for j, agent2 in enumerate(self.world.agents):
                if j <= i:
                    continue
                agent_dist = torch.linalg.vector_norm(
                    agent1.state.pos - agent2.state.pos, dim=-1
                )
                if agent_dist[env_index] <= self._comms_range:
                    color = Color.BLACK.value
                    line = rendering.Line(
                        (agent1.state.pos[env_index]),
                        (agent2.state.pos[env_index]),
                        width=1,
                    )
                    xform = rendering.Transform()
                    line.add_attr(xform)
                    line.set_color(*color)
                    geoms.append(line)

        return geoms

In [4]:
import copy
from typing import Callable, Optional
from benchmarl.environments import VmasTask
from benchmarl.utils import DEVICE_TYPING
from torchrl.envs import EnvBase, VmasEnv

def get_env_fun(
    self,
    num_envs: int,
    continuous_actions: bool,
    seed: Optional[int],
    device: DEVICE_TYPING):
  config = copy.deepcopy(self.config)
  if (hasattr(self, "name") and self.name == "NAVIGATION") or (
      self is VmasTask.NAVIGATION
  ):
      scenario = FWDiscoveryScenario()
  else:
      scenario = self.name.lower()
  return lambda: VmasEnv(
      scenario=scenario,
      num_envs=num_envs,
      continuous_actions=continuous_actions,
      seed=seed,
      device=device,
      categorical_actions=True,
      **config)

  return datetime.utcnow().replace(tzinfo=utc)


In [5]:
try:
    from benchmarl.environments import VmasClass
    VmasClass.get_env_fun = get_env_fun
except ImportError:
    print("Import Error")
    VmasTask.get_env_fun = get_env_fun

In [6]:
import wandb
import os
from google.colab import userdata
os.environ["WANDB_API_KEY"] = userdata.get("WANDB_API_KEY")
os.environ["WANDB_MODE"] = "online"
wandb.login(key=os.environ["WANDB_API_KEY"])

  return datetime.utcnow().replace(tzinfo=utc)
  | |_| | '_ \/ _` / _` |  _/ -_)
  return LooseVersion(v) >= LooseVersion(check)
  return datetime.utcnow().replace(tzinfo=utc)
[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mhamzahraoof[0m ([33mhamzahraoof-solo-dev[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

In [9]:
from benchmarl.experiment import ExperimentConfig

experiment_config = ExperimentConfig.get_from_yaml()

experiment_config.sampling_device = "cuda" if torch.cuda.is_available() else "cpu"
experiment_config.train_device = "cuda" if torch.cuda.is_available() else "cpu"

experiment_config.max_n_frames = 20_000_000
experiment_config.gamma = 0.99
experiment_config.on_policy_collected_frames_per_batch = 100_000
experiment_config.on_policy_n_envs_per_worker = 1000
experiment_config.on_policy_n_minibatch_iters = 45
experiment_config.on_policy_minibatch_size = 4096
experiment_config.evaluation = True
experiment_config.render = True
experiment_config.share_policy_params = True
experiment_config.evaluation_interval = 200_000
experiment_config.evaluation_episodes = 200
experiment_config.loggers = ["wandb"]

task = VmasTask.NAVIGATION.get_from_yaml()

task.config = {
    "max_steps" : 100,
    "n_agents" : 4,
    "shared_reward" : False,
    "x_semidim" : 1,
    "y_semidim" : 1,
    "render_action" : True,
    "agents_per_target" : 1,
    "use_agent_lidar" : False,
    "agent_collision_penalty" : -1,
    "time_penalty" : -0.01
}

from benchmarl.algorithms import MappoConfig
mappo_algorithm_config = MappoConfig.get_from_yaml()
mappo_algorithm_config = MappoConfig(
        share_param_critic=True,
        clip_epsilon=0.2,
        entropy_coef=0.001,
        critic_coef=1,
        loss_critic_type="l2",
        lmbda=0.9,
        scale_mapping="biased_softplus_1.0",
        use_tanh_normal=True,
        minibatch_advantage=False,
    )

from benchmarl.models import MlpConfig
model_config = MlpConfig(
        num_cells=[256, 256],
        layer_class=torch.nn.Linear,
        activation_class=torch.nn.SiLU,
    )

critic_model_config = MlpConfig(
        num_cells=[256, 256],
        layer_class=torch.nn.Linear,
        activation_class=torch.nn.SiLU,
)

from benchmarl.experiment import Experiment

experiment = Experiment(
    task=task,
    algorithm_config=mappo_algorithm_config,
    model_config=model_config,
    critic_model_config=critic_model_config,
    seed=1337,
    config=experiment_config,
)
experiment.run()


  0%|          | 0/200 [00:00<?, ?it/s][A


mean return = -5.996002197265625:   0%|          | 1/200 [00:41<2:17:38, 41.50s/it][A


mean return = -2.6702516078948975:   1%|          | 2/200 [01:23<2:17:47, 41.75s/it][A
mean return = -1.4812514781951904:   2%|▏         | 3/200 [01:47<1:51:01, 33.82s/it][A


mean return = -1.17000150680542:   2%|▏         | 4/200 [02:29<2:00:00, 36.74s/it]  [A
mean return = -0.8585013747215271:   2%|▎         | 5/200 [02:53<1:44:56, 32.29s/it][A


mean return = -0.46100112795829773:   3%|▎         | 6/200 [03:36<1:56:04, 35.90s/it][A
mean return = -0.44350138306617737:   4%|▎         | 7/200 [04:00<1:43:22, 32.14s/it][A


mean return = -0.2352512925863266:   4%|▍         | 8/200 [04:42<1:52:42, 35.22s/it] [A
mean return = -0.41500142216682434:   4%|▍         | 9/200 [05:07<1:42:00, 32.04s/it][A


mean return = -0.2662512958049774:   5%|▌         | 10/200 [05:49<1:51:07, 35.09s/it][A
mean return = -0.23525142669677734:   6%|▌         | 11/200 [06:14<1:40:19, 31.85s/it][A


mean return = -0.41625142097473145:   6%|▌         | 12/200 [06:57<1:50:41, 35.33s/it][A
mean return = -0.18875156342983246:   6%|▋         | 13/200 [07:22<1:40:21, 32.20s/it][A


mean return = -0.000501510628964752:   7%|▋         | 14/200 [08:04<1:49:18, 35.26s/it][A
mean return = -0.17775164544582367:   8%|▊         | 15/200 [08:29<1:38:51, 32.06s/it] [A


mean return = -0.23300166428089142:   8%|▊         | 16/200 [09:11<1:47:26, 35.03s/it][A
mean return = -0.024501681327819824:   8%|▊         | 17/200 [09:35<1:37:11, 31.86s/it][A


mean return = 0.27574825286865234:   9%|▉         | 18/200 [10:19<1:47:11, 35.34s/it]  [A
mean return = 0.43324825167655945:  10%|▉         | 19/200 [10:43<1:37:00, 32.16s/it][A


mean return = 0.45074814558029175:  10%|█         | 20/200 [11:26<1:45:32, 35.18s/it][A
mean return = 0.4284979999065399:  10%|█         | 21/200 [11:50<1:35:17, 31.94s/it] [A


mean return = 0.7724979519844055:  11%|█         | 22/200 [12:32<1:44:01, 35.07s/it][A
mean return = 0.7439976930618286:  12%|█▏        | 23/200 [12:57<1:34:00, 31.87s/it][A


mean return = 0.8072475790977478:  12%|█▏        | 24/200 [13:39<1:43:00, 35.12s/it][A
mean return = 1.0507473945617676:  12%|█▎        | 25/200 [14:04<1:33:07, 31.93s/it][A


mean return = 1.1064975261688232:  13%|█▎        | 26/200 [14:46<1:41:39, 35.06s/it][A
mean return = 1.079497218132019:  14%|█▎        | 27/200 [15:11<1:31:57, 31.89s/it] [A


mean return = 1.5327471494674683:  14%|█▍        | 28/200 [15:53<1:40:31, 35.06s/it][A
mean return = 1.7134969234466553:  14%|█▍        | 29/200 [16:18<1:30:50, 31.88s/it][A


mean return = 1.8937463760375977:  15%|█▌        | 30/200 [17:01<1:40:22, 35.43s/it][A
mean return = 2.0124964714050293:  16%|█▌        | 31/200 [17:26<1:30:29, 32.13s/it][A


mean return = 2.2182462215423584:  16%|█▌        | 32/200 [18:08<1:38:28, 35.17s/it][A
mean return = 2.5054962635040283:  16%|█▋        | 33/200 [18:33<1:29:00, 31.98s/it][A


mean return = 2.479245901107788:  17%|█▋        | 34/200 [19:15<1:37:12, 35.13s/it] [A
mean return = 2.6247458457946777:  18%|█▊        | 35/200 [19:39<1:27:40, 31.88s/it][A


mean return = 2.618745803833008:  18%|█▊        | 36/200 [20:23<1:36:45, 35.40s/it] [A
mean return = 2.8949954509735107:  18%|█▊        | 37/200 [20:47<1:27:11, 32.10s/it][A


mean return = 3.0359954833984375:  19%|█▉        | 38/200 [21:29<1:34:41, 35.07s/it][A
mean return = 3.2347450256347656:  20%|█▉        | 39/200 [21:54<1:25:35, 31.90s/it][A


mean return = 3.328244686126709:  20%|██        | 40/200 [22:37<1:34:11, 35.32s/it] [A
mean return = 3.540994882583618:  20%|██        | 41/200 [23:01<1:24:45, 31.98s/it][A


mean return = 3.3172447681427:  21%|██        | 42/200 [23:44<1:32:44, 35.22s/it]  [A
mean return = 3.4962449073791504:  22%|██▏       | 43/200 [24:09<1:23:39, 31.97s/it][A


mean return = 3.5229945182800293:  22%|██▏       | 44/200 [24:51<1:31:19, 35.12s/it][A
mean return = 3.592744827270508:  22%|██▎       | 45/200 [25:15<1:22:20, 31.88s/it] [A


mean return = 3.6537442207336426:  23%|██▎       | 46/200 [25:59<1:31:10, 35.53s/it][A
mean return = 3.69974422454834:  24%|██▎       | 47/200 [26:24<1:22:06, 32.20s/it]  [A


mean return = 3.814244508743286:  24%|██▍       | 48/200 [27:07<1:29:45, 35.43s/it][A
mean return = 3.7992444038391113:  24%|██▍       | 49/200 [27:31<1:20:50, 32.12s/it][A


mean return = 3.747994899749756:  25%|██▌       | 50/200 [28:14<1:28:07, 35.25s/it] [A
mean return = 3.826244592666626:  26%|██▌       | 51/200 [28:38<1:19:21, 31.95s/it][A


mean return = 3.690244197845459:  26%|██▌       | 52/200 [29:21<1:27:17, 35.39s/it][A
mean return = 3.688744306564331:  26%|██▋       | 53/200 [29:46<1:18:32, 32.06s/it][A


mean return = 4.0124945640563965:  27%|██▋       | 54/200 [30:29<1:25:59, 35.34s/it][A
mean return = 3.814744234085083:  28%|██▊       | 55/200 [30:53<1:17:30, 32.07s/it] [A


mean return = 3.9207444190979004:  28%|██▊       | 56/200 [31:35<1:24:13, 35.10s/it][A
mean return = 4.044993877410889:  28%|██▊       | 57/200 [32:00<1:15:59, 31.88s/it] [A


mean return = 3.999244213104248:  29%|██▉       | 58/200 [32:44<1:24:24, 35.67s/it][A
mean return = 4.275244235992432:  30%|██▉       | 59/200 [33:08<1:15:35, 32.16s/it][A


mean return = 4.052744388580322:  30%|███       | 60/200 [33:51<1:22:12, 35.23s/it][A
mean return = 4.014244556427002:  30%|███       | 61/200 [34:15<1:14:04, 31.98s/it][A


mean return = 4.201244354248047:  31%|███       | 62/200 [34:57<1:20:31, 35.01s/it][A
mean return = 3.9299941062927246:  32%|███▏      | 63/200 [35:21<1:12:35, 31.79s/it][A


mean return = 4.186244487762451:  32%|███▏      | 64/200 [36:04<1:19:40, 35.15s/it] [A
mean return = 4.174993991851807:  32%|███▎      | 65/200 [36:29<1:11:58, 31.99s/it][A


mean return = 4.209493160247803:  33%|███▎      | 66/200 [37:11<1:18:23, 35.10s/it][A
mean return = 4.153243541717529:  34%|███▎      | 67/200 [37:36<1:10:33, 31.83s/it][A


mean return = 4.320244312286377:  34%|███▍      | 68/200 [38:17<1:16:36, 34.82s/it][A
mean return = 4.3589935302734375:  34%|███▍      | 69/200 [38:42<1:09:04, 31.64s/it][A


mean return = 4.252243518829346:  35%|███▌      | 70/200 [39:24<1:15:44, 34.96s/it] [A
mean return = 4.265743255615234:  36%|███▌      | 71/200 [39:48<1:08:11, 31.72s/it][A


mean return = 4.355493545532227:  36%|███▌      | 72/200 [40:31<1:14:25, 34.89s/it][A
mean return = 4.5102434158325195:  36%|███▋      | 73/200 [40:55<1:07:04, 31.69s/it][A


mean return = 4.258993148803711:  37%|███▋      | 74/200 [41:36<1:12:46, 34.66s/it] [A
mean return = 4.358243465423584:  38%|███▊      | 75/200 [42:01<1:05:39, 31.51s/it][A


mean return = 4.52349328994751:  38%|███▊      | 76/200 [42:43<1:12:04, 34.88s/it] [A
mean return = 4.398243427276611:  38%|███▊      | 77/200 [43:07<1:04:50, 31.63s/it][A


mean return = 4.2884931564331055:  39%|███▉      | 78/200 [43:49<1:10:25, 34.64s/it][A
mean return = 4.482243537902832:  40%|███▉      | 79/200 [44:13<1:03:27, 31.46s/it] [A


mean return = 4.536993503570557:  40%|████      | 80/200 [44:56<1:09:47, 34.90s/it][A
mean return = 4.547993183135986:  40%|████      | 81/200 [45:20<1:02:40, 31.60s/it][A


mean return = 4.3499932289123535:  41%|████      | 82/200 [46:03<1:08:37, 34.89s/it][A
mean return = 4.577992916107178:  42%|████▏     | 83/200 [46:27<1:01:46, 31.68s/it] [A


mean return = 4.655492782592773:  42%|████▏     | 84/200 [47:09<1:07:06, 34.71s/it][A
mean return = 4.458743095397949:  42%|████▎     | 85/200 [47:33<1:00:28, 31.55s/it][A


mean return = 4.803992748260498:  43%|████▎     | 86/200 [48:16<1:06:34, 35.04s/it][A
mean return = 4.7597432136535645:  44%|████▎     | 87/200 [48:40<59:43, 31.71s/it] [A


mean return = 4.477742671966553:  44%|████▍     | 88/200 [49:23<1:05:26, 35.05s/it][A
mean return = 4.742743015289307:  44%|████▍     | 89/200 [49:47<58:48, 31.79s/it]  [A


mean return = 4.61949348449707:  45%|████▌     | 90/200 [50:29<1:03:44, 34.76s/it][A
mean return = 4.432743549346924:  46%|████▌     | 91/200 [50:53<57:22, 31.58s/it] [A


mean return = 4.329493045806885:  46%|████▌     | 92/200 [51:36<1:02:58, 34.98s/it][A
mean return = 4.431743144989014:  46%|████▋     | 93/200 [52:00<56:37, 31.75s/it]  [A


mean return = 4.437243461608887:  47%|████▋     | 94/200 [52:42<1:01:33, 34.85s/it][A
mean return = 4.5072431564331055:  48%|████▊     | 95/200 [53:07<55:44, 31.85s/it] [A


mean return = 4.73624324798584:  48%|████▊     | 96/200 [53:50<1:01:02, 35.22s/it][A
mean return = 4.622992992401123:  48%|████▊     | 97/200 [54:14<54:40, 31.85s/it] [A


mean return = 4.783742427825928:  49%|████▉     | 98/200 [54:57<59:48, 35.18s/it][A
mean return = 4.770742893218994:  50%|████▉     | 99/200 [55:21<53:43, 31.91s/it][A


mean return = 4.6857428550720215:  50%|█████     | 100/200 [56:03<58:20, 35.01s/it][A
mean return = 4.6989922523498535:  50%|█████     | 101/200 [56:28<52:26, 31.78s/it][A


mean return = 4.66199254989624:  51%|█████     | 102/200 [57:11<57:28, 35.19s/it]  [A
mean return = 4.699742794036865:  52%|█████▏    | 103/200 [57:35<51:44, 32.00s/it][A


mean return = 4.945743560791016:  52%|█████▏    | 104/200 [58:17<56:04, 35.05s/it][A
mean return = 4.669992446899414:  52%|█████▎    | 105/200 [58:42<50:17, 31.77s/it][A


mean return = 4.864742755889893:  53%|█████▎    | 106/200 [59:24<54:44, 34.95s/it][A
mean return = 4.724992752075195:  54%|█████▎    | 107/200 [59:48<49:04, 31.66s/it][A


mean return = 4.511492729187012:  54%|█████▍    | 108/200 [1:00:31<53:37, 34.98s/it][A
mean return = 4.618992805480957:  55%|█████▍    | 109/200 [1:00:55<48:09, 31.75s/it][A


mean return = 4.706243515014648:  55%|█████▌    | 110/200 [1:01:37<52:23, 34.93s/it][A
mean return = 4.6559929847717285:  56%|█████▌    | 111/200 [1:02:01<46:59, 31.68s/it][A


mean return = 4.729242324829102:  56%|█████▌    | 112/200 [1:02:45<51:36, 35.19s/it] [A
mean return = 4.784992694854736:  56%|█████▋    | 113/200 [1:03:08<46:05, 31.78s/it][A


mean return = 4.624242305755615:  57%|█████▋    | 114/200 [1:03:51<50:01, 34.91s/it][A
mean return = 4.810492992401123:  57%|█████▊    | 115/200 [1:04:16<45:10, 31.89s/it][A


mean return = 4.6664934158325195:  58%|█████▊    | 116/200 [1:04:57<48:48, 34.87s/it][A
mean return = 4.86149263381958:  58%|█████▊    | 117/200 [1:05:21<43:43, 31.60s/it]  [A


mean return = 4.944992542266846:  59%|█████▉    | 118/200 [1:06:04<47:47, 34.96s/it][A
mean return = 4.703242301940918:  60%|█████▉    | 119/200 [1:06:29<42:59, 31.84s/it][A


mean return = 4.803492546081543:  60%|██████    | 120/200 [1:07:10<46:26, 34.83s/it][A
mean return = 5.045492649078369:  60%|██████    | 121/200 [1:07:35<41:35, 31.59s/it][A


mean return = 4.880242347717285:  61%|██████    | 122/200 [1:08:17<45:08, 34.72s/it][A
mean return = 4.9292426109313965:  62%|██████▏   | 123/200 [1:08:40<40:24, 31.48s/it][A


mean return = 4.97224235534668:  62%|██████▏   | 124/200 [1:09:23<43:56, 34.69s/it]  [A
mean return = 4.933492183685303:  62%|██████▎   | 125/200 [1:09:47<39:23, 31.51s/it][A


mean return = 4.87849235534668:  63%|██████▎   | 126/200 [1:10:28<42:37, 34.57s/it] [A
mean return = 4.927992343902588:  64%|██████▎   | 127/200 [1:10:53<38:25, 31.58s/it][A


mean return = 5.061242580413818:  64%|██████▍   | 128/200 [1:11:36<41:48, 34.85s/it][A
mean return = 4.850742340087891:  64%|██████▍   | 129/200 [1:11:59<37:18, 31.52s/it][A


mean return = 4.716992378234863:  65%|██████▌   | 130/200 [1:12:42<40:31, 34.73s/it][A
mean return = 4.805992603302002:  66%|██████▌   | 131/200 [1:13:06<36:15, 31.53s/it][A


mean return = 4.883242607116699:  66%|██████▌   | 132/200 [1:13:47<39:15, 34.64s/it][A
mean return = 4.609992504119873:  66%|██████▋   | 133/200 [1:14:12<35:08, 31.47s/it][A


mean return = 4.863492488861084:  67%|██████▋   | 134/200 [1:14:54<38:13, 34.76s/it][A
mean return = 4.967242240905762:  68%|██████▊   | 135/200 [1:15:18<34:18, 31.67s/it][A


mean return = 4.6429924964904785:  68%|██████▊   | 136/200 [1:16:01<37:06, 34.79s/it][A
mean return = 4.753242492675781:  68%|██████▊   | 137/200 [1:16:25<33:09, 31.58s/it] [A


mean return = 4.809242248535156:  69%|██████▉   | 138/200 [1:17:06<35:43, 34.56s/it][A
mean return = 5.046492576599121:  70%|██████▉   | 139/200 [1:17:30<31:54, 31.38s/it][A


mean return = 5.093992233276367:  70%|███████   | 140/200 [1:18:13<34:42, 34.70s/it][A
mean return = 4.947492599487305:  70%|███████   | 141/200 [1:18:36<30:56, 31.47s/it][A


mean return = 4.747992515563965:  71%|███████   | 142/200 [1:19:19<33:36, 34.77s/it][A
mean return = 5.160242557525635:  72%|███████▏  | 143/200 [1:19:44<30:07, 31.71s/it][A


mean return = 4.7402424812316895:  72%|███████▏  | 144/200 [1:20:25<32:22, 34.68s/it][A
mean return = 5.068742275238037:  72%|███████▎  | 145/200 [1:20:49<28:52, 31.49s/it] [A


mean return = 4.944492340087891:  73%|███████▎  | 146/200 [1:21:32<31:29, 35.00s/it][A
mean return = 4.922242641448975:  74%|███████▎  | 147/200 [1:21:56<27:57, 31.64s/it][A


mean return = 4.7902421951293945:  74%|███████▍  | 148/200 [1:22:38<30:10, 34.82s/it][A
mean return = 5.008742332458496:  74%|███████▍  | 149/200 [1:23:02<26:50, 31.58s/it] [A


mean return = 4.9087419509887695:  75%|███████▌  | 150/200 [1:23:44<28:49, 34.58s/it][A
mean return = 5.069492340087891:  76%|███████▌  | 151/200 [1:24:09<25:47, 31.57s/it] [A


mean return = 4.851242542266846:  76%|███████▌  | 152/200 [1:24:51<27:54, 34.90s/it][A
mean return = 5.0492424964904785:  76%|███████▋  | 153/200 [1:25:15<24:45, 31.61s/it][A


mean return = 4.9624924659729:  77%|███████▋  | 154/200 [1:25:57<26:30, 34.57s/it]   [A
mean return = 4.933492183685303:  78%|███████▊  | 155/200 [1:26:21<23:33, 31.40s/it][A


mean return = 5.047492504119873:  78%|███████▊  | 156/200 [1:27:02<25:15, 34.45s/it][A
mean return = 4.898492336273193:  78%|███████▊  | 157/200 [1:27:26<22:26, 31.31s/it][A


mean return = 4.835492134094238:  79%|███████▉  | 158/200 [1:28:09<24:18, 34.73s/it][A
mean return = 4.94099235534668:  80%|███████▉  | 159/200 [1:28:33<21:37, 31.65s/it] [A


mean return = 4.939492225646973:  80%|████████  | 160/200 [1:29:15<23:04, 34.62s/it][A
mean return = 5.080742359161377:  80%|████████  | 161/200 [1:29:39<20:26, 31.44s/it][A


mean return = 4.814242362976074:  81%|████████  | 162/200 [1:30:21<21:50, 34.49s/it][A
mean return = 5.0517425537109375:  82%|████████▏ | 163/200 [1:30:45<19:19, 31.35s/it][A


mean return = 4.964992523193359:  82%|████████▏ | 164/200 [1:31:27<20:50, 34.75s/it] [A
mean return = 4.904492378234863:  82%|████████▎ | 165/200 [1:31:51<18:23, 31.52s/it][A


mean return = 4.963492393493652:  83%|████████▎ | 166/200 [1:32:33<19:35, 34.57s/it][A
mean return = 5.113991737365723:  84%|████████▎ | 167/200 [1:32:57<17:15, 31.39s/it][A


mean return = 4.867242336273193:  84%|████████▍ | 168/200 [1:33:39<18:25, 34.54s/it][A
mean return = 4.686492443084717:  84%|████████▍ | 169/200 [1:34:03<16:12, 31.39s/it][A


mean return = 4.843742370605469:  85%|████████▌ | 170/200 [1:34:46<17:24, 34.80s/it][A
mean return = 5.050992488861084:  86%|████████▌ | 171/200 [1:35:10<15:15, 31.57s/it][A


mean return = 4.960492134094238:  86%|████████▌ | 172/200 [1:35:51<16:07, 34.56s/it][A
mean return = 4.919992446899414:  86%|████████▋ | 173/200 [1:36:15<14:07, 31.39s/it][A


mean return = 4.915492534637451:  87%|████████▋ | 174/200 [1:36:57<14:56, 34.49s/it][A
mean return = 4.961992263793945:  88%|████████▊ | 175/200 [1:37:21<13:02, 31.31s/it][A


mean return = 5.213492393493652:  88%|████████▊ | 176/200 [1:38:04<13:53, 34.75s/it][A
mean return = 4.8314924240112305:  88%|████████▊ | 177/200 [1:38:28<12:04, 31.52s/it][A


mean return = 5.019992351531982:  89%|████████▉ | 178/200 [1:39:09<12:40, 34.58s/it] [A
mean return = 5.021242141723633:  90%|████████▉ | 179/200 [1:39:33<11:00, 31.43s/it][A


mean return = 5.089242458343506:  90%|█████████ | 180/200 [1:40:16<11:33, 34.68s/it][A
mean return = 4.854992389678955:  90%|█████████ | 181/200 [1:40:39<09:56, 31.42s/it][A


mean return = 4.825742244720459:  91%|█████████ | 182/200 [1:41:22<10:24, 34.69s/it][A
mean return = 4.710992336273193:  92%|█████████▏| 183/200 [1:41:46<08:56, 31.53s/it][A


mean return = 4.970492362976074:  92%|█████████▏| 184/200 [1:42:28<09:17, 34.83s/it][A
mean return = 4.85574197769165:  92%|█████████▎| 185/200 [1:42:53<07:57, 31.80s/it] [A


mean return = 4.79799222946167:  93%|█████████▎| 186/200 [1:43:36<08:12, 35.16s/it][A
mean return = 4.898492336273193:  94%|█████████▎| 187/200 [1:44:00<06:53, 31.81s/it][A


mean return = 4.666492462158203:  94%|█████████▍| 188/200 [1:44:42<06:58, 34.91s/it][A
mean return = 4.489992618560791:  94%|█████████▍| 189/200 [1:45:07<05:49, 31.76s/it][A


mean return = 4.66799259185791:  95%|█████████▌| 190/200 [1:45:49<05:48, 34.85s/it] [A
mean return = 4.8904924392700195:  96%|█████████▌| 191/200 [1:46:13<04:44, 31.62s/it][A


mean return = 4.963742256164551:  96%|█████████▌| 192/200 [1:46:55<04:39, 34.91s/it] [A
mean return = 5.142491817474365:  96%|█████████▋| 193/200 [1:47:19<03:41, 31.65s/it][A


mean return = 4.860992431640625:  97%|█████████▋| 194/200 [1:48:02<03:28, 34.83s/it][A
mean return = 4.899242401123047:  98%|█████████▊| 195/200 [1:48:26<02:37, 31.57s/it][A


mean return = 4.918242454528809:  98%|█████████▊| 196/200 [1:49:08<02:19, 34.80s/it][A
mean return = 4.773242473602295:  98%|█████████▊| 197/200 [1:49:32<01:34, 31.50s/it][A


mean return = 4.914242267608643:  99%|█████████▉| 198/200 [1:50:14<01:09, 34.82s/it][A
mean return = 4.803742408752441: 100%|█████████▉| 199/200 [1:50:38<00:31, 31.58s/it][A


mean return = 4.870492458343506: 100%|██████████| 200/200 [1:51:20<00:00, 34.65s/it][A

0,1
collection/agents/info/collision_rew,▁███▇▇▇▇▇▇▇▇▇▇▇▇▆▇▆▇▆▆▇▇▇▇▆▇▆▆▆▆▆▆▆▆▆▆▆▆
collection/agents/info/covering_reward,▁▁▂▂▂▄▅▅▅▆▆▇▇▇▇▇▇▇█▇▇▇██████████████████
collection/agents/info/targets_covered,▁▁▁▂▃▅▆▆▇▇▇▇▇▇▇▇█▇██████████████████████
collection/agents/reward/episode_reward_max,▁▂▂▂▄▄▅▅▅▅▅▆▇▇▅▇▆▆▆▇▆▅▆▆▆▆▇▇▇▆▇▇▇▇█▇▆█▇▇
collection/agents/reward/episode_reward_mean,▁▁▂▂▃▅▅▅▆▆▇▇▇▇▇▇███▇████████████████████
collection/agents/reward/episode_reward_min,▁▄▃▅▆▆▇▅▄▇█▇▅▆▆▂▆▆▇▅▄▇█▅▇▇▇▆▆▅▆▇▆▆▆▄▁▆▅▆
collection/reward/episode_reward_max,▁▂▁▂▃▂▃▄▃▄▄▅▅▆▆▆▆▆▇▆▆▇▇▆▇▇▆▇▇▇▇▇████▇▇█▇
collection/reward/episode_reward_mean,▁▂▂▂▂▂▃▃▅▅▆▆▆▇▇▇▇▇▇▇█████████████▇██████
collection/reward/episode_reward_min,▁▄▅▆▅▃▆▇█▆▃▆▆█▅▂▄▄▆▅▃▆▆▅▆▆▅▆▂▇▅▆▅▆▂▆▅▆▅▆
collection/reward/reward_max,▁▁▁▁▁▁█▁██▁██▁█▁██▁█▁█████▁█▁█▁███▁▁█▁▁█

0,1
collection/agents/info/collision_rew,-0.03
collection/agents/info/covering_reward,0.0887
collection/agents/info/targets_covered,0.34579
collection/agents/reward/episode_reward_max,12.74999
collection/agents/reward/episode_reward_mean,4.87049
collection/agents/reward/episode_reward_min,-15.24999
collection/reward/episode_reward_max,12.74999
collection/reward/episode_reward_mean,4.87049
collection/reward/episode_reward_min,-15.24999
collection/reward/reward_max,2.99


mean return = 4.870492458343506: 100%|██████████| 200/200 [1:51:22<00:00, 33.41s/it]
