# Multi-agent PPO on VMAS Navigation (TorchRL)


In [1]:
import torch
from tensordict.nn import set_composite_lp_aggregate
from agentslab.envs.vmas import make_vmas_env
from agentslab.models.networks import make_multiagent_mlp
from agentslab.models.policy import build_gaussian_policy_for_marl
from agentslab.models.value import make_value_net
from agentslab.runner.train import train_marl_ppo, PPOConfig
from agentslab.utils.device import select_device, split_devices
from agentslab.utils.seeding import seed_everything

policy_device = select_device('cuda')
policy_device, vmas_device = split_devices(policy_device)
seed_everything(0)

env = make_vmas_env('navigation', num_envs=64, device=policy_device, vmas_device=vmas_device, seed=0)
print('Specs:', env.observation_spec, env.action_spec)

# Build decentralised policy and critic
obs_dim = env.full_observation_spec['agents','observation'].shape[-1]
n_agents = env.n_agents
act_dim = env.full_action_spec[env.action_key].shape[-1]
policy_net = make_multiagent_mlp(obs_dim, 2 * act_dim, n_agents=n_agents, device=policy_device, centralized=False, share_params=True)
policy = build_gaussian_policy_for_marl(policy_net, env.action_key, env.action_spec_unbatched)
critic = make_value_net(obs_dim, device=policy_device)

run, rewards = train_marl_ppo(env, policy, critic, device=policy_device, log_dir='../logs', ckpt_dir='../checkpoints', env_name='vmas/navigation', seed=0, cfg=PPOConfig())
print('Run:', run)
rewards


Specs: Composite(
    agents: Composite(
        observation: UnboundedContinuous(
            shape=torch.Size([64, 4, 18]),
            space=ContinuousBox(
                low=Tensor(shape=torch.Size([64, 4, 18]), device=cpu, dtype=torch.float32, contiguous=True),
                high=Tensor(shape=torch.Size([64, 4, 18]), device=cpu, dtype=torch.float32, contiguous=True)),
            device=cpu,
            dtype=torch.float32,
            domain=continuous),
        info: Composite(
            pos_rew: UnboundedContinuous(
                shape=torch.Size([64, 4, 1]),
                space=ContinuousBox(
                    low=Tensor(shape=torch.Size([64, 4, 1]), device=cpu, dtype=torch.float32, contiguous=True),
                    high=Tensor(shape=torch.Size([64, 4, 1]), device=cpu, dtype=torch.float32, contiguous=True)),
                device=cpu,
                dtype=torch.float32,
                domain=continuous),
            final_rew: UnboundedContinuous(
           



RuntimeError: Setting 'advantage' via the constructor is deprecated, use .set_keys(<key>='some_key') instead.