In [1]:
import mlagents
from mlagents_envs.environment import UnityEnvironment as UE
from mlagents_envs.envs.unity_parallel_env import UnityParallelEnv as UPZBE
import numpy as np

In [2]:
env = UE(file_name="Env/DroneFlightv1", seed=1, side_channels=[], no_graphics_monitor=False, no_graphics=False)
env = UPZBE(env)

In [3]:
def relocate_agents(env):
    agents = list(set(env.agents))
    for agent in agents:
        env.action_space(agent).dtype = np.float32
    return agents  # simplified

# New helper to extract observation data for an agent
def get_agent_obs(obs, agent):
    agent_data = obs[agent]
    if isinstance(agent_data, dict):
        return np.array(agent_data['observation'][1]), np.array(agent_data['observation'][2])
    return np.array(agent_data[2]), np.array(agent_data[1])

agents = relocate_agents(env)

# Test SAC

In [4]:
from SAC_Distillation.DistilledSACAgent import DistilledSAC
from Hyperparameters import HYPERPARAMS as params

In [5]:
brain = DistilledSAC(env.observation_space(agents[0])[2].shape, env.observation_space(agents[0])[1].shape, env.action_space(agents[0]).shape,len(agents), params['sac_distilled'])
brain.load('SavedModels/SAC_distilled_trained.pth')

Number of agents:  4
Action dimensions:  5


In [None]:
import torch

# Define the device (use GPU if available, otherwise CPU)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


num_test_episodes = 1
max_episode_steps = 100000
steps = 0
obs = env.reset()

while steps < num_test_episodes * max_episode_steps:
    if len(obs) == 0:
        obs = env.reset()   # poll again without acting
        continue
    active_agents = relocate_agents(env)
    cams, vecs = [], []
    for aid in active_agents:
        cam, vec = get_agent_obs(obs, aid)
        cams.append(cam)
        vecs.append(vec)
    if not cams:
        obs = env.reset()
        continue
    cam_t = torch.as_tensor(np.stack(cams), device=device, dtype=torch.float32).unsqueeze(0)
    vec_t = torch.as_tensor(np.stack(vecs), device=device, dtype=torch.float32).unsqueeze(0)

    with torch.no_grad():
        act_t = brain.get_action(cam_t, vec_t, train=False)

    act_np = torch.round(act_t).clamp(-1,1)
    act_np = act_np[0].cpu().numpy()
    actions = {a: act for a, act in zip(active_agents, act_np)}

    next_obs, rewards, done_flags, infos = env.step(actions)
    steps += 1

    cam2, vec2, r_list, d_list = [], [], [], []
    goal_reached, crashes = 0.0, 0.0
    for aid in active_agents:
        if aid in next_obs:
            n_cam, n_vec = get_agent_obs(next_obs, aid)
        else:
            n_cam = np.zeros_like(cams[0])
            n_vec = np.zeros_like(vecs[0])
        cam2.append(n_cam)
        vec2.append(n_vec)

        r = rewards.get(aid, 0.0) + infos.get(aid, {}).get('group_reward', 0.0)
        if done_flags.get(aid, True):
            print(f"Agent {aid}: Reward: {r:.2f}, Done: {done_flags.get(aid, False)}")
        
        goal_reached += 1 if r > 19.9 else 0.0
        crashes += 1 if r < -9.9 else 0.0
        
        r_list.append(r)
        d_list.append(done_flags.get(aid, True))

    mean_r = np.mean(r_list)

    if steps % 1000 == 0:
        print(f"Step: {steps}, Mean Reward: {mean_r:.2f}, Goal Reached: {goal_reached}, Crashes: {crashes}")

    obs = next_obs
env.close()

Agent Drone?team=0?agent_id=2: Reward: 0.00, Done: True
Agent Drone?team=0?agent_id=0: Reward: 0.00, Done: True
Agent Drone?team=0?agent_id=1: Reward: 0.00, Done: True
Agent Drone?team=0?agent_id=3: Reward: -9.93, Done: True
Agent Drone?team=0?agent_id=2: Reward: 0.00, Done: True
Agent Drone?team=0?agent_id=0: Reward: -9.96, Done: True
Agent Drone?team=0?agent_id=1: Reward: 0.00, Done: True
Agent Drone?team=0?agent_id=3: Reward: 0.00, Done: True
Agent Drone?team=0?agent_id=2: Reward: 0.00, Done: True
Agent Drone?team=0?agent_id=0: Reward: -9.96, Done: True
Agent Drone?team=0?agent_id=1: Reward: 0.00, Done: True
Agent Drone?team=0?agent_id=3: Reward: 0.00, Done: True
Step: 1000, Mean Reward: 0.05, Goal Reached: 0.0, Crashes: 0.0
Agent Drone?team=0?agent_id=2: Reward: 0.00, Done: True
Agent Drone?team=0?agent_id=0: Reward: 0.00, Done: True
Agent Drone?team=0?agent_id=1: Reward: 0.00, Done: True
Agent Drone?team=0?agent_id=3: Reward: -9.93, Done: True
Agent Drone?team=0?agent_id=2: Reward

KeyboardInterrupt: 