### Test on Small Warehouse

In [2]:
pip install gymnasium

Collecting gymnasium
  Downloading gymnasium-1.0.0-py3-none-any.whl.metadata (9.5 kB)
Collecting farama-notifications>=0.0.1 (from gymnasium)
  Downloading Farama_Notifications-0.0.4-py3-none-any.whl.metadata (558 bytes)
Downloading gymnasium-1.0.0-py3-none-any.whl (958 kB)
   ---------------------------------------- 958.1/958.1 kB 6.1 MB/s eta 0:00:00
Downloading Farama_Notifications-0.0.4-py3-none-any.whl (2.5 kB)
Installing collected packages: farama-notifications, gymnasium
Successfully installed farama-notifications-0.0.4 gymnasium-1.0.0
Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 24.1.2 -> 24.3.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [2]:
import gymnasium as gym
import rware
from rware.warehouse import RewardType
import torch as T
from dql import DQNAgent, DeepQNetwork
import numpy as np
import time

def load_agents(n_agents, save_dir='saved_models'):
    """Load trained agents from disk"""
    agents = []
    
    for i in range(n_agents):
        
        # Create agent with loaded parameters
        agent = DQNAgent(
            gamma=0.95,
            epsilon=0.25,  # Use minimum epsilon for testing
            lr=0.001,
            input_dims=(71,),
            batch_size=64,
            n_actions=5,
            max_mem_size=100000
        )
        
        # Load network weights
        agent.Q_eval.load_state_dict(T.load(f'{save_dir}/agentsmall_{i}_Q_eval.pth'))
        
        agents.append(agent)
    
    return agents

def test_agents(n_episodes=10, max_steps=1000):
    layout = """
    ......
    ..xx..
    ..xx..
    ..xx..
    ......
    ..gg..
    """

    # layout = """
    # ......
    # ..xx..
    # ..xx..
    # ..xx..
    # ..xx..
    # ..xx..
    # ..xx..
    # ..xx..
    # ......
    # ..gg..
    # """
    env = gym.make("rware-tiny-2ag-v2",layout=layout, reward_type=RewardType.TWO_STAGE)
    n_agents = 2
    
    # Load trained agents
    agents = load_agents(n_agents)
    
    for episode in range(n_episodes):
        observations = env.reset()[0]
        episode_scores = [0] * n_agents
        
        for step in range(max_steps):
            # Get actions from trained agents
            actions = [agent.choose_action(observations[agent_id]) for agent_id, agent in enumerate(agents)]
            observations_, rewards, dones, _, _ = env.step(tuple(actions))
            env.render()
            
            rewards = list(rewards)
            
            # Update episode scores
            for agent_id in range(n_agents):
                if round(rewards[agent_id], 1) != -0.6 and round(rewards[agent_id], 1) != -0.3:
                    episode_scores[agent_id] += rewards[agent_id]
            
            observations = observations_
            time.sleep(0.004)
        
        total_score = sum(episode_scores)
        print(f"Episode {episode + 1}: Total Score = {total_score}")
    
    env.close()

if __name__ == '__main__':
    test_agents()

Episode 1: Total Score = 75.60000000000001
Episode 2: Total Score = 167.40000000000006
Episode 3: Total Score = 90.60000000000001
Episode 4: Total Score = 114.00000000000003
Episode 5: Total Score = 90.60000000000002
Episode 6: Total Score = 61.80000000000001
Episode 7: Total Score = 105.60000000000002
Episode 8: Total Score = 85.20000000000002
Episode 9: Total Score = 109.80000000000001
Episode 10: Total Score = 109.80000000000001


### Test on Large Warehouse

In [2]:
import gymnasium as gym
import rware
from rware.warehouse import RewardType
import torch as T
from dql import DQNAgent, DeepQNetwork
import numpy as np
import time

def load_agents(n_agents, save_dir='saved_models'):
    """Load trained agents from disk"""
    agents = []
    
    for i in range(n_agents):
        
        # Create agent with loaded parameters
        agent = DQNAgent(
            gamma=0.95,
            epsilon=0.37,  # Use minimum epsilon for testing
            lr=0.001,
            input_dims=(71,),
            batch_size=64,
            n_actions=5,
            max_mem_size=100000
        )
        
        # Load network weights
        agent.Q_eval.load_state_dict(T.load(f'{save_dir}/agentlarge_{i}_Q_eval.pth'))
        
        agents.append(agent)
    
    return agents

def test_agents(n_episodes=10, max_steps=1000):
    # layout = """
    # ......
    # ..xx..
    # ..xx..
    # ..xx..
    # ......
    # ..gg..
    # """

    layout = """
    ......
    ..xx..
    ..xx..
    ..xx..
    ..xx..
    ..xx..
    ..xx..
    ..xx..
    ......
    ..gg..
    """
    env = gym.make("rware-tiny-2ag-v2",layout=layout, reward_type=RewardType.TWO_STAGE)
    n_agents = 2
    
    # Load trained agents
    agents = load_agents(n_agents)
    
    for episode in range(n_episodes):
        observations = env.reset()[0]
        episode_scores = [0] * n_agents
        
        for step in range(max_steps):
            # Get actions from trained agents
            actions = [agent.choose_action(observations[agent_id]) for agent_id, agent in enumerate(agents)]
            observations_, rewards, dones, _, _ = env.step(tuple(actions))
            env.render()
            
            rewards = list(rewards)
            
            # Update episode scores
            for agent_id in range(n_agents):
                if round(rewards[agent_id], 1) != -0.6 and round(rewards[agent_id], 1) != -0.3:
                    episode_scores[agent_id] += rewards[agent_id]
            
            observations = observations_
            time.sleep(0.002)
        
        total_score = sum(episode_scores)
        print(f"Episode {episode + 1}: Total Score = {total_score}")
    
    env.close()

if __name__ == '__main__':
    test_agents()

Episode 1: Total Score = 42.60000000000001
Episode 2: Total Score = 28.800000000000004
Episode 3: Total Score = 52.2
Episode 4: Total Score = 37.2
Episode 5: Total Score = 52.2
Episode 6: Total Score = 96.00000000000001
Episode 7: Total Score = 27.6
Episode 8: Total Score = 57.60000000000001
Episode 9: Total Score = 79.80000000000001
Episode 10: Total Score = 42.60000000000001
