In [2]:
import warnings
warnings.filterwarnings("ignore", message="Gym has been unmaintained since")

import sys
sys.path.append("../env")  # to read hospital_env.py

from hospital_env import HospitalEnv
import gymnasium as gym
import numpy as np
import torch

from stable_baselines3 import DQN
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.vec_env import DummyVecEnv

# ------------------------------
# Set seeds for reproducibility
# ------------------------------
SEED = 42
np.random.seed(SEED)
torch.manual_seed(SEED)

# --- Create wrapped environment ---
def make_env():
    env = HospitalEnv()
    env.reset(seed=SEED)   # seed the environment here
    env = Monitor(env)      # important for SB3 logging
    return env

env = DummyVecEnv([make_env])

# --- Create DQN agent ---
model = DQN(
    "MlpPolicy",       # Fully connected NN
    env,
    learning_rate=5e-4,
    gamma=0.95,
    batch_size=64,
    buffer_size=50000,
    exploration_initial_eps=1.0,
    exploration_final_eps=0.1,
    exploration_fraction=0.1,   # epsilon decay
    target_update_interval=1000,
    verbose=1,
    seed=SEED                  # seed SB3 agent
)

# --- Train agent ---
model.learn(total_timesteps=50000)

# --- Save trained model ---
model.save("../models/dqn_hospital_sb3")
print("Model saved to models/dqn_hospital_sb3.zip")

# --- Evaluation ---
eval_env = DummyVecEnv([make_env])
mean_reward, std_reward = evaluate_policy(model, eval_env, n_eval_episodes=50)
print(f"Mean reward: {mean_reward:.2f} ± {std_reward:.2f}")


Using cpu device
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 100      |
|    ep_rew_mean      | 1e+03    |
|    exploration_rate | 0.928    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 1243     |
|    time_elapsed     | 0        |
|    total_timesteps  | 400      |
| train/              |          |
|    learning_rate    | 0.0005   |
|    loss             | 7.62     |
|    n_updates        | 74       |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 100      |
|    ep_rew_mean      | 1e+03    |
|    exploration_rate | 0.856    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 1178     |
|    time_elapsed     | 0        |
|    total_timesteps  | 800      |
| train/              |          |
|    learning_rate    | 0.0005   |
|    loss             | 0.00704  |
|  

In [3]:
import warnings
warnings.filterwarnings("ignore", message="Gym has been unmaintained since")
import gymnasium as gym
import numpy as np
from gymnasium import spaces

class HospitalEnv(gym.Env):
    def __init__(self, num_doctors=3, max_steps=100):
        super().__init__()

        self.num_doctors = num_doctors
        self.max_steps = max_steps

        # Observation space:
        # [free_doctors, doctor_timers..., max_wait_red, max_wait_yellow, max_wait_green,
        #  len_red, len_yellow, len_green, total_queue_length]
        obs_low = np.zeros(11, dtype=np.float32)

        obs_high = np.array(
            [self.num_doctors] +         # number of doctors
            [200] * self.num_doctors +   # service times
            [200, 200, 200] +            # max wait red/yellow/green
            [30, 30, 30, 30],            # queue lengths + total queue length
            dtype=np.float32)

        self.observation_space = spaces.Box(low=obs_low, high=obs_high, dtype=np.float32)


        
        # Actions: RED=0, YELLOW=1, GREEN=2
        self.action_space = spaces.Discrete(3)

        # Queues
        self.red_queue = []
        self.yellow_queue = []
        self.green_queue = []

        # Doctors timers
        self.doctor_timers = np.zeros(self.num_doctors)

        # Step counter
        self.current_step = 0

        # Coefficients for reward
        self.C = {"red": 10.0, "yellow": 1.0, "green": 0.2}

        # Max wait thresholds for critical penalty
        self.thresholds = {"red": 5, "yellow": 15, "green": 30}
        self.critical_penalty_values = {"red": 100, "yellow": 20, "green": 5}
        self.critical_penalty = {"red": 0, "yellow": 0, "green": 0}

        # Service time ranges (minutes)
        self.service_ranges = {
            "red": (8, 15),
            "yellow": (4, 8),
            "green": (2, 3)
        }

        # Arrival rates per minute
        self.arrival_lambda = {"red": 3, "yellow": 2, "green": 1}

        # Maximum total queue length
        self.max_queue_length = 30

    # -------------------------------
    # Reset environment
    # -------------------------------
    def reset(self, seed=None, options=None):
        super().reset(seed=seed)
        self.red_queue = []
        self.yellow_queue = []
        self.green_queue = []
        self.doctor_timers[:] = 0
        self.current_step = 0
        self.critical_penalty = {"red": 0, "yellow": 0, "green": 0}
        return self._get_obs(), {}

    # -------------------------------
    # Observation
    # -------------------------------
    def _get_obs(self):
        total_queue = len(self.red_queue) + len(self.yellow_queue) + len(self.green_queue)
        obs = np.array([
            np.sum(self.doctor_timers == 0),          # free doctors
            *self.doctor_timers,                       # remaining service times
            max(self.red_queue) if self.red_queue else 0,
            max(self.yellow_queue) if self.yellow_queue else 0,
            max(self.green_queue) if self.green_queue else 0,
            len(self.red_queue),
            len(self.yellow_queue),
            len(self.green_queue),
            total_queue
        ], dtype=np.float32)
        return obs

    # -------------------------------
    # Sample service time
    # -------------------------------
    def _sample_service_time(self, action):
        queue_map = {0: "red", 1: "yellow", 2: "green"}
        low, high = self.service_ranges[queue_map[action]]
        return np.random.randint(low, high+1)

    # -------------------------------
    # Add new arrivals
    # -------------------------------
    def _add_arrivals(self, service_time):
        # Poisson arrivals scaled by service time
        new_red = np.random.poisson(self.arrival_lambda["red"] * service_time)
        new_yellow = np.random.poisson(self.arrival_lambda["yellow"] * service_time)
        new_green = np.random.poisson(self.arrival_lambda["green"] * service_time)

        # Total queue length
        total_current = len(self.red_queue) + len(self.yellow_queue) + len(self.green_queue)
        total_new = new_red + new_yellow + new_green
        available_space = self.max_queue_length - total_current

        if total_new > available_space:
            # Scale down proportionally
            factor = available_space / total_new
            new_red = int(new_red * factor)
            new_yellow = int(new_yellow * factor)
            new_green = int(new_green * factor)

        self.red_queue.extend([0]*new_red)
        self.yellow_queue.extend([0]*new_yellow)
        self.green_queue.extend([0]*new_green)

    # -------------------------------
    # Step
    # -------------------------------
    def step(self, action):
        self.current_step += 1

        # Find first free doctor
        free_doctors = np.where(self.doctor_timers == 0)[0]
        if len(free_doctors) == 0:
            # Advance by min timer to free a doctor
            min_timer = min([t for t in self.doctor_timers if t > 0])
            self.doctor_timers = np.maximum(0, self.doctor_timers - min_timer)
            free_doctors = np.where(self.doctor_timers == 0)[0]

        doctor = free_doctors[0]

        # Map action to queue
        queue_map = {0: self.red_queue, 1: self.yellow_queue, 2: self.green_queue}
        queue_name_map = {0: "red", 1: "yellow", 2: "green"}
        queue = queue_map[action]
        cat_name = queue_name_map[action]

        # If queue empty, reward = 0
        if len(queue) == 0:
            reward = 0
            service_time = 0
        else:
            # Pop first patient
            wait_time = queue.pop(0)
            # Sample service time
            service_time = self._sample_service_time(action)
            self.doctor_timers[doctor] = service_time

            # Increase waiting time for other patients
            self.red_queue = [w + service_time for w in self.red_queue]
            self.yellow_queue = [w + service_time for w in self.yellow_queue]
            self.green_queue = [w + service_time for w in self.green_queue]

            # Reduce timers of other doctors
            for i in range(self.num_doctors):
                if i != doctor:
                    self.doctor_timers[i] = max(0, self.doctor_timers[i] - service_time)

        # Add new arrivals
        self._add_arrivals(service_time)

        # -------------------------------
        # Reward calculation
        # -------------------------------
        MaxWait_Red = max(self.red_queue) if self.red_queue else 0
        MaxWait_Yellow = max(self.yellow_queue) if self.yellow_queue else 0
        MaxWait_Green = max(self.green_queue) if self.green_queue else 0

        reward = 10 - (self.C["red"] * MaxWait_Red + self.C["yellow"] * MaxWait_Yellow + self.C["green"] * MaxWait_Green)

        # -------------------------------
        # Critical penalty
        # -------------------------------
        for q_name, q_list in [("red", self.red_queue), ("yellow", self.yellow_queue), ("green", self.green_queue)]:
            if any(w > self.thresholds[q_name] for w in q_list):
                self.critical_penalty[q_name] += self.critical_penalty_values[q_name]

        reward -= sum(self.critical_penalty.values())

        # Truncated if max steps reached
        truncated = self.current_step >= self.max_steps

        return self._get_obs(), reward, False, truncated, {}


In [4]:
import warnings
warnings.filterwarnings("ignore", message="Gym has been unmaintained since")

import sys
sys.path.append("../env")  # to read hospital_env.py

import gymnasium as gym
import numpy as np
import torch

from stable_baselines3 import DQN
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.vec_env import DummyVecEnv

# ------------------------------
# Set seeds for reproducibility
# ------------------------------
SEED = 42
np.random.seed(SEED)
torch.manual_seed(SEED)

# --- Create wrapped environment ---
def make_env():
    env = HospitalEnv()
    env.reset(seed=SEED)   # seed the environment here
    env = Monitor(env)      # important for SB3 logging
    return env

env = DummyVecEnv([make_env])

# --- Create DQN agent ---
model = DQN(
    "MlpPolicy",       # Fully connected NN
    env,
    learning_rate=5e-4,
    gamma=0.95,
    batch_size=64,
    buffer_size=50000,
    exploration_initial_eps=1.0,
    exploration_final_eps=0.1,
    exploration_fraction=0.1,   # epsilon decay
    target_update_interval=1000,
    verbose=1,
    seed=SEED                  # seed SB3 agent
)

# --- Train agent ---
model.learn(total_timesteps=50000)

# --- Save trained model ---
model.save("../models/dqn_hospital_sb3")
print("Model saved to models/dqn_hospital_sb3.zip")

# --- Evaluation ---
eval_env = DummyVecEnv([make_env])
mean_reward, std_reward = evaluate_policy(model, eval_env, n_eval_episodes=50)
print(f"Mean reward: {mean_reward:.2f} ± {std_reward:.2f}")

Using cpu device
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 100      |
|    ep_rew_mean      | 1e+03    |
|    exploration_rate | 0.928    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 1212     |
|    time_elapsed     | 0        |
|    total_timesteps  | 400      |
| train/              |          |
|    learning_rate    | 0.0005   |
|    loss             | 7.62     |
|    n_updates        | 74       |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 100      |
|    ep_rew_mean      | 1e+03    |
|    exploration_rate | 0.856    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 1106     |
|    time_elapsed     | 0        |
|    total_timesteps  | 800      |
| train/              |          |
|    learning_rate    | 0.0005   |
|    loss             | 0.00704  |
|  

In [2]:
from hospital_env import HospitalEnv
from stable_baselines3 import DQN
import numpy as np

# Load trained agent
model = DQN.load("../models/dqn_hospital_sb3.zip")

env = HospitalEnv()

# Metrics storage
rewards_per_episode = []
red_waits, yellow_waits, green_waits = [], [], []
red_served, yellow_served, green_served = 0, 0, 0
queue_lengths = {"red": [], "yellow": [], "green": []}
threshold_times = {"red": 30, "yellow": 60, "green": 120}

episodes = 10

for ep in range(episodes):
    obs = env.reset()
    done = False
    total_reward = 0

    while not done:
        action, _ = model.predict(obs, deterministic=True)
        obs, reward, done, info = env.step(action)
        total_reward += reward

        # Record queue lengths
        queue_lengths["red"].append(len(env.red_queue))
        queue_lengths["yellow"].append(len(env.yellow_queue))
        queue_lengths["green"].append(len(env.green_queue))

        # Record wait times for fairness & thresholds
        last_waits = getattr(env, "last_served_wait_times", {})  # assume you save these in env
        for cat, wait in last_waits.items():
            if cat == "red":
                red_waits.append(wait)
                red_served += 1
            elif cat == "yellow":
                yellow_waits.append(wait)
                yellow_served += 1
            elif cat == "green":
                green_waits.append(wait)
                green_served += 1

    rewards_per_episode.append(total_reward)

# Compute metrics
avg_reward = np.mean(rewards_per_episode)
avg_wait_red = np.mean(red_waits)
avg_wait_yellow = np.mean(yellow_waits)
avg_wait_green = np.mean(green_waits)

pct_red_within = 100 * sum(w <= threshold_times["red"] for w in red_waits) / red_served
pct_yellow_within = 100 * sum(w <= threshold_times["yellow"] for w in yellow_waits) / yellow_served
pct_green_within = 100 * sum(w <= threshold_times["green"] for w in green_waits) / green_served

queue_stats = {cat: {"avg": np.mean(qs), "max": np.max(qs)} for cat, qs in queue_lengths.items()}

fairness = 1 - (max(avg_wait_red, avg_wait_yellow, avg_wait_green) - min(avg_wait_red, avg_wait_yellow, avg_wait_green)) / max(avg_wait_red, avg_wait_yellow, avg_wait_green)

# Print results
print("Average reward per episode:", avg_reward)
print("Average wait times (Red, Yellow, Green):", avg_wait_red, avg_wait_yellow, avg_wait_green)
print("Percentage served within thresholds (Red, Yellow, Green):", pct_red_within, pct_yellow_within, pct_green_within)
print("Queue stats:", queue_stats)
print("Fairness metric:", fairness)


ValueError: You have passed a tuple to the predict() function instead of a Numpy array or a Dict. You are probably mixing Gym API with SB3 VecEnv API: `obs, info = env.reset()` (Gym) vs `obs = vec_env.reset()` (SB3 VecEnv). See related issue https://github.com/DLR-RM/stable-baselines3/issues/1694 and documentation for more information: https://stable-baselines3.readthedocs.io/en/master/guide/vec_envs.html#vecenv-api-vs-gym-api

In [15]:
from hospital_env import HospitalEnv
from stable_baselines3 import DQN
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.vec_env import DummyVecEnv
import numpy as np

# --- Load trained agent ---
model = DQN.load("../models/dqn_hospital_sb3.zip")

# --- Wrap the environment for SB3 ---
def make_env():
    env = HospitalEnv()
    env = Monitor(env)
    return env

eval_env = DummyVecEnv([make_env])

# --- Metrics storage ---
episodes = 50  # more episodes to get stable metrics
rewards_per_episode = []
red_waits, yellow_waits, green_waits = [], [], []
red_served = yellow_served = green_served = 0
queue_lengths = {"red": [], "yellow": [], "green": []}
threshold_times = {"red": 30, "yellow": 60, "green": 120}

# --- Evaluate agent ---
for ep in range(episodes):
    obs = eval_env.reset()  # returns np.array
    done = False
    total_reward = 0

    while not done:
        # Predict action
        action, _ = model.predict(obs, deterministic=True)
        obs, reward, terminated, truncated, info = eval_env.step(action)
        done = terminated[0] or truncated[0]  # single env
        total_reward += reward[0]

        # Access underlying HospitalEnv
        env0 = eval_env.envs[0]

        # Record queue lengths for stability
        queue_lengths["red"].append(len(env0.red_queue))
        queue_lengths["yellow"].append(len(env0.yellow_queue))
        queue_lengths["green"].append(len(env0.green_queue))

        # Track wait times per patient served
        if hasattr(env0, "last_served_wait_times"):  # if you implement this in env
            last_waits = env0.last_served_wait_times
            for cat, wait in last_waits.items():
                if cat == "red":
                    red_waits.append(wait)
                    red_served += 1
                elif cat == "yellow":
                    yellow_waits.append(wait)
                    yellow_served += 1
                elif cat == "green":
                    green_waits.append(wait)
                    green_served += 1

    rewards_per_episode.append(total_reward)

# --- Compute metrics ---

# 1. Average episode reward
avg_reward = np.mean(rewards_per_episode)

# 2. Convergence speed (std across episodes can indicate stability)
reward_std = np.std(rewards_per_episode)

# 3. Average waiting time per triage category
avg_wait_red = np.mean(red_waits) if red_waits else 0
avg_wait_yellow = np.mean(yellow_waits) if yellow_waits else 0
avg_wait_green = np.mean(green_waits) if green_waits else 0

# 4. Percentage within clinical thresholds
pct_red_within = 100 * sum(w <= threshold_times["red"] for w in red_waits) / red_served if red_served else 0
pct_yellow_within = 100 * sum(w <= threshold_times["yellow"] for w in yellow_waits) / yellow_served if yellow_served else 0
pct_green_within = 100 * sum(w <= threshold_times["green"] for w in green_waits) / green_served if green_served else 0

# 5. Queue stability measures (avg and max)
queue_stats = {cat: {"avg": np.mean(qs), "max": np.max(qs)} for cat, qs in queue_lengths.items()}

# 6. Fairness metric
waits = [avg_wait_red, avg_wait_yellow, avg_wait_green]
fairness = 1 - (max(waits) - min(waits)) / max(waits) if max(waits) > 0 else 1.0

# --- Print metrics ---
print("=== RL Agent Evaluation Metrics ===")
print(f"Average episode reward: {avg_reward:.2f} ± {reward_std:.2f}")
print(f"Average wait times (Red, Yellow, Green): {avg_wait_red:.2f}, {avg_wait_yellow:.2f}, {avg_wait_green:.2f}")
print(f"Percentage served within thresholds (Red, Yellow, Green): {pct_red_within:.1f}%, {pct_yellow_within:.1f}%, {pct_green_within:.1f}%")
print("Queue stats:", queue_stats)
print(f"Fairness metric: {fairness:.3f}")


ValueError: not enough values to unpack (expected 5, got 4)

In [16]:
from hospital_env import HospitalEnv
from stable_baselines3 import DQN
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.vec_env import DummyVecEnv
import numpy as np

# ------------------------
# Load trained model
# ------------------------
model = DQN.load("../models/dqn_hospital_sb3.zip")

# ------------------------
# Wrap environment
# ------------------------
def make_env():
    env = HospitalEnv()
    env = Monitor(env)
    return env

eval_env = DummyVecEnv([make_env])

# ------------------------
# Metrics storage
# ------------------------
episodes = 50
rewards_per_episode = []

# Track waiting times per triage category
red_waits, yellow_waits, green_waits = [], [], []

# Track patients served per triage category
red_served, yellow_served, green_served = 0, 0, 0

# Track queue lengths
queue_lengths = {"red": [], "yellow": [], "green": []}

# Clinical target thresholds (minutes)
threshold_times = {"red": 30, "yellow": 60, "green": 120}

# ------------------------
# Evaluation loop
# ------------------------
for ep in range(episodes):
    obs = eval_env.reset()
    done = False
    total_reward = 0

    # Reset per-episode queues in environment (if needed)
    eval_env.envs[0].red_queue = []
    eval_env.envs[0].yellow_queue = []
    eval_env.envs[0].green_queue = []

    while not done:
        action, _ = model.predict(obs, deterministic=True)
        obs, reward, done_array, info = eval_env.step(action)
        done = done_array[0]  # single environment
        total_reward += reward[0]

        # Record queue lengths
        queue_lengths["red"].append(len(eval_env.envs[0].red_queue))
        queue_lengths["yellow"].append(len(eval_env.envs[0].yellow_queue))
        queue_lengths["green"].append(len(eval_env.envs[0].green_queue))

        # Record served patients and their wait times
        last_waits = getattr(eval_env.envs[0], "last_served_wait_times", {})
        for cat, wait in last_waits.items():
            if cat == "red":
                red_waits.append(wait)
                red_served += 1
            elif cat == "yellow":
                yellow_waits.append(wait)
                yellow_served += 1
            elif cat == "green":
                green_waits.append(wait)
                green_served += 1

    rewards_per_episode.append(total_reward)

# ------------------------
# Compute metrics
# ------------------------
avg_reward = np.mean(rewards_per_episode)

avg_wait_red = np.mean(red_waits) if red_waits else 0
avg_wait_yellow = np.mean(yellow_waits) if yellow_waits else 0
avg_wait_green = np.mean(green_waits) if green_waits else 0

pct_red_within = 100 * sum(w <= threshold_times["red"] for w in red_waits) / red_served if red_served else 0
pct_yellow_within = 100 * sum(w <= threshold_times["yellow"] for w in yellow_waits) / yellow_served if yellow_served else 0
pct_green_within = 100 * sum(w <= threshold_times["green"] for w in green_waits) / green_served if green_served else 0

queue_stats = {cat: {"avg": np.mean(qs), "max": np.max(qs)} for cat, qs in queue_lengths.items()}

# Fairness: 1 - normalized difference between max & min average wait times
max_wait = max(avg_wait_red, avg_wait_yellow, avg_wait_green)
min_wait = min(avg_wait_red, avg_wait_yellow, avg_wait_green)
fairness = 1 - ((max_wait - min_wait) / max_wait) if max_wait > 0 else 1

# ------------------------
# Print results
# ------------------------
print("Average reward per episode:", avg_reward)
print("Average wait times (Red, Yellow, Green):", avg_wait_red, avg_wait_yellow, avg_wait_green)
print("Percentage served within thresholds (Red, Yellow, Green):", pct_red_within, pct_yellow_within, pct_green_within)
print("Queue stats (average and max lengths):", queue_stats)
print("Fairness metric:", fairness)


Average reward per episode: 5006.74
Average wait times (Red, Yellow, Green): 0 0 0
Percentage served within thresholds (Red, Yellow, Green): 0 0 0
Queue stats (average and max lengths): {'red': {'avg': np.float64(0.0), 'max': np.int64(0)}, 'yellow': {'avg': np.float64(0.0), 'max': np.int64(0)}, 'green': {'avg': np.float64(0.0), 'max': np.int64(0)}}
Fairness metric: 1


In [17]:
import sys
import numpy as np
from stable_baselines3 import DQN
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.monitor import Monitor

sys.path.append("../env")  # path to your HospitalEnv
from hospital_env import HospitalEnv

# --- Create wrapped environment for evaluation ---
def make_env():
    env = HospitalEnv()
    env = Monitor(env)  # SB3 requires Monitor for VecEnv
    return env

eval_env = DummyVecEnv([make_env])

# --- Load trained model ---
model = DQN.load("../models/dqn_hospital_sb3.zip")

# --- Metrics storage ---
episodes = 10
rewards_per_episode = []
red_waits, yellow_waits, green_waits = [], [], []
red_served, yellow_served, green_served = 0, 0, 0
queue_lengths = {"red": [], "yellow": [], "green": []}
threshold_times = {"red": 30, "yellow": 60, "green": 120}

for ep in range(episodes):
    obs = eval_env.reset()  # VecEnv returns array only
    done = False
    total_reward = 0

    while not done:
        # --- Predict action ---
        action, _ = model.predict(obs, deterministic=True)
        
        # --- Take step ---
        obs, reward, terminated, truncated, info = eval_env.step(action)
        done = terminated[0] or truncated[0]  # VecEnv returns arrays
        total_reward += reward[0]

        # --- Track queue lengths ---
        env_instance = eval_env.envs[0]  # get underlying env
        queue_lengths["red"].append(len(env_instance.red_queue))
        queue_lengths["yellow"].append(len(env_instance.yellow_queue))
        queue_lengths["green"].append(len(env_instance.green_queue))

        # --- Track wait times for fairness & thresholds ---
        last_waits = getattr(env_instance, "last_served_wait_times", {})
        for cat, wait in last_waits.items():
            if cat == "red":
                red_waits.append(wait)
                red_served += 1
            elif cat == "yellow":
                yellow_waits.append(wait)
                yellow_served += 1
            elif cat == "green":
                green_waits.append(wait)
                green_served += 1

    rewards_per_episode.append(total_reward)

# --- Compute metrics ---
avg_reward = np.mean(rewards_per_episode)

avg_wait_red = np.mean(red_waits) if red_waits else 0
avg_wait_yellow = np.mean(yellow_waits) if yellow_waits else 0
avg_wait_green = np.mean(green_waits) if green_waits else 0

pct_red_within = 100 * sum(w <= threshold_times["red"] for w in red_waits) / red_served if red_served > 0 else 0
pct_yellow_within = 100 * sum(w <= threshold_times["yellow"] for w in yellow_waits) / yellow_served if yellow_served > 0 else 0
pct_green_within = 100 * sum(w <= threshold_times["green"] for w in green_waits) / green_served if green_served > 0 else 0

queue_stats = {
    cat: {"avg": np.mean(qs) if qs else 0, "max": np.max(qs) if qs else 0}
    for cat, qs in queue_lengths.items()
}

# Fairness: 1 - normalized difference between max & min avg wait times
waits = [avg_wait_red, avg_wait_yellow, avg_wait_green]
fairness = 1 - (max(waits) - min(waits)) / max(waits) if max(waits) > 0 else 1

# --- Print results ---
print("Average reward per episode:", avg_reward)
print("Average wait times (Red, Yellow, Green):", avg_wait_red, avg_wait_yellow, avg_wait_green)
print("Percentage served within thresholds (Red, Yellow, Green):", pct_red_within, pct_yellow_within, pct_green_within)
print("Queue stats (average and max lengths):", queue_stats)
print("Fairness metric:", fairness)


ValueError: not enough values to unpack (expected 5, got 4)

In [18]:
import numpy as np
from stable_baselines3 import DQN
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.monitor import Monitor
from hospital_env import HospitalEnv

# --- Wrap the environment for SB3 ---
def make_env():
    env = HospitalEnv()
    env = Monitor(env)  # required for SB3 logging
    return env

eval_env = DummyVecEnv([make_env])

# --- Load trained model ---
model = DQN.load("../models/dqn_hospital_sb3.zip", env=eval_env)

# --- Metrics storage ---
episodes = 10
rewards_per_episode = []

red_waits, yellow_waits, green_waits = [], [], []
red_served, yellow_served, green_served = 0, 0, 0

queue_lengths = {"red": [], "yellow": [], "green": []}
threshold_times = {"red": 30, "yellow": 60, "green": 120}

fairness_list = []

# --- Evaluation loop ---
for ep in range(episodes):
    obs = eval_env.reset()
    done = False
    total_reward = 0

    while not done:
        action, _ = model.predict(obs, deterministic=True)
        obs, reward, done_array, info = eval_env.step(action)
        done = done_array[0]      # Single env
        total_reward += reward[0]

        # Access underlying env to get metrics
        env_instance = eval_env.envs[0]

        # --- Queue stats ---
        queue_lengths["red"].append(len(env_instance.red_queue))
        queue_lengths["yellow"].append(len(env_instance.yellow_queue))
        queue_lengths["green"].append(len(env_instance.green_queue))

        # --- Wait times for last served patient ---
        last_waits = getattr(env_instance, "last_served_wait_times", {})
        for cat, wait in last_waits.items():
            if cat == "red":
                red_waits.append(wait)
                red_served += 1
            elif cat == "yellow":
                yellow_waits.append(wait)
                yellow_served += 1
            elif cat == "green":
                green_waits.append(wait)
                green_served += 1

        # --- Fairness metric per step ---
        if red_waits and yellow_waits and green_waits:
            max_wait = max(np.mean(red_waits), np.mean(yellow_waits), np.mean(green_waits))
            min_wait = min(np.mean(red_waits), np.mean(yellow_waits), np.mean(green_waits))
            fairness_list.append(1 - (max_wait - min_wait) / max_wait)

    rewards_per_episode.append(total_reward)

# --- Compute final metrics ---
avg_reward = np.mean(rewards_per_episode)
avg_wait_red = np.mean(red_waits) if red_waits else 0
avg_wait_yellow = np.mean(yellow_waits) if yellow_waits else 0
avg_wait_green = np.mean(green_waits) if green_waits else 0

pct_red_within = 100 * sum(w <= threshold_times["red"] for w in red_waits) / red_served if red_served else 0
pct_yellow_within = 100 * sum(w <= threshold_times["yellow"] for w in yellow_waits) / yellow_served if yellow_served else 0
pct_green_within = 100 * sum(w <= threshold_times["green"] for w in green_waits) / green_served if green_served else 0

queue_stats = {cat: {"avg": np.mean(qs) if qs else 0, "max": np.max(qs) if qs else 0} 
               for cat, qs in queue_lengths.items()}

fairness = np.mean(fairness_list) if fairness_list else 1

# --- Print results ---
print("Average reward per episode:", avg_reward)
print("Average wait times (Red, Yellow, Green):", avg_wait_red, avg_wait_yellow, avg_wait_green)
print("Percentage served within thresholds (Red, Yellow, Green):", pct_red_within, pct_yellow_within, pct_green_within)
print("Queue stats (average and max lengths):", queue_stats)
print("Fairness metric:", fairness)


AttributeError: 'Monitor' object has no attribute 'red_queue'

In [19]:
import numpy as np
from stable_baselines3 import DQN
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.monitor import Monitor
from hospital_env import HospitalEnv

# --- Create wrapped environment ---
def make_env():
    env = HospitalEnv()
    env = Monitor(env)  # SB3 requires Monitor
    return env

eval_env = DummyVecEnv([make_env])

# --- Load trained agent ---
model = DQN.load("../models/dqn_hospital_sb3.zip", env=eval_env)

# --- Metrics storage ---
episodes = 10
rewards_per_episode = []
red_waits, yellow_waits, green_waits = [], [], []
red_served, yellow_served, green_served = 0, 0, 0
queue_lengths = {"red": [], "yellow": [], "green": []}
threshold_times = {"red": 30, "yellow": 60, "green": 120}

for ep in range(episodes):
    obs = eval_env.reset()
    done = False
    total_reward = 0

    while not done:
        # --- Predict action ---
        action, _ = model.predict(obs, deterministic=True)

        # --- Take step ---
        obs, reward, terminated, truncated, info = eval_env.step(action)
        done = terminated[0] or truncated[0]  # VecEnv returns arrays
        total_reward += reward[0]

        # --- Access underlying environment for queue metrics ---
        env_instance = eval_env.envs[0].envs[0].unwrapped  # DummyVecEnv -> Monitor -> HospitalEnv

        # Queue lengths
        queue_lengths["red"].append(len(env_instance.red_queue))
        queue_lengths["yellow"].append(len(env_instance.yellow_queue))
        queue_lengths["green"].append(len(env_instance.green_queue))

        # Wait times from last served patient
        last_waits = getattr(env_instance, "last_served_wait_times", {})
        for cat, wait in last_waits.items():
            if cat == "red":
                red_waits.append(wait)
                red_served += 1
            elif cat == "yellow":
                yellow_waits.append(wait)
                yellow_served += 1
            elif cat == "green":
                green_waits.append(wait)
                green_served += 1

    rewards_per_episode.append(total_reward)

# --- Compute metrics ---
avg_reward = np.mean(rewards_per_episode)
avg_wait_red = np.mean(red_waits) if red_waits else 0
avg_wait_yellow = np.mean(yellow_waits) if yellow_waits else 0
avg_wait_green = np.mean(green_waits) if green_waits else 0

pct_red_within = 100 * sum(w <= threshold_times["red"] for w in red_waits) / red_served if red_served > 0 else 0
pct_yellow_within = 100 * sum(w <= threshold_times["yellow"] for w in yellow_waits) / yellow_served if yellow_served > 0 else 0
pct_green_within = 100 * sum(w <= threshold_times["green"] for w in green_waits) / green_served if green_served > 0 else 0

queue_stats = {cat: {"avg": np.mean(qs) if qs else 0, "max": np.max(qs) if qs else 0} 
               for cat, qs in queue_lengths.items()}

fairness = 1 - (max(avg_wait_red, avg_wait_yellow, avg_wait_green) - min(avg_wait_red, avg_wait_yellow, avg_wait_green)) / max(avg_wait_red, avg_wait_yellow, avg_wait_green) if max(avg_wait_red, avg_wait_yellow, avg_wait_green) > 0 else 1

# --- Print metrics ---
print("Average reward per episode:", avg_reward)
print("Average wait times (Red, Yellow, Green):", avg_wait_red, avg_wait_yellow, avg_wait_green)
print("Percentage served within thresholds (Red, Yellow, Green):", pct_red_within, pct_yellow_within, pct_green_within)
print("Queue stats (average and max lengths):", queue_stats)
print("Fairness metric:", fairness)


ValueError: not enough values to unpack (expected 5, got 4)

In [2]:
import numpy as np
from stable_baselines3 import DQN
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.monitor import Monitor
import sys
sys.path.append("../env")  
from hospital_env import HospitalEnv


# --- Wrapped environment ---
def make_env():
    env = HospitalEnv()
    env = Monitor(env)
    return env

eval_env = DummyVecEnv([make_env])

# --- Load trained model ---
model = DQN.load("../models/dqn_hospital_sb3.zip", env=eval_env)

# --- Metrics ---
episodes = 10
rewards_per_episode = []
red_waits, yellow_waits, green_waits = [], [], []
red_served, yellow_served, green_served = 0, 0, 0
queue_lengths = {"red": [], "yellow": [], "green": []}
threshold_times = {"red": 30, "yellow": 60, "green": 120}

for ep in range(episodes):
    obs = eval_env.reset()
    done = False
    total_reward = 0

    while not done:
        action, _ = model.predict(obs, deterministic=True)

        # --- SB3 VecEnv returns 4 values ---
        obs, rewards, dones, infos = eval_env.step(action)
        done = dones[0]
        total_reward += rewards[0]

        # Access underlying HospitalEnv
        env_instance = eval_env.envs[0].env

        # Queue lengths
        queue_lengths["red"].append(len(env_instance.red_queue))
        queue_lengths["yellow"].append(len(env_instance.yellow_queue))
        queue_lengths["green"].append(len(env_instance.green_queue))

        # Last served wait times
        last_waits = getattr(env_instance, "last_served_wait_times", {})
        for cat, wait in last_waits.items():
            if cat == "red":
                red_waits.append(wait)
                red_served += 1
            elif cat == "yellow":
                yellow_waits.append(wait)
                yellow_served += 1
            elif cat == "green":
                green_waits.append(wait)
                green_served += 1

    rewards_per_episode.append(total_reward)

# --- Compute metrics ---
avg_reward = np.mean(rewards_per_episode)
avg_wait_red = np.mean(red_waits) if red_waits else 0
avg_wait_yellow = np.mean(yellow_waits) if yellow_waits else 0
avg_wait_green = np.mean(green_waits) if green_waits else 0

pct_red_within = 100 * sum(w <= threshold_times["red"] for w in red_waits) / red_served if red_served else 0
pct_yellow_within = 100 * sum(w <= threshold_times["yellow"] for w in yellow_waits) / yellow_served if yellow_served else 0
pct_green_within = 100 * sum(w <= threshold_times["green"] for w in green_waits) / green_served if green_served else 0

queue_stats = {cat: {"avg": np.mean(qs) if qs else 0, "max": np.max(qs) if qs else 0}
               for cat, qs in queue_lengths.items()}

fairness = 1 - (max(avg_wait_red, avg_wait_yellow, avg_wait_green) -
                min(avg_wait_red, avg_wait_yellow, avg_wait_green)) / max(avg_wait_red, avg_wait_yellow, avg_wait_green) \
                if max(avg_wait_red, avg_wait_yellow, avg_wait_green) > 0 else 1

# --- Print results ---
print("Average reward per episode:", avg_reward)
print("Average wait times (Red, Yellow, Green):", avg_wait_red, avg_wait_yellow, avg_wait_green)
print("Percentage served within thresholds (Red, Yellow, Green):", pct_red_within, pct_yellow_within, pct_green_within)
print("Queue stats (average and max lengths):", queue_stats)
print("Fairness metric:", fairness)


Average reward per episode: 5035.2
Average wait times (Red, Yellow, Green): 8.5165 0.0 1.7735
Percentage served within thresholds (Red, Yellow, Green): 99.55 100.0 99.55
Queue stats (average and max lengths): {'red': {'avg': np.float64(9.098), 'max': np.int64(34)}, 'yellow': {'avg': np.float64(194.8945), 'max': np.int64(416)}, 'green': {'avg': np.float64(93.3735), 'max': np.int64(223)}}
Fairness metric: 0.0


In [4]:
import numpy as np
from stable_baselines3 import DQN
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.monitor import Monitor
from hospital_env import HospitalEnv

# -----------------------
# Create environment
# -----------------------
def make_env():
    env = HospitalEnv()
    env = Monitor(env)  # Needed for SB3
    return env

eval_env = DummyVecEnv([make_env])

# -----------------------
# Load trained model
# -----------------------
model = DQN.load("../models/dqn_hospital_sb3.zip")

# -----------------------
# Metrics storage
# -----------------------
episodes = 10
threshold_times = {"red": 30, "yellow": 60, "green": 120}

all_rewards = []
all_waits = {"red": [], "yellow": [], "green": []}
served_within_threshold = {"red": 0, "yellow": 0, "green": 0}
queue_lengths = {"red": [], "yellow": [], "green": []}
fairness_counts = []

# -----------------------
# Run episodes
# -----------------------
for ep in range(episodes):
    obs = eval_env.reset()
    done = False
    total_reward = 0
    green_fairness_window = []

    while not done:
        # Predict action
        action, _ = model.predict(obs, deterministic=True)
        
        # Take step
        obs, reward, terminated, truncated, info = eval_env.step(action)
        done = terminated[0] or truncated[0]
        total_reward += reward[0]

        # Get the actual environment instance to access queues and last served waits
        env_instance = eval_env.envs[0].env  # unwrap Monitor
        last_waits = getattr(env_instance, "last_served_wait_times", {"red":0,"yellow":0,"green":0})
        
        # Record waits
        for cat in ["red", "yellow", "green"]:
            if cat in last_waits:
                all_waits[cat].append(last_waits[cat])
                if last_waits[cat] <= threshold_times[cat]:
                    served_within_threshold[cat] += 1

        # Record queue lengths
        queue_lengths["red"].append(len(env_instance.red_queue))
        queue_lengths["yellow"].append(len(env_instance.yellow_queue))
        queue_lengths["green"].append(len(env_instance.green_queue))

        # Track green fairness (was a green patient served recently?)
        green_fairness_window.append(int(action[0] == 2))
        if len(green_fairness_window) > 5:
            green_fairness_window.pop(0)
        fairness_counts.append(1 if sum(green_fairness_window) > 0 else 0)

    all_rewards.append(total_reward)

# -----------------------
# Compute metrics
# -----------------------
avg_reward = np.mean(all_rewards)
avg_waits = {cat: np.mean(all_waits[cat]) for cat in ["red","yellow","green"]}
pct_within = {cat: 100 * served_within_threshold[cat] / len(all_waits[cat]) if all_waits[cat] else 0 
              for cat in ["red","yellow","green"]}
queue_stats = {cat: {"avg": np.mean(queue_lengths[cat]), "max": np.max(queue_lengths[cat])} 
               for cat in ["red","yellow","green"]}
fairness_metric = np.mean(fairness_counts)

# -----------------------
# Print results
# -----------------------
print("Average reward per episode:", avg_reward)
print("Average wait times (Red, Yellow, Green):", avg_waits["red"], avg_waits["yellow"], avg_waits["green"])
print("Percentage served within thresholds (Red, Yellow, Green):", pct_within["red"], pct_within["yellow"], pct_within["green"])
print("Queue stats (average and max lengths):", queue_stats)
print("Fairness metric (green patient policy):", fairness_metric)


ValueError: not enough values to unpack (expected 5, got 4)

In [5]:
import numpy as np
from stable_baselines3 import DQN
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.monitor import Monitor
from hospital_env import HospitalEnv

# --- Create wrapped evaluation environment ---
def make_env():
    env = HospitalEnv()
    env = Monitor(env)
    return env

eval_env = DummyVecEnv([make_env])

# --- Load trained model ---
model = DQN.load("../models/dqn_hospital_sb3", env=eval_env)

# --- Metrics storage ---
episodes = 10

rewards_per_episode = []
red_waits, yellow_waits, green_waits = [], [], []
red_served, yellow_served, green_served = 0, 0, 0
queue_lengths = {"red": [], "yellow": [], "green": []}
threshold_times = {"red": 30, "yellow": 60, "green": 120}
green_fair_actions = []

# --- Evaluation loop ---
for ep in range(episodes):
    obs = eval_env.reset()
    done = False
    total_reward = 0

    while not done:
        # Predict action
        action, _ = model.predict(obs, deterministic=True)
        # Step in vectorized env (4 outputs)
        obs, reward, done_array, info = eval_env.step(action)
        done = done_array[0]  # single environment
        total_reward += reward[0]

        # Access the inner env to read queues and last served waits
        env_instance = eval_env.envs[0].env  # unwrap Monitor
        queue_lengths["red"].append(len(env_instance.red_queue))
        queue_lengths["yellow"].append(len(env_instance.yellow_queue))
        queue_lengths["green"].append(len(env_instance.green_queue))

        last_waits = getattr(env_instance, "last_served_wait_times", {})
        for cat, w in last_waits.items():
            if cat == "red":
                red_waits.append(w)
                red_served += 1
            elif cat == "yellow":
                yellow_waits.append(w)
                yellow_served += 1
            elif cat == "green":
                green_waits.append(w)
                green_served += 1

        # Track fairness: was green served at least once in last 5 actions?
        if 2 in env_instance.last_5_actions:
            green_fair_actions.append(1)
        else:
            green_fair_actions.append(0)

    rewards_per_episode.append(total_reward)

# --- Compute metrics ---
avg_reward = np.mean(rewards_per_episode)
avg_wait_red = np.mean(red_waits) if red_waits else 0
avg_wait_yellow = np.mean(yellow_waits) if yellow_waits else 0
avg_wait_green = np.mean(green_waits) if green_waits else 0

pct_red_within = 100 * sum(w <= threshold_times["red"] for w in red_waits) / red_served if red_served else 0
pct_yellow_within = 100 * sum(w <= threshold_times["yellow"] for w in yellow_waits) / yellow_served if yellow_served else 0
pct_green_within = 100 * sum(w <= threshold_times["green"] for w in green_waits) / green_served if green_served else 0

queue_stats = {cat: {"avg": np.mean(qs), "max": np.max(qs)} for cat, qs in queue_lengths.items()}

# Fairness metric for green patients: proportion of steps where green was served in last 5 actions
fairness = np.mean(green_fair_actions) if green_fair_actions else 0

# --- Print results ---
print("Average reward per episode:", avg_reward)
print("Average wait times (Red, Yellow, Green):", avg_wait_red, avg_wait_yellow, avg_wait_green)
print("Percentage served within thresholds (Red, Yellow, Green):", pct_red_within, pct_yellow_within, pct_green_within)
print("Queue stats (average and max lengths):", queue_stats)
print("Green fairness metric:", fairness)


Average reward per episode: 5035.2
Average wait times (Red, Yellow, Green): 8.5165 0.0 1.7735
Percentage served within thresholds (Red, Yellow, Green): 99.55 100.0 99.55
Queue stats (average and max lengths): {'red': {'avg': np.float64(9.098), 'max': np.int64(34)}, 'yellow': {'avg': np.float64(194.8945), 'max': np.int64(416)}, 'green': {'avg': np.float64(93.3735), 'max': np.int64(223)}}
Green fairness metric: 0.1745


In [1]:
import sys
sys.path.append("../env")  
from hospital_env import HospitalEnv
import numpy as np
from stable_baselines3 import DQN
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.monitor import Monitor
from hospital_env import HospitalEnv

# --- Create wrapped evaluation environment ---
def make_env():
    env = HospitalEnv()
    env = Monitor(env)
    return env

eval_env = DummyVecEnv([make_env])

# --- Load trained model ---
model = DQN.load("../models/dqn_hospital_sb3", env=eval_env)

# --- Metrics storage ---
episodes = 10

rewards_per_episode = []
red_waits, yellow_waits, green_waits = [], [], []
red_served, yellow_served, green_served = 0, 0, 0
queue_lengths = {"red": [], "yellow": [], "green": []}
threshold_times = {"red": 30, "yellow": 60, "green": 120}
green_fair_actions = []

# --- Evaluation loop ---
for ep in range(episodes):
    obs = eval_env.reset()
    done = False
    total_reward = 0

    while not done:
        # Predict action
        action, _ = model.predict(obs, deterministic=True)
        # Step in vectorized env (4 outputs)
        obs, reward, done_array, info = eval_env.step(action)
        done = done_array[0]  # single environment
        total_reward += reward[0]

        # Access the inner env to read queues and last served waits
        env_instance = eval_env.envs[0].env  # unwrap Monitor
        queue_lengths["red"].append(len(env_instance.red_queue))
        queue_lengths["yellow"].append(len(env_instance.yellow_queue))
        queue_lengths["green"].append(len(env_instance.green_queue))

        last_waits = getattr(env_instance, "last_served_wait_times", {})
        for cat, w in last_waits.items():
            if cat == "red":
                red_waits.append(w)
                red_served += 1
            elif cat == "yellow":
                yellow_waits.append(w)
                yellow_served += 1
            elif cat == "green":
                green_waits.append(w)
                green_served += 1

        # Track fairness: was green served at least once in last 5 actions?
        if 2 in env_instance.last_5_actions:
            green_fair_actions.append(1)
        else:
            green_fair_actions.append(0)

    rewards_per_episode.append(total_reward)

# --- Compute metrics ---
avg_reward = np.mean(rewards_per_episode)
avg_wait_red = np.mean(red_waits) if red_waits else 0
avg_wait_yellow = np.mean(yellow_waits) if yellow_waits else 0
avg_wait_green = np.mean(green_waits) if green_waits else 0

pct_red_within = 100 * sum(w <= threshold_times["red"] for w in red_waits) / red_served if red_served else 0
pct_yellow_within = 100 * sum(w <= threshold_times["yellow"] for w in yellow_waits) / yellow_served if yellow_served else 0
pct_green_within = 100 * sum(w <= threshold_times["green"] for w in green_waits) / green_served if green_served else 0

queue_stats = {cat: {"avg": np.mean(qs), "max": np.max(qs)} for cat, qs in queue_lengths.items()}

# Fairness metric for green patients: proportion of steps where green was served in last 5 actions
fairness = np.mean(green_fair_actions) if green_fair_actions else 0

# --- Print results ---
print("Average reward per episode:", avg_reward)
print("Average wait times (Red, Yellow, Green):", avg_wait_red, avg_wait_yellow, avg_wait_green)
print("Percentage served within thresholds (Red, Yellow, Green):", pct_red_within, pct_yellow_within, pct_green_within)
print("Queue stats (average and max lengths):", queue_stats)
print("Green fairness metric:", fairness)


Gym has been unmaintained since 2022 and does not support NumPy 2.0 amongst other critical functionality.
Please upgrade to Gymnasium, the maintained drop-in replacement of Gym, or contact the authors of your software and request that they upgrade.
Users of this version of Gym should be able to simply replace 'import gym' with 'import gymnasium as gym' in the vast majority of cases.
See the migration guide at https://gymnasium.farama.org/introduction/migration_guide/ for additional information.


Average reward per episode: 901.3
Average wait times (Red, Yellow, Green): 3.55 0.0 6.66
Percentage served within thresholds (Red, Yellow, Green): 100.0 100.0 100.0
Queue stats (average and max lengths): {'red': {'avg': np.float64(3.736666666666667), 'max': np.int64(14)}, 'yellow': {'avg': np.float64(28.546666666666667), 'max': np.int64(68)}, 'green': {'avg': np.float64(25.8), 'max': np.int64(66)}}
Green fairness metric: 0.5166666666666667


In [2]:
import warnings
warnings.filterwarnings("ignore", message="Gym has been unmaintained since")

import numpy as np
from stable_baselines3 import DQN
from hospital_env import HospitalEnv  # Make sure this is in your path

# -----------------------------
# Load trained model and env
# -----------------------------
model = DQN.load("../models/dqn_hospital_sb3.zip")
env = HospitalEnv()

# -----------------------------
# Metrics storage
# -----------------------------
episodes = 10
rewards_per_episode = []

all_wait_times = {"red": [], "yellow": [], "green": []}
served_within_thresholds = {"red": 0, "yellow": 0, "green": 0}
total_served = {"red": 0, "yellow": 0, "green": 0}
queue_lengths = {"red": [], "yellow": [], "green": []}
green_fairness_actions = []

threshold_times = {"red": 30, "yellow": 60, "green": 120}

# -----------------------------
# Evaluation loop
# -----------------------------
for ep in range(episodes):
    obs, _ = env.reset()
    done = False
    total_reward = 0

    while not done:
        action, _ = model.predict(obs, deterministic=True)
        obs, reward, terminated, truncated, info = env.step(action)
        done = terminated or truncated
        total_reward += reward

        # -----------------------------
        # Track queue lengths at each step
        # -----------------------------
        queue_lengths["red"].append(len(env.red_queue))
        queue_lengths["yellow"].append(len(env.yellow_queue))
        queue_lengths["green"].append(len(env.green_queue))

        # -----------------------------
        # Track last served wait times
        # -----------------------------
        for cat in ["red", "yellow", "green"]:
            wait = env.last_served_wait_times.get(cat, 0)
            if wait > 0:
                all_wait_times[cat].append(wait)
                total_served[cat] += 1
                if wait <= threshold_times[cat]:
                    served_within_thresholds[cat] += 1

        # -----------------------------
        # Track green fairness (was green served in last 5 actions)
        # -----------------------------
        green_fairness_actions.append(2 in env.last_5_actions)

    rewards_per_episode.append(total_reward)

# -----------------------------
# Compute metrics
# -----------------------------
avg_reward = np.mean(rewards_per_episode)
avg_wait = {cat: np.mean(all_wait_times[cat]) if all_wait_times[cat] else 0 for cat in all_wait_times}
pct_within = {cat: (served_within_thresholds[cat] / total_served[cat] * 100) if total_served[cat] else 0 for cat in total_served}
queue_stats = {cat: {"avg": np.mean(queue_lengths[cat]), "max": np.max(queue_lengths[cat])} for cat in queue_lengths}
green_fairness = np.mean(green_fairness_actions)

# -----------------------------
# Print metrics
# -----------------------------
print("Average reward per episode:", avg_reward)
print("Average wait times (Red, Yellow, Green):", avg_wait["red"], avg_wait["yellow"], avg_wait["green"])
print("Percentage served within thresholds (Red, Yellow, Green):", pct_within["red"], pct_within["yellow"], pct_within["green"])
print("Queue stats (average and max lengths):", queue_stats)
print("Green fairness metric:", green_fairness)


TypeError: unhashable type: 'numpy.ndarray'

In [2]:
import warnings
warnings.filterwarnings("ignore", message="Gym has been unmaintained since")
import numpy as np
from stable_baselines3 import DQN
from hospital_env import HospitalEnv  # make sure hospital_env.py is in the same folder

# -------------------------
# Load trained agent
# -------------------------
model = DQN.load("../models/dqn_hospital_sb3.zip")

# -------------------------
# Create environment
# -------------------------
env = HospitalEnv()

# -------------------------
# Metrics storage
# -------------------------
episodes = 10
rewards_per_episode = []

red_waits, yellow_waits, green_waits = [], [], []
red_served, yellow_served, green_served = 0, 0, 0

queue_lengths = {"red": [], "yellow": [], "green": []}
threshold_times = {"red": 30, "yellow": 60, "green": 120}

green_fairness_counts = 0  # counts how often green appears in last 5 actions

# -------------------------
# Evaluation loop
# -------------------------
for ep in range(episodes):
    obs, _ = env.reset()
    done = False
    total_reward = 0

    while not done:
        action, _ = model.predict(obs, deterministic=True)
        action = int(action)  # <-- convert to int
        obs, reward, terminated, truncated, info = env.step(action)
        done = terminated or truncated
        total_reward += reward

        # Track queue lengths
        queue_lengths["red"].append(len(env.red_queue))
        queue_lengths["yellow"].append(len(env.yellow_queue))
        queue_lengths["green"].append(len(env.green_queue))

        # Track last served wait times
        last_waits = env.last_served_wait_times
        for cat, wait in last_waits.items():
            if cat == "red" and wait > 0:
                red_waits.append(wait)
                red_served += 1
            elif cat == "yellow" and wait > 0:
                yellow_waits.append(wait)
                yellow_served += 1
            elif cat == "green" and wait > 0:
                green_waits.append(wait)
                green_served += 1

        # Green fairness: check if green patient was served in last 5 actions
        if 2 in env.last_5_actions:
            green_fairness_counts += 1

    rewards_per_episode.append(total_reward)

# -------------------------
# Compute metrics
# -------------------------
avg_reward = np.mean(rewards_per_episode)
avg_wait_red = np.mean(red_waits) if red_waits else 0
avg_wait_yellow = np.mean(yellow_waits) if yellow_waits else 0
avg_wait_green = np.mean(green_waits) if green_waits else 0

pct_red_within = 100 * sum(w <= threshold_times["red"] for w in red_waits) / red_served if red_served else 0
pct_yellow_within = 100 * sum(w <= threshold_times["yellow"] for w in yellow_waits) / yellow_served if yellow_served else 0
pct_green_within = 100 * sum(w <= threshold_times["green"] for w in green_waits) / green_served if green_served else 0

queue_stats = {cat: {"avg": np.mean(qs), "max": np.max(qs)} for cat, qs in queue_lengths.items()}

green_fairness_metric = green_fairness_counts / (episodes * env.max_steps)  # fraction of steps where green was in last 5 actions

# -------------------------
# Print results
# -------------------------
print("Average reward per episode:", avg_reward)
print("Average wait times (Red, Yellow, Green):", avg_wait_red, avg_wait_yellow, avg_wait_green)
print("Percentage served within thresholds (Red, Yellow, Green):", pct_red_within, pct_yellow_within, pct_green_within)
print("Queue stats (average and max lengths):", queue_stats)
print("Green fairness metric:", green_fairness_metric)


Average reward per episode: 905.1
Average wait times (Red, Yellow, Green): 4.154121863799283 7.6 9.835390946502057
Percentage served within thresholds (Red, Yellow, Green): 100.0 100.0 100.0
Queue stats (average and max lengths): {'red': {'avg': np.float64(3.89), 'max': np.int64(11)}, 'yellow': {'avg': np.float64(30.386666666666667), 'max': np.int64(80)}, 'green': {'avg': np.float64(27.89666666666667), 'max': np.int64(65)}}
Green fairness metric: 0.5966666666666667


In [5]:
import warnings
warnings.filterwarnings("ignore", message="Gym has been unmaintained since")

import sys
sys.path.append("../env")  # to read hospital_env.py

from hospital_env import HospitalEnv
import gymnasium as gym
import numpy as np
import torch

from stable_baselines3 import DQN
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.vec_env import DummyVecEnv

# ------------------------------
# Set seeds for reproducibility
# ------------------------------
SEED = 42
np.random.seed(SEED)
torch.manual_seed(SEED)

# --- Create wrapped environment ---
def make_env():
    env = HospitalEnv()
    env.reset(seed=SEED)   # seed the environment here
    env = Monitor(env)      # important for SB3 logging
    return env

env = DummyVecEnv([make_env])

# --- Create DQN agent ---
model = DQN(
    "MlpPolicy",       # Fully connected NN
    env,
    learning_rate=5e-4,
    gamma=0.95,
    batch_size=64,
    buffer_size=50000,
    exploration_initial_eps=1.0,
    exploration_final_eps=0.1,
    exploration_fraction=0.1,   # epsilon decay
    target_update_interval=1000,
    verbose=1,
    seed=SEED                  # seed SB3 agent
)

# --- Train agent ---
model.learn(total_timesteps=50000)

# --- Save trained model ---
model.save("../models/dqn_hospital_sb3")
print("Model saved to models/dqn_hospital_sb3.zip")

# --- Evaluation ---
eval_env = DummyVecEnv([make_env])
mean_reward, std_reward = evaluate_policy(model, eval_env, n_eval_episodes=50)
print(f"Mean reward: {mean_reward:.2f} ± {std_reward:.2f}")


Using cpu device
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 100      |
|    ep_rew_mean      | 1e+03    |
|    exploration_rate | 0.928    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 1102     |
|    time_elapsed     | 0        |
|    total_timesteps  | 400      |
| train/              |          |
|    learning_rate    | 0.0005   |
|    loss             | 7.62     |
|    n_updates        | 74       |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 100      |
|    ep_rew_mean      | 1e+03    |
|    exploration_rate | 0.856    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 1067     |
|    time_elapsed     | 0        |
|    total_timesteps  | 800      |
| train/              |          |
|    learning_rate    | 0.0005   |
|    loss             | 0.00704  |
|  