In [2]:
import gym
import numpy as np
from gym import spaces

class HospitalEnv(gym.Env):
    """
    Simple queue environment with triage-based service times.
    Service times follow lognormal distributions whose means are:
    Red = 25 mins, Yellow = 15 mins, Green = 10 mins.
    """

    def __init__(self):
        super(HospitalEnv, self).__init__()

        # Observation: number of patients in each category
        self.observation_space = spaces.Box(low=0, high=100, shape=(3,), dtype=np.int32)

        # Action: choose which category to serve (0 = Red, 1 = Yellow, 2 = Green)
        self.action_space = spaces.Discrete(3)

        # Patient queues (list of waiting times)
        self.red_queue = []
        self.yellow_queue = []
        self.green_queue = []

        # --------------------------
        # SERVICE TIME DISTRIBUTIONS
        # --------------------------
        # Means: Red = 25, Yellow = 15, Green = 10
        # We assume reasonable variance and convert to lognormal parameters.

        def lognormal_params(mean, std):
            """Convert actual mean/std to log-space μ and σ."""
            variance = std ** 2
            mu = np.log(mean**2 / np.sqrt(variance + mean**2))
            sigma = np.sqrt(np.log(1 + variance / (mean**2)))
            return mu, sigma

        # Approximated standard deviations for realistic variability
        red_std = 8
        yellow_std = 5
        green_std = 3

        self.red_mu, self.red_sigma = lognormal_params(25, red_std)
        self.yellow_mu, self.yellow_sigma = lognormal_params(15, yellow_std)
        self.green_mu, self.green_sigma = lognormal_params(10, green_std)

        # Clock
        self.time = 0


    def reset(self):
        self.red_queue = []
        self.yellow_queue = []
        self.green_queue = []
        self.time = 0
        return self._get_obs()


    def _get_obs(self):
        """Observation = queue sizes."""
        return np.array([
            len(self.red_queue),
            len(self.yellow_queue),
            len(self.green_queue)
        ], dtype=np.int32)


    # -----------------------
    # SAMPLING SERVICE TIME
    # -----------------------
    def sample_service_time(self, category):
        if category == 0:  # Red
            return np.random.lognormal(self.red_mu, self.red_sigma)
        elif category == 1:  # Yellow
            return np.random.lognormal(self.yellow_mu, self.yellow_sigma)
        else:  # Green
            return np.random.lognormal(self.green_mu, self.green_sigma)


    def step(self, action):
        # If queue is empty, no service happens
        if action == 0 and len(self.red_queue) == 0:
            service_time = 1
        elif action == 1 and len(self.yellow_queue) == 0:
            service_time = 1
        elif action == 2 and len(self.green_queue) == 0:
            service_time = 1
        else:
            # Sample service time based on category
            service_time = self.sample_service_time(action)

            # Remove one patient from selected queue
            if action == 0:
                self.red_queue.pop(0)
            elif action == 1:
                self.yellow_queue.pop(0)
            else:
                self.green_queue.pop(0)

        # Advance clock
        self.time += service_time

        # Waiting times accumulate
        for q in [self.red_queue, self.yellow_queue, self.green_queue]:
            for i in range(len(q)):
                q[i] += service_time

        # Reward: negative total waiting time
        reward = -(sum(self.red_queue) + sum(self.yellow_queue) + sum(self.green_queue))

        done = False  # Episodic logic optional
        return self._get_obs(), reward, done, {}


ModuleNotFoundError: No module named 'gym'

In [3]:
! pip install torch torchvision torchaudio
! pip install stable-baselines3





In [4]:
!pip install gym


Collecting gym
  Using cached gym-0.26.2-py3-none-any.whl
Collecting gym_notices>=0.0.4 (from gym)
  Using cached gym_notices-0.1.0-py3-none-any.whl.metadata (1.2 kB)
Using cached gym_notices-0.1.0-py3-none-any.whl (3.3 kB)
Installing collected packages: gym_notices, gym

   -------------------- ------------------- 1/2 [gym]
   -------------------- ------------------- 1/2 [gym]
   -------------------- ------------------- 1/2 [gym]
   -------------------- ------------------- 1/2 [gym]
   -------------------- ------------------- 1/2 [gym]
   -------------------- ------------------- 1/2 [gym]
   -------------------- ------------------- 1/2 [gym]
   -------------------- ------------------- 1/2 [gym]
   -------------------- ------------------- 1/2 [gym]
   -------------------- ------------------- 1/2 [gym]
   -------------------- ------------------- 1/2 [gym]
   -------------------- ------------------- 1/2 [gym]
   -------------------- ------------------- 1/2 [gym]
   -------------------- -

In [None]:
! pip install gymnasium[all]


In [None]:
import gymnasium as gym
from stable_baselines3 import DQN
from stable_baselines3.common.evaluation import evaluate_policy
from env.hospital_env import HospitalEnv

# Create environment
env = HospitalEnv()

# Create DQN agent
model = DQN(
    "MlpPolicy",       # Fully connected NN
    env,
    learning_rate=5e-4,
    gamma=0.95,
    batch_size=64,
    buffer_size=50000,
    exploration_fraction=0.1,   # epsilon decay
    exploration_initial_eps=1.0,
    exploration_final_eps=0.1,
    target_update_interval=1000,
    verbose=1
)

# Train agent
model.learn(total_timesteps=50000)

# Save trained model
model.save("../models/dqn_hospital_sb3")
print("Model saved to models/dqn_hospital_sb3.zip")

# Evaluate agent
mean_reward, std_reward = evaluate_policy(model, env, n_eval_episodes=10)
print(f"Mean reward: {mean_reward:.2f} ± {std_reward:.2f}")


In [None]:
# Load trained model
model = DQN.load("../models/dqn_hospital_sb3")

obs, info = env.reset()
done = False
total_reward = 0

for _ in range(50):
    action, _ = model.predict(obs, deterministic=True)
    obs, reward, terminated, truncated, info = env.step(action)
    done = terminated or truncated
    total_reward += reward

print("Total reward after training:", total_reward)


In [16]:
! pip uninstall torch torchvision torchaudio -y
! pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu


Found existing installation: torch 2.9.1
Uninstalling torch-2.9.1:
  Successfully uninstalled torch-2.9.1
Found existing installation: torchvision 0.24.1
Uninstalling torchvision-0.24.1:
  Successfully uninstalled torchvision-0.24.1
Found existing installation: torchaudio 2.9.1
Uninstalling torchaudio-2.9.1:
  Successfully uninstalled torchaudio-2.9.1
Defaulting to user installation because normal site-packages is not writeable
Looking in indexes: https://download.pytorch.org/whl/cpu
Collecting torch
  Using cached https://download.pytorch.org/whl/cpu/torch-2.9.1%2Bcpu-cp312-cp312-win_amd64.whl.metadata (29 kB)
Collecting torchvision
  Using cached https://download.pytorch.org/whl/cpu/torchvision-0.24.1%2Bcpu-cp312-cp312-win_amd64.whl.metadata (6.1 kB)
Collecting torchaudio
  Using cached https://download.pytorch.org/whl/cpu/torchaudio-2.9.1%2Bcpu-cp312-cp312-win_amd64.whl.metadata (7.0 kB)
Using cached https://download.pytorch.org/whl/cpu/torch-2.9.1%2Bcpu-cp312-cp312-win_amd64.whl (1



In [17]:
! pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu


Defaulting to user installation because normal site-packages is not writeable
Looking in indexes: https://download.pytorch.org/whl/cpu


In [19]:
import platform
print(platform.architecture())


('64bit', 'WindowsPE')


In [20]:
# Using venv
python -m venv rl_env
rl_env\Scripts\activate

# Upgrade pip
python -m pip install --upgrade pip

# Install required packages inside the clean env
pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
pip install stable-baselines3 gymnasium numpy pandas matplotlib


SyntaxError: invalid syntax (205492035.py, line 2)