In [None]:
rl_project/
│
├── hospital_env.py        # Your environment (already exists)
├── train_agent.py         # Train DQN agent
├── evaluate.py            # Evaluate trained model
├── serve_api.py           # REST API using FastAPI
├── requirements.txt
└── Dockerfile


In [None]:
import numpy as np
from hospital_env import HospitalEnv
from stable_baselines3 import DQN
from stable_baselines3.common.callbacks import BaseCallback
import os
import matplotlib.pyplot as plt


class RewardLogger(BaseCallback):
    def __init__(self, log_interval=1, verbose=0):
        super().__init__(verbose)
        self.rewards = []
        self.episode_rewards = []
        self.log_interval = log_interval

    def _on_step(self):
        if self.locals.get("done"):
            ep_reward = self.locals["infos"][0].get("episode_reward", 0)
            self.episode_rewards.append(ep_reward)
        return True

    def save_plot(self, path="reward_plot.png"):
        plt.plot(self.episode_rewards)
        plt.xlabel("Episode")
        plt.ylabel("Reward")
        plt.title("Training Reward Over Time")
        plt.savefig(path)
        plt.close()


def train():
    env = HospitalEnv()

    model = DQN(
        "MlpPolicy",
        env,
        verbose=1,
        learning_rate=1e-4,
        buffer_size=50000,
        learning_starts=100,
        batch_size=32,
        gamma=0.99,
        train_freq=1,
        target_update_interval=300,
    )

    logger = RewardLogger()

    model.learn(total_timesteps=50_000, callback=logger)

    logger.save_plot()

    model.save("dqn_hospital_agent")
    print("Model saved to dqn_hospital_agent.zip")


if __name__ == "__main__":
    train()


In [None]:
from hospital_env import HospitalEnv
from stable_baselines3 import DQN
import numpy as np



# Load trained agent
model = DQN.load("dqn_hospital_agent")
env = HospitalEnv()

# Metrics storage
rewards_per_episode = []
red_waits, yellow_waits, green_waits = [], [], []
red_served, yellow_served, green_served = 0, 0, 0
queue_lengths = {"red": [], "yellow": [], "green": []}
threshold_times = {"red": 30, "yellow": 60, "green": 120}

episodes = 10

for ep in range(episodes):
    obs = env.reset()
    done = False
    total_reward = 0

    while not done:
        action, _ = model.predict(obs, deterministic=True)
        obs, reward, done, info = env.step(action)
        total_reward += reward

        # Record queue lengths
        queue_lengths["red"].append(len(env.red_queue))
        queue_lengths["yellow"].append(len(env.yellow_queue))
        queue_lengths["green"].append(len(env.green_queue))

        # Record wait times for fairness & thresholds
        last_waits = getattr(env, "last_served_wait_times", {})  # assume you save these in env
        for cat, wait in last_waits.items():
            if cat == "red":
                red_waits.append(wait)
                red_served += 1
            elif cat == "yellow":
                yellow_waits.append(wait)
                yellow_served += 1
            elif cat == "green":
                green_waits.append(wait)
                green_served += 1

    rewards_per_episode.append(total_reward)

# Compute metrics
avg_reward = np.mean(rewards_per_episode)
avg_wait_red = np.mean(red_waits)
avg_wait_yellow = np.mean(yellow_waits)
avg_wait_green = np.mean(green_waits)

pct_red_within = 100 * sum(w <= threshold_times["red"] for w in red_waits) / red_served
pct_yellow_within = 100 * sum(w <= threshold_times["yellow"] for w in yellow_waits) / yellow_served
pct_green_within = 100 * sum(w <= threshold_times["green"] for w in green_waits) / green_served

queue_stats = {cat: {"avg": np.mean(qs), "max": np.max(qs)} for cat, qs in queue_lengths.items()}

fairness = 1 - (max(avg_wait_red, avg_wait_yellow, avg_wait_green) - min(avg_wait_red, avg_wait_yellow, avg_wait_green)) / max(avg_wait_red, avg_wait_yellow, avg_wait_green)

# Print results
print("Average reward per episode:", avg_reward)
print("Average wait times (Red, Yellow, Green):", avg_wait_red, avg_wait_yellow, avg_wait_green)
print("Percentage served within thresholds (Red, Yellow, Green):", pct_red_within, pct_yellow_within, pct_green_within)
print("Queue stats:", queue_stats)
print("Fairness metric:", fairness)


In [None]:
from fastapi import FastAPI
from pydantic import BaseModel
from stable_baselines3 import DQN
import numpy as np

app = FastAPI()
model = DQN.load("dqn_hospital_agent")

class Observation(BaseModel):
    state: list   # 10 numbers


@app.post("/predict")
def predict_action(data: Observation):
    obs = np.array(data.state).astype(float)
    action, _ = model.predict(obs, deterministic=True)
    return {"action": int(action)}


In [None]:
# Run Api

In [None]:
uvicorn serve_api:app --reload


In [None]:
# Requirements

mlxtend==0.23.4
mpmath==1.3.0
namex==0.1.0
networkx==3.6
numpy==2.2.6
opencv-python==4.12.0.88
openpyxl==3.1.5
opt_einsum==3.4.0
optree==0.18.0
packaging==25.0
pandas==2.3.3
pillow==12.0.0
protobuf==6.33.1
pydantic==2.12.4
pydantic_core==2.41.5
pygame==2.6.1
pyparsing==3.2.5
python-dateutil==2.9.0.post0
pytz==2025.2
setuptools==80.9.0
six==1.17.0
sniffio==1.3.1
stable_baselines3==2.7.0
starlette==0.50.0
sympy==1.14.0
tensorboard==2.20.0
tensorboard-data-server==0.7.2
termcolor==3.2.0
torch==2.9.1+cpu
torchaudio==2.9.1+cpu
torchvision==0.24.1+cpu
typing-inspection==0.4.2
typing_extensions==4.15.0
tzdata==2025.2
uri-template==1.3.0
uvicorn==0.38.0
webcolors==25.10.0
wheel==0.45.1

In [None]:
FROM python:3.10

WORKDIR /app

COPY . .

RUN pip install --no-cache-dir -r requirements.txt

EXPOSE 8000

CMD ["uvicorn", "serve_api:app", "--host", "0.0.0.0", "--port", "8000"]
