In [1]:
import json
import time
import logging
from collections import deque, Counter
import numpy as np
from scipy.stats import ks_2samp

# -----------------------------------
# Setup logging
# -----------------------------------
logging.basicConfig(
    filename="monitoring.log",
    level=logging.INFO,
    format="%(asctime)s - %(levelname)s - %(message)s"
)

# -----------------------------------
# Load training metrics
# -----------------------------------
with open("../training_metrics.json", "r") as f:
    training_metrics = json.load(f)

train_action_dist = np.array(training_metrics["train_action_dist"])
train_wait_red = np.array(training_metrics["train_wait_red"])
train_wait_yellow = np.array(training_metrics["train_wait_yellow"])

# -----------------------------------
# Buffers for live monitoring
# -----------------------------------
recent_rewards = deque(maxlen=200)
recent_actions = deque(maxlen=200)
recent_waits_red = deque(maxlen=200)
recent_waits_yellow = deque(maxlen=200)

# -----------------------------------
# Reward tracking
# -----------------------------------
def track_reward(reward):
    recent_rewards.append(reward)
    logging.info(f"Reward logged: {reward}")

# -----------------------------------
# Action tracking (Model Drift)
# -----------------------------------
def track_action(action):
    """
    action: 0=Red (urgent), 1=Yellow (non-urgent)
    """
    recent_actions.append(action)
    count = Counter(recent_actions)
    total = sum(count.values())
    dist = {
        "red": count.get(0, 0)/total if total else 0,
        "yellow": count.get(1, 0)/total if total else 0
    }
    logging.info(f"Current action distribution: {dist}")

    # Model drift detection (Euclidean distance)
    action_vector = np.array([dist["red"], dist["yellow"]])
    drift = np.linalg.norm(action_vector - train_action_dist)
    if drift > 0.25:
        logging.warning("MODEL DRIFT DETECTED: Action distribution deviates from training behavior!")

# -----------------------------------
# Wait-time tracking (Data Drift)
# -----------------------------------
def track_wait_time(cat, wait_time):
    """
    cat: "red" or "yellow"
    wait_time: float/int
    """
    if cat == "red":
        recent_waits_red.append(wait_time)
        if len(recent_waits_red) > 30:
            _check_wait_drift("red")
    else:
        recent_waits_yellow.append(wait_time)
        if len(recent_waits_yellow) > 30:
            _check_wait_drift("yellow")

def _check_wait_drift(cat):
    if cat == "red":
        stat, p = ks_2samp(train_wait_red, list(recent_waits_red))
    else:
        stat, p = ks_2samp(train_wait_yellow, list(recent_waits_yellow))
    
    if p < 0.05:
        logging.warning(f"DATA DRIFT DETECTED in {cat.upper()} wait times (p={p:.4f})")

# -----------------------------------
# Log every decision
# -----------------------------------
def log_decision(obs, action, reward, info={}):
    """
    obs: environment observation array
    action: 0=Red, 1=Yellow
    reward: float
    info: optional dict
    """
    data = {
        "timestamp": time.time(),
        "action": int(action),
        "reward": float(reward),
        "free_doctors": int(obs[0]),
        "longest_wait_red": float(obs[1]),
        "longest_wait_yellow": float(obs[2]),
        "red_queue_len": int(obs[3]),
        "yellow_queue_len": int(obs[4]),
        "doctor_busy_times": [float(obs[5]), float(obs[6]), float(obs[7])],
        "additional_info": info
    }
    logging.info("Decision: " + json.dumps(data))

# -----------------------------------
# Example usage (replace with your live environment loop)
# -----------------------------------
if __name__ == "__main__":
    # Simulate some sample decisions
    for i in range(50):
        # Fake observation
        obs = np.array([2, 3, 5, 1, 6, 0, 0, 1], dtype=float)
        action = np.random.choice([0, 1])
        reward = np.random.rand() * 50
        track_reward(reward)
        track_action(action)
        track_wait_time("red" if action==0 else "yellow", np.random.rand()*20)
        log_decision(obs, action, reward)

ModuleNotFoundError: No module named 'scipy'

In [3]:
import json
import time
import logging
from collections import deque, Counter
import numpy as np
from scipy.stats import ks_2samp
import os

# -----------------------------------
# Setup logging
# -----------------------------------
logging.basicConfig(
    filename="monitoring.log",
    level=logging.INFO,
    format="%(asctime)s - %(levelname)s - %(message)s"
)

# -----------------------------------
# Load training metrics
# -----------------------------------
root_dir = "C:/Users/Prudence Letaru/Desktop/RL_Project_New"
with open(os.path.join(root_dir, "training_metrics.json"), "r") as f:
    training_metrics = json.load(f)

train_action_dist = np.array(training_metrics["train_action_dist"])
train_wait_red = np.array(training_metrics["train_wait_red"])
train_wait_yellow = np.array(training_metrics["train_wait_yellow"])

# -----------------------------------
# Buffers for live monitoring
# -----------------------------------
recent_rewards = deque(maxlen=200)
recent_actions = deque(maxlen=200)
recent_waits_red = deque(maxlen=200)
recent_waits_yellow = deque(maxlen=200)

# -----------------------------------
# Reward tracking
# -----------------------------------
def track_reward(reward):
    recent_rewards.append(reward)
    logging.info(f"Reward logged: {reward}")

# -----------------------------------
# Action tracking (Model Drift)
# -----------------------------------
def track_action(action):
    """
    action: 0=Red (urgent), 1=Yellow (non-urgent)
    """
    recent_actions.append(action)
    count = Counter(recent_actions)
    total = sum(count.values())
    dist = {
        "red": count.get(0, 0)/total if total else 0,
        "yellow": count.get(1, 0)/total if total else 0
    }
    logging.info(f"Current action distribution: {dist}")

    # Model drift detection (Euclidean distance)
    action_vector = np.array([dist["red"], dist["yellow"]])
    drift = np.linalg.norm(action_vector - train_action_dist)
    if drift > 0.25:
        logging.warning("MODEL DRIFT DETECTED: Action distribution deviates from training behavior!")

# -----------------------------------
# Wait-time tracking (Data Drift)
# -----------------------------------
def track_wait_time(cat, wait_time):
    """
    cat: "red" or "yellow"
    wait_time: float/int
    """
    if cat == "red":
        recent_waits_red.append(wait_time)
        if len(recent_waits_red) > 30:
            _check_wait_drift("red")
    else:
        recent_waits_yellow.append(wait_time)
        if len(recent_waits_yellow) > 30:
            _check_wait_drift("yellow")

def _check_wait_drift(cat):
    if cat == "red":
        stat, p = ks_2samp(train_wait_red, list(recent_waits_red))
    else:
        stat, p = ks_2samp(train_wait_yellow, list(recent_waits_yellow))
    
    if p < 0.05:
        logging.warning(f"DATA DRIFT DETECTED in {cat.upper()} wait times (p={p:.4f})")

# -----------------------------------
# Log every decision
# -----------------------------------
def log_decision(obs, action, reward, info={}):
    """
    obs: environment observation array
    action: 0=Red, 1=Yellow
    reward: float
    info: optional dict
    """
    data = {
        "timestamp": time.time(),
        "action": int(action),
        "reward": float(reward),
        "free_doctors": int(obs[0]),
        "longest_wait_red": float(obs[1]),
        "longest_wait_yellow": float(obs[2]),
        "red_queue_len": int(obs[3]),
        "yellow_queue_len": int(obs[4]),
        "doctor_busy_times": [float(obs[5]), float(obs[6]), float(obs[7])],
        "additional_info": info
    }
    logging.info("Decision: " + json.dumps(data))

# -----------------------------------
# Example usage (replace with your live environment loop)
# -----------------------------------
if __name__ == "__main__":
    # Simulate some sample decisions
    for i in range(50):
        # Fake observation
        obs = np.array([2, 3, 5, 1, 6, 0, 0, 1], dtype=float)
        action = np.random.choice([0, 1])
        reward = np.random.rand() * 50
        track_reward(reward)
        track_action(action)
        track_wait_time("red" if action==0 else "yellow", np.random.rand()*20)
        log_decision(obs, action, reward)

In [7]:
import sys
import os
import json
import time
import logging
from collections import deque, Counter
import numpy as np
from scipy.stats import ks_2samp

from stable_baselines3 import DQN
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.monitor import Monitor

# -------------------------------
# Setup logging
# -------------------------------
logging.basicConfig(
    filename="monitoring.log",
    level=logging.INFO,
    format="%(asctime)s - %(levelname)s - %(message)s"
)

# -------------------------------
# Global buffers for drift detection
# -------------------------------
recent_rewards = deque(maxlen=200)
recent_actions = deque(maxlen=200)
recent_wait_red = deque(maxlen=200)
recent_wait_yellow = deque(maxlen=200)

# -------------------------------
# Monitoring functions
# -------------------------------
def track_reward(reward):
    recent_rewards.append(reward)
    logging.info(f"Reward logged: {reward}")

def track_action(action):
    recent_actions.append(action)
    count = Counter(recent_actions)
    total = sum(count.values())
    dist = {
        "red": count.get(0, 0) / total if total else 0,
        "yellow": count.get(1, 0) / total if total else 0
    }
    logging.info(f"Action distribution: {dist}")

    # Drift detection
    action_vector = np.array([dist["red"], dist["yellow"]])
    if "train_action_dist" in globals():
        drift = np.linalg.norm(action_vector - train_action_dist)
        if drift > 0.25:
            logging.warning("MODEL DRIFT DETECTED: Action distribution deviates from training!")

def track_wait_time(cat, wait_time):
    if cat == "red":
        recent_wait_red.append(wait_time)
        if len(recent_wait_red) > 30:
            _check_wait_time_drift("red")
    else:
        recent_wait_yellow.append(wait_time)
        if len(recent_wait_yellow) > 30:
            _check_wait_time_drift("yellow")

def _check_wait_time_drift(cat):
    if cat == "red":
        stat, p = ks_2samp(train_wait_red, list(recent_wait_red))
    else:
        stat, p = ks_2samp(train_wait_yellow, list(recent_wait_yellow))

    if p < 0.05:
        logging.warning(f"DATA DRIFT DETECTED in {cat.upper()} wait times (p={p:.4f})")

def log_decision(obs, action, reward, info={}):
    data = {
        "timestamp": time.time(),
        "action": int(action),
        "reward": float(reward),
        "free_doctors": int(obs[0]),
        "longest_wait_red": float(obs[1]),
        "longest_wait_yellow": float(obs[2]),
        "red_queue_len": int(obs[3]),
        "yellow_queue_len": int(obs[4]),
        "doctor_busy_times": [float(obs[5]), float(obs[6]), float(obs[7])],
        "additional_info": info
    }
    logging.info("Decision: " + json.dumps(data))

# -------------------------------
# Main loop: run actual environment
# -------------------------------
if __name__ == "__main__":
    # Add environment path
    root_dir = "C:/Users/Prudence Letaru/Desktop/RL_Project_New"
    sys.path.append(root_dir)

    sys.path.append(os.path.join(root_dir, "env"))
    from hospital_env import HospitalEnv

    # Create evaluation environment
    def make_env():
        env = HospitalEnv()
        env = Monitor(env)
        return env

    eval_env = DummyVecEnv([make_env])

    # Load trained model
    model = DQN.load("models/dqn_hospital_sb3", env=eval_env)

    # Load previous training metrics if available
    if os.path.exists("training_metrics.json"):
        with open("training_metrics.json", "r") as f:
            training_metrics = json.load(f)
        train_action_dist = np.array(training_metrics["train_action_dist"])
        train_wait_red = np.array(training_metrics["train_wait_red"])
        train_wait_yellow = np.array(training_metrics["train_wait_yellow"])
    else:
        # fallback: uniform distribution
        train_action_dist = np.array([0.5, 0.5])
        train_wait_red = np.random.normal(10, 3, 500)
        train_wait_yellow = np.random.normal(20, 5, 500)

    # Evaluation parameters
    n_episodes = 40
    threshold_times = {"red": 15, "yellow": 30}

    # Metrics storage
    rewards_per_episode = []
    red_waits, yellow_waits = [], []
    queue_lengths = {"red": [], "yellow": []}
    total_actions_red, total_actions_yellow = 0, 0

    # -------------------------------
    # Run episodes
    # -------------------------------
    for ep in range(n_episodes):
        obs = eval_env.reset()
        done = False
        episode_reward = 0

        while not done:
            action, _states = model.predict(obs, deterministic=True)

            if action[0] == 0:
                total_actions_red += 1
            else:
                total_actions_yellow += 1

            obs, reward, done, info = eval_env.step(action)
            episode_reward += float(reward[0])

            env = eval_env.envs[0].unwrapped

            # Track metrics
            track_reward(float(reward[0]))
            track_action(action[0])

            # Track wait times only if someone was served
            if env.last_served_wait_times["red"] > 0:
                red_waits.append(float(env.last_served_wait_times["red"]))
                track_wait_time("red", float(env.last_served_wait_times["red"]))
            if env.last_served_wait_times["yellow"] > 0:
                yellow_waits.append(float(env.last_served_wait_times["yellow"]))
                track_wait_time("yellow", float(env.last_served_wait_times["yellow"]))

            # Log decision
            log_decision(obs[0], action[0], float(reward[0]), info)

            # Queue lengths
            queue_lengths["red"].append(len(env.red_queue))
            queue_lengths["yellow"].append(len(env.yellow_queue))

        rewards_per_episode.append(episode_reward)

    # -------------------------------
    # Compute summary metrics
    # -------------------------------
    avg_reward = float(np.mean(rewards_per_episode))
    avg_wait_red = float(np.mean(red_waits)) if red_waits else 0
    avg_wait_yellow = float(np.mean(yellow_waits)) if yellow_waits else 0
    pct_red_within = float(100 * sum(w <= threshold_times["red"] for w in red_waits) / len(red_waits)) if red_waits else 0
    pct_yellow_within = float(100 * sum(w <= threshold_times["yellow"] for w in yellow_waits) / len(yellow_waits)) if yellow_waits else 0
    queue_stats = {cat: {"avg": float(np.mean(qs)), "max": int(np.max(qs))} for cat, qs in queue_lengths.items()}

    # Action distribution
    total_actions = total_actions_red + total_actions_yellow
    red_pct = float(total_actions_red / total_actions) if total_actions > 0 else 0
    yellow_pct = float(total_actions_yellow / total_actions) if total_actions > 0 else 0

    print("\n=== ACTION DISTRIBUTION ===")
    print(f"Red actions: {total_actions_red} ({red_pct:.3f})")
    print(f"Yellow actions: {total_actions_yellow} ({yellow_pct:.3f})")
    print("\n=== PERFORMANCE METRICS ===")
    print(f"Average reward per episode: {avg_reward:.2f}")
    print(f"Average wait times (Red, Yellow): {avg_wait_red:.2f}, {avg_wait_yellow:.2f}")
    print(f"Percentage served within thresholds (Red, Yellow): {pct_red_within:.2f}%, {pct_yellow_within:.2f}%")
    for cat, stats in queue_stats.items():
        print(f"{cat.capitalize()}: avg={stats['avg']:.2f}, max={stats['max']}")

    # Save metrics
    training_metrics = {
        "train_action_dist": [red_pct, yellow_pct],
        "train_wait_red": red_waits,
        "train_wait_yellow": yellow_waits,
        "avg_reward_per_episode": avg_reward,
        "pct_within_threshold_red": pct_red_within,
        "pct_within_threshold_yellow": pct_yellow_within,
        "queue_red_avg": queue_stats["red"]["avg"],
        "queue_red_max": queue_stats["red"]["max"],
        "queue_yellow_avg": queue_stats["yellow"]["avg"],
        "queue_yellow_max": queue_stats["yellow"]["max"]
    }

    with open("training_metrics.json", "w") as f:
        json.dump(training_metrics, f, indent=4)

    print("\n[INFO] Monitoring complete. Logs in 'monitoring.log'. Metrics saved in 'training_metrics.json'.")


TypeError: Object of type ndarray is not JSON serializable

In [8]:
import sys
import os
import json
import time
import logging
from collections import deque, Counter
import numpy as np
from scipy.stats import ks_2samp

from stable_baselines3 import DQN
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.monitor import Monitor

# -------------------------------
# Setup logging
# -------------------------------
logging.basicConfig(
    filename="monitoring.log",
    level=logging.INFO,
    format="%(asctime)s - %(levelname)s - %(message)s"
)

# -------------------------------
# Global buffers for drift detection
# -------------------------------
recent_rewards = deque(maxlen=200)
recent_actions = deque(maxlen=200)
recent_wait_red = deque(maxlen=200)
recent_wait_yellow = deque(maxlen=200)

# -------------------------------
# Monitoring functions
# -------------------------------
def track_reward(reward):
    recent_rewards.append(reward)
    logging.info(f"Reward logged: {reward}")

def track_action(action):
    recent_actions.append(action)
    count = Counter(recent_actions)
    total = sum(count.values())
    dist = {
        "red": count.get(0, 0) / total if total else 0,
        "yellow": count.get(1, 0) / total if total else 0
    }
    logging.info(f"Action distribution: {dist}")

    # Drift detection
    action_vector = np.array([dist["red"], dist["yellow"]])
    if "train_action_dist" in globals():
        drift = np.linalg.norm(action_vector - train_action_dist)
        if drift > 0.25:
            logging.warning("MODEL DRIFT DETECTED: Action distribution deviates from training!")

def track_wait_time(cat, wait_time):
    if cat == "red":
        recent_wait_red.append(wait_time)
        if len(recent_wait_red) > 30:
            _check_wait_time_drift("red")
    else:
        recent_wait_yellow.append(wait_time)
        if len(recent_wait_yellow) > 30:
            _check_wait_time_drift("yellow")

def _check_wait_time_drift(cat):
    if cat == "red":
        stat, p = ks_2samp(train_wait_red, list(recent_wait_red))
    else:
        stat, p = ks_2samp(train_wait_yellow, list(recent_wait_yellow))

    if p < 0.05:
        logging.warning(f"DATA DRIFT DETECTED in {cat.upper()} wait times (p={p:.4f})")

def log_decision(obs, action, reward, info={}):
    # Convert obs to list if it's a numpy array
    obs_list = obs.tolist() if isinstance(obs, np.ndarray) else obs
    data = {
        "timestamp": time.time(),
        "action": int(action),
        "reward": float(reward),
        "free_doctors": int(obs_list[0]),
        "longest_wait_red": float(obs_list[1]),
        "longest_wait_yellow": float(obs_list[2]),
        "red_queue_len": int(obs_list[3]),
        "yellow_queue_len": int(obs_list[4]),
        "doctor_busy_times": [float(obs_list[5]), float(obs_list[6]), float(obs_list[7])],
        "additional_info": info
    }
    logging.info("Decision: " + json.dumps(data))

# -------------------------------
# Main loop: run actual environment
# -------------------------------
if __name__ == "__main__":
    # Paths
    root_dir = "C:/Users/Prudence Letaru/Desktop/RL_Project_New"
    sys.path.append(root_dir)
    sys.path.append(os.path.join(root_dir, "env"))

    # Import environment
    from hospital_env import HospitalEnv

    # Create evaluation environment
    def make_env():
        env = HospitalEnv()
        env = Monitor(env)
        return env

    eval_env = DummyVecEnv([make_env])

    # Load trained model
    model = DQN.load(os.path.join(root_dir, "models/dqn_hospital_sb3"), env=eval_env)

    # Load previous training metrics if available
    metrics_path = os.path.join(root_dir, "training_metrics.json")
    if os.path.exists(metrics_path):
        with open(metrics_path, "r") as f:
            training_metrics = json.load(f)
        train_action_dist = np.array(training_metrics["train_action_dist"])
        train_wait_red = np.array(training_metrics["train_wait_red"])
        train_wait_yellow = np.array(training_metrics["train_wait_yellow"])
    else:
        train_action_dist = np.array([0.5, 0.5])
        train_wait_red = np.random.normal(10, 3, 500)
        train_wait_yellow = np.random.normal(20, 5, 500)

    # Evaluation parameters
    n_episodes = 40
    threshold_times = {"red": 15, "yellow": 30}

    # Metrics storage
    rewards_per_episode = []
    red_waits, yellow_waits = [], []
    queue_lengths = {"red": [], "yellow": []}
    total_actions_red, total_actions_yellow = 0, 0

    # -------------------------------
    # Run episodes
    # -------------------------------
    for ep in range(n_episodes):
        obs = eval_env.reset()
        done = False
        episode_reward = 0

        while not done:
            action, _states = model.predict(obs, deterministic=True)

            if action[0] == 0:
                total_actions_red += 1
            else:
                total_actions_yellow += 1

            obs, reward, done, info = eval_env.step(action)
            episode_reward += float(reward[0])

            env = eval_env.envs[0].unwrapped

            # Track metrics
            track_reward(float(reward[0]))
            track_action(action[0])

            # Track wait times only if someone was served
            if env.last_served_wait_times["red"] > 0:
                red_waits.append(float(env.last_served_wait_times["red"]))
                track_wait_time("red", float(env.last_served_wait_times["red"]))
            if env.last_served_wait_times["yellow"] > 0:
                yellow_waits.append(float(env.last_served_wait_times["yellow"]))
                track_wait_time("yellow", float(env.last_served_wait_times["yellow"]))

            # Log decision
            log_decision(obs[0], action[0], float(reward[0]), info)

            # Queue lengths
            queue_lengths["red"].append(len(env.red_queue))
            queue_lengths["yellow"].append(len(env.yellow_queue))

        rewards_per_episode.append(episode_reward)

    # -------------------------------
    # Compute summary metrics
    # -------------------------------
    avg_reward = float(np.mean(rewards_per_episode))
    avg_wait_red = float(np.mean(red_waits)) if red_waits else 0
    avg_wait_yellow = float(np.mean(yellow_waits)) if yellow_waits else 0
    pct_red_within = float(100 * sum(w <= threshold_times["red"] for w in red_waits) / len(red_waits)) if red_waits else 0
    pct_yellow_within = float(100 * sum(w <= threshold_times["yellow"] for w in yellow_waits) / len(yellow_waits)) if yellow_waits else 0
    queue_stats = {cat: {"avg": float(np.mean(qs)), "max": int(np.max(qs))} for cat, qs in queue_lengths.items()}

    # Action distribution
    total_actions = total_actions_red + total_actions_yellow
    red_pct = float(total_actions_red / total_actions) if total_actions > 0 else 0
    yellow_pct = float(total_actions_yellow / total_actions) if total_actions > 0 else 0

    # -------------------------------
    # Print summary
    # -------------------------------
    print("\n=== ACTION DISTRIBUTION ===")
    print(f"Red actions: {total_actions_red} ({red_pct:.3f})")
    print(f"Yellow actions: {total_actions_yellow} ({yellow_pct:.3f})")
    print("\n=== PERFORMANCE METRICS ===")
    print(f"Average reward per episode: {avg_reward:.2f}")
    print(f"Average wait times (Red, Yellow): {avg_wait_red:.2f}, {avg_wait_yellow:.2f}")
    print(f"Percentage served within thresholds (Red, Yellow): {pct_red_within:.2f}%, {pct_yellow_within:.2f}%")
    for cat, stats in queue_stats.items():
        print(f"{cat.capitalize()}: avg={stats['avg']:.2f}, max={stats['max']}")

    # Save metrics
    training_metrics = {
        "train_action_dist": [red_pct, yellow_pct],
        "train_wait_red": red_waits,
        "train_wait_yellow": yellow_waits,
        "avg_reward_per_episode": avg_reward,
        "pct_within_threshold_red": pct_red_within,
        "pct_within_threshold_yellow": pct_yellow_within,
        "queue_red_avg": queue_stats["red"]["avg"],
        "queue_red_max": queue_stats["red"]["max"],
        "queue_yellow_avg": queue_stats["yellow"]["avg"],
        "queue_yellow_max": queue_stats["yellow"]["max"]
    }

    with open(metrics_path, "w") as f:
        json.dump(training_metrics, f, indent=4)

    print("\n[INFO] Monitoring complete. Logs in 'monitoring.log'. Metrics saved in 'training_metrics.json'.")


TypeError: Object of type ndarray is not JSON serializable

In [9]:
import sys
import os
import json
import time
import logging
from collections import deque, Counter
import numpy as np
from scipy.stats import ks_2samp

from stable_baselines3 import DQN
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.monitor import Monitor

# -------------------------------
# Setup logging
# -------------------------------
logging.basicConfig(
    filename="monitoring.log",
    level=logging.INFO,
    format="%(asctime)s - %(levelname)s - %(message)s"
)

# -------------------------------
# Global buffers for drift detection
# -------------------------------
recent_rewards = deque(maxlen=200)
recent_actions = deque(maxlen=200)
recent_wait_red = deque(maxlen=200)
recent_wait_yellow = deque(maxlen=200)

# -------------------------------
# Monitoring functions
# -------------------------------
def track_reward(reward):
    recent_rewards.append(reward)
    logging.info(f"Reward logged: {reward}")

def track_action(action):
    recent_actions.append(action)
    count = Counter(recent_actions)
    total = sum(count.values())
    dist = {
        "red": count.get(0, 0) / total if total else 0,
        "yellow": count.get(1, 0) / total if total else 0
    }
    logging.info(f"Action distribution: {dist}")

    # Drift detection
    action_vector = np.array([dist["red"], dist["yellow"]])
    if "train_action_dist" in globals():
        drift = np.linalg.norm(action_vector - train_action_dist)
        if drift > 0.25:
            logging.warning("MODEL DRIFT DETECTED: Action distribution deviates from training!")

def track_wait_time(cat, wait_time):
    if cat == "red":
        recent_wait_red.append(wait_time)
        if len(recent_wait_red) > 30:
            _check_wait_time_drift("red")
    else:
        recent_wait_yellow.append(wait_time)
        if len(recent_wait_yellow) > 30:
            _check_wait_time_drift("yellow")

def _check_wait_time_drift(cat):
    if cat == "red":
        stat, p = ks_2samp(train_wait_red, list(recent_wait_red))
    else:
        stat, p = ks_2samp(train_wait_yellow, list(recent_wait_yellow))

    if p < 0.05:
        logging.warning(f"DATA DRIFT DETECTED in {cat.upper()} wait times (p={p:.4f})")

def log_decision(obs, action, reward, info={}):
    # Convert obs to list if it's a numpy array
    obs_list = obs.tolist() if isinstance(obs, np.ndarray) else obs
    data = {
        "timestamp": time.time(),
        "action": int(action),
        "reward": float(reward),
        "free_doctors": int(obs_list[0]),
        "longest_wait_red": float(obs_list[1]),
        "longest_wait_yellow": float(obs_list[2]),
        "red_queue_len": int(obs_list[3]),
        "yellow_queue_len": int(obs_list[4]),
        "doctor_busy_times": [float(obs_list[5]), float(obs_list[6]), float(obs_list[7])],
        "additional_info": info
    }
    logging.info("Decision: " + json.dumps(data))

# -------------------------------
# Main loop: run actual environment
# -------------------------------
if __name__ == "__main__":
    # Paths
    root_dir = "C:/Users/Prudence Letaru/Desktop/RL_Project_New"
    sys.path.append(root_dir)
    sys.path.append(os.path.join(root_dir, "env"))

    # Import environment
    from hospital_env import HospitalEnv

    # Create evaluation environment
    def make_env():
        env = HospitalEnv()
        env = Monitor(env)
        return env

    eval_env = DummyVecEnv([make_env])

    # Load trained model
    model = DQN.load(os.path.join(root_dir, "models/dqn_hospital_sb3"), env=eval_env)

    # Load previous training metrics if available
    metrics_path = os.path.join(root_dir, "training_metrics.json")
    if os.path.exists(metrics_path):
        with open(metrics_path, "r") as f:
            training_metrics = json.load(f)
        train_action_dist = np.array(training_metrics["train_action_dist"])
        train_wait_red = np.array(training_metrics["train_wait_red"])
        train_wait_yellow = np.array(training_metrics["train_wait_yellow"])
    else:
        train_action_dist = np.array([0.5, 0.5])
        train_wait_red = np.random.normal(10, 3, 500)
        train_wait_yellow = np.random.normal(20, 5, 500)

    # Evaluation parameters
    n_episodes = 40
    threshold_times = {"red": 15, "yellow": 30}

    # Metrics storage
    rewards_per_episode = []
    red_waits, yellow_waits = [], []
    queue_lengths = {"red": [], "yellow": []}
    total_actions_red, total_actions_yellow = 0, 0

    # -------------------------------
    # Run episodes
    # -------------------------------
    for ep in range(n_episodes):
        obs = eval_env.reset()
        done = False
        episode_reward = 0

        while not done:
            action, _states = model.predict(obs, deterministic=True)

            if action[0] == 0:
                total_actions_red += 1
            else:
                total_actions_yellow += 1

            obs, reward, done, info = eval_env.step(action)
            episode_reward += float(reward[0])

            env = eval_env.envs[0].unwrapped

            # Track metrics
            track_reward(float(reward[0]))
            track_action(action[0])

            # Track wait times only if someone was served
            if env.last_served_wait_times["red"] > 0:
                red_waits.append(float(env.last_served_wait_times["red"]))
                track_wait_time("red", float(env.last_served_wait_times["red"]))
            if env.last_served_wait_times["yellow"] > 0:
                yellow_waits.append(float(env.last_served_wait_times["yellow"]))
                track_wait_time("yellow", float(env.last_served_wait_times["yellow"]))

            # Log decision
            log_decision(obs[0], action[0], float(reward[0]), info)

            # Queue lengths
            queue_lengths["red"].append(len(env.red_queue))
            queue_lengths["yellow"].append(len(env.yellow_queue))

        rewards_per_episode.append(episode_reward)

    # -------------------------------
    # Compute summary metrics
    # -------------------------------
    avg_reward = float(np.mean(rewards_per_episode))
    avg_wait_red = float(np.mean(red_waits)) if red_waits else 0
    avg_wait_yellow = float(np.mean(yellow_waits)) if yellow_waits else 0
    pct_red_within = float(100 * sum(w <= threshold_times["red"] for w in red_waits) / len(red_waits)) if red_waits else 0
    pct_yellow_within = float(100 * sum(w <= threshold_times["yellow"] for w in yellow_waits) / len(yellow_waits)) if yellow_waits else 0
    queue_stats = {cat: {"avg": float(np.mean(qs)), "max": int(np.max(qs))} for cat, qs in queue_lengths.items()}

    # Action distribution
    total_actions = total_actions_red + total_actions_yellow
    red_pct = float(total_actions_red / total_actions) if total_actions > 0 else 0
    yellow_pct = float(total_actions_yellow / total_actions) if total_actions > 0 else 0

    # -------------------------------
    # Print summary
    # -------------------------------
    print("\n=== ACTION DISTRIBUTION ===")
    print(f"Red actions: {total_actions_red} ({red_pct:.3f})")
    print(f"Yellow actions: {total_actions_yellow} ({yellow_pct:.3f})")
    print("\n=== PERFORMANCE METRICS ===")
    print(f"Average reward per episode: {avg_reward:.2f}")
    print(f"Average wait times (Red, Yellow): {avg_wait_red:.2f}, {avg_wait_yellow:.2f}")
    print(f"Percentage served within thresholds (Red, Yellow): {pct_red_within:.2f}%, {pct_yellow_within:.2f}%")
    for cat, stats in queue_stats.items():
        print(f"{cat.capitalize()}: avg={stats['avg']:.2f}, max={stats['max']}")

    # Save metrics
    training_metrics = {
        "train_action_dist": [red_pct, yellow_pct],
        "train_wait_red": red_waits,
        "train_wait_yellow": yellow_waits,
        "avg_reward_per_episode": avg_reward,
        "pct_within_threshold_red": pct_red_within,
        "pct_within_threshold_yellow": pct_yellow_within,
        "queue_red_avg": queue_stats["red"]["avg"],
        "queue_red_max": queue_stats["red"]["max"],
        "queue_yellow_avg": queue_stats["yellow"]["avg"],
        "queue_yellow_max": queue_stats["yellow"]["max"]
    }

    with open(metrics_path, "w") as f:
        json.dump(training_metrics, f, indent=4)

    print("\n[INFO] Monitoring complete. Logs in 'monitoring.log'. Metrics saved in 'training_metrics.json'.")


TypeError: Object of type ndarray is not JSON serializable

In [11]:
import sys
import os
import json
import time
import logging
from collections import deque, Counter
import numpy as np
from scipy.stats import ks_2samp

from stable_baselines3 import DQN
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.monitor import Monitor

# -------------------------------
# Setup logging
# -------------------------------
logging.basicConfig(
    filename="monitoring.log",
    level=logging.INFO,
    format="%(asctime)s - %(levelname)s - %(message)s"
)

# -------------------------------
# Global buffers for drift detection
# -------------------------------
recent_rewards = deque(maxlen=200)
recent_actions = deque(maxlen=200)
recent_wait_red = deque(maxlen=200)
recent_wait_yellow = deque(maxlen=200)

# -------------------------------
# Monitoring functions
# -------------------------------
def track_reward(reward):
    recent_rewards.append(reward)
    logging.info(f"Reward logged: {reward}")

def track_action(action):
    recent_actions.append(action)
    count = Counter(recent_actions)
    total = sum(count.values())
    dist = {
        "red": count.get(0, 0) / total if total else 0,
        "yellow": count.get(1, 0) / total if total else 0
    }
    logging.info(f"Action distribution: {dist}")

    # Model drift detection
    action_vector = np.array([dist["red"], dist["yellow"]])
    if "train_action_dist" in globals():
        drift = np.linalg.norm(action_vector - train_action_dist)
        if drift > 0.25:
            logging.warning("MODEL DRIFT DETECTED: Action distribution deviates from training!")

def track_wait_time(cat, wait_time):
    if cat == "red":
        recent_wait_red.append(wait_time)
        if len(recent_wait_red) > 30:
            _check_wait_time_drift("red")
    else:
        recent_wait_yellow.append(wait_time)
        if len(recent_wait_yellow) > 30:
            _check_wait_time_drift("yellow")

def _check_wait_time_drift(cat):
    if cat == "red":
        stat, p = ks_2samp(train_wait_red, list(recent_wait_red))
    else:
        stat, p = ks_2samp(train_wait_yellow, list(recent_wait_yellow))

    if p < 0.05:
        logging.warning(f"DATA DRIFT DETECTED in {cat.upper()} wait times (p={p:.4f})")

def log_decision(obs, action, reward, info={}):
    # Recursive conversion of NumPy arrays to lists
    def convert(obj):
        if isinstance(obj, np.ndarray):
            return obj.tolist()
        if isinstance(obj, dict):
            return {k: convert(v) for k, v in obj.items()}
        if isinstance(obj, list):
            return [convert(x) for x in obj]
        return obj

    obs = convert(obs)
    info = convert(info)

    data = {
        "timestamp": time.time(),
        "action": int(action),
        "reward": float(reward),
        "free_doctors": int(obs[0]),
        "longest_wait_red": float(obs[1]),
        "longest_wait_yellow": float(obs[2]),
        "red_queue_len": int(obs[3]),
        "yellow_queue_len": int(obs[4]),
        "doctor_busy_times": [float(obs[5]), float(obs[6]), float(obs[7])],
        "additional_info": info
    }
    logging.info("Decision: " + json.dumps(data))

# -------------------------------
# Main loop: run actual environment
# -------------------------------
if __name__ == "__main__":
    root_dir = "C:/Users/Prudence Letaru/Desktop/RL_Project_New"
    sys.path.append(root_dir)
    sys.path.append(os.path.join(root_dir, "env"))

    from hospital_env import HospitalEnv

    # Create evaluation environment
    def make_env():
        env = HospitalEnv()
        env = Monitor(env)
        return env

    eval_env = DummyVecEnv([make_env])

    # Load trained model
    model = DQN.load(os.path.join(root_dir, "models/dqn_hospital_sb3"), env=eval_env)

    # Load previous training metrics if available
    metrics_file = os.path.join(root_dir, "training_metrics.json")
    if os.path.exists(metrics_file):
        with open(metrics_file, "r") as f:
            training_metrics = json.load(f)
        train_action_dist = np.array(training_metrics["train_action_dist"])
        train_wait_red = np.array(training_metrics["train_wait_red"])
        train_wait_yellow = np.array(training_metrics["train_wait_yellow"])
    else:
        # fallback: uniform distribution
        train_action_dist = np.array([0.5, 0.5])
        train_wait_red = np.random.normal(10, 3, 500)
        train_wait_yellow = np.random.normal(20, 5, 500)

    # Evaluation parameters
    n_episodes = 40
    threshold_times = {"red": 15, "yellow": 30}

    # Metrics storage
    rewards_per_episode = []
    red_waits, yellow_waits = [], []
    queue_lengths = {"red": [], "yellow": []}
    total_actions_red, total_actions_yellow = 0, 0

    # -------------------------------
    # Run episodes
    # -------------------------------
    for ep in range(n_episodes):
        obs = eval_env.reset()
        done = False
        episode_reward = 0

        while not done:
            action, _states = model.predict(obs, deterministic=True)

            if action[0] == 0:
                total_actions_red += 1
            else:
                total_actions_yellow += 1

            obs, reward, done, info = eval_env.step(action)
            episode_reward += float(reward[0])

            env = eval_env.envs[0].unwrapped

            # Track metrics
            track_reward(float(reward[0]))
            track_action(action[0])

            # Track wait times only if someone was served
            if env.last_served_wait_times["red"] > 0:
                red_waits.append(float(env.last_served_wait_times["red"]))
                track_wait_time("red", float(env.last_served_wait_times["red"]))
            if env.last_served_wait_times["yellow"] > 0:
                yellow_waits.append(float(env.last_served_wait_times["yellow"]))
                track_wait_time("yellow", float(env.last_served_wait_times["yellow"]))

            # Log decision (obs converted inside function)
            log_decision(obs[0], action[0], float(reward[0]), info)

            # Queue lengths
            queue_lengths["red"].append(len(env.red_queue))
            queue_lengths["yellow"].append(len(env.yellow_queue))

        rewards_per_episode.append(episode_reward)

    # -------------------------------
    # Compute summary metrics
    # -------------------------------
    avg_reward = float(np.mean(rewards_per_episode))
    avg_wait_red = float(np.mean(red_waits)) if red_waits else 0
    avg_wait_yellow = float(np.mean(yellow_waits)) if yellow_waits else 0
    pct_red_within = float(100 * sum(w <= threshold_times["red"] for w in red_waits) / len(red_waits)) if red_waits else 0
    pct_yellow_within = float(100 * sum(w <= threshold_times["yellow"] for w in yellow_waits) / len(yellow_waits)) if yellow_waits else 0
    queue_stats = {cat: {"avg": float(np.mean(qs)), "max": int(np.max(qs))} for cat, qs in queue_lengths.items()}

    # Action distribution
    total_actions = total_actions_red + total_actions_yellow
    red_pct = float(total_actions_red / total_actions) if total_actions > 0 else 0
    yellow_pct = float(total_actions_yellow / total_actions) if total_actions > 0 else 0

    # -------------------------------
    # Print summary
    # -------------------------------
    print("\n=== ACTION DISTRIBUTION ===")
    print(f"Red actions: {total_actions_red} ({red_pct:.3f})")
    print(f"Yellow actions: {total_actions_yellow} ({yellow_pct:.3f})")
    print("\n=== PERFORMANCE METRICS ===")
    print(f"Average reward per episode: {avg_reward:.2f}")
    print(f"Average wait times (Red, Yellow): {avg_wait_red:.2f}, {avg_wait_yellow:.2f}")
    print(f"Percentage served within thresholds (Red, Yellow): {pct_red_within:.2f}%, {pct_yellow_within:.2f}%")
    for cat, stats in queue_stats.items():
        print(f"{cat.capitalize()}: avg={stats['avg']:.2f}, max={stats['max']}")

    # -------------------------------
    # Save metrics
    # -------------------------------
    training_metrics = {
        "train_action_dist": [red_pct, yellow_pct],
        "train_wait_red": red_waits,
        "train_wait_yellow": yellow_waits,
        "avg_reward_per_episode": avg_reward,
        "pct_within_threshold_red": pct_red_within,
        "pct_within_threshold_yellow": pct_yellow_within,
        "queue_red_avg": queue_stats["red"]["avg"],
        "queue_red_max": queue_stats["red"]["max"],
        "queue_yellow_avg": queue_stats["yellow"]["avg"],
        "queue_yellow_max": queue_stats["yellow"]["max"]
    }

    with open(metrics_file, "w") as f:
        json.dump(training_metrics, f, indent=4)

    print("\n[INFO] Monitoring complete. Logs in 'monitoring.log'. Metrics saved in 'training_metrics.json'.")



=== ACTION DISTRIBUTION ===
Red actions: 564 (0.470)
Yellow actions: 636 (0.530)

=== PERFORMANCE METRICS ===
Average reward per episode: 1895.70
Average wait times (Red, Yellow): 3.90, 5.86
Percentage served within thresholds (Red, Yellow): 100.00%, 100.00%
Red: avg=1.85, max=9
Yellow: avg=6.54, max=25

[INFO] Monitoring complete. Logs in 'monitoring.log'. Metrics saved in 'training_metrics.json'.


In [12]:
import json
import time
import numpy as np
from collections import deque, Counter
from scipy.stats import ks_2samp
import logging
import os

# -------------------------------
# Setup logging
# -------------------------------
logging.basicConfig(
    filename="monitoring.log",
    level=logging.INFO,
    format="%(asctime)s - %(levelname)s - %(message)s"
)

# -------------------------------
# Load training metrics
# -------------------------------
METRICS_FILE = "training_metrics.json"
if os.path.exists(METRICS_FILE):
    with open(METRICS_FILE, "r") as f:
        training_metrics = json.load(f)
    train_action_dist = np.array(training_metrics["train_action_dist"])
    train_wait_red = np.array(training_metrics["train_wait_red"])
    train_wait_yellow = np.array(training_metrics["train_wait_yellow"])
else:
    train_action_dist = np.array([0.5, 0.5])
    train_wait_red = np.random.normal(10, 3, 500)
    train_wait_yellow = np.random.normal(20, 5, 500)

# -------------------------------
# Monitoring buffers
# -------------------------------
recent_rewards = deque(maxlen=200)
recent_actions = deque(maxlen=200)
recent_wait_red = deque(maxlen=200)
recent_wait_yellow = deque(maxlen=200)

# -------------------------------
# Monitoring functions
# -------------------------------
def track_reward(r):
    recent_rewards.append(r)

def track_action(a):
    recent_actions.append(a)
    count = Counter(recent_actions)
    total = sum(count.values())
    dist = {"red": count.get(0, 0)/total if total else 0,
            "yellow": count.get(1, 0)/total if total else 0}
    # Model drift check
    drift = np.linalg.norm(np.array([dist["red"], dist["yellow"]]) - train_action_dist)
    if drift > 0.25:
        logging.warning("MODEL DRIFT DETECTED")
    return dist

def track_wait(cat, wt):
    if cat == "red":
        recent_wait_red.append(wt)
        if len(recent_wait_red) > 30:
            check_wait_drift("red")
    else:
        recent_wait_yellow.append(wt)
        if len(recent_wait_yellow) > 30:
            check_wait_drift("yellow")

def check_wait_drift(cat):
    if cat == "red":
        stat, p = ks_2samp(train_wait_red, list(recent_wait_red))
    else:
        stat, p = ks_2samp(train_wait_yellow, list(recent_wait_yellow))
    if p < 0.05:
        logging.warning(f"DATA DRIFT DETECTED in {cat.upper()} wait times (p={p:.4f})")

# -------------------------------
# Monitor API logs in real time
# -------------------------------
LOG_FILE = "api_logs.json"
processed_lines = 0

print("[INFO] Continuous monitoring started. Press Ctrl+C to stop.")

while True:
    if os.path.exists(LOG_FILE):
        with open(LOG_FILE, "r") as f:
            lines = f.readlines()
        new_lines = lines[processed_lines:]
        for line in new_lines:
            try:
                entry = json.loads(line)
                action = 0 if entry["action"] == "RED" else 1
                reward = entry["reward"]
                wait_time = entry["wait_time"]
                cat = "red" if action == 0 else "yellow"

                track_action(action)
                track_wait(cat, wait_time)
                track_reward(reward)
            except Exception as e:
                logging.error(f"Error processing log line: {e}")

        processed_lines += len(new_lines)

        # Optional: print periodic summary
        if len(recent_rewards) > 0 and processed_lines % 20 == 0:
            print(f"[SUMMARY] Mean reward: {np.mean(recent_rewards):.2f}, "
                  f"Action distribution: RED {recent_actions.count(0)}, YELLOW {recent_actions.count(1)}, "
                  f"Avg waits: RED {np.mean(recent_wait_red):.2f}, YELLOW {np.mean(recent_wait_yellow):.2f}")

    time.sleep(2)  # check every 2 seconds


[INFO] Continuous monitoring started. Press Ctrl+C to stop.


KeyboardInterrupt: 

In [13]:
from fastapi import FastAPI
from pydantic import BaseModel
import numpy as np
import json
import os
import time
from stable_baselines3 import DQN
from hospital_env import HospitalEnv

# -------------------------------
# FastAPI app
# -------------------------------
app = FastAPI(title="Hospital RL Agent API")

# -------------------------------
# Pydantic models for input
# -------------------------------
class State(BaseModel):
    free_doctors: int
    longest_wait_red: float
    longest_wait_yellow: float
    red_queue_length: int
    yellow_queue_length: int
    doctor1_busy_time: float
    doctor2_busy_time: float
    doctor3_busy_time: float

class RequestBody(BaseModel):
    state: State

# -------------------------------
# Load trained RL model
# -------------------------------
MODEL_PATH = "models/dqn_hospital_sb3"
model = DQN.load(MODEL_PATH)

# -------------------------------
# Logging location for monitoring
# -------------------------------
LOG_FILE = "api_logs.json"

# -------------------------------
# Convert API state to environment observation
# -------------------------------
def state_to_obs(state: State):
    return np.array([
        state.free_doctors,
        state.longest_wait_red,
        state.longest_wait_yellow,
        state.red_queue_length,
        state.yellow_queue_length,
        state.doctor1_busy_time,
        state.doctor2_busy_time,
        state.doctor3_busy_time
    ], dtype=np.float32)

# -------------------------------
# API endpoint
# -------------------------------
@app.post("/predict")
def predict(request: RequestBody):
    obs = state_to_obs(request.state)

    # Predict action
    action, _ = model.predict(obs, deterministic=True)
    action = int(action)

    # Compute reward and wait time using environment logic
    # Here we simulate an environment step
    env = HospitalEnv()
    # Set environment state manually
    env.doctor_timers = np.array([request.state.doctor1_busy_time,
                                  request.state.doctor2_busy_time,
                                  request.state.doctor3_busy_time], dtype=np.float32)
    env.red_queue = [request.state.longest_wait_red]*request.state.red_queue_length
    env.yellow_queue = [request.state.longest_wait_yellow]*request.state.yellow_queue_length

    _, reward, _, _, _ = env.step(action)
    wait_time = env.last_served_wait_times["red"] if action == 0 else env.last_served_wait_times["yellow"]

    # Log to file for monitoring
    log_entry = {
        "timestamp": time.time(),
        "state": request.state.dict(),
        "action": "RED" if action == 0 else "YELLOW",
        "reward": reward,
        "wait_time": wait_time
    }
    with open(LOG_FILE, "a") as f:
        f.write(json.dumps(log_entry) + "\n")

    return {
        "action": "RED" if action == 0 else "YELLOW",
        "reward": reward,
        "wait_time": wait_time
    }
