DQN

In [None]:
import os
import zipfile
import requests
import ast
import pickle
import pandas as pd
import numpy as np

from stable_baselines3 import DQN
from stable_baselines3.common.callbacks import CheckpointCallback
from fire_dispatch_rl_env.environment import FireDispatchEnv
from fire_dispatch_rl_env.wrappers import WrappedDispatchEnv

# === 📦 0. Download and extract data files ===
zip_url = "https://github.com/shanchengnb/fire-dispatch/raw/refs/heads/master/final%20data.zip"
zip_path = "final_data.zip"
extract_dir = "final_data"

if not os.path.exists(zip_path):
    print("⬇️ Downloading data package...")
    response = requests.get(zip_url)
    with open(zip_path, "wb") as f:
        f.write(response.content)

if not os.path.exists(extract_dir):
    print("📦 Extracting data package...")
    with zipfile.ZipFile(zip_path, "r") as zip_ref:
        zip_ref.extractall(extract_dir)

# === 🔍 File search utility ===
def find_file_by_keyword(folder, keyword):
    for root, _, files in os.walk(folder):
        for f in files:
            if keyword.lower() in f.lower():
                return os.path.join(root, f)
    raise FileNotFoundError(f"❌ File containing '{keyword}' not found")
# === 📁 1. Load data ===
csv_path = find_file_by_keyword(extract_dir, "real_incidents")
df = pd.read_csv(csv_path)
if isinstance(df.loc[0, 'graph_node'], str):
    df["graph_node"] = df["graph_node"].apply(ast.literal_eval)

# === Load .pkl files ===
def load_pickle(path):
    if not os.path.exists(path):
        raise FileNotFoundError(f"❌ File not found: {path}")
    with open(path, "rb") as f:
        return pickle.load(f)

station_dists = load_pickle("station_dists.pkl")
station_mapping = load_pickle("station_mapping.pkl")

# ✅ Read CSV correctly as DataFrame, then convert to dict
counts_path = find_file_by_keyword(extract_dir, "station_engine_counts")
df_counts = pd.read_csv(counts_path)
station_engine_counts = dict(zip(df_counts["Station name"], df_counts["station_engine_counts"]))

station_xy = {}
xy_path = "station_xy.pkl"
if os.path.exists(xy_path):
    station_xy = load_pickle(xy_path)

# === ⚙️ 2. Environment configuration ===
config = {
    "max_engines": len(station_mapping),
    "cooldown_seconds": 180,
    "event_num": 300,
    "max_steps": 300,
    "map_width": 50000,
    "map_height": 50000,
    "obs_dim": 96,
    "obs_engine_count": 20,
    "station_dists": station_dists,
    "station_engine_counts": station_engine_counts,
    "station_xy": station_xy,
    "average_speed_kmph": 48
}

# === 🧱 3. Create wrapped environment (for fixed action space) ===
base_env = FireDispatchEnv(config, event_df=df)
env = WrappedDispatchEnv(base_env, max_actions=20)

obs = env.reset()
print(f"✅ obs shape: {np.shape(obs)} | type: {type(obs)}")

# === 💾 4. Save path and checkpoint callback ===
save_path = "models/fire_dqn_final"
os.makedirs(save_path, exist_ok=True)

checkpoint_callback = CheckpointCallback(
    save_freq=10_000,
    save_path=save_path,
    name_prefix="checkpoint",
    save_replay_buffer=True,
    save_vecnormalize=True
)

# === 🔧 5. Create reinforcement learning model ===
model = DQN(
    policy="MlpPolicy",
    env=env,
    learning_rate=1e-4,
    buffer_size=100_000,
    learning_starts=1000,
    batch_size=64,
    tau=0.1,
    gamma=0.99,
    train_freq=4,
    target_update_interval=1000,
    exploration_fraction=0.2,
    exploration_final_eps=0.05,
    verbose=1,
    tensorboard_log="./tensorboard/",
    policy_kwargs=dict(
        net_arch=[128, 128],  # Enhance representation power
            # ✅ Enable Duelling DQN
    )
)

# === 🚀 6. Start training ===
model.learn(
    total_timesteps=200_000,
    callback=checkpoint_callback
)

# === ✅ 7. Save the final model ===
model.save(os.path.join(save_path, "final_model"))
print("✅ Model training complete and saved!")


In [None]:
import os
import ast
import pickle
import pandas as pd
import numpy as np

from stable_baselines3 import DQN
from stable_baselines3.common.callbacks import CheckpointCallback
from stable_baselines3.common.env_util import make_vec_env
from fire_dispatch_rl_env.environment import FireDispatchEnv
from fire_dispatch_rl_env.wrappers import WrappedDispatchEnv

# === 📁 1. Load data ===
csv_path = r"D:\UCL2\final paper\data\real_incidents_with_station_info_with_on_scene_seconds.csv"# ⚠️ This is a local file path, please ensure the file exists at this location on your machine
df = pd.read_csv(csv_path)
if isinstance(df.loc[0, 'graph_node'], str):
    df["graph_node"] = df["graph_node"].apply(ast.literal_eval)

with open("station_dists.pkl", "rb") as f:
    station_dists = pickle.load(f)
with open("station_mapping.pkl", "rb") as f:
    station_mapping = pickle.load(f)

station_xy = {}
xy_path = r"D:\UCL2\final paper\data\station_xy.pkl"
if os.path.exists(xy_path):
    with open(xy_path, "rb") as f:
        station_xy = pickle.load(f)

# ✅ Directly specify path to station_engine_counts file
counts_path = r"D:\UCL2\final paper\data\Station_engine_counts.csv"
df_counts = pd.read_csv(counts_path)
station_engine_counts = dict(zip(df_counts["Station name"], df_counts["station_engine_counts"]))

# === ⚙️ 2. Environment configuration ===
config = {
    "max_engines": len(station_mapping),
    "cooldown_seconds": 180,
    "event_num": 300,
    "max_steps": 300,
    "map_width": 50000,
    "map_height": 50000,
    "obs_dim": 96,
    "obs_engine_count": 20,
    "station_dists": station_dists,
    "station_engine_counts": station_engine_counts,
    "station_xy": station_xy,
    "average_speed_kmph": 48
}

# === 🧱 3. Create wrapped environment (for stable action space) ===
base_env = FireDispatchEnv(config, event_df=df)
env = WrappedDispatchEnv(base_env, max_actions=20)

obs = env.reset()
print(f"✅ obs shape: {np.shape(obs)} | type: {type(obs)}")

# === 💾 4. Save path and checkpoint callback ===
save_path = "models/fire_dqn_final"
os.makedirs(save_path, exist_ok=True)

checkpoint_callback = CheckpointCallback(
    save_freq=10_000,
    save_path=save_path,
    name_prefix="checkpoint",
    save_replay_buffer=True,
    save_vecnormalize=True
)

# === 🔧 5. Create reinforcement learning model ===
model = DQN(
    policy="MlpPolicy",
    env=env,
    learning_rate=1e-4,
    buffer_size=100_000,
    learning_starts=1000,
    batch_size=64,
    tau=0.1,
    gamma=0.99,
    train_freq=4,
    target_update_interval=1000,
    exploration_fraction=0.2,
    exploration_final_eps=0.05,
    verbose=1,
    tensorboard_log="./tensorboard/",
    policy_kwargs=dict(
        net_arch=[128, 128],  # Enhance representation power
    )
)

# === 🚀 6. Start training ===
model.learn(
    total_timesteps=200_000,
    callback=checkpoint_callback
)

# === ✅ 7. Save final model ===
model.save(os.path.join(save_path, "final_model"))
print("✅ Model training complete and saved!")


SAC

In [None]:
import os
import ast
import pickle
import pandas as pd
import numpy as np

from stable_baselines3 import SAC
from stable_baselines3.common.callbacks import CheckpointCallback

from fire_dispatch_rl_env.environment import FireDispatchEnv
from fire_dispatch_rl_env.wrappers import ContinuousDispatchEnv

# === 📁 1. Load data ===
# ⚠️ This is a local file path, please ensure the file exists at this location on your machine
csv_path = r"D:\UCL2\final paper\data\real_with_dispatch_info.csv"
df = pd.read_csv(csv_path)
if isinstance(df.loc[0, 'graph_node'], str):
    df["graph_node"] = df["graph_node"].apply(ast.literal_eval)

with open("station_dists.pkl", "rb") as f:
    station_dists = pickle.load(f)
with open("station_mapping.pkl", "rb") as f:
    station_mapping = pickle.load(f)

station_xy = {}
xy_path = r"D:\UCL2\final paper\data\station_xy.pkl"
if os.path.exists(xy_path):
    with open(xy_path, "rb") as f:
        station_xy = pickle.load(f)

counts_path = r"D:\UCL2\final paper\data\Station_engine_counts.csv"
df_counts = pd.read_csv(counts_path)
station_engine_counts = dict(zip(df_counts["Station name"], df_counts["station_engine_counts"]))

# === ⚙️ 2. Environment configuration ===
config = {
    "max_engines": len(station_mapping),
    "cooldown_seconds": 180,
    "event_num": 150,             # ✅ Reduce number of events
    "max_steps": 150,             # ✅ Reduce max steps (avoid waste)
    "map_width": 50000,
    "map_height": 50000,
    "obs_dim": 96,
    "obs_engine_count": 20,
    "station_dists": station_dists,
    "station_engine_counts": station_engine_counts,
    "station_xy": station_xy,
    "average_speed_kmph": 48,
    "max_dispatch_per_event": 2  # ✅ Multi-vehicle dispatch support
}

# === 🌍 3. Initialize continuous action environment ===
base_env = FireDispatchEnv(config, event_df=df)
env = ContinuousDispatchEnv(base_env)

obs = env.reset()
print(f"✅ Continuous action environment initialized, obs shape: {np.shape(obs)}")

# === 💾 4. Save path and callback setup ===
save_path = "models/fire_sac_debug_10w"
os.makedirs(save_path, exist_ok=True)

checkpoint_callback = CheckpointCallback(
    save_freq=5000,
    save_path=save_path,
    name_prefix="checkpoint",
    save_replay_buffer=True,
    save_vecnormalize=True
)

# === 🔧 5. Build SAC model (continuous actions) ===
model = SAC(
    policy="MlpPolicy",
    env=env,
    learning_rate=3e-4,
    buffer_size=100_000,
    learning_starts=1000,
    batch_size=128,
    tau=0.005,
    gamma=0.99,
    train_freq=1,
    gradient_steps=1,
    verbose=1,
    tensorboard_log="./tensorboard/",
    policy_kwargs=dict(net_arch=[256, 256])
)

# === 🚀 6. Start training (only 100k steps) ===
model.learn(
    total_timesteps=100_000,      # ✅ Limit to 100k steps
    callback=checkpoint_callback
)

# === ✅ 7. Save final model ===
model.save(os.path.join(save_path, "final_model713"))
print("✅ SAC multi-vehicle dispatch training complete (100k steps), model saved!")
