In [1]:
from pathlib import Path
import os
import sys
import shutil

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
import traci

PROJECT_DIR = Path.cwd()

NETWORK_FILE  = PROJECT_DIR / "my_network.net.xml"
ROUTE_FILE    = PROJECT_DIR / "my_routes.rou.xml"
CONFIG_FILE   = PROJECT_DIR / "my_config.sumocfg"
WEIGHTS_FILE  = PROJECT_DIR / "gnn_a2c_best.weights.h5"

print("Working directory:", PROJECT_DIR)
print("Network exists? ", NETWORK_FILE.exists())
print("Routes exist?  ", ROUTE_FILE.exists())
print("Config exists? ", CONFIG_FILE.exists())
print("Weights exist? ", WEIGHTS_FILE.exists())

SUMO_HOME = os.environ.get("SUMO_HOME")
if SUMO_HOME is None:
    raise EnvironmentError("SUMO_HOME is not set.")

print("SUMO_HOME:", SUMO_HOME)

def get_sumo_binary(gui: bool = False) -> str:
    base_name = "sumo-gui" if gui else "sumo"
    cmd = shutil.which(base_name)
    if cmd is not None:
        return cmd

    bin_dir = Path(SUMO_HOME) / "bin"
    candidate = bin_dir / (base_name + ".exe" if sys.platform.startswith("win") else base_name)

    if not candidate.exists():
        raise FileNotFoundError(f"{base_name} not found at {candidate}")

    return str(candidate)


Working directory: C:\Users\manda\OneDrive\Documents\AI Traffic - Jupyter
Network exists?  True
Routes exist?   True
Config exists?  True
Weights exist?  True
SUMO_HOME: C:\Program Files (x86)\Eclipse\Sumo\


In [5]:
# --- Discover TLS IDs ---

if traci.isLoaded():
    traci.close()

sumo_bin = get_sumo_binary(gui=False)
cmd = [sumo_bin, "-c", str(CONFIG_FILE), "--step-length", "1"]
traci.start(cmd)

tls_ids = traci.trafficlight.getIDList()
print("Total TLS:", len(tls_ids))
print(tls_ids)

traci.close()

# --- Build tls_lane_map ---

if traci.isLoaded():
    traci.close()

traci.start([sumo_bin, "-c", str(CONFIG_FILE), "--step-length", "1"])

tls_lane_map = {}
for tls in tls_ids:
    lanes = traci.trafficlight.getControlledLanes(tls)
    lanes = list(dict.fromkeys(lanes))
    tls_lane_map[tls] = lanes

print("\nTLS → lanes:")
for tls, lanes in tls_lane_map.items():
    print(tls, ":", lanes)

traci.close()

# --- State function ---

def get_tls_state(tls_id: str, lane_map: dict) -> list:
    lane_ids = lane_map[tls_id]
    queue_lengths = []
    waiting_times = []

    for lane in lane_ids:
        q = traci.lane.getLastStepHaltingNumber(lane)
        w = traci.lane.getWaitingTime(lane)
        queue_lengths.append(q)
        waiting_times.append(w)

    current_phase = traci.trafficlight.getPhase(tls_id)
    return queue_lengths + waiting_times + [current_phase]

# --- Compute feature_size ---

if traci.isLoaded():
    traci.close()

traci.start([sumo_bin, "-c", str(CONFIG_FILE), "--step-length", "1"])

for _ in range(5):
    traci.simulationStep()

lengths = []
for tls in tls_ids:
    s = get_tls_state(tls, tls_lane_map)
    lengths.append(len(s))

feature_size = max(lengths)
num_nodes = len(tls_ids)

print("State lengths per TLS:", lengths)
print("feature_size:", feature_size)
print("num_nodes:", num_nodes)

traci.close()

# --- Adjacency ---

from collections import defaultdict

if traci.isLoaded():
    traci.close()

traci.start([sumo_bin, "-c", str(CONFIG_FILE), "--step-length", "1"])

tls_adj = {tls: set() for tls in tls_ids}

for tls in tls_ids:
    controlled_links = traci.trafficlight.getControlledLinks(tls)
    for link_group in controlled_links:
        for (incoming, outgoing, _) in link_group:
            for other_tls in tls_ids:
                if other_tls == tls:
                    continue
                if outgoing in tls_lane_map.get(other_tls, []):
                    tls_adj[tls].add(other_tls)
                    tls_adj[other_tls].add(tls)

traci.close()

edge_count = sum(len(neigh) for neigh in tls_adj.values())
if edge_count == 0 and len(tls_ids) > 1:
    print("No adjacency found; using simple chain.")
    ordered = list(tls_ids)
    for i in range(len(ordered) - 1):
        a, b = ordered[i], ordered[i+1]
        tls_adj[a].add(b)
        tls_adj[b].add(a)

print("\nAdjacency list:")
for tls, neigh in tls_adj.items():
    print(tls, ":", sorted(list(neigh)))

tls_index = {tls_id: i for i, tls_id in enumerate(tls_ids)}
adj_matrix = np.zeros((num_nodes, num_nodes), dtype=np.float32)

for tls, neigh in tls_adj.items():
    i = tls_index[tls]
    for nb in neigh:
        j = tls_index[nb]
        adj_matrix[i, j] = 1.0
        adj_matrix[j, i] = 1.0

print("\nadj_matrix shape:", adj_matrix.shape)
print("adj_matrix[0]:", adj_matrix[0])

# --- Reward function ---

def compute_global_reward(tls_lane_map: dict) -> float:
    total_wait = 0.0
    for tls, lanes in tls_lane_map.items():
        for lane in lanes:
            total_wait += traci.lane.getWaitingTime(lane)
    return -total_wait / 1000.0


Total TLS: 7
('1234828897', '1757353212', '1757353214', '1783045940', '1783045985', '1843356909', '7671039164')

TLS → lanes:
1234828897 : ['107445428#1_0', '-173540663#0_0', '-47195458#0_0']
1757353212 : ['1173473098#2_0', '107445426#1_0', '173540663#2_0']
1757353214 : ['800859756#1_0', '800859756#1_1']
1783045940 : ['-821520600#3_0', '315129223#3_0', '315129223#3_1', '821520600#2_0']
1783045985 : ['-821520600#2_0', '821520600#1_0', '324489280#3_0']
1843356909 : ['223051913#1_0', '223051913#1_1']
7671039164 : ['164073716#1_0', '164073716#1_1', '164073716#1_2']
State lengths per TLS: [7, 7, 5, 9, 7, 5, 7]
feature_size: 9
num_nodes: 7

Adjacency list:
1234828897 : []
1757353212 : []
1757353214 : []
1783045940 : ['1783045985']
1783045985 : ['1783045940']
1843356909 : []
7671039164 : []

adj_matrix shape: (7, 7)
adj_matrix[0]: [0. 0. 0. 0. 0. 0. 0.]


In [6]:
# --- Discover TLS IDs ---

if traci.isLoaded():
    traci.close()

sumo_bin = get_sumo_binary(gui=False)
cmd = [sumo_bin, "-c", str(CONFIG_FILE), "--step-length", "1"]
traci.start(cmd)

tls_ids = traci.trafficlight.getIDList()
print("Total TLS:", len(tls_ids))
print(tls_ids)

traci.close()

# --- Build tls_lane_map ---

if traci.isLoaded():
    traci.close()

traci.start([sumo_bin, "-c", str(CONFIG_FILE), "--step-length", "1"])

tls_lane_map = {}
for tls in tls_ids:
    lanes = traci.trafficlight.getControlledLanes(tls)
    lanes = list(dict.fromkeys(lanes))
    tls_lane_map[tls] = lanes

print("\nTLS → lanes:")
for tls, lanes in tls_lane_map.items():
    print(tls, ":", lanes)

traci.close()

# --- State function ---

def get_tls_state(tls_id: str, lane_map: dict) -> list:
    lane_ids = lane_map[tls_id]
    queue_lengths = []
    waiting_times = []

    for lane in lane_ids:
        q = traci.lane.getLastStepHaltingNumber(lane)
        w = traci.lane.getWaitingTime(lane)
        queue_lengths.append(q)
        waiting_times.append(w)

    current_phase = traci.trafficlight.getPhase(tls_id)
    return queue_lengths + waiting_times + [current_phase]

# --- Compute feature_size ---

if traci.isLoaded():
    traci.close()

traci.start([sumo_bin, "-c", str(CONFIG_FILE), "--step-length", "1"])

for _ in range(5):
    traci.simulationStep()

lengths = []
for tls in tls_ids:
    s = get_tls_state(tls, tls_lane_map)
    lengths.append(len(s))

feature_size = max(lengths)
num_nodes = len(tls_ids)

print("State lengths per TLS:", lengths)
print("feature_size:", feature_size)
print("num_nodes:", num_nodes)

traci.close()

# --- Adjacency ---

from collections import defaultdict

if traci.isLoaded():
    traci.close()

traci.start([sumo_bin, "-c", str(CONFIG_FILE), "--step-length", "1"])

tls_adj = {tls: set() for tls in tls_ids}

for tls in tls_ids:
    controlled_links = traci.trafficlight.getControlledLinks(tls)
    for link_group in controlled_links:
        for (incoming, outgoing, _) in link_group:
            for other_tls in tls_ids:
                if other_tls == tls:
                    continue
                if outgoing in tls_lane_map.get(other_tls, []):
                    tls_adj[tls].add(other_tls)
                    tls_adj[other_tls].add(tls)

traci.close()

edge_count = sum(len(neigh) for neigh in tls_adj.values())
if edge_count == 0 and len(tls_ids) > 1:
    print("No adjacency found; using simple chain.")
    ordered = list(tls_ids)
    for i in range(len(ordered) - 1):
        a, b = ordered[i], ordered[i+1]
        tls_adj[a].add(b)
        tls_adj[b].add(a)

print("\nAdjacency list:")
for tls, neigh in tls_adj.items():
    print(tls, ":", sorted(list(neigh)))

tls_index = {tls_id: i for i, tls_id in enumerate(tls_ids)}
adj_matrix = np.zeros((num_nodes, num_nodes), dtype=np.float32)

for tls, neigh in tls_adj.items():
    i = tls_index[tls]
    for nb in neigh:
        j = tls_index[nb]
        adj_matrix[i, j] = 1.0
        adj_matrix[j, i] = 1.0

print("\nadj_matrix shape:", adj_matrix.shape)
print("adj_matrix[0]:", adj_matrix[0])

# --- Reward function ---

def compute_global_reward(tls_lane_map: dict) -> float:
    total_wait = 0.0
    for tls, lanes in tls_lane_map.items():
        for lane in lanes:
            total_wait += traci.lane.getWaitingTime(lane)
    return -total_wait / 1000.0


Total TLS: 7
('1234828897', '1757353212', '1757353214', '1783045940', '1783045985', '1843356909', '7671039164')

TLS → lanes:
1234828897 : ['107445428#1_0', '-173540663#0_0', '-47195458#0_0']
1757353212 : ['1173473098#2_0', '107445426#1_0', '173540663#2_0']
1757353214 : ['800859756#1_0', '800859756#1_1']
1783045940 : ['-821520600#3_0', '315129223#3_0', '315129223#3_1', '821520600#2_0']
1783045985 : ['-821520600#2_0', '821520600#1_0', '324489280#3_0']
1843356909 : ['223051913#1_0', '223051913#1_1']
7671039164 : ['164073716#1_0', '164073716#1_1', '164073716#1_2']
State lengths per TLS: [7, 7, 5, 9, 7, 5, 7]
feature_size: 9
num_nodes: 7

Adjacency list:
1234828897 : []
1757353212 : []
1757353214 : []
1783045940 : ['1783045985']
1783045985 : ['1783045940']
1843356909 : []
7671039164 : []

adj_matrix shape: (7, 7)
adj_matrix[0]: [0. 0. 0. 0. 0. 0. 0.]


In [7]:
class GNNActorCritic(tf.keras.Model):
    def __init__(self, hidden_dim: int, num_actions: int):
        super().__init__()
        self.state_embed = tf.keras.layers.Dense(hidden_dim, activation="relu")
        self.post_gnn   = tf.keras.layers.Dense(hidden_dim, activation="relu")
        self.policy_head = tf.keras.layers.Dense(num_actions)
        self.value_head  = tf.keras.layers.Dense(1)

    def call(self, inputs, training=False):
        x, adj = inputs        # x: (B, N, F), adj: (B, N, N)

        h = self.state_embed(x)      # (B, N, H)
        h_neigh = tf.matmul(adj, h)  # (B, N, H)

        h_cat = tf.concat([h, h_neigh], axis=-1)  # (B, N, 2H)
        h_out = self.post_gnn(h_cat)              # (B, N, H)

        policy_logits = self.policy_head(h_out)   # (B, N, A)
        graph_embed = tf.reduce_mean(h_out, axis=1)
        value = self.value_head(graph_embed)

        return policy_logits, value

hidden_dim = 64
num_actions = 2

gnn_model = GNNActorCritic(hidden_dim, num_actions)

# Build model once
adj_batch_tf = tf.convert_to_tensor(adj_matrix[None, ...], dtype=tf.float32)
dummy_states = tf.random.uniform((1, num_nodes, feature_size), dtype=tf.float32)
gnn_model((dummy_states, adj_batch_tf))

if WEIGHTS_FILE.exists():
    gnn_model.load_weights(WEIGHTS_FILE)
    print("Loaded trained weights from:", WEIGHTS_FILE)
else:
    print("WARNING: weights file not found. Using untrained model.")


Loaded trained weights from: C:\Users\manda\OneDrive\Documents\AI Traffic - Jupyter\gnn_a2c_best.weights.h5


In [8]:
def select_actions_from_logits(policy_logits: tf.Tensor) -> np.ndarray:
    if isinstance(policy_logits, tf.Tensor):
        policy_logits = policy_logits.numpy()
    return np.argmax(policy_logits, axis=-1)


def apply_actions_to_sumo(actions: np.ndarray, tls_ids_list):
    for idx, tls in enumerate(tls_ids_list):
        a = int(actions[idx])
        if a == 0:
            continue
        elif a == 1:
            curr_phase = traci.trafficlight.getPhase(tls)
            logic = traci.trafficlight.getCompleteRedYellowGreenDefinition(tls)[0]
            num_phases = len(logic.phases)
            next_phase = (curr_phase + 1) % num_phases
            traci.trafficlight.setPhase(tls, next_phase)


In [9]:
def run_fixed_time_episode(max_steps=3600):
    """
    No RL control. SUMO's built-in controller handles TLS.
    We just step the simulation and compute reward.
    """
    if traci.isLoaded():
        traci.close()

    traci.start([get_sumo_binary(False), "-c", str(CONFIG_FILE), "--step-length", "1"])

    episode_return = 0.0
    rewards_over_time = []

    for t in range(max_steps):
        traci.simulationStep()

        r = compute_global_reward(tls_lane_map)
        episode_return += r
        rewards_over_time.append(r)

        if (t + 1) % 600 == 0:
            print(f"[Fixed] Step {t+1}/{max_steps}, reward: {r:.4f}")

    traci.close()
    print("[Fixed] Episode finished. Total return:", episode_return)
    return episode_return, rewards_over_time


def run_ai_episode_greedy(max_steps=3600):
    """
    Use the trained GNNActorCritic in greedy mode (argmax).
    """
    if traci.isLoaded():
        traci.close()

    traci.start([get_sumo_binary(False), "-c", str(CONFIG_FILE), "--step-length", "1"])

    episode_return = 0.0
    rewards_over_time = []

    for t in range(max_steps):
        all_states = []
        for tls in tls_ids:
            s = get_tls_state(tls, tls_lane_map)
            s_padded = s + [0] * (feature_size - len(s))
            all_states.append(s_padded)

        states_np = np.array(all_states, dtype=np.float32)[None, ...]
        states_tf = tf.convert_to_tensor(states_np, dtype=tf.float32)

        policy_logits_tf, value_tf = gnn_model((states_tf, adj_batch_tf), training=False)
        policy_logits = policy_logits_tf[0]  # (N, 2)

        actions = select_actions_from_logits(policy_logits)
        apply_actions_to_sumo(actions, tls_ids)

        traci.simulationStep()

        r = compute_global_reward(tls_lane_map)
        episode_return += r
        rewards_over_time.append(r)

        if (t + 1) % 600 == 0:
            print(f"[AI] Step {t+1}/{max_steps}, reward: {r:.4f}")

    traci.close()
    print("[AI] Episode finished. Total return:", episode_return)
    return episode_return, rewards_over_time


In [10]:
def evaluate_policy(run_fn, label: str, num_episodes=3, max_steps=3600):
    returns = []
    all_traces = []

    for ep in range(num_episodes):
        print(f"\n=== {label} EPISODE {ep+1}/{num_episodes} ===")
        ep_ret, trace = run_fn(max_steps=max_steps)
        returns.append(ep_ret)
        all_traces.append(trace)
        print(f"{label} episode {ep+1} return: {ep_ret:.4f}")

    returns = np.array(returns, dtype=np.float64)
    print(f"\n=== {label} SUMMARY ===")
    print("Returns:", returns)
    print("Mean return:", returns.mean())
    print("Std return:", returns.std())

    return returns, all_traces

# Evaluate fixed-time
fixed_returns, fixed_traces = evaluate_policy(run_fixed_time_episode, "Fixed", num_episodes=3, max_steps=3600)

# Evaluate AI
ai_returns, ai_traces = evaluate_policy(run_ai_episode_greedy, "AI", num_episodes=3, max_steps=3600)



=== Fixed EPISODE 1/3 ===
[Fixed] Step 600/3600, reward: -0.0000
[Fixed] Step 1200/3600, reward: -0.0270
[Fixed] Step 1800/3600, reward: -0.0040
[Fixed] Step 2400/3600, reward: -0.0000
[Fixed] Step 3000/3600, reward: -0.0310
[Fixed] Step 3600/3600, reward: -0.0010
[Fixed] Episode finished. Total return: -107.5660000000005
Fixed episode 1 return: -107.5660

=== Fixed EPISODE 2/3 ===
[Fixed] Step 600/3600, reward: -0.0000
[Fixed] Step 1200/3600, reward: -0.0270
[Fixed] Step 1800/3600, reward: -0.0040
[Fixed] Step 2400/3600, reward: -0.0000
[Fixed] Step 3000/3600, reward: -0.0310
[Fixed] Step 3600/3600, reward: -0.0010
[Fixed] Episode finished. Total return: -107.5660000000005
Fixed episode 2 return: -107.5660

=== Fixed EPISODE 3/3 ===
[Fixed] Step 600/3600, reward: -0.0000
[Fixed] Step 1200/3600, reward: -0.0270
[Fixed] Step 1800/3600, reward: -0.0040
[Fixed] Step 2400/3600, reward: -0.0000
[Fixed] Step 3000/3600, reward: -0.0310
[Fixed] Step 3600/3600, reward: -0.0010
[Fixed] Episode 

  logic = traci.trafficlight.getCompleteRedYellowGreenDefinition(tls)[0]


[AI] Step 600/3600, reward: -0.0150
[AI] Step 1200/3600, reward: -0.0070
[AI] Step 1800/3600, reward: -0.0010
[AI] Step 2400/3600, reward: -0.0050
[AI] Step 3000/3600, reward: -0.0000
[AI] Step 3600/3600, reward: -0.0050
[AI] Episode finished. Total return: -24.461000000000077
AI episode 1 return: -24.4610

=== AI EPISODE 2/3 ===
[AI] Step 600/3600, reward: -0.0150
[AI] Step 1200/3600, reward: -0.0070
[AI] Step 1800/3600, reward: -0.0010
[AI] Step 2400/3600, reward: -0.0050
[AI] Step 3000/3600, reward: -0.0000
[AI] Step 3600/3600, reward: -0.0050
[AI] Episode finished. Total return: -24.461000000000077
AI episode 2 return: -24.4610

=== AI EPISODE 3/3 ===
[AI] Step 600/3600, reward: -0.0150
[AI] Step 1200/3600, reward: -0.0070
[AI] Step 1800/3600, reward: -0.0010
[AI] Step 2400/3600, reward: -0.0050
[AI] Step 3000/3600, reward: -0.0000
[AI] Step 3600/3600, reward: -0.0050
[AI] Episode finished. Total return: -24.461000000000077
AI episode 3 return: -24.4610

=== AI SUMMARY ===
Returns:

In [11]:
def run_episode_with_metrics(mode: str = "fixed", max_steps: int = 3600):
    """
    Run one evaluation episode (fixed-time or AI) and collect detailed metrics.

    mode:
        "fixed" → SUMO's built-in fixed program
        "ai"    → Trained RL model (GNN + A2C) in greedy mode

    Returns:
        {
            "mode": str,
            "num_tls": int,
            "episode_return": float,
            "reward_trace": list[float],
            "wait_trace": list[float],
            "queue_trace": list[int],
            "processed_trace": list[int],
            "intersection_stats": dict
        }
    """

    assert mode in ("fixed", "ai")

    if traci.isLoaded():
        traci.close()

    # Start SUMO without GUI
    traci.start([get_sumo_binary(False), "-c", str(CONFIG_FILE), "--step-length", "1"])

    episode_return = 0.0
    reward_trace = []
    wait_trace = []
    queue_trace = []
    processed_trace = []

    # Per-intersection cumulative metrics
    tls_wait_sum = {tls: 0.0 for tls in tls_ids}
    tls_queue_sum = {tls: 0.0 for tls in tls_ids}
    step_count = 0

    for t in range(max_steps):

        # ---------------- AI CONTROL ----------------
        if mode == "ai":
            all_states = []
            for tls in tls_ids:
                s = get_tls_state(tls, tls_lane_map)
                s_padded = s + [0] * (feature_size - len(s))
                all_states.append(s_padded)

            states_np = np.array(all_states, dtype=np.float32)[None, ...]
            states_tf = tf.convert_to_tensor(states_np, dtype=tf.float32)

            policy_logits_tf, value_tf = gnn_model((states_tf, adj_batch_tf), training=False)
            policy_logits = policy_logits_tf[0]

            actions = select_actions_from_logits(policy_logits)
            apply_actions_to_sumo(actions, tls_ids)
        # ----------------------------------------------------

        # Step SUMO
        traci.simulationStep()

        # Reward (global waiting)
        r = compute_global_reward(tls_lane_map)
        episode_return += r
        reward_trace.append(r)

        # Total waiting & queues across all TLS
        total_wait = 0.0
        total_queue = 0

        for tls, lanes in tls_lane_map.items():
            w = 0.0
            q = 0
            for lane in lanes:
                w += traci.lane.getWaitingTime(lane)
                q += traci.lane.getLastStepHaltingNumber(lane)

            total_wait += w
            total_queue += q

            tls_wait_sum[tls] += w
            tls_queue_sum[tls] += q

        wait_trace.append(total_wait)
        queue_trace.append(total_queue)

        # Processed vehicles (arrived)
        processed_trace.append(traci.simulation.getArrivedNumber())

        step_count += 1

        # Progress print
        if (t + 1) % 600 == 0:
            print(
                f"[{mode.upper()}+METRICS] Step {t+1}/{max_steps}, "
                f"reward={r:.4f}, wait={total_wait:.1f}, queue={total_queue}"
            )

    traci.close()
    print(f"[{mode.upper()}+METRICS] Episode finished. Total return: {episode_return}")

    # Intersection averages
    intersection_stats = {}
    for tls in tls_ids:
        intersection_stats[tls] = {
            "avg_wait": tls_wait_sum[tls] / max(step_count, 1),
            "avg_queue": tls_queue_sum[tls] / max(step_count, 1),
        }

    return {
        "mode": mode,
        "num_tls": len(tls_ids),
        "episode_return": episode_return,
        "reward_trace": reward_trace,
        "wait_trace": wait_trace,
        "queue_trace": queue_trace,
        "processed_trace": processed_trace,
        "intersection_stats": intersection_stats,
    }
