In [1]:
# 01_baseline_and_data.ipynb

import os
from pathlib import Path
import subprocess
import pandas as pd
import numpy as np
import xml.etree.ElementTree as ET

# === IMPORTANT ===
# If SUMO_HOME is not already set in your system environment variables,
# you can uncomment and set it here manually:
# os.environ["SUMO_HOME"] = r"C:\Program Files (x86)\Eclipse\Sumo"  # example for Windows

SUMO_HOME = os.environ.get("SUMO_HOME")
if SUMO_HOME is None:
    raise EnvironmentError("SUMO_HOME is not set. Please set it in your system or in this notebook.")

print("Using SUMO_HOME:", SUMO_HOME)

PROJECT_DIR = Path.cwd()
print("Project directory:", PROJECT_DIR)


Using SUMO_HOME: C:\Program Files (x86)\Eclipse\Sumo\
Project directory: C:\Users\manda\OneDrive\Documents\AI Traffic - Jupyter


In [2]:
import os
print("Current working directory:")
print(os.getcwd())


Current working directory:
C:\Users\manda\OneDrive\Documents\AI Traffic - Jupyter


In [24]:
# Recreate SUMO configuration file

CONFIG_FILE = PROJECT_DIR / "my_config.sumocfg"

config_xml = f"""<configuration>
    <input>
        <net-file value="{NETWORK_FILE.name}"/>
        <route-files value="{ROUTE_FILE.name}"/>
    </input>

    <time>
        <begin value="0"/>
        <end value="3600"/>
    </time>

    <report>
        <verbose value="true"/>
        <no-step-log value="true"/>
    </report>

    <output>
        <tripinfo-output value="{TRIPINFO_FILE.name}"/>
    </output>
</configuration>
"""

with open(CONFIG_FILE, "w", encoding="utf-8") as f:
    f.write(config_xml)

print("Config file created at:", CONFIG_FILE)
print("Exists now?", CONFIG_FILE.exists())


Config file created at: C:\Users\manda\OneDrive\Documents\AI Traffic - Jupyter\my_config.sumocfg
Exists now? True


In [14]:
import shutil
import sys

def build_sumo_network(osm_file: Path, net_file: Path):
    """
    Convert map.osm -> my_network.net.xml using netconvert.
    Handles Windows (.exe) and uses either PATH or SUMO_HOME.
    """
    # 1) Try to find netconvert in PATH
    netconvert_cmd = shutil.which("netconvert")

    if netconvert_cmd is None:
        # 2) Fallback: build from SUMO_HOME
        if SUMO_HOME is None:
            raise EnvironmentError(
                "SUMO_HOME is not set and netconvert was not found in PATH.\n"
                "Set SUMO_HOME or add netconvert to your system PATH."
            )

        bin_dir = Path(SUMO_HOME) / "bin"

        if sys.platform.startswith("win"):
            candidate = bin_dir / "netconvert.exe"
        else:
            candidate = bin_dir / "netconvert"

        if not candidate.exists():
            raise FileNotFoundError(
                f"netconvert not found.\n"
                f"Tried PATH and: {candidate}\n"
                f"Check that SUMO is correctly installed in: {bin_dir}"
            )

        netconvert_cmd = str(candidate)

    print("Using netconvert at:", netconvert_cmd)

    cmd = [
        netconvert_cmd,
        "--lefthand",
        "--osm-files", str(osm_file),
        "-o", str(net_file),
        "--tls.guess",
        "--tls.discard-simple",
        "--ramps.guess",
        "--no-turnarounds",
        "--junctions.corner-detail", "5",
    ]

    print("Running netconvert with command:")
    print(" ".join(cmd))

    result = subprocess.run(cmd, capture_output=True, text=True)

    if result.returncode != 0:
        print("\nSTDERR:\n", result.stderr)
        raise RuntimeError(f"netconvert failed with code {result.returncode}")

    print("\n✅ Network successfully built:", net_file)


# Call it ONLY if the network file does not exist yet
if not NETWORK_FILE.exists():
    if not OSM_FILE.exists():
        raise FileNotFoundError(f"OSM file not found: {OSM_FILE}")
    print("Generating my_network.net.xml from map.osm ...")
    build_sumo_network(OSM_FILE, NETWORK_FILE)
else:
    print("Network file already exists:", NETWORK_FILE)


Generating my_network.net.xml from map.osm ...
Using netconvert at: C:\Program Files (x86)\Eclipse\Sumo\bin\netconvert.EXE
Running netconvert with command:
C:\Program Files (x86)\Eclipse\Sumo\bin\netconvert.EXE --lefthand --osm-files C:\Users\manda\OneDrive\Documents\AI Traffic - Jupyter\map.osm -o C:\Users\manda\OneDrive\Documents\AI Traffic - Jupyter\my_network.net.xml --tls.guess --tls.discard-simple --ramps.guess --no-turnarounds --junctions.corner-detail 5

✅ Network successfully built: C:\Users\manda\OneDrive\Documents\AI Traffic - Jupyter\my_network.net.xml


In [3]:
# === CONFIG: adjust these if your filenames differ ===

OSM_FILE      = PROJECT_DIR / "map.osm"            # your raw map
NETWORK_FILE  = PROJECT_DIR / "my_network.net.xml" # will be generated
ROUTE_FILE    = PROJECT_DIR / "my_routes.rou.xml"  # will be generated
CONFIG_FILE   = PROJECT_DIR / "my_config.sumocfg"  # will be generated
TRIPINFO_FILE = PROJECT_DIR / "tripinfo_baseline.xml"

DF_SHEFF_CSV  = PROJECT_DIR / "df_sheff.csv"
RAW_DFT_CSV   = PROJECT_DIR / "dft_traffic_counts_aadf.csv"

print("OSM file exists?           ->", OSM_FILE.exists())
print("Network file exists?       ->", NETWORK_FILE.exists())
print("Routes file exists?        ->", ROUTE_FILE.exists())
print(".sumocfg file exists?      ->", CONFIG_FILE.exists())


OSM file exists?           -> True
Network file exists?       -> True
Routes file exists?        -> True
.sumocfg file exists?      -> True


In [16]:
def build_sumo_network(osm_file: Path, net_file: Path):
    """
    Convert a raw OSM file into a SUMO .net.xml file using
    safe, stable, UK-specific netconvert settings.
    """
    netconvert = Path(SUMO_HOME) / "bin" / "netconvert"

    if not netconvert.exists():
        raise FileNotFoundError(f"netconvert not found at: {netconvert}")

    cmd = [
        str(netconvert),
        "--lefthand",                 # UK driving
        "--osm-files", str(osm_file),
        "-o", str(net_file),

        # Good SUMO conversion flags
        "--tls.guess",                # auto detect signals
        "--tls.discard-simple",       # remove trivial lights
        "--ramps.guess",
        "--roundabouts.guess",
        "--junctions.join",           # fix broken intersections
        "--geometry.remove",          # remove redundant geometry
        "--no-turnarounds",
        "--keep-edges.min-speed", "1.0",
        "--remove-edges.isolated"
    ]

    print("Running netconvert with command:")
    print(" ".join(cmd))

    result = subprocess.run(cmd, capture_output=True, text=True)

    if result.returncode != 0:
        print("\nSTDERR:\n", result.stderr)
        raise RuntimeError("netconvert failed!\n" + result.stderr)

    print("Network successfully built:", net_file)


if not NETWORK_FILE.exists():
    print("Generating network file from OSM...")
    build_sumo_network(OSM_FILE, NETWORK_FILE)
else:
    print("Network file already exists:", NETWORK_FILE)


Network file already exists: C:\Users\manda\OneDrive\Documents\AI Traffic - Jupyter\my_network.net.xml


In [3]:
def load_sheffield_data(df_sheff_path: Path, raw_dft_path: Path) -> pd.DataFrame:
    """
    Load Sheffield AADF data.
    Priority:
      1) existing cleaned df_sheff.csv
      2) fall back to filtering dft_traffic_counts_aadf.csv
    """
    if df_sheff_path.exists():
        print("Loading cleaned Sheffield dataset:", df_sheff_path)
        df_sheff = pd.read_csv(df_sheff_path)
        return df_sheff

    if not raw_dft_path.exists():
        raise FileNotFoundError(
            "Neither df_sheff.csv nor raw dft_traffic_counts_aadf.csv found.\n"
            "Place at least one of them in the project folder."
        )

    print("Cleaned df_sheff.csv not found. Building it from raw DfT file...")
    df = pd.read_csv(raw_dft_path, low_memory=False)
    df.columns = [c.strip().lower() for c in df.columns]

    SHEFF_CODE = "E08000019"
    keep_cols = [
        "count_point_id", "year", "local_authority_name", "local_authority_code",
        "road_name", "road_type", "road_category",
        "all_motor_vehicles", "latitude", "longitude"
    ]
    df = df[[c for c in keep_cols if c in df.columns]].copy()

    df["count_point_id"] = df["count_point_id"].astype(str)
    if "year" in df:
        df["year"] = pd.to_numeric(df["year"], errors="coerce")

    df["all_motor_vehicles"] = pd.to_numeric(df["all_motor_vehicles"], errors="coerce")
    df = df.dropna(subset=["all_motor_vehicles"])

    df_sheff = df[
        (df["local_authority_code"] == SHEFF_CODE) |
        (df["local_authority_name"].astype(str).str.fullmatch(r"Sheffield", na=False))
    ].copy()

    if "road_type" in df_sheff.columns:
        df_sheff = df_sheff[~df_sheff["road_type"].str.contains("Minor", case=False, na=False)]

    print("Sheffield filtered shape:", df_sheff.shape)
    df_sheff.to_csv(df_sheff_path, index=False)
    print("Saved cleaned Sheffield file to:", df_sheff_path)
    return df_sheff


df_sheff = load_sheffield_data(DF_SHEFF_CSV, RAW_DFT_CSV)

print("CLEANED Sheffield AADF Data (first 10 rows)")
display(df_sheff.head(10))
print("Dataset size:", df_sheff.shape)

# Compute maximum all_motor_vehicles and period
max_vehicles = df_sheff["all_motor_vehicles"].max()
max_row = df_sheff[df_sheff["all_motor_vehicles"] == max_vehicles]

print("\nRow with max all_motor_vehicles:")
print(max_row.to_string(index=False))

peak_percentage = 0.10  # 10% peak hour assumption
period = 3600 / (max_vehicles * peak_percentage)

print(f"\nMax AADF (vehicles/day): {max_vehicles}")
print(f"Assumed peak fraction: {peak_percentage * 100:.0f}% of daily flow")
print(f"Computed PERIOD for randomTrips.py: {period:.3f} seconds between vehicles")


Cleaned df_sheff.csv not found. Building it from raw DfT file...
Sheffield filtered shape: (2380, 10)
Saved cleaned Sheffield file to: C:\Users\manda\OneDrive\Documents\AI Traffic - Jupyter\df_sheff.csv
CLEANED Sheffield AADF Data (first 10 rows)


Unnamed: 0,count_point_id,year,local_authority_name,local_authority_code,road_name,road_type,road_category,all_motor_vehicles,latitude,longitude
15873,6565,2000,Sheffield,E08000019,A57,Major,PA,6200,53.379922,-1.550482
15874,6565,2001,Sheffield,E08000019,A57,Major,PA,6245,53.379922,-1.550482
15875,6565,2002,Sheffield,E08000019,A57,Major,PA,6051,53.379922,-1.550482
15876,6565,2003,Sheffield,E08000019,A57,Major,PA,6192,53.379922,-1.550482
15877,6565,2004,Sheffield,E08000019,A57,Major,PA,6206,53.379922,-1.550482
15878,6565,2005,Sheffield,E08000019,A57,Major,PA,6218,53.379922,-1.550482
15879,6565,2006,Sheffield,E08000019,A57,Major,PA,6380,53.379922,-1.550482
15880,6565,2007,Sheffield,E08000019,A57,Major,PA,6340,53.379922,-1.550482
15881,6565,2008,Sheffield,E08000019,A57,Major,PA,6143,53.379922,-1.550482
15882,6565,2009,Sheffield,E08000019,A57,Major,PA,6357,53.379922,-1.550482


Dataset size: (2380, 10)

Row with max all_motor_vehicles:
count_point_id  year local_authority_name local_authority_code road_name road_type road_category  all_motor_vehicles  latitude  longitude
         73007  2024            Sheffield            E08000019        M1     Major            TM              137487 53.463996  -1.450033

Max AADF (vehicles/day): 137487
Assumed peak fraction: 10% of daily flow
Computed PERIOD for randomTrips.py: 0.262 seconds between vehicles


In [4]:
def generate_random_routes(
    network_file: Path,
    route_file: Path,
    period: float,
    seed: int = 42,
    binomial: int = 10,
    prefix: str = "trip"
):
    """
    Call SUMO's randomTrips.py to generate a realistic route file
    based on the given network and vehicle spawn period.
    """
    tools_dir = Path(SUMO_HOME) / "tools"
    random_trips_py = tools_dir / "randomTrips.py"

    if not random_trips_py.exists():
        raise FileNotFoundError(f"randomTrips.py not found at: {random_trips_py}")

    print("Generating routes with randomTrips.py ...")
    cmd = [
        "python", str(random_trips_py),
        "-n", str(network_file),
        "-r", str(route_file),
        "--period", str(period),
        "--seed", str(seed),
        "--binomial", str(binomial),
        "--prefix", prefix,
        "--validate",
    ]

    print("Command:", " ".join(cmd))
    result = subprocess.run(cmd, capture_output=True, text=True)

    if result.returncode != 0:
        print("randomTrips.py STDERR:\n", result.stderr)
        raise RuntimeError(f"randomTrips.py failed with code {result.returncode}")

    print("Random Trips Generation: SUCCESS")
    print("Output routes file:", route_file)


# Only regenerate if you want fresh routes
if not ROUTE_FILE.exists():
    generate_random_routes(NETWORK_FILE, ROUTE_FILE, period)
else:
    print("Route file already exists:", ROUTE_FILE)


Route file already exists: C:\Users\manda\OneDrive\Documents\AI Traffic - Jupyter\my_routes.rou.xml


In [8]:
def create_sumo_config(config_path: Path, net_file: Path, route_file: Path, tripinfo_path: Path):
    """
    Create a simple SUMO configuration file that:
      - loads the given network + route file
      - simulates 0–3600 seconds
      - writes tripinfo XML
    """
    config_xml = f"""<configuration>
    <input>
        <net-file value="{net_file.name}"/>
        <route-files value="{route_file.name}"/>
    </input>

    <time>
        <begin value="0"/>
        <end value="3600"/>
    </time>

    <report>
        <verbose value="true"/>
        <no-step-log value="true"/>
    </report>

    <output>
        <tripinfo-output value="{tripinfo_path.name}"/>
    </output>
</configuration>
"""
    config_path.write_text(config_xml, encoding="utf-8")
    print("Wrote SUMO config to:", config_path)


create_sumo_config(CONFIG_FILE, NETWORK_FILE, ROUTE_FILE, TRIPINFO_FILE)


Wrote SUMO config to: C:\Users\manda\OneDrive\Documents\AI Traffic - Jupyter\my_config.sumocfg


In [26]:
print(CONFIG_FILE.read_text())

<configuration>
    <input>
        <net-file value="my_network.net.xml"/>
        <route-files value="my_routes.rou.xml"/>
    </input>

    <time>
        <begin value="0"/>
        <end value="3600"/>
    </time>

    <report>
        <verbose value="true"/>
        <no-step-log value="true"/>
    </report>

    <output>
        <tripinfo-output value="tripinfo_baseline.xml"/>
    </output>
</configuration>



In [19]:
import shutil
import sys

def get_sumo_binary(gui: bool = False) -> str:
    """
    Find the SUMO binary (sumo or sumo-gui).
    1) Try from PATH
    2) Fallback to SUMO_HOME/bin on Windows/Linux
    """
    base_name = "sumo-gui" if gui else "sumo"

    # 1) Try from PATH
    cmd = shutil.which(base_name)
    if cmd is not None:
        return cmd

    # 2) Try from SUMO_HOME/bin
    if SUMO_HOME is None:
        raise EnvironmentError(
            f"{base_name} not found in PATH and SUMO_HOME is not set.\n"
            "Either add SUMO to PATH, or set SUMO_HOME correctly."
        )

    bin_dir = Path(SUMO_HOME) / "bin"
    if sys.platform.startswith("win"):
        candidate = bin_dir / f"{base_name}.exe"
    else:
        candidate = bin_dir / base_name

    if not candidate.exists():
        raise FileNotFoundError(
            f"{base_name} not found.\n"
            f"Tried PATH and: {candidate}\n"
            f"Check that SUMO is correctly installed."
        )

    return str(candidate)


In [20]:
def run_sumo_baseline(config_file: Path, tripinfo_file: Path, use_gui: bool = False):
    """
    Run SUMO using the given config file.
    Let SUMO's built-in fixed-time signals control the junctions.
    """
    if tripinfo_file.exists():
        print("Deleting old tripinfo file:", tripinfo_file)
        tripinfo_file.unlink()

    sumo_bin = get_sumo_binary(gui=use_gui)
    print("Using SUMO binary:", sumo_bin)

    cmd = [
        sumo_bin,
        "-c", str(config_file),
        "--duration-log.statistics",
        "--log", "log_baseline.txt",
    ]
    print("Running SUMO baseline...")
    print("Command:", " ".join(cmd))

    result = subprocess.run(cmd, capture_output=True, text=True)

    if result.returncode != 0:
        print("\nSUMO STDERR:\n", result.stderr)
        raise RuntimeError(f"SUMO exited with code {result.returncode}")

    print("SUMO baseline run completed.")
    print("Tripinfo generated:", tripinfo_file.exists())


In [27]:
run_sumo_baseline(CONFIG_FILE, TRIPINFO_FILE, use_gui=False)  # or True if you want to see the map

Using SUMO binary: C:\Program Files (x86)\Eclipse\Sumo\bin\sumo.EXE
Running SUMO baseline...
Command: C:\Program Files (x86)\Eclipse\Sumo\bin\sumo.EXE -c C:\Users\manda\OneDrive\Documents\AI Traffic - Jupyter\my_config.sumocfg --duration-log.statistics --log log_baseline.txt
SUMO baseline run completed.
Tripinfo generated: True


In [28]:
def parse_tripinfo(tripinfo_path: Path) -> pd.DataFrame:
    """
    Parse a SUMO tripinfo XML file into a pandas DataFrame with
    travel time and waiting time per vehicle.
    """
    if not tripinfo_path.exists():
        raise FileNotFoundError(f"Tripinfo file not found: {tripinfo_path}")

    records = []
    tree = ET.parse(tripinfo_path)
    root = tree.getroot()

    for trip in root.iter("tripinfo"):
        veh_id = trip.get("id")
        duration = float(trip.get("duration", 0.0))
        waiting_time = float(trip.get("waitingTime", 0.0))
        route_length = float(trip.get("routeLength", 0.0))

        records.append({
            "veh_id": veh_id,
            "duration": duration,
            "waiting_time": waiting_time,
            "route_length": route_length,
        })

    df = pd.DataFrame(records)
    return df


df_tripinfo = parse_tripinfo(TRIPINFO_FILE)
print("Parsed trips:", len(df_tripinfo))
display(df_tripinfo.head())

print("\n=== Fixed-time baseline metrics ===")
print("Average travel time (s):", df_tripinfo["duration"].mean())
print("Average waiting time (s):", df_tripinfo["waiting_time"].mean())
print("Total waiting time (s):", df_tripinfo["waiting_time"].sum())
print("Total vehicles:", len(df_tripinfo))


Parsed trips: 5327


Unnamed: 0,veh_id,duration,waiting_time,route_length
0,trip2,3.0,0.0,6.21
1,trip12,3.0,0.0,9.85
2,trip23,1.0,0.0,0.0
3,trip16,4.0,0.0,15.65
4,trip37,1.0,0.0,0.0



=== Fixed-time baseline metrics ===
Average travel time (s): 66.43007321193917
Average waiting time (s): 49.38182842125023
Total waiting time (s): 263057.0
Total vehicles: 5327
