diff --git a/.github/workflows/docker-build.yml b/.github/workflows/docker-build.yml index bc1e55fd..53061397 100644 --- a/.github/workflows/docker-build.yml +++ b/.github/workflows/docker-build.yml @@ -73,6 +73,8 @@ jobs: dockerfile: src/envs/chat_env/server/Dockerfile - name: coding-env dockerfile: src/envs/coding_env/server/Dockerfile + - name: sumo-rl-env + dockerfile: src/envs/sumo_rl_env/server/Dockerfile - name: atari-env dockerfile: src/envs/atari_env/server/Dockerfile diff --git a/examples/sumo_rl_simple.py b/examples/sumo_rl_simple.py new file mode 100644 index 00000000..ec5f08ae --- /dev/null +++ b/examples/sumo_rl_simple.py @@ -0,0 +1,105 @@ +#!/usr/bin/env python3 +""" +Simple example demonstrating SUMO-RL Environment usage. + +This example shows how to: +1. Connect to a SUMO traffic signal control environment +2. Reset the environment +3. Take actions (select traffic light phases) +4. Process observations and rewards + +Usage: + # Option 1: Start the server manually + python -m envs.sumo_rl_env.server.app + # Then run: python examples/sumo_rl_simple.py + + # Option 2: Use Docker + docker run -p 8000:8000 sumo-rl-env:latest + # Then run: python examples/sumo_rl_simple.py +""" + +import numpy as np + +from envs.sumo_rl_env import SumoAction, SumoRLEnv + + +def main(): + """Run a simple SUMO traffic control episode.""" + # Connect to the SUMO environment server + print("Connecting to SUMO-RL environment...") + env = SumoRLEnv(base_url="http://localhost:8000") + + try: + # Reset the environment + print("\nResetting environment...") + result = env.reset() + print(f"Observation shape: {result.observation.observation_shape}") + print(f"Available actions: {result.observation.action_mask}") + print(f"Number of green phases: {len(result.observation.action_mask)}") + + # Get initial state + state = env.state() + print(f"\nSimulation configuration:") + print(f" Network: {state.net_file}") + print(f" Duration: {state.num_seconds} seconds") + print(f" Delta time: {state.delta_time} seconds") + print(f" Reward function: {state.reward_fn}") + + # Run a few steps with random policy + print("\nRunning traffic control with random policy...") + episode_reward = 0 + steps = 0 + max_steps = 100 + + for step in range(max_steps): + # Random policy: select random green phase + action_id = np.random.choice(result.observation.action_mask) + + # Take action + result = env.step(SumoAction(phase_id=action_id)) + + episode_reward += result.reward or 0 + steps += 1 + + # Print progress every 10 steps + if step % 10 == 0: + state = env.state() + print( + f"Step {step:3d}: " + f"phase={action_id}, " + f"reward={result.reward:6.2f}, " + f"vehicles={state.total_vehicles:3d}, " + f"waiting={state.mean_waiting_time:6.2f}s, " + f"speed={state.mean_speed:5.2f}m/s" + ) + + if result.done: + print(f"\nEpisode finished after {steps} steps!") + break + + # Final statistics + print(f"\n{'='*60}") + print(f"Episode Summary:") + print(f" Total steps: {steps}") + print(f" Total reward: {episode_reward:.2f}") + print(f" Average reward: {episode_reward/steps:.2f}") + + # Get final state + state = env.state() + print(f"\nFinal State:") + print(f" Simulation time: {state.sim_time:.0f} seconds") + print(f" Total vehicles: {state.total_vehicles}") + print(f" Total waiting time: {state.total_waiting_time:.2f} seconds") + print(f" Mean waiting time: {state.mean_waiting_time:.2f} seconds") + print(f" Mean speed: {state.mean_speed:.2f} m/s") + print(f"{'='*60}") + + finally: + # Cleanup + print("\nClosing environment...") + env.close() + print("Done!") + + +if __name__ == "__main__": + main() diff --git a/src/envs/sumo_rl_env/README.md b/src/envs/sumo_rl_env/README.md new file mode 100644 index 00000000..e35035ae --- /dev/null +++ b/src/envs/sumo_rl_env/README.md @@ -0,0 +1,341 @@ +# SUMO-RL Environment + +Integration of traffic signal control with the OpenEnv framework via SUMO (Simulation of Urban MObility) and SUMO-RL. + +## Overview + +This environment enables reinforcement learning for **traffic signal control** using SUMO, a microscopic traffic simulation package. Train RL agents to optimize traffic light timing and minimize vehicle delays. + +**Key Features**: +- **Realistic traffic simulation** via SUMO +- **Single-agent mode** for single intersection control +- **Configurable rewards** (waiting time, queue, pressure, speed) +- **Multiple networks** supported (custom .net.xml and .rou.xml files) +- **Docker-ready** with pre-bundled example network + +## Quick Start + +### Using Docker (Recommended) + +```python +from envs.sumo_rl_env import SumoRLEnv, SumoAction + +# Automatically starts container +env = SumoRLEnv.from_docker_image("sumo-rl-env:latest") + +# Reset environment +result = env.reset() +print(f"Observation shape: {result.observation.observation_shape}") +print(f"Available actions: {result.observation.action_mask}") + +# Take action (select next green phase) +result = env.step(SumoAction(phase_id=1)) +print(f"Reward: {result.reward}, Done: {result.done}") + +# Get state +state = env.state() +print(f"Simulation time: {state.sim_time}") +print(f"Total vehicles: {state.total_vehicles}") +print(f"Mean waiting time: {state.mean_waiting_time}") + +# Cleanup +env.close() +``` + +### Building the Docker Image + +```bash +cd OpenEnv + +# Build base image first (if not already built) +docker build -t envtorch-base:latest -f src/core/containers/images/Dockerfile . + +# Build SUMO-RL environment +docker build -f src/envs/sumo_rl_env/server/Dockerfile -t sumo-rl-env:latest . +``` + +### Running with Different Configurations + +```bash +# Default: single-intersection +docker run -p 8000:8000 sumo-rl-env:latest + +# Longer simulation +docker run -p 8000:8000 \ + -e SUMO_NUM_SECONDS=50000 \ + sumo-rl-env:latest + +# Different reward function +docker run -p 8000:8000 \ + -e SUMO_REWARD_FN=queue \ + sumo-rl-env:latest + +# Custom seed for reproducibility +docker run -p 8000:8000 \ + -e SUMO_SEED=123 \ + sumo-rl-env:latest +``` + +## Observation + +The observation is a vector containing: +- **Phase one-hot**: Current active green phase (one-hot encoded) +- **Min green flag**: Binary indicator if minimum green time has passed +- **Lane densities**: Number of vehicles / lane capacity for each incoming lane +- **Lane queues**: Number of queued vehicles / lane capacity for each incoming lane + +Observation size varies by network topology (depends on number of phases and lanes). + +**Default (single-intersection)**: +- 4 green phases +- 8 incoming lanes +- Observation size: ~21 elements + +## Action Space + +The action space is discrete and represents selecting the next green phase to activate. + +- **Action type**: Discrete +- **Action range**: `[0, num_green_phases - 1]` +- **Default (single-intersection)**: 4 actions (one per green phase) + +When a phase change is requested, SUMO automatically inserts a yellow phase before switching. + +## Rewards + +Default reward function is **change in cumulative waiting time**: +``` +reward = -(total_waiting_time_now - total_waiting_time_previous) +``` + +Positive rewards indicate waiting time decreased (good). + +### Available Reward Functions + +Set via `SUMO_REWARD_FN` environment variable: + +- **`diff-waiting-time`** (default): Change in cumulative waiting time +- **`average-speed`**: Average speed of all vehicles +- **`queue`**: Negative total queue length +- **`pressure`**: Pressure metric (incoming - outgoing vehicles) + +## Configuration + +### Environment Variables + +| Variable | Default | Description | +|----------|---------|-------------| +| `SUMO_NET_FILE` | `/app/nets/single-intersection.net.xml` | Network topology file | +| `SUMO_ROUTE_FILE` | `/app/nets/single-intersection.rou.xml` | Vehicle routes file | +| `SUMO_NUM_SECONDS` | `20000` | Simulation duration (seconds) | +| `SUMO_DELTA_TIME` | `5` | Seconds between agent actions | +| `SUMO_YELLOW_TIME` | `2` | Yellow phase duration (seconds) | +| `SUMO_MIN_GREEN` | `5` | Minimum green time (seconds) | +| `SUMO_MAX_GREEN` | `50` | Maximum green time (seconds) | +| `SUMO_REWARD_FN` | `diff-waiting-time` | Reward function name | +| `SUMO_SEED` | `42` | Random seed (use for reproducibility) | + +### Using Custom Networks + +To use your own SUMO network: + +```python +from envs.sumo_rl_env import SumoRLEnv + +env = SumoRLEnv.from_docker_image( + "sumo-rl-env:latest", + volumes={ + "/path/to/your/nets": {"bind": "/nets", "mode": "ro"} + }, + environment={ + "SUMO_NET_FILE": "/nets/my-network.net.xml", + "SUMO_ROUTE_FILE": "/nets/my-routes.rou.xml", + } +) +``` + +Your network directory should contain: +- `.net.xml` - Network topology (roads, junctions, traffic lights) +- `.rou.xml` - Vehicle routes (trip definitions, flow rates) + +## API Reference + +### SumoAction + +```python +@dataclass +class SumoAction(Action): + phase_id: int # Green phase to activate (0 to num_phases-1) + ts_id: str = "0" # Traffic signal ID (for multi-agent) +``` + +### SumoObservation + +```python +@dataclass +class SumoObservation(Observation): + observation: List[float] # Observation vector + observation_shape: List[int] # Shape for reshaping + action_mask: List[int] # Valid action indices + sim_time: float # Current simulation time + done: bool # Episode finished + reward: Optional[float] # Reward from last action + metadata: Dict # System metrics +``` + +### SumoState + +```python +@dataclass +class SumoState(State): + episode_id: str # Unique episode ID + step_count: int # Steps taken + net_file: str # Network file path + route_file: str # Route file path + sim_time: float # Current simulation time + total_vehicles: int # Total vehicles in simulation + total_waiting_time: float # Cumulative waiting time + mean_waiting_time: float # Mean waiting time + mean_speed: float # Mean vehicle speed + # ... configuration parameters +``` + +## Example Training Loop + +```python +from envs.sumo_rl_env import SumoRLEnv, SumoAction +import numpy as np + +# Start environment +env = SumoRLEnv.from_docker_image("sumo-rl-env:latest") + +# Training loop +for episode in range(10): + result = env.reset() + episode_reward = 0 + steps = 0 + + while not result.done and steps < 1000: + # Random policy (replace with your RL agent) + action_id = np.random.choice(result.observation.action_mask) + + # Take action + result = env.step(SumoAction(phase_id=action_id)) + + episode_reward += result.reward or 0 + steps += 1 + + # Print progress every 100 steps + if steps % 100 == 0: + state = env.state() + print(f"Step {steps}: " + f"reward={result.reward:.2f}, " + f"vehicles={state.total_vehicles}, " + f"waiting={state.mean_waiting_time:.2f}") + + print(f"Episode {episode}: total_reward={episode_reward:.2f}, steps={steps}") + +env.close() +``` + +## Performance Notes + +### Simulation Speed + +- **Reset time**: 1-5 seconds (starts new SUMO simulation) +- **Step time**: ~50-200ms per step (depends on network size) +- **Episode duration**: Minutes (20,000 sim seconds with delta_time=5 → ~4,000 steps) + +### Optimization + +For faster simulation: +1. Reduce `SUMO_NUM_SECONDS` for shorter episodes +2. Increase `SUMO_DELTA_TIME` for fewer decisions +3. Use simpler networks with fewer vehicles + +## Architecture + +``` +┌─────────────────────────────────┐ +│ Client: SumoRLEnv │ +│ .step(phase_id=1) │ +└──────────────┬──────────────────┘ + │ HTTP +┌──────────────▼──────────────────┐ +│ FastAPI Server (Docker) │ +│ SumoEnvironment │ +│ ├─ Wraps sumo_rl │ +│ ├─ Single-agent mode │ +│ └─ No GUI │ +└──────────────┬──────────────────┘ + │ +┌──────────────▼──────────────────┐ +│ SUMO Simulator │ +│ - Reads .net.xml (network) │ +│ - Reads .rou.xml (routes) │ +│ - Simulates traffic flow │ +│ - Provides observations │ +└─────────────────────────────────┘ +``` + +## Bundled Network + +The default `single-intersection` network is a simple 4-way intersection with: +- **4 incoming roads** (North, South, East, West) +- **4 green phases** (NS straight, NS left, EW straight, EW left) +- **Vehicle flow**: Continuous stream with varying rates + +## Limitations + +- **No GUI in Docker**: SUMO GUI requires X server (not available in containers) +- **Single-agent only**: Multi-agent (multiple intersections) coming in future version +- **Fixed network per container**: Each container uses one network topology +- **Memory usage**: ~500MB for small networks, 2-4GB for large city networks + +## Troubleshooting + +### Container won't start +```bash +# Check logs +docker logs + +# Verify network files exist +docker run sumo-rl-env:latest ls -la /app/nets/ +``` + +### "SUMO_HOME not set" error +This should be automatic in Docker. If running locally: +```bash +export SUMO_HOME=/usr/share/sumo +``` + +### Slow performance +- Reduce simulation duration: `SUMO_NUM_SECONDS=5000` +- Increase action interval: `SUMO_DELTA_TIME=10` +- Use smaller networks with fewer vehicles + +## References + +- [SUMO Documentation](https://sumo.dlr.de/docs/) +- [SUMO-RL GitHub](https://github.com/LucasAlegre/sumo-rl) +- [SUMO-RL Paper](https://peerj.com/articles/cs-575/) +- [RESCO Benchmarks](https://github.com/jault/RESCO) + +## Citation + +If you use SUMO-RL in your research, please cite: + +```bibtex +@misc{sumorl, + author = {Lucas N. Alegre}, + title = {{SUMO-RL}}, + year = {2019}, + publisher = {GitHub}, + journal = {GitHub repository}, + howpublished = {\url{https://github.com/LucasAlegre/sumo-rl}}, +} +``` + +## License + +This integration is licensed under the BSD-style license. SUMO-RL and SUMO have their own licenses. diff --git a/src/envs/sumo_rl_env/__init__.py b/src/envs/sumo_rl_env/__init__.py new file mode 100644 index 00000000..17aaf2f6 --- /dev/null +++ b/src/envs/sumo_rl_env/__init__.py @@ -0,0 +1,31 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +""" +SUMO-RL Environment for OpenEnv. + +This module provides OpenEnv integration for traffic signal control using +SUMO (Simulation of Urban MObility) via the SUMO-RL library. + +Example: + >>> from envs.sumo_rl_env import SumoRLEnv, SumoAction + >>> + >>> # Connect to a running server or start via Docker + >>> env = SumoRLEnv.from_docker_image("sumo-rl-env:latest") + >>> + >>> # Reset and interact + >>> result = env.reset() + >>> result = env.step(SumoAction(phase_id=1)) + >>> print(result.reward, result.done) + >>> + >>> # Cleanup + >>> env.close() +""" + +from .client import SumoRLEnv +from .models import SumoAction, SumoObservation, SumoState + +__all__ = ["SumoRLEnv", "SumoAction", "SumoObservation", "SumoState"] diff --git a/src/envs/sumo_rl_env/client.py b/src/envs/sumo_rl_env/client.py new file mode 100644 index 00000000..deba88fd --- /dev/null +++ b/src/envs/sumo_rl_env/client.py @@ -0,0 +1,145 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +""" +HTTP client for SUMO-RL environment. + +This module provides a client to interact with the SUMO traffic signal +control environment over HTTP. +""" + +from typing import Any, Dict + +from core.http_env_client import HTTPEnvClient +from core.types import StepResult + +from .models import SumoAction, SumoObservation, SumoState + + +class SumoRLEnv(HTTPEnvClient[SumoAction, SumoObservation]): + """ + HTTP client for SUMO-RL traffic signal control environment. + + This client communicates with a SUMO environment server to control + traffic signals using reinforcement learning. + + Example: + >>> # Start container and connect + >>> env = SumoRLEnv.from_docker_image("sumo-rl-env:latest") + >>> + >>> # Reset environment + >>> result = env.reset() + >>> print(f"Observation shape: {result.observation.observation_shape}") + >>> print(f"Action space: {result.observation.action_mask}") + >>> + >>> # Take action + >>> result = env.step(SumoAction(phase_id=1)) + >>> print(f"Reward: {result.reward}, Done: {result.done}") + >>> + >>> # Get state + >>> state = env.state() + >>> print(f"Sim time: {state.sim_time}, Total vehicles: {state.total_vehicles}") + >>> + >>> # Cleanup + >>> env.close() + + Example with custom network: + >>> # Use custom SUMO network via volume mount + >>> env = SumoRLEnv.from_docker_image( + ... "sumo-rl-env:latest", + ... port=8000, + ... volumes={ + ... "/path/to/my/nets": {"bind": "/nets", "mode": "ro"} + ... }, + ... environment={ + ... "SUMO_NET_FILE": "/nets/my-network.net.xml", + ... "SUMO_ROUTE_FILE": "/nets/my-routes.rou.xml", + ... } + ... ) + + Example with configuration: + >>> # Adjust simulation parameters + >>> env = SumoRLEnv.from_docker_image( + ... "sumo-rl-env:latest", + ... environment={ + ... "SUMO_NUM_SECONDS": "10000", + ... "SUMO_DELTA_TIME": "10", + ... "SUMO_REWARD_FN": "queue", + ... "SUMO_SEED": "123", + ... } + ... ) + """ + + def _step_payload(self, action: SumoAction) -> Dict[str, Any]: + """ + Convert SumoAction to JSON payload for HTTP request. + + Args: + action: SumoAction containing phase_id to execute. + + Returns: + Dictionary payload for step endpoint. + """ + return { + "phase_id": action.phase_id, + "ts_id": action.ts_id, + } + + def _parse_result(self, payload: Dict[str, Any]) -> StepResult[SumoObservation]: + """ + Parse step result from HTTP response JSON. + + Args: + payload: JSON response from step endpoint. + + Returns: + StepResult containing SumoObservation. + """ + obs_data = payload.get("observation", {}) + + observation = SumoObservation( + observation=obs_data.get("observation", []), + observation_shape=obs_data.get("observation_shape", []), + action_mask=obs_data.get("action_mask", []), + sim_time=obs_data.get("sim_time", 0.0), + done=obs_data.get("done", False), + reward=obs_data.get("reward"), + metadata=obs_data.get("metadata", {}), + ) + + return StepResult( + observation=observation, + reward=payload.get("reward"), + done=payload.get("done", False), + ) + + def _parse_state(self, payload: Dict[str, Any]) -> SumoState: + """ + Parse state from HTTP response JSON. + + Args: + payload: JSON response from state endpoint. + + Returns: + SumoState object. + """ + return SumoState( + episode_id=payload.get("episode_id", ""), + step_count=payload.get("step_count", 0), + net_file=payload.get("net_file", ""), + route_file=payload.get("route_file", ""), + num_seconds=payload.get("num_seconds", 20000), + delta_time=payload.get("delta_time", 5), + yellow_time=payload.get("yellow_time", 2), + min_green=payload.get("min_green", 5), + max_green=payload.get("max_green", 50), + reward_fn=payload.get("reward_fn", "diff-waiting-time"), + sim_time=payload.get("sim_time", 0.0), + total_vehicles=payload.get("total_vehicles", 0), + total_waiting_time=payload.get("total_waiting_time", 0.0), + mean_waiting_time=payload.get("mean_waiting_time", 0.0), + mean_speed=payload.get("mean_speed", 0.0), + ) diff --git a/src/envs/sumo_rl_env/models.py b/src/envs/sumo_rl_env/models.py new file mode 100644 index 00000000..6c73092b --- /dev/null +++ b/src/envs/sumo_rl_env/models.py @@ -0,0 +1,110 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +""" +Data models for SUMO-RL Environment. + +This module defines the Action, Observation, and State types for traffic +signal control using SUMO (Simulation of Urban MObility). +""" + +from dataclasses import dataclass, field +from typing import Dict, List, Optional + +from core.env_server import Action, Observation, State + + +@dataclass +class SumoAction(Action): + """ + Action for SUMO traffic signal control environment. + + Represents selecting which traffic light phase to activate next. + + Attributes: + phase_id: Index of the green phase to activate (0 to num_phases-1) + ts_id: Traffic signal ID (for multi-agent support, default "0") + """ + + phase_id: int + ts_id: str = "0" + + +@dataclass +class SumoObservation(Observation): + """ + Observation from SUMO traffic signal environment. + + Contains traffic metrics for decision-making. + + Attributes: + observation: Flattened observation vector containing: + - One-hot encoded current phase + - Min green flag (binary) + - Lane densities (normalized) + - Lane queues (normalized) + observation_shape: Shape of observation for reshaping + action_mask: List of valid action indices + sim_time: Current simulation time in seconds + done: Whether episode is complete + reward: Reward from last action (None on reset) + metadata: Additional info (system metrics, etc.) + """ + + observation: List[float] = field(default_factory=list) + observation_shape: List[int] = field(default_factory=list) + action_mask: List[int] = field(default_factory=list) + sim_time: float = 0.0 + done: bool = False + reward: Optional[float] = None + metadata: Dict = field(default_factory=dict) + + +@dataclass +class SumoState(State): + """ + State of SUMO traffic signal environment. + + Tracks both configuration and runtime state. + + Configuration attributes: + net_file: Path to SUMO network file (.net.xml) + route_file: Path to SUMO route file (.rou.xml) + num_seconds: Total simulation duration in seconds + delta_time: Seconds between agent actions + yellow_time: Duration of yellow phase in seconds + min_green: Minimum green time per phase in seconds + max_green: Maximum green time per phase in seconds + reward_fn: Name of reward function used + + Runtime attributes: + episode_id: Unique episode identifier + step_count: Number of steps taken in episode + sim_time: Current simulation time in seconds + total_vehicles: Total number of vehicles in simulation + total_waiting_time: Cumulative waiting time across all vehicles + """ + + # Episode tracking + episode_id: str = "" + step_count: int = 0 + + # SUMO configuration + net_file: str = "" + route_file: str = "" + num_seconds: int = 20000 + delta_time: int = 5 + yellow_time: int = 2 + min_green: int = 5 + max_green: int = 50 + reward_fn: str = "diff-waiting-time" + + # Runtime metrics + sim_time: float = 0.0 + total_vehicles: int = 0 + total_waiting_time: float = 0.0 + mean_waiting_time: float = 0.0 + mean_speed: float = 0.0 diff --git a/src/envs/sumo_rl_env/nets/single-intersection/single-intersection.edg.xml b/src/envs/sumo_rl_env/nets/single-intersection/single-intersection.edg.xml new file mode 100755 index 00000000..52c3e7aa --- /dev/null +++ b/src/envs/sumo_rl_env/nets/single-intersection/single-intersection.edg.xml @@ -0,0 +1,6 @@ + + + + + + diff --git a/src/envs/sumo_rl_env/nets/single-intersection/single-intersection.net.xml b/src/envs/sumo_rl_env/nets/single-intersection/single-intersection.net.xml new file mode 100755 index 00000000..0f32510f --- /dev/null +++ b/src/envs/sumo_rl_env/nets/single-intersection/single-intersection.net.xml @@ -0,0 +1,86 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/envs/sumo_rl_env/nets/single-intersection/single-intersection.nod.xml b/src/envs/sumo_rl_env/nets/single-intersection/single-intersection.nod.xml new file mode 100755 index 00000000..a8b68d54 --- /dev/null +++ b/src/envs/sumo_rl_env/nets/single-intersection/single-intersection.nod.xml @@ -0,0 +1,7 @@ + + + + + + + diff --git a/src/envs/sumo_rl_env/nets/single-intersection/single-intersection.rou.xml b/src/envs/sumo_rl_env/nets/single-intersection/single-intersection.rou.xml new file mode 100755 index 00000000..291cdee8 --- /dev/null +++ b/src/envs/sumo_rl_env/nets/single-intersection/single-intersection.rou.xml @@ -0,0 +1,6 @@ + + + + + + diff --git a/src/envs/sumo_rl_env/nets/single-intersection/single-intersection.sumocfg b/src/envs/sumo_rl_env/nets/single-intersection/single-intersection.sumocfg new file mode 100755 index 00000000..035327b7 --- /dev/null +++ b/src/envs/sumo_rl_env/nets/single-intersection/single-intersection.sumocfg @@ -0,0 +1,10 @@ + + + + + + + diff --git a/src/envs/sumo_rl_env/server/Dockerfile b/src/envs/sumo_rl_env/server/Dockerfile new file mode 100644 index 00000000..d1495283 --- /dev/null +++ b/src/envs/sumo_rl_env/server/Dockerfile @@ -0,0 +1,65 @@ +# Dockerfile for SUMO-RL Environment +# This image provides traffic signal control via SUMO (Simulation of Urban MObility) + +# Configurable base image - defaults to local build, can be overridden for CI/CD +# Base image provides: fastapi, uvicorn, requests, curl, PYTHONPATH=/app/src +# +# Local build: docker build -t envtorch-base:latest -f src/core/containers/images/Dockerfile . +# docker build -f src/envs/sumo_rl_env/server/Dockerfile -t sumo-rl-env:latest . +# +# CI/CD build: docker build --build-arg BASE_IMAGE=ghcr.io/meta-pytorch/openenv-base:latest \ +# -f src/envs/sumo_rl_env/server/Dockerfile -t sumo-rl-env:latest . +ARG BASE_IMAGE=envtorch-base:latest +FROM ${BASE_IMAGE} + +# Install SUMO system dependencies +# SUMO is available in Debian repositories +RUN apt-get update && apt-get install -y --no-install-recommends \ + sumo \ + sumo-tools \ + && rm -rf /var/lib/apt/lists/* + +# Set SUMO_HOME environment variable +ENV SUMO_HOME=/usr/share/sumo + +# Install SUMO-RL and Python dependencies +# sumo-rl includes: gymnasium, pettingzoo, numpy, pandas, sumolib, traci +RUN pip install --no-cache-dir \ + gymnasium>=0.28 \ + pettingzoo>=1.24.3 \ + numpy>=1.24.0 \ + pandas>=2.0.0 \ + sumolib>=1.14.0 \ + traci>=1.14.0 \ + sumo-rl>=1.4.5 + +# Copy OpenEnv core (base image already set WORKDIR=/app) +COPY src/core/ /app/src/core/ + +# Copy SUMO-RL environment code (includes nets/) +COPY src/envs/sumo_rl_env/ /app/src/envs/sumo_rl_env/ + +# Copy example network files to expected location +# Default: single-intersection (simple 4-way intersection) +COPY src/envs/sumo_rl_env/nets/single-intersection/ /app/nets/single-intersection/ + +# SUMO environment variables (can be overridden at runtime) +ENV SUMO_NET_FILE=/app/nets/single-intersection/single-intersection.net.xml +ENV SUMO_ROUTE_FILE=/app/nets/single-intersection/single-intersection.rou.xml +ENV SUMO_NUM_SECONDS=20000 +ENV SUMO_DELTA_TIME=5 +ENV SUMO_YELLOW_TIME=2 +ENV SUMO_MIN_GREEN=5 +ENV SUMO_MAX_GREEN=50 +ENV SUMO_REWARD_FN=diff-waiting-time +ENV SUMO_SEED=42 + +# Expose port +EXPOSE 8000 + +# Health check +HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \ + CMD curl -f http://localhost:8000/health || exit 1 + +# Run the FastAPI server +CMD ["uvicorn", "envs.sumo_rl_env.server.app:app", "--host", "0.0.0.0", "--port", "8000"] diff --git a/src/envs/sumo_rl_env/server/__init__.py b/src/envs/sumo_rl_env/server/__init__.py new file mode 100644 index 00000000..f4b70221 --- /dev/null +++ b/src/envs/sumo_rl_env/server/__init__.py @@ -0,0 +1,7 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +"""SUMO-RL environment server package.""" diff --git a/src/envs/sumo_rl_env/server/app.py b/src/envs/sumo_rl_env/server/app.py new file mode 100644 index 00000000..b81463ae --- /dev/null +++ b/src/envs/sumo_rl_env/server/app.py @@ -0,0 +1,47 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +""" +FastAPI application for SUMO-RL environment server. + +This module creates an HTTP server that exposes traffic signal control +via the OpenEnv API using SUMO (Simulation of Urban MObility). +""" + +import os + +from core.env_server import create_fastapi_app + +from ..models import SumoAction, SumoObservation +from .sumo_environment import SumoEnvironment + +# Get configuration from environment variables +net_file = os.getenv("SUMO_NET_FILE", "/app/nets/single-intersection.net.xml") +route_file = os.getenv("SUMO_ROUTE_FILE", "/app/nets/single-intersection.rou.xml") +num_seconds = int(os.getenv("SUMO_NUM_SECONDS", "20000")) +delta_time = int(os.getenv("SUMO_DELTA_TIME", "5")) +yellow_time = int(os.getenv("SUMO_YELLOW_TIME", "2")) +min_green = int(os.getenv("SUMO_MIN_GREEN", "5")) +max_green = int(os.getenv("SUMO_MAX_GREEN", "50")) +reward_fn = os.getenv("SUMO_REWARD_FN", "diff-waiting-time") +sumo_seed = int(os.getenv("SUMO_SEED", "42")) + +# Create single environment instance +# This is reused for all HTTP requests (avoids TraCI connection issues) +env = SumoEnvironment( + net_file=net_file, + route_file=route_file, + num_seconds=num_seconds, + delta_time=delta_time, + yellow_time=yellow_time, + min_green=min_green, + max_green=max_green, + reward_fn=reward_fn, + sumo_seed=sumo_seed, +) + +# Create FastAPI app +app = create_fastapi_app(env, SumoAction, SumoObservation) diff --git a/src/envs/sumo_rl_env/server/sumo_environment.py b/src/envs/sumo_rl_env/server/sumo_environment.py new file mode 100644 index 00000000..757b9f17 --- /dev/null +++ b/src/envs/sumo_rl_env/server/sumo_environment.py @@ -0,0 +1,237 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +""" +SUMO-RL Environment Server Implementation. + +This module wraps the SUMO-RL SumoEnvironment and exposes it +via the OpenEnv Environment interface for traffic signal control. +""" + +import os +import uuid +from typing import Any, Dict + +# Set SUMO_HOME before importing sumo_rl +os.environ.setdefault("SUMO_HOME", "/usr/share/sumo") + +from core.env_server import Action, Environment, Observation + +from ..models import SumoAction, SumoObservation, SumoState + +# Import SUMO-RL +try: + from sumo_rl import SumoEnvironment as BaseSumoEnv +except ImportError as e: + raise ImportError( + "sumo-rl is not installed. " + "Please install it with: pip install sumo-rl" + ) from e + + +class SumoEnvironment(Environment): + """ + SUMO-RL Environment wrapper for OpenEnv. + + This environment wraps the SUMO traffic signal control environment + for single-agent reinforcement learning. + + Args: + net_file: Path to SUMO network file (.net.xml) + route_file: Path to SUMO route file (.rou.xml) + num_seconds: Simulation duration in seconds (default: 20000) + delta_time: Seconds between agent actions (default: 5) + yellow_time: Yellow phase duration in seconds (default: 2) + min_green: Minimum green time in seconds (default: 5) + max_green: Maximum green time in seconds (default: 50) + reward_fn: Reward function name (default: "diff-waiting-time") + sumo_seed: Random seed for reproducibility (default: 42) + + Example: + >>> env = SumoEnvironment( + ... net_file="/app/nets/single-intersection.net.xml", + ... route_file="/app/nets/single-intersection.rou.xml" + ... ) + >>> obs = env.reset() + >>> print(obs.observation_shape) + >>> obs = env.step(SumoAction(phase_id=1)) + >>> print(obs.reward, obs.done) + """ + + def __init__( + self, + net_file: str, + route_file: str, + num_seconds: int = 20000, + delta_time: int = 5, + yellow_time: int = 2, + min_green: int = 5, + max_green: int = 50, + reward_fn: str = "diff-waiting-time", + sumo_seed: int = 42, + ): + """Initialize SUMO traffic signal environment.""" + super().__init__() + + # Store configuration + self.net_file = net_file + self.route_file = route_file + self.num_seconds = num_seconds + self.delta_time = delta_time + self.yellow_time = yellow_time + self.min_green = min_green + self.max_green = max_green + self.reward_fn = reward_fn + self.sumo_seed = sumo_seed + + # Create SUMO environment (single-agent mode) + # Key settings: + # - use_gui=False: No GUI in Docker + # - single_agent=True: Returns single obs/reward (not dict) + # - sumo_warnings=False: Suppress SUMO warnings + # - out_csv_name=None: Don't write CSV files + self.env = BaseSumoEnv( + net_file=net_file, + route_file=route_file, + use_gui=False, + single_agent=True, + num_seconds=num_seconds, + delta_time=delta_time, + yellow_time=yellow_time, + min_green=min_green, + max_green=max_green, + reward_fn=reward_fn, + sumo_seed=sumo_seed, + sumo_warnings=False, + out_csv_name=None, # Disable CSV output + add_system_info=True, + add_per_agent_info=False, + ) + + # Initialize state + self._state = SumoState( + net_file=net_file, + route_file=route_file, + num_seconds=num_seconds, + delta_time=delta_time, + yellow_time=yellow_time, + min_green=min_green, + max_green=max_green, + reward_fn=reward_fn, + ) + + self._last_info = {} + + def reset(self) -> Observation: + """ + Reset the environment and return initial observation. + + Returns: + Initial SumoObservation for the agent. + """ + # Reset SUMO simulation + obs, info = self.env.reset() + + # Update state tracking + self._state.episode_id = str(uuid.uuid4()) + self._state.step_count = 0 + self._state.sim_time = 0.0 + + # Store info for metadata + self._last_info = info + + return self._make_observation(obs, reward=None, done=False, info=info) + + def step(self, action: Action) -> Observation: + """ + Execute agent's action and return resulting observation. + + Args: + action: SumoAction containing the phase_id to execute. + + Returns: + SumoObservation after action execution. + + Raises: + ValueError: If action is not a SumoAction. + """ + if not isinstance(action, SumoAction): + raise ValueError(f"Expected SumoAction, got {type(action)}") + + # Validate phase_id + num_phases = self.env.action_space.n + if action.phase_id < 0 or action.phase_id >= num_phases: + raise ValueError( + f"Invalid phase_id: {action.phase_id}. " + f"Valid range: [0, {num_phases - 1}]" + ) + + # Execute action in SUMO + # Returns: (obs, reward, terminated, truncated, info) + obs, reward, terminated, truncated, info = self.env.step(action.phase_id) + done = terminated or truncated + + # Update state + self._state.step_count += 1 + self._state.sim_time = info.get("step", 0.0) + self._state.total_vehicles = info.get("system_total_running", 0) + self._state.total_waiting_time = info.get("system_total_waiting_time", 0.0) + self._state.mean_waiting_time = info.get("system_mean_waiting_time", 0.0) + self._state.mean_speed = info.get("system_mean_speed", 0.0) + + # Store info for metadata + self._last_info = info + + return self._make_observation(obs, reward=reward, done=done, info=info) + + @property + def state(self) -> SumoState: + """Get current environment state.""" + return self._state + + def _make_observation( + self, obs: Any, reward: float, done: bool, info: Dict + ) -> SumoObservation: + """ + Create SumoObservation from SUMO environment output. + + Args: + obs: Observation array from SUMO environment + reward: Reward value (None on reset) + done: Whether episode is complete + info: Info dictionary from SUMO environment + + Returns: + SumoObservation for the agent. + """ + # Convert observation to list + if hasattr(obs, "tolist"): + obs_list = obs.tolist() + else: + obs_list = list(obs) + + # Get action mask (all actions valid in SUMO-RL) + num_phases = self.env.action_space.n + action_mask = list(range(num_phases)) + + # Extract system metrics for metadata + system_info = { + k: v for k, v in info.items() if k.startswith("system_") + } + + # Create observation + return SumoObservation( + observation=obs_list, + observation_shape=[len(obs_list)], + action_mask=action_mask, + sim_time=info.get("step", 0.0), + done=done, + reward=reward, + metadata={ + "num_green_phases": num_phases, + "system_info": system_info, + }, + ) diff --git a/src/envs/sumo_rl_env/test_sumo_rl.sh b/src/envs/sumo_rl_env/test_sumo_rl.sh new file mode 100755 index 00000000..61265c73 --- /dev/null +++ b/src/envs/sumo_rl_env/test_sumo_rl.sh @@ -0,0 +1,220 @@ +#!/bin/bash +# Complete SUMO-RL Integration Test Script +# Run this to verify everything works! + +set -e # Exit on error + +echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" +echo "🚀 SUMO-RL Environment Test Script" +echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" +echo "" + +# Navigate to repo root +cd /Users/sanyambhutani/GH/OpenEnv + +echo "📁 Working directory: $(pwd)" +echo "" + +# Step 1: Check if base image exists +echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" +echo "Step 1: Checking for base image..." +echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" + +if docker images | grep -q "envtorch-base.*latest"; then + echo "✅ envtorch-base:latest found" +else + echo "⚠️ envtorch-base:latest not found - building it now..." + echo "" + docker build -t envtorch-base:latest -f src/core/containers/images/Dockerfile . + echo "" + echo "✅ Base image built successfully" +fi +echo "" + +# Step 2: Build SUMO-RL environment +echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" +echo "Step 2: Building SUMO-RL environment image..." +echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" +echo "⏳ This will take 5-10 minutes (installing SUMO)..." +echo "" + +docker build -f src/envs/sumo_rl_env/server/Dockerfile -t sumo-rl-env:latest . + +echo "" +echo "✅ SUMO-RL environment built successfully" +echo "" + +# Check image size +IMAGE_SIZE=$(docker images sumo-rl-env:latest --format "{{.Size}}") +echo "📦 Image size: $IMAGE_SIZE" +echo "" + +# Step 3: Start container +echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" +echo "Step 3: Starting SUMO-RL container..." +echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" + +# Stop any existing container +docker stop sumo-rl-test 2>/dev/null || true +docker rm sumo-rl-test 2>/dev/null || true + +# Start new container +docker run -d -p 8000:8000 --name sumo-rl-test sumo-rl-env:latest + +echo "⏳ Waiting for container to start..." +sleep 5 + +# Check if container is running +if docker ps | grep -q sumo-rl-test; then + echo "✅ Container is running" +else + echo "❌ Container failed to start!" + echo "Logs:" + docker logs sumo-rl-test + exit 1 +fi +echo "" + +# Step 4: Test health endpoint +echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" +echo "Step 4: Testing health endpoint..." +echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" + +HEALTH_RESPONSE=$(curl -s http://localhost:8000/health) +echo "Response: $HEALTH_RESPONSE" + +if echo "$HEALTH_RESPONSE" | grep -q "healthy"; then + echo "✅ Health check passed" +else + echo "❌ Health check failed!" + exit 1 +fi +echo "" + +# Step 5: Test reset endpoint +echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" +echo "Step 5: Testing reset endpoint..." +echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" +echo "⏳ This may take 3-5 seconds (SUMO simulation starting)..." + +RESET_RESPONSE=$(curl -s -X POST http://localhost:8000/reset) + +if echo "$RESET_RESPONSE" | jq -e '.observation.observation' > /dev/null 2>&1; then + echo "✅ Reset successful" + + # Extract observation details + OBS_SHAPE=$(echo "$RESET_RESPONSE" | jq '.observation.observation_shape') + ACTION_MASK=$(echo "$RESET_RESPONSE" | jq '.observation.action_mask') + + echo " 📊 Observation shape: $OBS_SHAPE" + echo " 🎮 Available actions: $ACTION_MASK" +else + echo "❌ Reset failed!" + echo "Response: $RESET_RESPONSE" + exit 1 +fi +echo "" + +# Step 6: Test step endpoint +echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" +echo "Step 6: Testing step endpoint (taking 5 actions)..." +echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" + +for i in {1..5}; do + # Take action (cycle through phases 0-1) + PHASE_ID=$((i % 2)) + + STEP_RESPONSE=$(curl -s -X POST http://localhost:8000/step \ + -H "Content-Type: application/json" \ + -d "{\"action\": {\"phase_id\": $PHASE_ID, \"ts_id\": \"0\"}}") + + if echo "$STEP_RESPONSE" | jq -e '.reward' > /dev/null 2>&1; then + REWARD=$(echo "$STEP_RESPONSE" | jq '.reward') + DONE=$(echo "$STEP_RESPONSE" | jq '.done') + echo " Step $i: phase=$PHASE_ID, reward=$REWARD, done=$DONE" + else + echo "❌ Step $i failed!" + echo "Response: $STEP_RESPONSE" + exit 1 + fi +done + +echo "✅ All steps successful" +echo "" + +# Step 7: Test state endpoint +echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" +echo "Step 7: Testing state endpoint..." +echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" + +STATE_RESPONSE=$(curl -s http://localhost:8000/state) + +if echo "$STATE_RESPONSE" | jq -e '.episode_id' > /dev/null 2>&1; then + echo "✅ State endpoint working" + + # Extract state details + EPISODE_ID=$(echo "$STATE_RESPONSE" | jq -r '.episode_id') + STEP_COUNT=$(echo "$STATE_RESPONSE" | jq '.step_count') + SIM_TIME=$(echo "$STATE_RESPONSE" | jq '.sim_time') + TOTAL_VEHICLES=$(echo "$STATE_RESPONSE" | jq '.total_vehicles') + + echo " 📝 Episode ID: ${EPISODE_ID:0:8}..." + echo " 🔢 Step count: $STEP_COUNT" + echo " ⏱️ Simulation time: $SIM_TIME seconds" + echo " 🚗 Total vehicles: $TOTAL_VEHICLES" +else + echo "❌ State endpoint failed!" + echo "Response: $STATE_RESPONSE" + exit 1 +fi +echo "" + +# Step 8: Check logs for errors +echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" +echo "Step 8: Checking container logs for errors..." +echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" + +LOGS=$(docker logs sumo-rl-test 2>&1) + +# Check for Python errors (but ignore LoggerMode.Error which is expected) +if echo "$LOGS" | grep -i "error\|exception\|traceback" | grep -v "LoggerMode.Error"; then + echo "⚠️ Found errors in logs:" + echo "$LOGS" | grep -i "error\|exception\|traceback" | grep -v "LoggerMode.Error" +else + echo "✅ No errors found in logs" +fi +echo "" + +# Step 9: Cleanup +echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" +echo "Step 9: Cleanup..." +echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" + +echo "🧹 Stopping and removing test container..." +docker stop sumo-rl-test +docker rm sumo-rl-test + +echo "✅ Cleanup complete" +echo "" + +# Final summary +echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" +echo "🎉 ALL TESTS PASSED!" +echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" +echo "" +echo "Summary:" +echo " ✅ Docker image built successfully ($IMAGE_SIZE)" +echo " ✅ Container started and ran" +echo " ✅ Health endpoint working" +echo " ✅ Reset endpoint working" +echo " ✅ Step endpoint working (5 actions executed)" +echo " ✅ State endpoint working" +echo " ✅ No errors in logs" +echo "" +echo "🎯 SUMO-RL integration is working perfectly!" +echo "" +echo "Next steps:" +echo " 1. Test Python client: python examples/sumo_rl_simple.py" +echo " 2. Push to GitHub to trigger CI/CD" +echo " 3. Use for RL training!" +echo ""