From 43c3c905c0ad71fc9c5973174d437b5b1d569ee0 Mon Sep 17 00:00:00 2001 From: Sanyam Bhutani Date: Fri, 17 Oct 2025 11:27:11 -0700 Subject: [PATCH 01/13] add model --- SUMO_RL_INTEGRATION_PLAN.md | 663 +++++++++++++++++++++++++++++++++ SUMO_RL_RISK_ANALYSIS.md | 505 +++++++++++++++++++++++++ src/envs/sumo_rl_env/models.py | 110 ++++++ 3 files changed, 1278 insertions(+) create mode 100644 SUMO_RL_INTEGRATION_PLAN.md create mode 100644 SUMO_RL_RISK_ANALYSIS.md create mode 100644 src/envs/sumo_rl_env/models.py diff --git a/SUMO_RL_INTEGRATION_PLAN.md b/SUMO_RL_INTEGRATION_PLAN.md new file mode 100644 index 00000000..47e4d339 --- /dev/null +++ b/SUMO_RL_INTEGRATION_PLAN.md @@ -0,0 +1,663 @@ +# SUMO-RL Integration Plan for OpenEnv + +**Date**: 2025-10-17 +**Status**: Design Phase +**Complexity**: High (Docker + SUMO system dependencies) + +--- + +## ๐Ÿค” ULTRATHINK ANALYSIS + +### What is SUMO-RL? + +**SUMO-RL** is a Reinforcement Learning environment for **Traffic Signal Control** using SUMO (Simulation of Urban MObility). + +- **Use Case**: Train RL agents to optimize traffic light timing to minimize vehicle delays +- **Main Class**: `SumoEnvironment` from `sumo_rl.environment.env` +- **APIs**: Supports both Gymnasium (single-agent) and PettingZoo (multi-agent) +- **Repository**: https://github.com/LucasAlegre/sumo-rl +- **Version**: 1.4.5 + +### How SUMO-RL Works + +1. **SUMO Simulator**: Microscopic traffic simulation +2. **Network Files**: `.net.xml` (road network) + `.rou.xml` (vehicle routes) +3. **Traffic Signals**: RL agent controls when lights change phases +4. **Observation**: Lane densities, queues, current phase, min_green flag +5. **Action**: Select next green phase (discrete action space) +6. **Reward**: Change in cumulative vehicle delay (default) + +### Example Usage + +```python +import gymnasium as gym +import sumo_rl + +env = gym.make('sumo-rl-v0', + net_file='nets/single-intersection.net.xml', + route_file='nets/single-intersection.rou.xml', + use_gui=False, + num_seconds=100000) + +obs, info = env.reset() +done = False +while not done: + action = env.action_space.sample() + obs, reward, terminated, truncated, info = env.step(action) + done = terminated or truncated +``` + +--- + +## ๐ŸŽฏ Integration Strategy + +### Follow Atari Pattern + +Like Atari, we'll create: +1. **models.py** - Data models +2. **server/sumo_environment.py** - Environment wrapper +3. **server/app.py** - FastAPI server +4. **server/Dockerfile** - Container with SUMO +5. **client.py** - HTTP client + +### Key Differences from Atari + +| Aspect | Atari | SUMO-RL | +|--------|-------|---------| +| **External Dependency** | ALE (pip installable) | SUMO (system package) | +| **Configuration** | Game name (simple) | Network + route files (complex) | +| **Observation** | Image pixels | Traffic metrics (vectors) | +| **Action** | Joystick actions | Traffic signal phases | +| **Docker Complexity** | Simple | High (need SUMO system install) | +| **File Dependencies** | None (ROMs bundled) | Network/route XML files required | + +--- + +## ๐Ÿ“‹ Technical Design + +### 1. Data Models (`models.py`) + +```python +from dataclasses import dataclass +from typing import List, Optional +from core.env_server import Action, Observation, State + +@dataclass +class SumoAction(Action): + """Action for SUMO environment - select next green phase.""" + phase_id: int # Which green phase to activate next + ts_id: str = "0" # Traffic signal ID (for multi-agent support later) + +@dataclass +class SumoObservation(Observation): + """Observation from SUMO environment.""" + observation: List[float] # Full observation vector + observation_shape: List[int] # Shape for reshaping + + # Observation components (for interpretability) + current_phase: Optional[int] = None + min_green_passed: Optional[bool] = None + lane_densities: Optional[List[float]] = None + lane_queues: Optional[List[float]] = None + + # Metadata + action_mask: Optional[List[int]] = None # Legal actions + sim_time: float = 0.0 # Current simulation time + + done: bool = False + reward: Optional[float] = None + +@dataclass +class SumoState(State): + """State of SUMO environment.""" + episode_id: str = "" + step_count: int = 0 + + # SUMO configuration + net_file: str = "" + route_file: str = "" + num_seconds: int = 20000 + delta_time: int = 5 + yellow_time: int = 2 + min_green: int = 5 + max_green: int = 50 + + # Runtime state + sim_time: float = 0.0 + total_vehicles: int = 0 + total_waiting_time: float = 0.0 +``` + +### 2. Environment Wrapper (`server/sumo_environment.py`) + +```python +import uuid +from typing import Any, Dict, Literal, Optional +from core.env_server import Action, Environment, Observation +from ..models import SumoAction, SumoObservation, SumoState + +import os +os.environ.setdefault('SUMO_HOME', '/usr/share/sumo') + +from sumo_rl import SumoEnvironment as BaseSumoEnv + +class SumoEnvironment(Environment): + """ + SUMO-RL Environment wrapper for OpenEnv. + + Wraps the SUMO traffic signal control environment for single-agent RL. + + Args: + net_file: Path to SUMO network file (.net.xml) + route_file: Path to SUMO route file (.rou.xml) + num_seconds: Simulation duration in seconds + delta_time: Seconds between actions + yellow_time: Yellow phase duration + min_green: Minimum green time + max_green: Maximum green time + reward_fn: Reward function name + """ + + def __init__( + self, + net_file: str, + route_file: str, + num_seconds: int = 20000, + delta_time: int = 5, + yellow_time: int = 2, + min_green: int = 5, + max_green: int = 50, + reward_fn: str = "diff-waiting-time", + ): + super().__init__() + + # Store config + self.net_file = net_file + self.route_file = route_file + self.num_seconds = num_seconds + self.delta_time = delta_time + self.yellow_time = yellow_time + self.min_green = min_green + self.max_green = max_green + self.reward_fn = reward_fn + + # Create SUMO environment (single-agent mode) + self.env = BaseSumoEnv( + net_file=net_file, + route_file=route_file, + use_gui=False, # No GUI in Docker + single_agent=True, # Single-agent for OpenEnv + num_seconds=num_seconds, + delta_time=delta_time, + yellow_time=yellow_time, + min_green=min_green, + max_green=max_green, + reward_fn=reward_fn, + sumo_warnings=False, + ) + + # Initialize state + self._state = SumoState( + net_file=net_file, + route_file=route_file, + num_seconds=num_seconds, + delta_time=delta_time, + yellow_time=yellow_time, + min_green=min_green, + max_green=max_green, + ) + + self._last_obs = None + self._last_info = None + + def reset(self) -> Observation: + """Reset the environment.""" + # Reset SUMO + obs, info = self.env.reset() + + # Update state + self._state.episode_id = str(uuid.uuid4()) + self._state.step_count = 0 + self._state.sim_time = 0.0 + + # Store for later + self._last_obs = obs + self._last_info = info + + return self._make_observation(obs, 0.0, False, info) + + def step(self, action: Action) -> Observation: + """Execute action.""" + if not isinstance(action, SumoAction): + raise ValueError(f"Expected SumoAction, got {type(action)}") + + # Validate action + if action.phase_id < 0 or action.phase_id >= self.env.action_space.n: + raise ValueError( + f"Invalid phase_id: {action.phase_id}. " + f"Valid range: [0, {self.env.action_space.n - 1}]" + ) + + # Execute in SUMO + obs, reward, terminated, truncated, info = self.env.step(action.phase_id) + done = terminated or truncated + + # Update state + self._state.step_count += 1 + self._state.sim_time = info.get('step', 0.0) + self._state.total_vehicles = info.get('system_total_running', 0) + self._state.total_waiting_time = info.get('system_total_waiting_time', 0.0) + + # Store for later + self._last_obs = obs + self._last_info = info + + return self._make_observation(obs, reward, done, info) + + @property + def state(self) -> SumoState: + """Get current state.""" + return self._state + + def _make_observation( + self, + obs: Any, + reward: float, + done: bool, + info: Dict + ) -> SumoObservation: + """Create SumoObservation from SUMO env output.""" + # Convert observation to list + if hasattr(obs, 'tolist'): + obs_list = obs.tolist() + else: + obs_list = list(obs) + + # Get action mask (all actions valid in SUMO-RL) + action_mask = list(range(self.env.action_space.n)) + + # Create observation + return SumoObservation( + observation=obs_list, + observation_shape=[len(obs_list)], + action_mask=action_mask, + sim_time=info.get('step', 0.0), + done=done, + reward=reward, + metadata={ + "num_green_phases": self.env.action_space.n, + "system_info": { + k: v for k, v in info.items() if k.startswith('system_') + }, + }, + ) +``` + +### 3. FastAPI Server (`server/app.py`) + +```python +import os +from core.env_server import create_fastapi_app +from ..models import SumoAction, SumoObservation +from .sumo_environment import SumoEnvironment + +# Get configuration from environment +net_file = os.getenv("SUMO_NET_FILE", "/app/nets/single-intersection.net.xml") +route_file = os.getenv("SUMO_ROUTE_FILE", "/app/nets/single-intersection.rou.xml") +num_seconds = int(os.getenv("SUMO_NUM_SECONDS", "20000")) +delta_time = int(os.getenv("SUMO_DELTA_TIME", "5")) +yellow_time = int(os.getenv("SUMO_YELLOW_TIME", "2")) +min_green = int(os.getenv("SUMO_MIN_GREEN", "5")) +max_green = int(os.getenv("SUMO_MAX_GREEN", "50")) +reward_fn = os.getenv("SUMO_REWARD_FN", "diff-waiting-time") + +# Create environment +env = SumoEnvironment( + net_file=net_file, + route_file=route_file, + num_seconds=num_seconds, + delta_time=delta_time, + yellow_time=yellow_time, + min_green=min_green, + max_green=max_green, + reward_fn=reward_fn, +) + +# Create FastAPI app +app = create_fastapi_app(env, SumoAction, SumoObservation) +``` + +### 4. Dockerfile (`server/Dockerfile`) + +```dockerfile +# Configurable base image +ARG BASE_IMAGE=envtorch-base:latest +FROM ${BASE_IMAGE} + +# Install SUMO +# SUMO is a microscopic traffic simulation package +RUN apt-get update && apt-get install -y --no-install-recommends \ + software-properties-common \ + && add-apt-repository ppa:sumo/stable \ + && apt-get update \ + && apt-get install -y --no-install-recommends \ + sumo \ + sumo-tools \ + && rm -rf /var/lib/apt/lists/* + +# Set SUMO_HOME +ENV SUMO_HOME=/usr/share/sumo + +# Install SUMO-RL and dependencies +RUN pip install --no-cache-dir \ + gymnasium>=0.28 \ + pettingzoo>=1.24.3 \ + numpy>=1.24.0 \ + pandas>=2.0.0 \ + sumolib>=1.14.0 \ + traci>=1.14.0 \ + sumo-rl>=1.4.5 + +# Copy OpenEnv core +COPY src/core/ /app/src/core/ + +# Copy SUMO-RL environment code +COPY src/envs/sumo_rl_env/ /app/src/envs/sumo_rl_env/ + +# Copy example networks +# We'll bundle a simple single-intersection example +COPY sumo-rl/sumo_rl/nets/single-intersection/ /app/nets/ + +# Environment variables (can be overridden at runtime) +ENV SUMO_NET_FILE=/app/nets/single-intersection.net.xml +ENV SUMO_ROUTE_FILE=/app/nets/single-intersection.rou.xml +ENV SUMO_NUM_SECONDS=20000 +ENV SUMO_DELTA_TIME=5 +ENV SUMO_YELLOW_TIME=2 +ENV SUMO_MIN_GREEN=5 +ENV SUMO_MAX_GREEN=50 +ENV SUMO_REWARD_FN=diff-waiting-time + +# Expose port +EXPOSE 8000 + +# Health check +HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \ + CMD curl -f http://localhost:8000/health || exit 1 + +# Run the FastAPI server +CMD ["uvicorn", "envs.sumo_rl_env.server.app:app", "--host", "0.0.0.0", "--port", "8000"] +``` + +### 5. HTTP Client (`client.py`) + +```python +from typing import Any, Dict +from core.http_env_client import HTTPEnvClient +from core.types import StepResult +from .models import SumoAction, SumoObservation, SumoState + +class SumoRLEnv(HTTPEnvClient[SumoAction, SumoObservation]): + """ + HTTP client for SUMO-RL environment. + + Example: + >>> env = SumoRLEnv.from_docker_image("sumo-rl-env:latest") + >>> result = env.reset() + >>> result = env.step(SumoAction(phase_id=1)) + >>> print(f"Reward: {result.reward}, Done: {result.done}") + >>> env.close() + """ + + def _step_payload(self, action: SumoAction) -> Dict[str, Any]: + """Convert action to JSON payload.""" + return { + "phase_id": action.phase_id, + "ts_id": action.ts_id, + } + + def _parse_result(self, payload: Dict[str, Any]) -> StepResult[SumoObservation]: + """Parse step result from JSON.""" + obs_data = payload.get("observation", {}) + + observation = SumoObservation( + observation=obs_data.get("observation", []), + observation_shape=obs_data.get("observation_shape", []), + current_phase=obs_data.get("current_phase"), + min_green_passed=obs_data.get("min_green_passed"), + lane_densities=obs_data.get("lane_densities"), + lane_queues=obs_data.get("lane_queues"), + action_mask=obs_data.get("action_mask", []), + sim_time=obs_data.get("sim_time", 0.0), + done=obs_data.get("done", False), + reward=obs_data.get("reward"), + metadata=obs_data.get("metadata", {}), + ) + + return StepResult( + observation=observation, + reward=payload.get("reward"), + done=payload.get("done", False), + ) + + def _parse_state(self, payload: Dict[str, Any]) -> SumoState: + """Parse state from JSON.""" + return SumoState( + episode_id=payload.get("episode_id", ""), + step_count=payload.get("step_count", 0), + net_file=payload.get("net_file", ""), + route_file=payload.get("route_file", ""), + num_seconds=payload.get("num_seconds", 20000), + delta_time=payload.get("delta_time", 5), + yellow_time=payload.get("yellow_time", 2), + min_green=payload.get("min_green", 5), + max_green=payload.get("max_green", 50), + sim_time=payload.get("sim_time", 0.0), + total_vehicles=payload.get("total_vehicles", 0), + total_waiting_time=payload.get("total_waiting_time", 0.0), + ) +``` + +--- + +## โš ๏ธ Critical Challenges + +### 1. SUMO System Dependency + +**Challenge**: SUMO must be installed at system level (apt-get), not just pip. + +**Solution**: +```dockerfile +RUN add-apt-repository ppa:sumo/stable && \ + apt-get update && \ + apt-get install -y sumo sumo-tools +``` + +### 2. Network Files Required + +**Challenge**: SUMO needs `.net.xml` and `.rou.xml` files to run. + +**Solutions**: +- **Bundle examples**: Copy simple networks from sumo-rl repo +- **Volume mount**: Let users mount their own networks +- **Default config**: Use single-intersection as default + +### 3. No GUI Support + +**Challenge**: Docker can't run SUMO GUI. + +**Solution**: Always use `use_gui=False` in Docker environment. + +### 4. Long Simulation Times + +**Challenge**: Traffic simulations can take minutes to complete. + +**Solution**: +- Set reasonable defaults (20000 seconds simulation time) +- Allow configuration via environment variables +- Document expected runtimes + +### 5. Multi-Agent Complexity + +**Challenge**: SUMO-RL supports multi-agent (multiple traffic lights). + +**Solution**: Start with single-agent only for OpenEnv integration. Multi-agent can be added later. + +--- + +## ๐Ÿ“Š Configuration Matrix + +| Variable | Default | Description | +|----------|---------|-------------| +| `SUMO_NET_FILE` | `/app/nets/single-intersection.net.xml` | Network topology file | +| `SUMO_ROUTE_FILE` | `/app/nets/single-intersection.rou.xml` | Vehicle routes file | +| `SUMO_NUM_SECONDS` | `20000` | Simulation duration | +| `SUMO_DELTA_TIME` | `5` | Seconds between actions | +| `SUMO_YELLOW_TIME` | `2` | Yellow phase duration | +| `SUMO_MIN_GREEN` | `5` | Minimum green time | +| `SUMO_MAX_GREEN` | `50` | Maximum green time | +| `SUMO_REWARD_FN` | `diff-waiting-time` | Reward function | + +### Available Reward Functions + +From SUMO-RL source: +- `diff-waiting-time` (default) - Change in cumulative waiting time +- `average-speed` - Average speed of vehicles +- `queue` - Total queue length +- `pressure` - Pressure (difference between incoming/outgoing vehicles) + +--- + +## ๐Ÿงช Testing Strategy + +### 1. Pre-Flight Checks +- Verify network files exist +- Check SUMO installation +- Validate Dockerfile syntax +- Test imports + +### 2. Docker Build Test +```bash +docker build -f src/envs/sumo_rl_env/server/Dockerfile -t sumo-rl-env:latest . +``` + +### 3. Runtime Tests +```bash +docker run -p 8000:8000 sumo-rl-env:latest + +curl http://localhost:8000/health +curl -X POST http://localhost:8000/reset +curl -X POST http://localhost:8000/step \ + -H "Content-Type: application/json" \ + -d '{"action": {"phase_id": 1, "ts_id": "0"}}' +``` + +### 4. Python Client Test +```python +from envs.sumo_rl_env import SumoRLEnv, SumoAction + +env = SumoRLEnv.from_docker_image("sumo-rl-env:latest") +result = env.reset() +result = env.step(SumoAction(phase_id=1)) +print(f"Reward: {result.reward}, Done: {result.done}") +env.close() +``` + +--- + +## ๐Ÿ“ฆ What to Bundle + +### Minimal Network Example + +Bundle the single-intersection example from sumo-rl: +``` +sumo-rl/sumo_rl/nets/single-intersection/ +โ”œโ”€โ”€ single-intersection.net.xml # Network topology +โ”œโ”€โ”€ single-intersection.rou.xml # Vehicle routes +``` + +This provides a working example out-of-the-box. + +### Additional Networks (Optional) + +Could bundle RESCO benchmarks for research: +- `grid4x4` - 4ร—4 grid of intersections +- `arterial4x4` - Arterial road network +- `cologne1` - Real-world Cologne network + +But start with single-intersection for simplicity. + +--- + +## ๐ŸŽฏ Implementation Plan + +### Phase 1: Core Implementation (4-6 hours) +1. Create `models.py` โœ“ (designed) +2. Create `server/sumo_environment.py` โœ“ (designed) +3. Create `server/app.py` โœ“ (designed) +4. Create `server/Dockerfile` โœ“ (designed) +5. Create `client.py` โœ“ (designed) + +### Phase 2: Testing (2-3 hours) +1. Build Docker image +2. Test basic functionality +3. Test different configurations +4. Verify reward functions work + +### Phase 3: Documentation (1-2 hours) +1. Write README.md +2. Create examples +3. Document network file format +4. Add to GitHub Actions + +### Phase 4: Integration (1 hour) +1. Add to `.github/workflows/docker-build.yml` +2. Update main README +3. Add to environments list + +**Total Estimate**: 8-12 hours + +--- + +## ๐Ÿš€ Next Steps + +1. **Create file structure** in `/Users/sanyambhutani/GH/OpenEnv/src/envs/sumo_rl_env/` +2. **Copy network files** from `/Users/sanyambhutani/OpenEnv/sumo-rl/sumo_rl/nets/` +3. **Implement all files** following the designs above +4. **Build and test Docker image** +5. **Create documentation** +6. **Add to GitHub Actions** + +--- + +## ๐Ÿ’ก Key Insights + +### Why SUMO-RL is Harder Than Atari + +1. **System Dependencies**: Atari (ale-py) is pip-installable, SUMO requires apt-get +2. **Configuration Complexity**: Atari just needs game name, SUMO needs network files +3. **Runtime**: Atari is fast, SUMO simulations can take minutes +4. **File Dependencies**: Atari bundles ROMs, SUMO needs user-provided networks + +### Why It's Still Doable + +1. **Single-Agent Mode**: Simplifies to standard Gymnasium API +2. **Bundle Example**: Include simple network to start immediately +3. **Environment Variables**: Easy runtime configuration +4. **Pattern Reuse**: Follow exact Atari pattern for consistency + +--- + +## ๐Ÿ“š References + +- [SUMO-RL GitHub](https://github.com/LucasAlegre/sumo-rl) +- [SUMO Documentation](https://sumo.dlr.de/docs/) +- [SUMO-RL Docs](https://lucasalegre.github.io/sumo-rl/) +- [RESCO Benchmarks Paper](https://people.engr.tamu.edu/guni/Papers/NeurIPS-signals.pdf) + +--- + +**Status**: Design complete, ready for implementation +**Complexity**: High (system dependencies + network files) +**Time Estimate**: 8-12 hours +**Confidence**: 85% (Dockerfile complexity is main risk) diff --git a/SUMO_RL_RISK_ANALYSIS.md b/SUMO_RL_RISK_ANALYSIS.md new file mode 100644 index 00000000..0f216b39 --- /dev/null +++ b/SUMO_RL_RISK_ANALYSIS.md @@ -0,0 +1,505 @@ +# SUMO-RL Integration: ULTRATHINK Risk Analysis + +**Date**: 2025-10-17 +**Status**: Deep Risk Assessment + +--- + +## โœ… Docker Eliminates PRIMARY Risk + +**YES - Docker solves the hardest problem!** + +| Risk | Without Docker | With Docker | +|------|---------------|-------------| +| **System Dependencies** | โŒ Nightmare | โœ… Solved | +| **Cross-platform** | โŒ Linux only | โœ… Works everywhere | +| **Installation** | โŒ Requires sudo | โœ… Just `docker run` | +| **Reproducibility** | โŒ "Works on my machine" | โœ… Identical | + +**Conclusion**: Docker takes away 80% of the pain. โœจ + +--- + +## โš ๏ธ Remaining Risks (Deep Analysis) + +### ๐Ÿ”ด HIGH RISK + +#### 1. **TraCI Connection Management in HTTP Server** + +**Issue**: `SumoEnvironment` uses class variable `CONNECTION_LABEL` that increments globally. + +```python +CONNECTION_LABEL = 0 # For traci multi-client support + +def __init__(self): + self.label = str(SumoEnvironment.CONNECTION_LABEL) + SumoEnvironment.CONNECTION_LABEL += 1 +``` + +**Risk**: In HTTP server with concurrent requests: +- Request 1 creates env (label=0) +- Request 2 creates env (label=1) +- Request 1 resets โ†’ closes connection label=0 +- Request 2 steps โ†’ tries to use label=1 +- **Potential conflict if requests overlap** + +**Likelihood**: Medium (depends on usage pattern) + +**Impact**: High (could cause simulation errors) + +**Mitigation**: +```python +# Option 1: Single environment instance (RECOMMENDED) +# Create ONE environment at server startup, reuse for all requests +env = SumoEnvironment(...) # Created once +app = create_fastapi_app(env, ...) # Reuses same env + +# Option 2: Thread-safe connection management +# Use threading locks around TraCI operations +``` + +**Decision**: Use single environment instance per container (same as Atari pattern). Each HTTP request uses the same environment. **SOLVES ISSUE**. + +--- + +#### 2. **LIBSUMO vs TraCI Performance Trade-off** + +**Background**: +```python +LIBSUMO = "LIBSUMO_AS_TRACI" in os.environ +``` + +- **TraCI**: Standard, supports GUI, slower (1x speed) +- **LIBSUMO**: No GUI, no parallel sims, faster (8x speed) + +**Risk**: Default TraCI could be too slow for RL training. + +**Likelihood**: High (traffic sims are inherently slow) + +**Impact**: Medium (training takes longer, not broken) + +**Mitigation**: +```dockerfile +# Option 1: Use TraCI (default, safer) +# No env var needed, works out of box + +# Option 2: Enable LIBSUMO for speed +ENV LIBSUMO_AS_TRACI=1 + +# Recommendation: Start with TraCI, add LIBSUMO as optimization later +``` + +**Decision**: Start with TraCI (default), document LIBSUMO option for advanced users. + +--- + +### ๐ŸŸก MEDIUM RISK + +#### 3. **Episode Reset Performance** + +**Issue**: Each `reset()` closes and restarts SUMO simulation. + +```python +def reset(self, seed=None, **kwargs): + if self.episode != 0: + self.close() # Closes previous simulation + self._start_simulation() # Starts new one +``` + +**Risk**: Reset could take 1-5 seconds (slow for RL training loop). + +**Likelihood**: High (this is how SUMO works) + +**Impact**: Medium (slows training, doesn't break it) + +**Mitigation**: +- Document expected reset time +- Use long episodes (`num_seconds=20000`) +- Consider warm-start optimizations later + +**Decision**: Accept this limitation, document it. Not a blocker. + +--- + +#### 4. **CSV Output Accumulation** + +**Issue**: Environment can write CSV metrics to disk. + +```python +def save_csv(self, out_csv_name, episode): + df.to_csv(out_csv_name + f"_conn{self.label}_ep{episode}" + ".csv") +``` + +**Risk**: In Docker, CSV files accumulate โ†’ disk space. + +**Likelihood**: Low (only if user enables CSV output) + +**Impact**: Low (disk space, not functionality) + +**Mitigation**: +```python +# In our wrapper, set out_csv_name=None (disables CSV) +env = SumoEnvironment( + ..., + out_csv_name=None, # Disable CSV output +) +``` + +**Decision**: Disable CSV output by default. Users can enable via volume mount if needed. + +--- + +#### 5. **Network File Path Resolution** + +**Issue**: SUMO needs absolute paths to `.net.xml` and `.rou.xml` files. + +**Risk**: If paths are wrong in Docker, simulation fails. + +**Likelihood**: Low (we control the paths) + +**Impact**: High (breaks everything if wrong) + +**Mitigation**: +```dockerfile +# Bundle networks at known paths +COPY sumo-rl/sumo_rl/nets/single-intersection/ /app/nets/ + +# Set absolute paths as defaults +ENV SUMO_NET_FILE=/app/nets/single-intersection.net.xml +ENV SUMO_ROUTE_FILE=/app/nets/single-intersection.rou.xml +``` + +**Decision**: Bundle example networks, use absolute paths. Test during build. + +--- + +#### 6. **Dynamic Observation/Action Spaces** + +**Issue**: Different networks โ†’ different action/observation sizes. + +```python +# Action space size = number of traffic signal phases (varies) +self.action_space = gym.spaces.Discrete(num_green_phases) + +# Observation size = depends on number of lanes (varies) +obs_size = num_green_phases + 1 + 2*num_lanes +``` + +**Risk**: OpenEnv expects fixed-size spaces? + +**Likelihood**: Low (we use single network by default) + +**Impact**: Medium (breaks if user changes network) + +**Mitigation**: +- Use single-intersection as default (fixed sizes) +- Document that changing networks may change spaces +- Future: Make spaces configurable + +**Decision**: Not a blocker. Start with single network, document clearly. + +--- + +### ๐ŸŸข LOW RISK + +#### 7. **SUMO Version Compatibility** + +**Issue**: `ppa:sumo/stable` might update SUMO version over time. + +**Risk**: New SUMO version breaks sumo-rl compatibility. + +**Likelihood**: Low (SUMO is stable) + +**Impact**: Medium (breaks after rebuild) + +**Mitigation**: +```dockerfile +# Option 1: Pin SUMO version (if available) +RUN apt-get install -y sumo=1.14.0 + +# Option 2: Pin sumolib/traci versions +RUN pip install sumolib==1.14.0 traci==1.14.0 + +# Option 3: Accept latest (simpler, usually works) +``` + +**Decision**: Start with latest, pin if issues arise. + +--- + +#### 8. **sumolib/traci vs System SUMO Mismatch** + +**Issue**: Pip packages `sumolib` and `traci` should match system SUMO version. + +**Risk**: Version mismatch causes compatibility issues. + +**Likelihood**: Low (sumo-rl handles this) + +**Impact**: Medium (simulation errors) + +**Mitigation**: +```dockerfile +# Install SUMO first +RUN apt-get install -y sumo sumo-tools + +# Then install matching Python packages +RUN pip install sumolib>=1.14.0 traci>=1.14.0 +``` + +**Decision**: Use `>=` versions, should work. Test during build. + +--- + +#### 9. **PettingZoo Version Compatibility** + +**Issue**: Code has fallback for PettingZoo 1.24 vs 1.25+ + +```python +try: + from pettingzoo.utils import AgentSelector # 1.25+ +except ImportError: + from pettingzoo.utils import agent_selector as AgentSelector # 1.24 +``` + +**Risk**: Version incompatibility breaks import. + +**Likelihood**: Low (pyproject.toml specifies `pettingzoo>=1.24.3`) + +**Impact**: Low (import error, easy to debug) + +**Mitigation**: +```dockerfile +RUN pip install pettingzoo>=1.24.3 +``` + +**Decision**: Use version spec from pyproject.toml. + +--- + +#### 10. **Memory Usage with Many Vehicles** + +**Issue**: Large traffic networks with thousands of vehicles โ†’ high memory. + +**Risk**: Container OOM (out of memory). + +**Likelihood**: Low (single-intersection is small) + +**Impact**: High (container crash) + +**Mitigation**: +- Use small default network (single-intersection) +- Document memory requirements for large networks +- Docker memory limits (optional) + +**Decision**: Not a blocker. Document memory requirements. + +--- + +#### 11. **Simulation Determinism** + +**Issue**: Default `sumo_seed="random"` โ†’ non-deterministic. + +**Risk**: Can't reproduce training runs. + +**Likelihood**: High (default is random) + +**Impact**: Low (science issue, not functionality) + +**Mitigation**: +```python +# Allow seed control via environment variable +sumo_seed = int(os.getenv("SUMO_SEED", "42")) # Default fixed seed + +# Or keep random, document it +sumo_seed = os.getenv("SUMO_SEED", "random") +``` + +**Decision**: Default to fixed seed (42) for reproducibility. Document how to use random. + +--- + +#### 12. **Headless Operation (No GUI)** + +**Issue**: We force `use_gui=False` in Docker. + +**Risk**: Users might want to see simulation GUI. + +**Likelihood**: Low (Docker is headless) + +**Impact**: Low (convenience feature) + +**Mitigation**: +- Document that GUI is not available in Docker +- Suggest local development for GUI +- Future: VNC access to container GUI + +**Decision**: Not a blocker. GUI doesn't work in Docker anyway. + +--- + +#### 13. **Docker Image Size** + +**Issue**: SUMO + dependencies โ†’ large image. + +**Estimate**: +- Base: ~200MB +- SUMO: ~500MB +- Python packages: ~200MB +- **Total: ~900MB-1GB** + +**Risk**: Large downloads, storage. + +**Likelihood**: High (definitely will be large) + +**Impact**: Low (acceptable for complex sim) + +**Mitigation**: +- Multi-stage builds (future optimization) +- Clear documentation of size +- Accept it (complexity requires space) + +**Decision**: Accept ~1GB image size. Not a blocker. + +--- + +#### 14. **Long Simulation Times** + +**Issue**: Traffic simulations take time (minutes per episode). + +**Example**: 20,000 simulated seconds with delta_time=5 โ†’ 4,000 steps per episode. + +**Risk**: RL training is slow. + +**Likelihood**: High (inherent to traffic simulation) + +**Impact**: Medium (slower research, not broken) + +**Mitigation**: +- Document expected times +- Recommend shorter episodes for quick tests +- Suggest LIBSUMO for speedup + +**Decision**: Document clearly. Not a technical blocker. + +--- + +## ๐Ÿ“Š Risk Summary + +| Risk | Severity | Likelihood | Mitigation Status | +|------|----------|-----------|-------------------| +| TraCI Connection Management | ๐Ÿ”ด High | Medium | โœ… Solved (single env instance) | +| LIBSUMO vs TraCI | ๐Ÿ”ด High | High | โœ… Mitigated (default TraCI, doc LIBSUMO) | +| Episode Reset Performance | ๐ŸŸก Medium | High | โœ… Accepted (document) | +| CSV Output Accumulation | ๐ŸŸก Medium | Low | โœ… Solved (disable by default) | +| Network File Paths | ๐ŸŸก Medium | Low | โœ… Solved (bundle at known paths) | +| Dynamic Spaces | ๐ŸŸก Medium | Low | โœ… Accepted (document) | +| SUMO Version | ๐ŸŸข Low | Low | โœ… Accepted (use latest) | +| sumolib/traci Mismatch | ๐ŸŸข Low | Low | โœ… Mitigated (>=1.14.0) | +| PettingZoo Version | ๐ŸŸข Low | Low | โœ… Mitigated (>=1.24.3) | +| Memory Usage | ๐ŸŸข Low | Low | โœ… Accepted (document) | +| Simulation Determinism | ๐ŸŸข Low | High | โœ… Solved (default fixed seed) | +| No GUI | ๐ŸŸข Low | Low | โœ… Accepted (Docker is headless) | +| Image Size | ๐ŸŸข Low | High | โœ… Accepted (~1GB) | +| Long Sim Times | ๐ŸŸข Low | High | โœ… Accepted (document) | + +--- + +## โœ… Final Risk Assessment + +### Overall Risk Level: **LOW-MEDIUM** โœ… + +### Key Findings: + +1. **Docker solves the hardest problem** (system dependencies) โœ… +2. **No critical blockers** - all risks have mitigations โœ… +3. **Main concerns are performance** (speed, memory) - acceptable for simulation โœ… +4. **Connection management solved** by single env instance pattern โœ… + +### Recommended Mitigations: + +```python +# 1. Single environment instance per container +env = SumoEnvironment( + net_file="/app/nets/single-intersection.net.xml", + route_file="/app/nets/single-intersection.rou.xml", + use_gui=False, # No GUI in Docker + single_agent=True, # Single-agent mode + num_seconds=20000, + sumo_seed=42, # Fixed seed for reproducibility + out_csv_name=None, # Disable CSV output + sumo_warnings=False, # Quiet +) + +# 2. Reuse for all HTTP requests +app = create_fastapi_app(env, SumoAction, SumoObservation) +``` + +```dockerfile +# 3. Bundle network files at known paths +COPY sumo-rl/sumo_rl/nets/single-intersection/ /app/nets/ + +# 4. Set SUMO_HOME +ENV SUMO_HOME=/usr/share/sumo + +# 5. Don't enable LIBSUMO by default (safer) +# ENV LIBSUMO_AS_TRACI=1 # Optional for advanced users +``` + +--- + +## ๐ŸŽฏ Confidence Level + +**Original**: 85% confident +**After Deep Analysis**: **95% confident** โœ… + +**Reasons for Increased Confidence**: +1. All high-risk items have clear mitigations +2. Docker architecture naturally solves connection management +3. Pattern matches Atari (proven to work) +4. Risks are mostly performance/documentation, not functionality +5. No unexpected blockers found + +--- + +## ๐Ÿš€ Ready to Implement + +**Recommendation**: **PROCEED WITH IMPLEMENTATION** โœ… + +The risks are manageable and well-understood. Docker makes this integration feasible and clean. + +**Estimated Effort**: 8-12 hours (unchanged) + +**Success Probability**: 95% + +--- + +## ๐Ÿ“ Documentation Requirements + +Based on risk analysis, must document: + +1. **Performance expectations**: + - Reset takes 1-5 seconds + - Episodes can take minutes + - LIBSUMO option for 8x speedup + +2. **Network files**: + - Default: single-intersection (bundled) + - Custom: mount volume with your .net.xml/.rou.xml + +3. **Reproducibility**: + - Default seed=42 (deterministic) + - Set SUMO_SEED=random for stochastic + +4. **Limitations**: + - No GUI in Docker + - Single-agent only (v1) + - Fixed network per container + +5. **Memory requirements**: + - Small networks: ~500MB + - Large networks: 2-4GB + - Document scaling + +--- + +**Analysis Complete**: All risks identified, mitigated, and documented. โœ… diff --git a/src/envs/sumo_rl_env/models.py b/src/envs/sumo_rl_env/models.py new file mode 100644 index 00000000..611b0eed --- /dev/null +++ b/src/envs/sumo_rl_env/models.py @@ -0,0 +1,110 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +""" +Data models for SUMO-RL Environment. + +This module defines the Action, Observation, and State types for traffic +signal control using SUMO (Simulation of Urban MObility). +""" + +from dataclasses import dataclass, field +from typing import Dict, List, Optional + +from core.env_server import Action, Observation, State + + +@dataclass +class SumoAction(Action): + """ + Action for SUMO traffic signal control environment. + + Represents selecting which traffic light phase to activate next. + + Attributes: + phase_id: Index of the green phase to activate (0 to num_phases-1) + ts_id: Traffic signal ID (for multi-agent support, default "0") + """ + + phase_id: int + ts_id: str = "0" + + +@dataclass +class SumoObservation(Observation): + """ + Observation from SUMO traffic signal environment. + + Contains traffic metrics for decision-making. + + Attributes: + observation: Flattened observation vector containing: + - One-hot encoded current phase + - Min green flag (binary) + - Lane densities (normalized) + - Lane queues (normalized) + observation_shape: Shape of observation for reshaping + action_mask: List of valid action indices + sim_time: Current simulation time in seconds + done: Whether episode is complete + reward: Reward from last action (None on reset) + metadata: Additional info (system metrics, etc.) + """ + + observation: List[float] + observation_shape: List[int] + action_mask: List[int] = field(default_factory=list) + sim_time: float = 0.0 + done: bool = False + reward: Optional[float] = None + metadata: Dict = field(default_factory=dict) + + +@dataclass +class SumoState(State): + """ + State of SUMO traffic signal environment. + + Tracks both configuration and runtime state. + + Configuration attributes: + net_file: Path to SUMO network file (.net.xml) + route_file: Path to SUMO route file (.rou.xml) + num_seconds: Total simulation duration in seconds + delta_time: Seconds between agent actions + yellow_time: Duration of yellow phase in seconds + min_green: Minimum green time per phase in seconds + max_green: Maximum green time per phase in seconds + reward_fn: Name of reward function used + + Runtime attributes: + episode_id: Unique episode identifier + step_count: Number of steps taken in episode + sim_time: Current simulation time in seconds + total_vehicles: Total number of vehicles in simulation + total_waiting_time: Cumulative waiting time across all vehicles + """ + + # Episode tracking + episode_id: str = "" + step_count: int = 0 + + # SUMO configuration + net_file: str = "" + route_file: str = "" + num_seconds: int = 20000 + delta_time: int = 5 + yellow_time: int = 2 + min_green: int = 5 + max_green: int = 50 + reward_fn: str = "diff-waiting-time" + + # Runtime metrics + sim_time: float = 0.0 + total_vehicles: int = 0 + total_waiting_time: float = 0.0 + mean_waiting_time: float = 0.0 + mean_speed: float = 0.0 From c822a741fd2d23374a9165931fe2461aa56dc6b1 Mon Sep 17 00:00:00 2001 From: Sanyam Bhutani Date: Fri, 17 Oct 2025 11:27:34 -0700 Subject: [PATCH 02/13] add env --- .../sumo_rl_env/server/sumo_environment.py | 237 ++++++++++++++++++ 1 file changed, 237 insertions(+) create mode 100644 src/envs/sumo_rl_env/server/sumo_environment.py diff --git a/src/envs/sumo_rl_env/server/sumo_environment.py b/src/envs/sumo_rl_env/server/sumo_environment.py new file mode 100644 index 00000000..757b9f17 --- /dev/null +++ b/src/envs/sumo_rl_env/server/sumo_environment.py @@ -0,0 +1,237 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +""" +SUMO-RL Environment Server Implementation. + +This module wraps the SUMO-RL SumoEnvironment and exposes it +via the OpenEnv Environment interface for traffic signal control. +""" + +import os +import uuid +from typing import Any, Dict + +# Set SUMO_HOME before importing sumo_rl +os.environ.setdefault("SUMO_HOME", "/usr/share/sumo") + +from core.env_server import Action, Environment, Observation + +from ..models import SumoAction, SumoObservation, SumoState + +# Import SUMO-RL +try: + from sumo_rl import SumoEnvironment as BaseSumoEnv +except ImportError as e: + raise ImportError( + "sumo-rl is not installed. " + "Please install it with: pip install sumo-rl" + ) from e + + +class SumoEnvironment(Environment): + """ + SUMO-RL Environment wrapper for OpenEnv. + + This environment wraps the SUMO traffic signal control environment + for single-agent reinforcement learning. + + Args: + net_file: Path to SUMO network file (.net.xml) + route_file: Path to SUMO route file (.rou.xml) + num_seconds: Simulation duration in seconds (default: 20000) + delta_time: Seconds between agent actions (default: 5) + yellow_time: Yellow phase duration in seconds (default: 2) + min_green: Minimum green time in seconds (default: 5) + max_green: Maximum green time in seconds (default: 50) + reward_fn: Reward function name (default: "diff-waiting-time") + sumo_seed: Random seed for reproducibility (default: 42) + + Example: + >>> env = SumoEnvironment( + ... net_file="/app/nets/single-intersection.net.xml", + ... route_file="/app/nets/single-intersection.rou.xml" + ... ) + >>> obs = env.reset() + >>> print(obs.observation_shape) + >>> obs = env.step(SumoAction(phase_id=1)) + >>> print(obs.reward, obs.done) + """ + + def __init__( + self, + net_file: str, + route_file: str, + num_seconds: int = 20000, + delta_time: int = 5, + yellow_time: int = 2, + min_green: int = 5, + max_green: int = 50, + reward_fn: str = "diff-waiting-time", + sumo_seed: int = 42, + ): + """Initialize SUMO traffic signal environment.""" + super().__init__() + + # Store configuration + self.net_file = net_file + self.route_file = route_file + self.num_seconds = num_seconds + self.delta_time = delta_time + self.yellow_time = yellow_time + self.min_green = min_green + self.max_green = max_green + self.reward_fn = reward_fn + self.sumo_seed = sumo_seed + + # Create SUMO environment (single-agent mode) + # Key settings: + # - use_gui=False: No GUI in Docker + # - single_agent=True: Returns single obs/reward (not dict) + # - sumo_warnings=False: Suppress SUMO warnings + # - out_csv_name=None: Don't write CSV files + self.env = BaseSumoEnv( + net_file=net_file, + route_file=route_file, + use_gui=False, + single_agent=True, + num_seconds=num_seconds, + delta_time=delta_time, + yellow_time=yellow_time, + min_green=min_green, + max_green=max_green, + reward_fn=reward_fn, + sumo_seed=sumo_seed, + sumo_warnings=False, + out_csv_name=None, # Disable CSV output + add_system_info=True, + add_per_agent_info=False, + ) + + # Initialize state + self._state = SumoState( + net_file=net_file, + route_file=route_file, + num_seconds=num_seconds, + delta_time=delta_time, + yellow_time=yellow_time, + min_green=min_green, + max_green=max_green, + reward_fn=reward_fn, + ) + + self._last_info = {} + + def reset(self) -> Observation: + """ + Reset the environment and return initial observation. + + Returns: + Initial SumoObservation for the agent. + """ + # Reset SUMO simulation + obs, info = self.env.reset() + + # Update state tracking + self._state.episode_id = str(uuid.uuid4()) + self._state.step_count = 0 + self._state.sim_time = 0.0 + + # Store info for metadata + self._last_info = info + + return self._make_observation(obs, reward=None, done=False, info=info) + + def step(self, action: Action) -> Observation: + """ + Execute agent's action and return resulting observation. + + Args: + action: SumoAction containing the phase_id to execute. + + Returns: + SumoObservation after action execution. + + Raises: + ValueError: If action is not a SumoAction. + """ + if not isinstance(action, SumoAction): + raise ValueError(f"Expected SumoAction, got {type(action)}") + + # Validate phase_id + num_phases = self.env.action_space.n + if action.phase_id < 0 or action.phase_id >= num_phases: + raise ValueError( + f"Invalid phase_id: {action.phase_id}. " + f"Valid range: [0, {num_phases - 1}]" + ) + + # Execute action in SUMO + # Returns: (obs, reward, terminated, truncated, info) + obs, reward, terminated, truncated, info = self.env.step(action.phase_id) + done = terminated or truncated + + # Update state + self._state.step_count += 1 + self._state.sim_time = info.get("step", 0.0) + self._state.total_vehicles = info.get("system_total_running", 0) + self._state.total_waiting_time = info.get("system_total_waiting_time", 0.0) + self._state.mean_waiting_time = info.get("system_mean_waiting_time", 0.0) + self._state.mean_speed = info.get("system_mean_speed", 0.0) + + # Store info for metadata + self._last_info = info + + return self._make_observation(obs, reward=reward, done=done, info=info) + + @property + def state(self) -> SumoState: + """Get current environment state.""" + return self._state + + def _make_observation( + self, obs: Any, reward: float, done: bool, info: Dict + ) -> SumoObservation: + """ + Create SumoObservation from SUMO environment output. + + Args: + obs: Observation array from SUMO environment + reward: Reward value (None on reset) + done: Whether episode is complete + info: Info dictionary from SUMO environment + + Returns: + SumoObservation for the agent. + """ + # Convert observation to list + if hasattr(obs, "tolist"): + obs_list = obs.tolist() + else: + obs_list = list(obs) + + # Get action mask (all actions valid in SUMO-RL) + num_phases = self.env.action_space.n + action_mask = list(range(num_phases)) + + # Extract system metrics for metadata + system_info = { + k: v for k, v in info.items() if k.startswith("system_") + } + + # Create observation + return SumoObservation( + observation=obs_list, + observation_shape=[len(obs_list)], + action_mask=action_mask, + sim_time=info.get("step", 0.0), + done=done, + reward=reward, + metadata={ + "num_green_phases": num_phases, + "system_info": system_info, + }, + ) From 980abc3ee8ece88dccc61e3b031d71f17adaa930 Mon Sep 17 00:00:00 2001 From: Sanyam Bhutani Date: Fri, 17 Oct 2025 11:28:42 -0700 Subject: [PATCH 03/13] app --- src/envs/sumo_rl_env/server/app.py | 47 ++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) create mode 100644 src/envs/sumo_rl_env/server/app.py diff --git a/src/envs/sumo_rl_env/server/app.py b/src/envs/sumo_rl_env/server/app.py new file mode 100644 index 00000000..b81463ae --- /dev/null +++ b/src/envs/sumo_rl_env/server/app.py @@ -0,0 +1,47 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +""" +FastAPI application for SUMO-RL environment server. + +This module creates an HTTP server that exposes traffic signal control +via the OpenEnv API using SUMO (Simulation of Urban MObility). +""" + +import os + +from core.env_server import create_fastapi_app + +from ..models import SumoAction, SumoObservation +from .sumo_environment import SumoEnvironment + +# Get configuration from environment variables +net_file = os.getenv("SUMO_NET_FILE", "/app/nets/single-intersection.net.xml") +route_file = os.getenv("SUMO_ROUTE_FILE", "/app/nets/single-intersection.rou.xml") +num_seconds = int(os.getenv("SUMO_NUM_SECONDS", "20000")) +delta_time = int(os.getenv("SUMO_DELTA_TIME", "5")) +yellow_time = int(os.getenv("SUMO_YELLOW_TIME", "2")) +min_green = int(os.getenv("SUMO_MIN_GREEN", "5")) +max_green = int(os.getenv("SUMO_MAX_GREEN", "50")) +reward_fn = os.getenv("SUMO_REWARD_FN", "diff-waiting-time") +sumo_seed = int(os.getenv("SUMO_SEED", "42")) + +# Create single environment instance +# This is reused for all HTTP requests (avoids TraCI connection issues) +env = SumoEnvironment( + net_file=net_file, + route_file=route_file, + num_seconds=num_seconds, + delta_time=delta_time, + yellow_time=yellow_time, + min_green=min_green, + max_green=max_green, + reward_fn=reward_fn, + sumo_seed=sumo_seed, +) + +# Create FastAPI app +app = create_fastapi_app(env, SumoAction, SumoObservation) From 1d3c96c5a8df3c0948ee56438f7737aa87abda57 Mon Sep 17 00:00:00 2001 From: Sanyam Bhutani Date: Fri, 17 Oct 2025 11:28:51 -0700 Subject: [PATCH 04/13] init --- src/envs/sumo_rl_env/server/__init__.py | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100644 src/envs/sumo_rl_env/server/__init__.py diff --git a/src/envs/sumo_rl_env/server/__init__.py b/src/envs/sumo_rl_env/server/__init__.py new file mode 100644 index 00000000..f4b70221 --- /dev/null +++ b/src/envs/sumo_rl_env/server/__init__.py @@ -0,0 +1,7 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +"""SUMO-RL environment server package.""" From e64f3d76fe4447f126ad34b031236c3fb359386a Mon Sep 17 00:00:00 2001 From: Sanyam Bhutani Date: Fri, 17 Oct 2025 11:28:59 -0700 Subject: [PATCH 05/13] DockerFile --- src/envs/sumo_rl_env/server/Dockerfile | 69 ++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) create mode 100644 src/envs/sumo_rl_env/server/Dockerfile diff --git a/src/envs/sumo_rl_env/server/Dockerfile b/src/envs/sumo_rl_env/server/Dockerfile new file mode 100644 index 00000000..618ebe88 --- /dev/null +++ b/src/envs/sumo_rl_env/server/Dockerfile @@ -0,0 +1,69 @@ +# Dockerfile for SUMO-RL Environment +# This image provides traffic signal control via SUMO (Simulation of Urban MObility) + +# Configurable base image - defaults to local build, can be overridden for CI/CD +# Base image provides: fastapi, uvicorn, requests, curl, PYTHONPATH=/app/src +# +# Local build: docker build -t envtorch-base:latest -f src/core/containers/images/Dockerfile . +# docker build -f src/envs/sumo_rl_env/server/Dockerfile -t sumo-rl-env:latest . +# +# CI/CD build: docker build --build-arg BASE_IMAGE=ghcr.io/meta-pytorch/openenv-base:latest \ +# -f src/envs/sumo_rl_env/server/Dockerfile -t sumo-rl-env:latest . +ARG BASE_IMAGE=envtorch-base:latest +FROM ${BASE_IMAGE} + +# Install SUMO system dependencies +# SUMO is a microscopic traffic simulation package +RUN apt-get update && apt-get install -y --no-install-recommends \ + software-properties-common \ + && add-apt-repository ppa:sumo/stable \ + && apt-get update \ + && apt-get install -y --no-install-recommends \ + sumo \ + sumo-tools \ + && rm -rf /var/lib/apt/lists/* + +# Set SUMO_HOME environment variable +ENV SUMO_HOME=/usr/share/sumo + +# Install SUMO-RL and Python dependencies +# sumo-rl includes: gymnasium, pettingzoo, numpy, pandas, sumolib, traci +RUN pip install --no-cache-dir \ + gymnasium>=0.28 \ + pettingzoo>=1.24.3 \ + numpy>=1.24.0 \ + pandas>=2.0.0 \ + sumolib>=1.14.0 \ + traci>=1.14.0 \ + sumo-rl>=1.4.5 + +# Copy OpenEnv core (base image already set WORKDIR=/app) +COPY src/core/ /app/src/core/ + +# Copy SUMO-RL environment code +COPY src/envs/sumo_rl_env/ /app/src/envs/sumo_rl_env/ + +# Copy example network files +# Default: single-intersection (simple 4-way intersection) +COPY nets/single-intersection/ /app/nets/single-intersection/ + +# SUMO environment variables (can be overridden at runtime) +ENV SUMO_NET_FILE=/app/nets/single-intersection/single-intersection.net.xml +ENV SUMO_ROUTE_FILE=/app/nets/single-intersection/single-intersection.rou.xml +ENV SUMO_NUM_SECONDS=20000 +ENV SUMO_DELTA_TIME=5 +ENV SUMO_YELLOW_TIME=2 +ENV SUMO_MIN_GREEN=5 +ENV SUMO_MAX_GREEN=50 +ENV SUMO_REWARD_FN=diff-waiting-time +ENV SUMO_SEED=42 + +# Expose port +EXPOSE 8000 + +# Health check +HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \ + CMD curl -f http://localhost:8000/health || exit 1 + +# Run the FastAPI server +CMD ["uvicorn", "envs.sumo_rl_env.server.app:app", "--host", "0.0.0.0", "--port", "8000"] From debd7243f829501bca42fb89040c4080c5cdd257 Mon Sep 17 00:00:00 2001 From: Sanyam Bhutani Date: Fri, 17 Oct 2025 11:29:48 -0700 Subject: [PATCH 06/13] client and configs --- .../single-intersection.edg.xml | 6 + .../single-intersection.net.xml | 86 +++++++++++ .../single-intersection.nod.xml | 7 + .../single-intersection.rou.xml | 6 + .../single-intersection.sumocfg | 10 ++ src/envs/sumo_rl_env/client.py | 145 ++++++++++++++++++ 6 files changed, 260 insertions(+) create mode 100755 nets/single-intersection/single-intersection.edg.xml create mode 100755 nets/single-intersection/single-intersection.net.xml create mode 100755 nets/single-intersection/single-intersection.nod.xml create mode 100755 nets/single-intersection/single-intersection.rou.xml create mode 100755 nets/single-intersection/single-intersection.sumocfg create mode 100644 src/envs/sumo_rl_env/client.py diff --git a/nets/single-intersection/single-intersection.edg.xml b/nets/single-intersection/single-intersection.edg.xml new file mode 100755 index 00000000..52c3e7aa --- /dev/null +++ b/nets/single-intersection/single-intersection.edg.xml @@ -0,0 +1,6 @@ + + + + + + diff --git a/nets/single-intersection/single-intersection.net.xml b/nets/single-intersection/single-intersection.net.xml new file mode 100755 index 00000000..0f32510f --- /dev/null +++ b/nets/single-intersection/single-intersection.net.xml @@ -0,0 +1,86 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/nets/single-intersection/single-intersection.nod.xml b/nets/single-intersection/single-intersection.nod.xml new file mode 100755 index 00000000..a8b68d54 --- /dev/null +++ b/nets/single-intersection/single-intersection.nod.xml @@ -0,0 +1,7 @@ + + + + + + + diff --git a/nets/single-intersection/single-intersection.rou.xml b/nets/single-intersection/single-intersection.rou.xml new file mode 100755 index 00000000..291cdee8 --- /dev/null +++ b/nets/single-intersection/single-intersection.rou.xml @@ -0,0 +1,6 @@ + + + + + + diff --git a/nets/single-intersection/single-intersection.sumocfg b/nets/single-intersection/single-intersection.sumocfg new file mode 100755 index 00000000..035327b7 --- /dev/null +++ b/nets/single-intersection/single-intersection.sumocfg @@ -0,0 +1,10 @@ + + + + + + + diff --git a/src/envs/sumo_rl_env/client.py b/src/envs/sumo_rl_env/client.py new file mode 100644 index 00000000..deba88fd --- /dev/null +++ b/src/envs/sumo_rl_env/client.py @@ -0,0 +1,145 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +""" +HTTP client for SUMO-RL environment. + +This module provides a client to interact with the SUMO traffic signal +control environment over HTTP. +""" + +from typing import Any, Dict + +from core.http_env_client import HTTPEnvClient +from core.types import StepResult + +from .models import SumoAction, SumoObservation, SumoState + + +class SumoRLEnv(HTTPEnvClient[SumoAction, SumoObservation]): + """ + HTTP client for SUMO-RL traffic signal control environment. + + This client communicates with a SUMO environment server to control + traffic signals using reinforcement learning. + + Example: + >>> # Start container and connect + >>> env = SumoRLEnv.from_docker_image("sumo-rl-env:latest") + >>> + >>> # Reset environment + >>> result = env.reset() + >>> print(f"Observation shape: {result.observation.observation_shape}") + >>> print(f"Action space: {result.observation.action_mask}") + >>> + >>> # Take action + >>> result = env.step(SumoAction(phase_id=1)) + >>> print(f"Reward: {result.reward}, Done: {result.done}") + >>> + >>> # Get state + >>> state = env.state() + >>> print(f"Sim time: {state.sim_time}, Total vehicles: {state.total_vehicles}") + >>> + >>> # Cleanup + >>> env.close() + + Example with custom network: + >>> # Use custom SUMO network via volume mount + >>> env = SumoRLEnv.from_docker_image( + ... "sumo-rl-env:latest", + ... port=8000, + ... volumes={ + ... "/path/to/my/nets": {"bind": "/nets", "mode": "ro"} + ... }, + ... environment={ + ... "SUMO_NET_FILE": "/nets/my-network.net.xml", + ... "SUMO_ROUTE_FILE": "/nets/my-routes.rou.xml", + ... } + ... ) + + Example with configuration: + >>> # Adjust simulation parameters + >>> env = SumoRLEnv.from_docker_image( + ... "sumo-rl-env:latest", + ... environment={ + ... "SUMO_NUM_SECONDS": "10000", + ... "SUMO_DELTA_TIME": "10", + ... "SUMO_REWARD_FN": "queue", + ... "SUMO_SEED": "123", + ... } + ... ) + """ + + def _step_payload(self, action: SumoAction) -> Dict[str, Any]: + """ + Convert SumoAction to JSON payload for HTTP request. + + Args: + action: SumoAction containing phase_id to execute. + + Returns: + Dictionary payload for step endpoint. + """ + return { + "phase_id": action.phase_id, + "ts_id": action.ts_id, + } + + def _parse_result(self, payload: Dict[str, Any]) -> StepResult[SumoObservation]: + """ + Parse step result from HTTP response JSON. + + Args: + payload: JSON response from step endpoint. + + Returns: + StepResult containing SumoObservation. + """ + obs_data = payload.get("observation", {}) + + observation = SumoObservation( + observation=obs_data.get("observation", []), + observation_shape=obs_data.get("observation_shape", []), + action_mask=obs_data.get("action_mask", []), + sim_time=obs_data.get("sim_time", 0.0), + done=obs_data.get("done", False), + reward=obs_data.get("reward"), + metadata=obs_data.get("metadata", {}), + ) + + return StepResult( + observation=observation, + reward=payload.get("reward"), + done=payload.get("done", False), + ) + + def _parse_state(self, payload: Dict[str, Any]) -> SumoState: + """ + Parse state from HTTP response JSON. + + Args: + payload: JSON response from state endpoint. + + Returns: + SumoState object. + """ + return SumoState( + episode_id=payload.get("episode_id", ""), + step_count=payload.get("step_count", 0), + net_file=payload.get("net_file", ""), + route_file=payload.get("route_file", ""), + num_seconds=payload.get("num_seconds", 20000), + delta_time=payload.get("delta_time", 5), + yellow_time=payload.get("yellow_time", 2), + min_green=payload.get("min_green", 5), + max_green=payload.get("max_green", 50), + reward_fn=payload.get("reward_fn", "diff-waiting-time"), + sim_time=payload.get("sim_time", 0.0), + total_vehicles=payload.get("total_vehicles", 0), + total_waiting_time=payload.get("total_waiting_time", 0.0), + mean_waiting_time=payload.get("mean_waiting_time", 0.0), + mean_speed=payload.get("mean_speed", 0.0), + ) From c1b2aa494a95a14ec4a6aa2b2257368892a446f6 Mon Sep 17 00:00:00 2001 From: Sanyam Bhutani Date: Fri, 17 Oct 2025 11:31:28 -0700 Subject: [PATCH 07/13] init --- src/envs/sumo_rl_env/__init__.py | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 src/envs/sumo_rl_env/__init__.py diff --git a/src/envs/sumo_rl_env/__init__.py b/src/envs/sumo_rl_env/__init__.py new file mode 100644 index 00000000..17aaf2f6 --- /dev/null +++ b/src/envs/sumo_rl_env/__init__.py @@ -0,0 +1,31 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +""" +SUMO-RL Environment for OpenEnv. + +This module provides OpenEnv integration for traffic signal control using +SUMO (Simulation of Urban MObility) via the SUMO-RL library. + +Example: + >>> from envs.sumo_rl_env import SumoRLEnv, SumoAction + >>> + >>> # Connect to a running server or start via Docker + >>> env = SumoRLEnv.from_docker_image("sumo-rl-env:latest") + >>> + >>> # Reset and interact + >>> result = env.reset() + >>> result = env.step(SumoAction(phase_id=1)) + >>> print(result.reward, result.done) + >>> + >>> # Cleanup + >>> env.close() +""" + +from .client import SumoRLEnv +from .models import SumoAction, SumoObservation, SumoState + +__all__ = ["SumoRLEnv", "SumoAction", "SumoObservation", "SumoState"] From 0df6a874cf9da7826a60dc4157a4556d34292796 Mon Sep 17 00:00:00 2001 From: Sanyam Bhutani Date: Fri, 17 Oct 2025 11:31:41 -0700 Subject: [PATCH 08/13] ReadMe --- src/envs/sumo_rl_env/README.md | 341 +++++++++++++++++++++++++++++++++ 1 file changed, 341 insertions(+) create mode 100644 src/envs/sumo_rl_env/README.md diff --git a/src/envs/sumo_rl_env/README.md b/src/envs/sumo_rl_env/README.md new file mode 100644 index 00000000..e35035ae --- /dev/null +++ b/src/envs/sumo_rl_env/README.md @@ -0,0 +1,341 @@ +# SUMO-RL Environment + +Integration of traffic signal control with the OpenEnv framework via SUMO (Simulation of Urban MObility) and SUMO-RL. + +## Overview + +This environment enables reinforcement learning for **traffic signal control** using SUMO, a microscopic traffic simulation package. Train RL agents to optimize traffic light timing and minimize vehicle delays. + +**Key Features**: +- **Realistic traffic simulation** via SUMO +- **Single-agent mode** for single intersection control +- **Configurable rewards** (waiting time, queue, pressure, speed) +- **Multiple networks** supported (custom .net.xml and .rou.xml files) +- **Docker-ready** with pre-bundled example network + +## Quick Start + +### Using Docker (Recommended) + +```python +from envs.sumo_rl_env import SumoRLEnv, SumoAction + +# Automatically starts container +env = SumoRLEnv.from_docker_image("sumo-rl-env:latest") + +# Reset environment +result = env.reset() +print(f"Observation shape: {result.observation.observation_shape}") +print(f"Available actions: {result.observation.action_mask}") + +# Take action (select next green phase) +result = env.step(SumoAction(phase_id=1)) +print(f"Reward: {result.reward}, Done: {result.done}") + +# Get state +state = env.state() +print(f"Simulation time: {state.sim_time}") +print(f"Total vehicles: {state.total_vehicles}") +print(f"Mean waiting time: {state.mean_waiting_time}") + +# Cleanup +env.close() +``` + +### Building the Docker Image + +```bash +cd OpenEnv + +# Build base image first (if not already built) +docker build -t envtorch-base:latest -f src/core/containers/images/Dockerfile . + +# Build SUMO-RL environment +docker build -f src/envs/sumo_rl_env/server/Dockerfile -t sumo-rl-env:latest . +``` + +### Running with Different Configurations + +```bash +# Default: single-intersection +docker run -p 8000:8000 sumo-rl-env:latest + +# Longer simulation +docker run -p 8000:8000 \ + -e SUMO_NUM_SECONDS=50000 \ + sumo-rl-env:latest + +# Different reward function +docker run -p 8000:8000 \ + -e SUMO_REWARD_FN=queue \ + sumo-rl-env:latest + +# Custom seed for reproducibility +docker run -p 8000:8000 \ + -e SUMO_SEED=123 \ + sumo-rl-env:latest +``` + +## Observation + +The observation is a vector containing: +- **Phase one-hot**: Current active green phase (one-hot encoded) +- **Min green flag**: Binary indicator if minimum green time has passed +- **Lane densities**: Number of vehicles / lane capacity for each incoming lane +- **Lane queues**: Number of queued vehicles / lane capacity for each incoming lane + +Observation size varies by network topology (depends on number of phases and lanes). + +**Default (single-intersection)**: +- 4 green phases +- 8 incoming lanes +- Observation size: ~21 elements + +## Action Space + +The action space is discrete and represents selecting the next green phase to activate. + +- **Action type**: Discrete +- **Action range**: `[0, num_green_phases - 1]` +- **Default (single-intersection)**: 4 actions (one per green phase) + +When a phase change is requested, SUMO automatically inserts a yellow phase before switching. + +## Rewards + +Default reward function is **change in cumulative waiting time**: +``` +reward = -(total_waiting_time_now - total_waiting_time_previous) +``` + +Positive rewards indicate waiting time decreased (good). + +### Available Reward Functions + +Set via `SUMO_REWARD_FN` environment variable: + +- **`diff-waiting-time`** (default): Change in cumulative waiting time +- **`average-speed`**: Average speed of all vehicles +- **`queue`**: Negative total queue length +- **`pressure`**: Pressure metric (incoming - outgoing vehicles) + +## Configuration + +### Environment Variables + +| Variable | Default | Description | +|----------|---------|-------------| +| `SUMO_NET_FILE` | `/app/nets/single-intersection.net.xml` | Network topology file | +| `SUMO_ROUTE_FILE` | `/app/nets/single-intersection.rou.xml` | Vehicle routes file | +| `SUMO_NUM_SECONDS` | `20000` | Simulation duration (seconds) | +| `SUMO_DELTA_TIME` | `5` | Seconds between agent actions | +| `SUMO_YELLOW_TIME` | `2` | Yellow phase duration (seconds) | +| `SUMO_MIN_GREEN` | `5` | Minimum green time (seconds) | +| `SUMO_MAX_GREEN` | `50` | Maximum green time (seconds) | +| `SUMO_REWARD_FN` | `diff-waiting-time` | Reward function name | +| `SUMO_SEED` | `42` | Random seed (use for reproducibility) | + +### Using Custom Networks + +To use your own SUMO network: + +```python +from envs.sumo_rl_env import SumoRLEnv + +env = SumoRLEnv.from_docker_image( + "sumo-rl-env:latest", + volumes={ + "/path/to/your/nets": {"bind": "/nets", "mode": "ro"} + }, + environment={ + "SUMO_NET_FILE": "/nets/my-network.net.xml", + "SUMO_ROUTE_FILE": "/nets/my-routes.rou.xml", + } +) +``` + +Your network directory should contain: +- `.net.xml` - Network topology (roads, junctions, traffic lights) +- `.rou.xml` - Vehicle routes (trip definitions, flow rates) + +## API Reference + +### SumoAction + +```python +@dataclass +class SumoAction(Action): + phase_id: int # Green phase to activate (0 to num_phases-1) + ts_id: str = "0" # Traffic signal ID (for multi-agent) +``` + +### SumoObservation + +```python +@dataclass +class SumoObservation(Observation): + observation: List[float] # Observation vector + observation_shape: List[int] # Shape for reshaping + action_mask: List[int] # Valid action indices + sim_time: float # Current simulation time + done: bool # Episode finished + reward: Optional[float] # Reward from last action + metadata: Dict # System metrics +``` + +### SumoState + +```python +@dataclass +class SumoState(State): + episode_id: str # Unique episode ID + step_count: int # Steps taken + net_file: str # Network file path + route_file: str # Route file path + sim_time: float # Current simulation time + total_vehicles: int # Total vehicles in simulation + total_waiting_time: float # Cumulative waiting time + mean_waiting_time: float # Mean waiting time + mean_speed: float # Mean vehicle speed + # ... configuration parameters +``` + +## Example Training Loop + +```python +from envs.sumo_rl_env import SumoRLEnv, SumoAction +import numpy as np + +# Start environment +env = SumoRLEnv.from_docker_image("sumo-rl-env:latest") + +# Training loop +for episode in range(10): + result = env.reset() + episode_reward = 0 + steps = 0 + + while not result.done and steps < 1000: + # Random policy (replace with your RL agent) + action_id = np.random.choice(result.observation.action_mask) + + # Take action + result = env.step(SumoAction(phase_id=action_id)) + + episode_reward += result.reward or 0 + steps += 1 + + # Print progress every 100 steps + if steps % 100 == 0: + state = env.state() + print(f"Step {steps}: " + f"reward={result.reward:.2f}, " + f"vehicles={state.total_vehicles}, " + f"waiting={state.mean_waiting_time:.2f}") + + print(f"Episode {episode}: total_reward={episode_reward:.2f}, steps={steps}") + +env.close() +``` + +## Performance Notes + +### Simulation Speed + +- **Reset time**: 1-5 seconds (starts new SUMO simulation) +- **Step time**: ~50-200ms per step (depends on network size) +- **Episode duration**: Minutes (20,000 sim seconds with delta_time=5 โ†’ ~4,000 steps) + +### Optimization + +For faster simulation: +1. Reduce `SUMO_NUM_SECONDS` for shorter episodes +2. Increase `SUMO_DELTA_TIME` for fewer decisions +3. Use simpler networks with fewer vehicles + +## Architecture + +``` +โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” +โ”‚ Client: SumoRLEnv โ”‚ +โ”‚ .step(phase_id=1) โ”‚ +โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ฌโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ + โ”‚ HTTP +โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ–ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” +โ”‚ FastAPI Server (Docker) โ”‚ +โ”‚ SumoEnvironment โ”‚ +โ”‚ โ”œโ”€ Wraps sumo_rl โ”‚ +โ”‚ โ”œโ”€ Single-agent mode โ”‚ +โ”‚ โ””โ”€ No GUI โ”‚ +โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ฌโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ + โ”‚ +โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ–ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” +โ”‚ SUMO Simulator โ”‚ +โ”‚ - Reads .net.xml (network) โ”‚ +โ”‚ - Reads .rou.xml (routes) โ”‚ +โ”‚ - Simulates traffic flow โ”‚ +โ”‚ - Provides observations โ”‚ +โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ +``` + +## Bundled Network + +The default `single-intersection` network is a simple 4-way intersection with: +- **4 incoming roads** (North, South, East, West) +- **4 green phases** (NS straight, NS left, EW straight, EW left) +- **Vehicle flow**: Continuous stream with varying rates + +## Limitations + +- **No GUI in Docker**: SUMO GUI requires X server (not available in containers) +- **Single-agent only**: Multi-agent (multiple intersections) coming in future version +- **Fixed network per container**: Each container uses one network topology +- **Memory usage**: ~500MB for small networks, 2-4GB for large city networks + +## Troubleshooting + +### Container won't start +```bash +# Check logs +docker logs + +# Verify network files exist +docker run sumo-rl-env:latest ls -la /app/nets/ +``` + +### "SUMO_HOME not set" error +This should be automatic in Docker. If running locally: +```bash +export SUMO_HOME=/usr/share/sumo +``` + +### Slow performance +- Reduce simulation duration: `SUMO_NUM_SECONDS=5000` +- Increase action interval: `SUMO_DELTA_TIME=10` +- Use smaller networks with fewer vehicles + +## References + +- [SUMO Documentation](https://sumo.dlr.de/docs/) +- [SUMO-RL GitHub](https://github.com/LucasAlegre/sumo-rl) +- [SUMO-RL Paper](https://peerj.com/articles/cs-575/) +- [RESCO Benchmarks](https://github.com/jault/RESCO) + +## Citation + +If you use SUMO-RL in your research, please cite: + +```bibtex +@misc{sumorl, + author = {Lucas N. Alegre}, + title = {{SUMO-RL}}, + year = {2019}, + publisher = {GitHub}, + journal = {GitHub repository}, + howpublished = {\url{https://github.com/LucasAlegre/sumo-rl}}, +} +``` + +## License + +This integration is licensed under the BSD-style license. SUMO-RL and SUMO have their own licenses. From c1651adf8d6fb7266069672be8952ccf5873d72a Mon Sep 17 00:00:00 2001 From: Sanyam Bhutani Date: Fri, 17 Oct 2025 11:32:38 -0700 Subject: [PATCH 09/13] example and docker --- .github/workflows/docker-build.yml | 2 + examples/sumo_rl_simple.py | 105 +++++++++++++++++++++++++++++ 2 files changed, 107 insertions(+) create mode 100644 examples/sumo_rl_simple.py diff --git a/.github/workflows/docker-build.yml b/.github/workflows/docker-build.yml index ef753e07..26934ec9 100644 --- a/.github/workflows/docker-build.yml +++ b/.github/workflows/docker-build.yml @@ -73,6 +73,8 @@ jobs: dockerfile: src/envs/chat_env/server/Dockerfile - name: coding-env dockerfile: src/envs/coding_env/server/Dockerfile + - name: sumo-rl-env + dockerfile: src/envs/sumo_rl_env/server/Dockerfile steps: - name: Checkout code diff --git a/examples/sumo_rl_simple.py b/examples/sumo_rl_simple.py new file mode 100644 index 00000000..ec5f08ae --- /dev/null +++ b/examples/sumo_rl_simple.py @@ -0,0 +1,105 @@ +#!/usr/bin/env python3 +""" +Simple example demonstrating SUMO-RL Environment usage. + +This example shows how to: +1. Connect to a SUMO traffic signal control environment +2. Reset the environment +3. Take actions (select traffic light phases) +4. Process observations and rewards + +Usage: + # Option 1: Start the server manually + python -m envs.sumo_rl_env.server.app + # Then run: python examples/sumo_rl_simple.py + + # Option 2: Use Docker + docker run -p 8000:8000 sumo-rl-env:latest + # Then run: python examples/sumo_rl_simple.py +""" + +import numpy as np + +from envs.sumo_rl_env import SumoAction, SumoRLEnv + + +def main(): + """Run a simple SUMO traffic control episode.""" + # Connect to the SUMO environment server + print("Connecting to SUMO-RL environment...") + env = SumoRLEnv(base_url="http://localhost:8000") + + try: + # Reset the environment + print("\nResetting environment...") + result = env.reset() + print(f"Observation shape: {result.observation.observation_shape}") + print(f"Available actions: {result.observation.action_mask}") + print(f"Number of green phases: {len(result.observation.action_mask)}") + + # Get initial state + state = env.state() + print(f"\nSimulation configuration:") + print(f" Network: {state.net_file}") + print(f" Duration: {state.num_seconds} seconds") + print(f" Delta time: {state.delta_time} seconds") + print(f" Reward function: {state.reward_fn}") + + # Run a few steps with random policy + print("\nRunning traffic control with random policy...") + episode_reward = 0 + steps = 0 + max_steps = 100 + + for step in range(max_steps): + # Random policy: select random green phase + action_id = np.random.choice(result.observation.action_mask) + + # Take action + result = env.step(SumoAction(phase_id=action_id)) + + episode_reward += result.reward or 0 + steps += 1 + + # Print progress every 10 steps + if step % 10 == 0: + state = env.state() + print( + f"Step {step:3d}: " + f"phase={action_id}, " + f"reward={result.reward:6.2f}, " + f"vehicles={state.total_vehicles:3d}, " + f"waiting={state.mean_waiting_time:6.2f}s, " + f"speed={state.mean_speed:5.2f}m/s" + ) + + if result.done: + print(f"\nEpisode finished after {steps} steps!") + break + + # Final statistics + print(f"\n{'='*60}") + print(f"Episode Summary:") + print(f" Total steps: {steps}") + print(f" Total reward: {episode_reward:.2f}") + print(f" Average reward: {episode_reward/steps:.2f}") + + # Get final state + state = env.state() + print(f"\nFinal State:") + print(f" Simulation time: {state.sim_time:.0f} seconds") + print(f" Total vehicles: {state.total_vehicles}") + print(f" Total waiting time: {state.total_waiting_time:.2f} seconds") + print(f" Mean waiting time: {state.mean_waiting_time:.2f} seconds") + print(f" Mean speed: {state.mean_speed:.2f} m/s") + print(f"{'='*60}") + + finally: + # Cleanup + print("\nClosing environment...") + env.close() + print("Done!") + + +if __name__ == "__main__": + main() From c19c6f435a2e5bf9d1b03d9e1d57f4bcb7291bc4 Mon Sep 17 00:00:00 2001 From: Sanyam Bhutani Date: Fri, 17 Oct 2025 13:07:51 -0700 Subject: [PATCH 10/13] fix docker --- SUMO_RL_INTEGRATION_PLAN.md | 663 ------------------------- SUMO_RL_RISK_ANALYSIS.md | 505 ------------------- src/envs/sumo_rl_env/models.py | 4 +- src/envs/sumo_rl_env/server/Dockerfile | 10 +- src/envs/sumo_rl_env/test_sumo_rl.sh | 220 ++++++++ 5 files changed, 225 insertions(+), 1177 deletions(-) delete mode 100644 SUMO_RL_INTEGRATION_PLAN.md delete mode 100644 SUMO_RL_RISK_ANALYSIS.md create mode 100755 src/envs/sumo_rl_env/test_sumo_rl.sh diff --git a/SUMO_RL_INTEGRATION_PLAN.md b/SUMO_RL_INTEGRATION_PLAN.md deleted file mode 100644 index 47e4d339..00000000 --- a/SUMO_RL_INTEGRATION_PLAN.md +++ /dev/null @@ -1,663 +0,0 @@ -# SUMO-RL Integration Plan for OpenEnv - -**Date**: 2025-10-17 -**Status**: Design Phase -**Complexity**: High (Docker + SUMO system dependencies) - ---- - -## ๐Ÿค” ULTRATHINK ANALYSIS - -### What is SUMO-RL? - -**SUMO-RL** is a Reinforcement Learning environment for **Traffic Signal Control** using SUMO (Simulation of Urban MObility). - -- **Use Case**: Train RL agents to optimize traffic light timing to minimize vehicle delays -- **Main Class**: `SumoEnvironment` from `sumo_rl.environment.env` -- **APIs**: Supports both Gymnasium (single-agent) and PettingZoo (multi-agent) -- **Repository**: https://github.com/LucasAlegre/sumo-rl -- **Version**: 1.4.5 - -### How SUMO-RL Works - -1. **SUMO Simulator**: Microscopic traffic simulation -2. **Network Files**: `.net.xml` (road network) + `.rou.xml` (vehicle routes) -3. **Traffic Signals**: RL agent controls when lights change phases -4. **Observation**: Lane densities, queues, current phase, min_green flag -5. **Action**: Select next green phase (discrete action space) -6. **Reward**: Change in cumulative vehicle delay (default) - -### Example Usage - -```python -import gymnasium as gym -import sumo_rl - -env = gym.make('sumo-rl-v0', - net_file='nets/single-intersection.net.xml', - route_file='nets/single-intersection.rou.xml', - use_gui=False, - num_seconds=100000) - -obs, info = env.reset() -done = False -while not done: - action = env.action_space.sample() - obs, reward, terminated, truncated, info = env.step(action) - done = terminated or truncated -``` - ---- - -## ๐ŸŽฏ Integration Strategy - -### Follow Atari Pattern - -Like Atari, we'll create: -1. **models.py** - Data models -2. **server/sumo_environment.py** - Environment wrapper -3. **server/app.py** - FastAPI server -4. **server/Dockerfile** - Container with SUMO -5. **client.py** - HTTP client - -### Key Differences from Atari - -| Aspect | Atari | SUMO-RL | -|--------|-------|---------| -| **External Dependency** | ALE (pip installable) | SUMO (system package) | -| **Configuration** | Game name (simple) | Network + route files (complex) | -| **Observation** | Image pixels | Traffic metrics (vectors) | -| **Action** | Joystick actions | Traffic signal phases | -| **Docker Complexity** | Simple | High (need SUMO system install) | -| **File Dependencies** | None (ROMs bundled) | Network/route XML files required | - ---- - -## ๐Ÿ“‹ Technical Design - -### 1. Data Models (`models.py`) - -```python -from dataclasses import dataclass -from typing import List, Optional -from core.env_server import Action, Observation, State - -@dataclass -class SumoAction(Action): - """Action for SUMO environment - select next green phase.""" - phase_id: int # Which green phase to activate next - ts_id: str = "0" # Traffic signal ID (for multi-agent support later) - -@dataclass -class SumoObservation(Observation): - """Observation from SUMO environment.""" - observation: List[float] # Full observation vector - observation_shape: List[int] # Shape for reshaping - - # Observation components (for interpretability) - current_phase: Optional[int] = None - min_green_passed: Optional[bool] = None - lane_densities: Optional[List[float]] = None - lane_queues: Optional[List[float]] = None - - # Metadata - action_mask: Optional[List[int]] = None # Legal actions - sim_time: float = 0.0 # Current simulation time - - done: bool = False - reward: Optional[float] = None - -@dataclass -class SumoState(State): - """State of SUMO environment.""" - episode_id: str = "" - step_count: int = 0 - - # SUMO configuration - net_file: str = "" - route_file: str = "" - num_seconds: int = 20000 - delta_time: int = 5 - yellow_time: int = 2 - min_green: int = 5 - max_green: int = 50 - - # Runtime state - sim_time: float = 0.0 - total_vehicles: int = 0 - total_waiting_time: float = 0.0 -``` - -### 2. Environment Wrapper (`server/sumo_environment.py`) - -```python -import uuid -from typing import Any, Dict, Literal, Optional -from core.env_server import Action, Environment, Observation -from ..models import SumoAction, SumoObservation, SumoState - -import os -os.environ.setdefault('SUMO_HOME', '/usr/share/sumo') - -from sumo_rl import SumoEnvironment as BaseSumoEnv - -class SumoEnvironment(Environment): - """ - SUMO-RL Environment wrapper for OpenEnv. - - Wraps the SUMO traffic signal control environment for single-agent RL. - - Args: - net_file: Path to SUMO network file (.net.xml) - route_file: Path to SUMO route file (.rou.xml) - num_seconds: Simulation duration in seconds - delta_time: Seconds between actions - yellow_time: Yellow phase duration - min_green: Minimum green time - max_green: Maximum green time - reward_fn: Reward function name - """ - - def __init__( - self, - net_file: str, - route_file: str, - num_seconds: int = 20000, - delta_time: int = 5, - yellow_time: int = 2, - min_green: int = 5, - max_green: int = 50, - reward_fn: str = "diff-waiting-time", - ): - super().__init__() - - # Store config - self.net_file = net_file - self.route_file = route_file - self.num_seconds = num_seconds - self.delta_time = delta_time - self.yellow_time = yellow_time - self.min_green = min_green - self.max_green = max_green - self.reward_fn = reward_fn - - # Create SUMO environment (single-agent mode) - self.env = BaseSumoEnv( - net_file=net_file, - route_file=route_file, - use_gui=False, # No GUI in Docker - single_agent=True, # Single-agent for OpenEnv - num_seconds=num_seconds, - delta_time=delta_time, - yellow_time=yellow_time, - min_green=min_green, - max_green=max_green, - reward_fn=reward_fn, - sumo_warnings=False, - ) - - # Initialize state - self._state = SumoState( - net_file=net_file, - route_file=route_file, - num_seconds=num_seconds, - delta_time=delta_time, - yellow_time=yellow_time, - min_green=min_green, - max_green=max_green, - ) - - self._last_obs = None - self._last_info = None - - def reset(self) -> Observation: - """Reset the environment.""" - # Reset SUMO - obs, info = self.env.reset() - - # Update state - self._state.episode_id = str(uuid.uuid4()) - self._state.step_count = 0 - self._state.sim_time = 0.0 - - # Store for later - self._last_obs = obs - self._last_info = info - - return self._make_observation(obs, 0.0, False, info) - - def step(self, action: Action) -> Observation: - """Execute action.""" - if not isinstance(action, SumoAction): - raise ValueError(f"Expected SumoAction, got {type(action)}") - - # Validate action - if action.phase_id < 0 or action.phase_id >= self.env.action_space.n: - raise ValueError( - f"Invalid phase_id: {action.phase_id}. " - f"Valid range: [0, {self.env.action_space.n - 1}]" - ) - - # Execute in SUMO - obs, reward, terminated, truncated, info = self.env.step(action.phase_id) - done = terminated or truncated - - # Update state - self._state.step_count += 1 - self._state.sim_time = info.get('step', 0.0) - self._state.total_vehicles = info.get('system_total_running', 0) - self._state.total_waiting_time = info.get('system_total_waiting_time', 0.0) - - # Store for later - self._last_obs = obs - self._last_info = info - - return self._make_observation(obs, reward, done, info) - - @property - def state(self) -> SumoState: - """Get current state.""" - return self._state - - def _make_observation( - self, - obs: Any, - reward: float, - done: bool, - info: Dict - ) -> SumoObservation: - """Create SumoObservation from SUMO env output.""" - # Convert observation to list - if hasattr(obs, 'tolist'): - obs_list = obs.tolist() - else: - obs_list = list(obs) - - # Get action mask (all actions valid in SUMO-RL) - action_mask = list(range(self.env.action_space.n)) - - # Create observation - return SumoObservation( - observation=obs_list, - observation_shape=[len(obs_list)], - action_mask=action_mask, - sim_time=info.get('step', 0.0), - done=done, - reward=reward, - metadata={ - "num_green_phases": self.env.action_space.n, - "system_info": { - k: v for k, v in info.items() if k.startswith('system_') - }, - }, - ) -``` - -### 3. FastAPI Server (`server/app.py`) - -```python -import os -from core.env_server import create_fastapi_app -from ..models import SumoAction, SumoObservation -from .sumo_environment import SumoEnvironment - -# Get configuration from environment -net_file = os.getenv("SUMO_NET_FILE", "/app/nets/single-intersection.net.xml") -route_file = os.getenv("SUMO_ROUTE_FILE", "/app/nets/single-intersection.rou.xml") -num_seconds = int(os.getenv("SUMO_NUM_SECONDS", "20000")) -delta_time = int(os.getenv("SUMO_DELTA_TIME", "5")) -yellow_time = int(os.getenv("SUMO_YELLOW_TIME", "2")) -min_green = int(os.getenv("SUMO_MIN_GREEN", "5")) -max_green = int(os.getenv("SUMO_MAX_GREEN", "50")) -reward_fn = os.getenv("SUMO_REWARD_FN", "diff-waiting-time") - -# Create environment -env = SumoEnvironment( - net_file=net_file, - route_file=route_file, - num_seconds=num_seconds, - delta_time=delta_time, - yellow_time=yellow_time, - min_green=min_green, - max_green=max_green, - reward_fn=reward_fn, -) - -# Create FastAPI app -app = create_fastapi_app(env, SumoAction, SumoObservation) -``` - -### 4. Dockerfile (`server/Dockerfile`) - -```dockerfile -# Configurable base image -ARG BASE_IMAGE=envtorch-base:latest -FROM ${BASE_IMAGE} - -# Install SUMO -# SUMO is a microscopic traffic simulation package -RUN apt-get update && apt-get install -y --no-install-recommends \ - software-properties-common \ - && add-apt-repository ppa:sumo/stable \ - && apt-get update \ - && apt-get install -y --no-install-recommends \ - sumo \ - sumo-tools \ - && rm -rf /var/lib/apt/lists/* - -# Set SUMO_HOME -ENV SUMO_HOME=/usr/share/sumo - -# Install SUMO-RL and dependencies -RUN pip install --no-cache-dir \ - gymnasium>=0.28 \ - pettingzoo>=1.24.3 \ - numpy>=1.24.0 \ - pandas>=2.0.0 \ - sumolib>=1.14.0 \ - traci>=1.14.0 \ - sumo-rl>=1.4.5 - -# Copy OpenEnv core -COPY src/core/ /app/src/core/ - -# Copy SUMO-RL environment code -COPY src/envs/sumo_rl_env/ /app/src/envs/sumo_rl_env/ - -# Copy example networks -# We'll bundle a simple single-intersection example -COPY sumo-rl/sumo_rl/nets/single-intersection/ /app/nets/ - -# Environment variables (can be overridden at runtime) -ENV SUMO_NET_FILE=/app/nets/single-intersection.net.xml -ENV SUMO_ROUTE_FILE=/app/nets/single-intersection.rou.xml -ENV SUMO_NUM_SECONDS=20000 -ENV SUMO_DELTA_TIME=5 -ENV SUMO_YELLOW_TIME=2 -ENV SUMO_MIN_GREEN=5 -ENV SUMO_MAX_GREEN=50 -ENV SUMO_REWARD_FN=diff-waiting-time - -# Expose port -EXPOSE 8000 - -# Health check -HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \ - CMD curl -f http://localhost:8000/health || exit 1 - -# Run the FastAPI server -CMD ["uvicorn", "envs.sumo_rl_env.server.app:app", "--host", "0.0.0.0", "--port", "8000"] -``` - -### 5. HTTP Client (`client.py`) - -```python -from typing import Any, Dict -from core.http_env_client import HTTPEnvClient -from core.types import StepResult -from .models import SumoAction, SumoObservation, SumoState - -class SumoRLEnv(HTTPEnvClient[SumoAction, SumoObservation]): - """ - HTTP client for SUMO-RL environment. - - Example: - >>> env = SumoRLEnv.from_docker_image("sumo-rl-env:latest") - >>> result = env.reset() - >>> result = env.step(SumoAction(phase_id=1)) - >>> print(f"Reward: {result.reward}, Done: {result.done}") - >>> env.close() - """ - - def _step_payload(self, action: SumoAction) -> Dict[str, Any]: - """Convert action to JSON payload.""" - return { - "phase_id": action.phase_id, - "ts_id": action.ts_id, - } - - def _parse_result(self, payload: Dict[str, Any]) -> StepResult[SumoObservation]: - """Parse step result from JSON.""" - obs_data = payload.get("observation", {}) - - observation = SumoObservation( - observation=obs_data.get("observation", []), - observation_shape=obs_data.get("observation_shape", []), - current_phase=obs_data.get("current_phase"), - min_green_passed=obs_data.get("min_green_passed"), - lane_densities=obs_data.get("lane_densities"), - lane_queues=obs_data.get("lane_queues"), - action_mask=obs_data.get("action_mask", []), - sim_time=obs_data.get("sim_time", 0.0), - done=obs_data.get("done", False), - reward=obs_data.get("reward"), - metadata=obs_data.get("metadata", {}), - ) - - return StepResult( - observation=observation, - reward=payload.get("reward"), - done=payload.get("done", False), - ) - - def _parse_state(self, payload: Dict[str, Any]) -> SumoState: - """Parse state from JSON.""" - return SumoState( - episode_id=payload.get("episode_id", ""), - step_count=payload.get("step_count", 0), - net_file=payload.get("net_file", ""), - route_file=payload.get("route_file", ""), - num_seconds=payload.get("num_seconds", 20000), - delta_time=payload.get("delta_time", 5), - yellow_time=payload.get("yellow_time", 2), - min_green=payload.get("min_green", 5), - max_green=payload.get("max_green", 50), - sim_time=payload.get("sim_time", 0.0), - total_vehicles=payload.get("total_vehicles", 0), - total_waiting_time=payload.get("total_waiting_time", 0.0), - ) -``` - ---- - -## โš ๏ธ Critical Challenges - -### 1. SUMO System Dependency - -**Challenge**: SUMO must be installed at system level (apt-get), not just pip. - -**Solution**: -```dockerfile -RUN add-apt-repository ppa:sumo/stable && \ - apt-get update && \ - apt-get install -y sumo sumo-tools -``` - -### 2. Network Files Required - -**Challenge**: SUMO needs `.net.xml` and `.rou.xml` files to run. - -**Solutions**: -- **Bundle examples**: Copy simple networks from sumo-rl repo -- **Volume mount**: Let users mount their own networks -- **Default config**: Use single-intersection as default - -### 3. No GUI Support - -**Challenge**: Docker can't run SUMO GUI. - -**Solution**: Always use `use_gui=False` in Docker environment. - -### 4. Long Simulation Times - -**Challenge**: Traffic simulations can take minutes to complete. - -**Solution**: -- Set reasonable defaults (20000 seconds simulation time) -- Allow configuration via environment variables -- Document expected runtimes - -### 5. Multi-Agent Complexity - -**Challenge**: SUMO-RL supports multi-agent (multiple traffic lights). - -**Solution**: Start with single-agent only for OpenEnv integration. Multi-agent can be added later. - ---- - -## ๐Ÿ“Š Configuration Matrix - -| Variable | Default | Description | -|----------|---------|-------------| -| `SUMO_NET_FILE` | `/app/nets/single-intersection.net.xml` | Network topology file | -| `SUMO_ROUTE_FILE` | `/app/nets/single-intersection.rou.xml` | Vehicle routes file | -| `SUMO_NUM_SECONDS` | `20000` | Simulation duration | -| `SUMO_DELTA_TIME` | `5` | Seconds between actions | -| `SUMO_YELLOW_TIME` | `2` | Yellow phase duration | -| `SUMO_MIN_GREEN` | `5` | Minimum green time | -| `SUMO_MAX_GREEN` | `50` | Maximum green time | -| `SUMO_REWARD_FN` | `diff-waiting-time` | Reward function | - -### Available Reward Functions - -From SUMO-RL source: -- `diff-waiting-time` (default) - Change in cumulative waiting time -- `average-speed` - Average speed of vehicles -- `queue` - Total queue length -- `pressure` - Pressure (difference between incoming/outgoing vehicles) - ---- - -## ๐Ÿงช Testing Strategy - -### 1. Pre-Flight Checks -- Verify network files exist -- Check SUMO installation -- Validate Dockerfile syntax -- Test imports - -### 2. Docker Build Test -```bash -docker build -f src/envs/sumo_rl_env/server/Dockerfile -t sumo-rl-env:latest . -``` - -### 3. Runtime Tests -```bash -docker run -p 8000:8000 sumo-rl-env:latest - -curl http://localhost:8000/health -curl -X POST http://localhost:8000/reset -curl -X POST http://localhost:8000/step \ - -H "Content-Type: application/json" \ - -d '{"action": {"phase_id": 1, "ts_id": "0"}}' -``` - -### 4. Python Client Test -```python -from envs.sumo_rl_env import SumoRLEnv, SumoAction - -env = SumoRLEnv.from_docker_image("sumo-rl-env:latest") -result = env.reset() -result = env.step(SumoAction(phase_id=1)) -print(f"Reward: {result.reward}, Done: {result.done}") -env.close() -``` - ---- - -## ๐Ÿ“ฆ What to Bundle - -### Minimal Network Example - -Bundle the single-intersection example from sumo-rl: -``` -sumo-rl/sumo_rl/nets/single-intersection/ -โ”œโ”€โ”€ single-intersection.net.xml # Network topology -โ”œโ”€โ”€ single-intersection.rou.xml # Vehicle routes -``` - -This provides a working example out-of-the-box. - -### Additional Networks (Optional) - -Could bundle RESCO benchmarks for research: -- `grid4x4` - 4ร—4 grid of intersections -- `arterial4x4` - Arterial road network -- `cologne1` - Real-world Cologne network - -But start with single-intersection for simplicity. - ---- - -## ๐ŸŽฏ Implementation Plan - -### Phase 1: Core Implementation (4-6 hours) -1. Create `models.py` โœ“ (designed) -2. Create `server/sumo_environment.py` โœ“ (designed) -3. Create `server/app.py` โœ“ (designed) -4. Create `server/Dockerfile` โœ“ (designed) -5. Create `client.py` โœ“ (designed) - -### Phase 2: Testing (2-3 hours) -1. Build Docker image -2. Test basic functionality -3. Test different configurations -4. Verify reward functions work - -### Phase 3: Documentation (1-2 hours) -1. Write README.md -2. Create examples -3. Document network file format -4. Add to GitHub Actions - -### Phase 4: Integration (1 hour) -1. Add to `.github/workflows/docker-build.yml` -2. Update main README -3. Add to environments list - -**Total Estimate**: 8-12 hours - ---- - -## ๐Ÿš€ Next Steps - -1. **Create file structure** in `/Users/sanyambhutani/GH/OpenEnv/src/envs/sumo_rl_env/` -2. **Copy network files** from `/Users/sanyambhutani/OpenEnv/sumo-rl/sumo_rl/nets/` -3. **Implement all files** following the designs above -4. **Build and test Docker image** -5. **Create documentation** -6. **Add to GitHub Actions** - ---- - -## ๐Ÿ’ก Key Insights - -### Why SUMO-RL is Harder Than Atari - -1. **System Dependencies**: Atari (ale-py) is pip-installable, SUMO requires apt-get -2. **Configuration Complexity**: Atari just needs game name, SUMO needs network files -3. **Runtime**: Atari is fast, SUMO simulations can take minutes -4. **File Dependencies**: Atari bundles ROMs, SUMO needs user-provided networks - -### Why It's Still Doable - -1. **Single-Agent Mode**: Simplifies to standard Gymnasium API -2. **Bundle Example**: Include simple network to start immediately -3. **Environment Variables**: Easy runtime configuration -4. **Pattern Reuse**: Follow exact Atari pattern for consistency - ---- - -## ๐Ÿ“š References - -- [SUMO-RL GitHub](https://github.com/LucasAlegre/sumo-rl) -- [SUMO Documentation](https://sumo.dlr.de/docs/) -- [SUMO-RL Docs](https://lucasalegre.github.io/sumo-rl/) -- [RESCO Benchmarks Paper](https://people.engr.tamu.edu/guni/Papers/NeurIPS-signals.pdf) - ---- - -**Status**: Design complete, ready for implementation -**Complexity**: High (system dependencies + network files) -**Time Estimate**: 8-12 hours -**Confidence**: 85% (Dockerfile complexity is main risk) diff --git a/SUMO_RL_RISK_ANALYSIS.md b/SUMO_RL_RISK_ANALYSIS.md deleted file mode 100644 index 0f216b39..00000000 --- a/SUMO_RL_RISK_ANALYSIS.md +++ /dev/null @@ -1,505 +0,0 @@ -# SUMO-RL Integration: ULTRATHINK Risk Analysis - -**Date**: 2025-10-17 -**Status**: Deep Risk Assessment - ---- - -## โœ… Docker Eliminates PRIMARY Risk - -**YES - Docker solves the hardest problem!** - -| Risk | Without Docker | With Docker | -|------|---------------|-------------| -| **System Dependencies** | โŒ Nightmare | โœ… Solved | -| **Cross-platform** | โŒ Linux only | โœ… Works everywhere | -| **Installation** | โŒ Requires sudo | โœ… Just `docker run` | -| **Reproducibility** | โŒ "Works on my machine" | โœ… Identical | - -**Conclusion**: Docker takes away 80% of the pain. โœจ - ---- - -## โš ๏ธ Remaining Risks (Deep Analysis) - -### ๐Ÿ”ด HIGH RISK - -#### 1. **TraCI Connection Management in HTTP Server** - -**Issue**: `SumoEnvironment` uses class variable `CONNECTION_LABEL` that increments globally. - -```python -CONNECTION_LABEL = 0 # For traci multi-client support - -def __init__(self): - self.label = str(SumoEnvironment.CONNECTION_LABEL) - SumoEnvironment.CONNECTION_LABEL += 1 -``` - -**Risk**: In HTTP server with concurrent requests: -- Request 1 creates env (label=0) -- Request 2 creates env (label=1) -- Request 1 resets โ†’ closes connection label=0 -- Request 2 steps โ†’ tries to use label=1 -- **Potential conflict if requests overlap** - -**Likelihood**: Medium (depends on usage pattern) - -**Impact**: High (could cause simulation errors) - -**Mitigation**: -```python -# Option 1: Single environment instance (RECOMMENDED) -# Create ONE environment at server startup, reuse for all requests -env = SumoEnvironment(...) # Created once -app = create_fastapi_app(env, ...) # Reuses same env - -# Option 2: Thread-safe connection management -# Use threading locks around TraCI operations -``` - -**Decision**: Use single environment instance per container (same as Atari pattern). Each HTTP request uses the same environment. **SOLVES ISSUE**. - ---- - -#### 2. **LIBSUMO vs TraCI Performance Trade-off** - -**Background**: -```python -LIBSUMO = "LIBSUMO_AS_TRACI" in os.environ -``` - -- **TraCI**: Standard, supports GUI, slower (1x speed) -- **LIBSUMO**: No GUI, no parallel sims, faster (8x speed) - -**Risk**: Default TraCI could be too slow for RL training. - -**Likelihood**: High (traffic sims are inherently slow) - -**Impact**: Medium (training takes longer, not broken) - -**Mitigation**: -```dockerfile -# Option 1: Use TraCI (default, safer) -# No env var needed, works out of box - -# Option 2: Enable LIBSUMO for speed -ENV LIBSUMO_AS_TRACI=1 - -# Recommendation: Start with TraCI, add LIBSUMO as optimization later -``` - -**Decision**: Start with TraCI (default), document LIBSUMO option for advanced users. - ---- - -### ๐ŸŸก MEDIUM RISK - -#### 3. **Episode Reset Performance** - -**Issue**: Each `reset()` closes and restarts SUMO simulation. - -```python -def reset(self, seed=None, **kwargs): - if self.episode != 0: - self.close() # Closes previous simulation - self._start_simulation() # Starts new one -``` - -**Risk**: Reset could take 1-5 seconds (slow for RL training loop). - -**Likelihood**: High (this is how SUMO works) - -**Impact**: Medium (slows training, doesn't break it) - -**Mitigation**: -- Document expected reset time -- Use long episodes (`num_seconds=20000`) -- Consider warm-start optimizations later - -**Decision**: Accept this limitation, document it. Not a blocker. - ---- - -#### 4. **CSV Output Accumulation** - -**Issue**: Environment can write CSV metrics to disk. - -```python -def save_csv(self, out_csv_name, episode): - df.to_csv(out_csv_name + f"_conn{self.label}_ep{episode}" + ".csv") -``` - -**Risk**: In Docker, CSV files accumulate โ†’ disk space. - -**Likelihood**: Low (only if user enables CSV output) - -**Impact**: Low (disk space, not functionality) - -**Mitigation**: -```python -# In our wrapper, set out_csv_name=None (disables CSV) -env = SumoEnvironment( - ..., - out_csv_name=None, # Disable CSV output -) -``` - -**Decision**: Disable CSV output by default. Users can enable via volume mount if needed. - ---- - -#### 5. **Network File Path Resolution** - -**Issue**: SUMO needs absolute paths to `.net.xml` and `.rou.xml` files. - -**Risk**: If paths are wrong in Docker, simulation fails. - -**Likelihood**: Low (we control the paths) - -**Impact**: High (breaks everything if wrong) - -**Mitigation**: -```dockerfile -# Bundle networks at known paths -COPY sumo-rl/sumo_rl/nets/single-intersection/ /app/nets/ - -# Set absolute paths as defaults -ENV SUMO_NET_FILE=/app/nets/single-intersection.net.xml -ENV SUMO_ROUTE_FILE=/app/nets/single-intersection.rou.xml -``` - -**Decision**: Bundle example networks, use absolute paths. Test during build. - ---- - -#### 6. **Dynamic Observation/Action Spaces** - -**Issue**: Different networks โ†’ different action/observation sizes. - -```python -# Action space size = number of traffic signal phases (varies) -self.action_space = gym.spaces.Discrete(num_green_phases) - -# Observation size = depends on number of lanes (varies) -obs_size = num_green_phases + 1 + 2*num_lanes -``` - -**Risk**: OpenEnv expects fixed-size spaces? - -**Likelihood**: Low (we use single network by default) - -**Impact**: Medium (breaks if user changes network) - -**Mitigation**: -- Use single-intersection as default (fixed sizes) -- Document that changing networks may change spaces -- Future: Make spaces configurable - -**Decision**: Not a blocker. Start with single network, document clearly. - ---- - -### ๐ŸŸข LOW RISK - -#### 7. **SUMO Version Compatibility** - -**Issue**: `ppa:sumo/stable` might update SUMO version over time. - -**Risk**: New SUMO version breaks sumo-rl compatibility. - -**Likelihood**: Low (SUMO is stable) - -**Impact**: Medium (breaks after rebuild) - -**Mitigation**: -```dockerfile -# Option 1: Pin SUMO version (if available) -RUN apt-get install -y sumo=1.14.0 - -# Option 2: Pin sumolib/traci versions -RUN pip install sumolib==1.14.0 traci==1.14.0 - -# Option 3: Accept latest (simpler, usually works) -``` - -**Decision**: Start with latest, pin if issues arise. - ---- - -#### 8. **sumolib/traci vs System SUMO Mismatch** - -**Issue**: Pip packages `sumolib` and `traci` should match system SUMO version. - -**Risk**: Version mismatch causes compatibility issues. - -**Likelihood**: Low (sumo-rl handles this) - -**Impact**: Medium (simulation errors) - -**Mitigation**: -```dockerfile -# Install SUMO first -RUN apt-get install -y sumo sumo-tools - -# Then install matching Python packages -RUN pip install sumolib>=1.14.0 traci>=1.14.0 -``` - -**Decision**: Use `>=` versions, should work. Test during build. - ---- - -#### 9. **PettingZoo Version Compatibility** - -**Issue**: Code has fallback for PettingZoo 1.24 vs 1.25+ - -```python -try: - from pettingzoo.utils import AgentSelector # 1.25+ -except ImportError: - from pettingzoo.utils import agent_selector as AgentSelector # 1.24 -``` - -**Risk**: Version incompatibility breaks import. - -**Likelihood**: Low (pyproject.toml specifies `pettingzoo>=1.24.3`) - -**Impact**: Low (import error, easy to debug) - -**Mitigation**: -```dockerfile -RUN pip install pettingzoo>=1.24.3 -``` - -**Decision**: Use version spec from pyproject.toml. - ---- - -#### 10. **Memory Usage with Many Vehicles** - -**Issue**: Large traffic networks with thousands of vehicles โ†’ high memory. - -**Risk**: Container OOM (out of memory). - -**Likelihood**: Low (single-intersection is small) - -**Impact**: High (container crash) - -**Mitigation**: -- Use small default network (single-intersection) -- Document memory requirements for large networks -- Docker memory limits (optional) - -**Decision**: Not a blocker. Document memory requirements. - ---- - -#### 11. **Simulation Determinism** - -**Issue**: Default `sumo_seed="random"` โ†’ non-deterministic. - -**Risk**: Can't reproduce training runs. - -**Likelihood**: High (default is random) - -**Impact**: Low (science issue, not functionality) - -**Mitigation**: -```python -# Allow seed control via environment variable -sumo_seed = int(os.getenv("SUMO_SEED", "42")) # Default fixed seed - -# Or keep random, document it -sumo_seed = os.getenv("SUMO_SEED", "random") -``` - -**Decision**: Default to fixed seed (42) for reproducibility. Document how to use random. - ---- - -#### 12. **Headless Operation (No GUI)** - -**Issue**: We force `use_gui=False` in Docker. - -**Risk**: Users might want to see simulation GUI. - -**Likelihood**: Low (Docker is headless) - -**Impact**: Low (convenience feature) - -**Mitigation**: -- Document that GUI is not available in Docker -- Suggest local development for GUI -- Future: VNC access to container GUI - -**Decision**: Not a blocker. GUI doesn't work in Docker anyway. - ---- - -#### 13. **Docker Image Size** - -**Issue**: SUMO + dependencies โ†’ large image. - -**Estimate**: -- Base: ~200MB -- SUMO: ~500MB -- Python packages: ~200MB -- **Total: ~900MB-1GB** - -**Risk**: Large downloads, storage. - -**Likelihood**: High (definitely will be large) - -**Impact**: Low (acceptable for complex sim) - -**Mitigation**: -- Multi-stage builds (future optimization) -- Clear documentation of size -- Accept it (complexity requires space) - -**Decision**: Accept ~1GB image size. Not a blocker. - ---- - -#### 14. **Long Simulation Times** - -**Issue**: Traffic simulations take time (minutes per episode). - -**Example**: 20,000 simulated seconds with delta_time=5 โ†’ 4,000 steps per episode. - -**Risk**: RL training is slow. - -**Likelihood**: High (inherent to traffic simulation) - -**Impact**: Medium (slower research, not broken) - -**Mitigation**: -- Document expected times -- Recommend shorter episodes for quick tests -- Suggest LIBSUMO for speedup - -**Decision**: Document clearly. Not a technical blocker. - ---- - -## ๐Ÿ“Š Risk Summary - -| Risk | Severity | Likelihood | Mitigation Status | -|------|----------|-----------|-------------------| -| TraCI Connection Management | ๐Ÿ”ด High | Medium | โœ… Solved (single env instance) | -| LIBSUMO vs TraCI | ๐Ÿ”ด High | High | โœ… Mitigated (default TraCI, doc LIBSUMO) | -| Episode Reset Performance | ๐ŸŸก Medium | High | โœ… Accepted (document) | -| CSV Output Accumulation | ๐ŸŸก Medium | Low | โœ… Solved (disable by default) | -| Network File Paths | ๐ŸŸก Medium | Low | โœ… Solved (bundle at known paths) | -| Dynamic Spaces | ๐ŸŸก Medium | Low | โœ… Accepted (document) | -| SUMO Version | ๐ŸŸข Low | Low | โœ… Accepted (use latest) | -| sumolib/traci Mismatch | ๐ŸŸข Low | Low | โœ… Mitigated (>=1.14.0) | -| PettingZoo Version | ๐ŸŸข Low | Low | โœ… Mitigated (>=1.24.3) | -| Memory Usage | ๐ŸŸข Low | Low | โœ… Accepted (document) | -| Simulation Determinism | ๐ŸŸข Low | High | โœ… Solved (default fixed seed) | -| No GUI | ๐ŸŸข Low | Low | โœ… Accepted (Docker is headless) | -| Image Size | ๐ŸŸข Low | High | โœ… Accepted (~1GB) | -| Long Sim Times | ๐ŸŸข Low | High | โœ… Accepted (document) | - ---- - -## โœ… Final Risk Assessment - -### Overall Risk Level: **LOW-MEDIUM** โœ… - -### Key Findings: - -1. **Docker solves the hardest problem** (system dependencies) โœ… -2. **No critical blockers** - all risks have mitigations โœ… -3. **Main concerns are performance** (speed, memory) - acceptable for simulation โœ… -4. **Connection management solved** by single env instance pattern โœ… - -### Recommended Mitigations: - -```python -# 1. Single environment instance per container -env = SumoEnvironment( - net_file="/app/nets/single-intersection.net.xml", - route_file="/app/nets/single-intersection.rou.xml", - use_gui=False, # No GUI in Docker - single_agent=True, # Single-agent mode - num_seconds=20000, - sumo_seed=42, # Fixed seed for reproducibility - out_csv_name=None, # Disable CSV output - sumo_warnings=False, # Quiet -) - -# 2. Reuse for all HTTP requests -app = create_fastapi_app(env, SumoAction, SumoObservation) -``` - -```dockerfile -# 3. Bundle network files at known paths -COPY sumo-rl/sumo_rl/nets/single-intersection/ /app/nets/ - -# 4. Set SUMO_HOME -ENV SUMO_HOME=/usr/share/sumo - -# 5. Don't enable LIBSUMO by default (safer) -# ENV LIBSUMO_AS_TRACI=1 # Optional for advanced users -``` - ---- - -## ๐ŸŽฏ Confidence Level - -**Original**: 85% confident -**After Deep Analysis**: **95% confident** โœ… - -**Reasons for Increased Confidence**: -1. All high-risk items have clear mitigations -2. Docker architecture naturally solves connection management -3. Pattern matches Atari (proven to work) -4. Risks are mostly performance/documentation, not functionality -5. No unexpected blockers found - ---- - -## ๐Ÿš€ Ready to Implement - -**Recommendation**: **PROCEED WITH IMPLEMENTATION** โœ… - -The risks are manageable and well-understood. Docker makes this integration feasible and clean. - -**Estimated Effort**: 8-12 hours (unchanged) - -**Success Probability**: 95% - ---- - -## ๐Ÿ“ Documentation Requirements - -Based on risk analysis, must document: - -1. **Performance expectations**: - - Reset takes 1-5 seconds - - Episodes can take minutes - - LIBSUMO option for 8x speedup - -2. **Network files**: - - Default: single-intersection (bundled) - - Custom: mount volume with your .net.xml/.rou.xml - -3. **Reproducibility**: - - Default seed=42 (deterministic) - - Set SUMO_SEED=random for stochastic - -4. **Limitations**: - - No GUI in Docker - - Single-agent only (v1) - - Fixed network per container - -5. **Memory requirements**: - - Small networks: ~500MB - - Large networks: 2-4GB - - Document scaling - ---- - -**Analysis Complete**: All risks identified, mitigated, and documented. โœ… diff --git a/src/envs/sumo_rl_env/models.py b/src/envs/sumo_rl_env/models.py index 611b0eed..6c73092b 100644 --- a/src/envs/sumo_rl_env/models.py +++ b/src/envs/sumo_rl_env/models.py @@ -54,8 +54,8 @@ class SumoObservation(Observation): metadata: Additional info (system metrics, etc.) """ - observation: List[float] - observation_shape: List[int] + observation: List[float] = field(default_factory=list) + observation_shape: List[int] = field(default_factory=list) action_mask: List[int] = field(default_factory=list) sim_time: float = 0.0 done: bool = False diff --git a/src/envs/sumo_rl_env/server/Dockerfile b/src/envs/sumo_rl_env/server/Dockerfile index 618ebe88..cb5527bc 100644 --- a/src/envs/sumo_rl_env/server/Dockerfile +++ b/src/envs/sumo_rl_env/server/Dockerfile @@ -13,14 +13,10 @@ ARG BASE_IMAGE=envtorch-base:latest FROM ${BASE_IMAGE} # Install SUMO system dependencies -# SUMO is a microscopic traffic simulation package +# SUMO is available in Debian repositories RUN apt-get update && apt-get install -y --no-install-recommends \ - software-properties-common \ - && add-apt-repository ppa:sumo/stable \ - && apt-get update \ - && apt-get install -y --no-install-recommends \ - sumo \ - sumo-tools \ + sumo \ + sumo-tools \ && rm -rf /var/lib/apt/lists/* # Set SUMO_HOME environment variable diff --git a/src/envs/sumo_rl_env/test_sumo_rl.sh b/src/envs/sumo_rl_env/test_sumo_rl.sh new file mode 100755 index 00000000..61265c73 --- /dev/null +++ b/src/envs/sumo_rl_env/test_sumo_rl.sh @@ -0,0 +1,220 @@ +#!/bin/bash +# Complete SUMO-RL Integration Test Script +# Run this to verify everything works! + +set -e # Exit on error + +echo "โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”" +echo "๐Ÿš€ SUMO-RL Environment Test Script" +echo "โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”" +echo "" + +# Navigate to repo root +cd /Users/sanyambhutani/GH/OpenEnv + +echo "๐Ÿ“ Working directory: $(pwd)" +echo "" + +# Step 1: Check if base image exists +echo "โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”" +echo "Step 1: Checking for base image..." +echo "โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”" + +if docker images | grep -q "envtorch-base.*latest"; then + echo "โœ… envtorch-base:latest found" +else + echo "โš ๏ธ envtorch-base:latest not found - building it now..." + echo "" + docker build -t envtorch-base:latest -f src/core/containers/images/Dockerfile . + echo "" + echo "โœ… Base image built successfully" +fi +echo "" + +# Step 2: Build SUMO-RL environment +echo "โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”" +echo "Step 2: Building SUMO-RL environment image..." +echo "โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”" +echo "โณ This will take 5-10 minutes (installing SUMO)..." +echo "" + +docker build -f src/envs/sumo_rl_env/server/Dockerfile -t sumo-rl-env:latest . + +echo "" +echo "โœ… SUMO-RL environment built successfully" +echo "" + +# Check image size +IMAGE_SIZE=$(docker images sumo-rl-env:latest --format "{{.Size}}") +echo "๐Ÿ“ฆ Image size: $IMAGE_SIZE" +echo "" + +# Step 3: Start container +echo "โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”" +echo "Step 3: Starting SUMO-RL container..." +echo "โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”" + +# Stop any existing container +docker stop sumo-rl-test 2>/dev/null || true +docker rm sumo-rl-test 2>/dev/null || true + +# Start new container +docker run -d -p 8000:8000 --name sumo-rl-test sumo-rl-env:latest + +echo "โณ Waiting for container to start..." +sleep 5 + +# Check if container is running +if docker ps | grep -q sumo-rl-test; then + echo "โœ… Container is running" +else + echo "โŒ Container failed to start!" + echo "Logs:" + docker logs sumo-rl-test + exit 1 +fi +echo "" + +# Step 4: Test health endpoint +echo "โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”" +echo "Step 4: Testing health endpoint..." +echo "โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”" + +HEALTH_RESPONSE=$(curl -s http://localhost:8000/health) +echo "Response: $HEALTH_RESPONSE" + +if echo "$HEALTH_RESPONSE" | grep -q "healthy"; then + echo "โœ… Health check passed" +else + echo "โŒ Health check failed!" + exit 1 +fi +echo "" + +# Step 5: Test reset endpoint +echo "โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”" +echo "Step 5: Testing reset endpoint..." +echo "โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”" +echo "โณ This may take 3-5 seconds (SUMO simulation starting)..." + +RESET_RESPONSE=$(curl -s -X POST http://localhost:8000/reset) + +if echo "$RESET_RESPONSE" | jq -e '.observation.observation' > /dev/null 2>&1; then + echo "โœ… Reset successful" + + # Extract observation details + OBS_SHAPE=$(echo "$RESET_RESPONSE" | jq '.observation.observation_shape') + ACTION_MASK=$(echo "$RESET_RESPONSE" | jq '.observation.action_mask') + + echo " ๐Ÿ“Š Observation shape: $OBS_SHAPE" + echo " ๐ŸŽฎ Available actions: $ACTION_MASK" +else + echo "โŒ Reset failed!" + echo "Response: $RESET_RESPONSE" + exit 1 +fi +echo "" + +# Step 6: Test step endpoint +echo "โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”" +echo "Step 6: Testing step endpoint (taking 5 actions)..." +echo "โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”" + +for i in {1..5}; do + # Take action (cycle through phases 0-1) + PHASE_ID=$((i % 2)) + + STEP_RESPONSE=$(curl -s -X POST http://localhost:8000/step \ + -H "Content-Type: application/json" \ + -d "{\"action\": {\"phase_id\": $PHASE_ID, \"ts_id\": \"0\"}}") + + if echo "$STEP_RESPONSE" | jq -e '.reward' > /dev/null 2>&1; then + REWARD=$(echo "$STEP_RESPONSE" | jq '.reward') + DONE=$(echo "$STEP_RESPONSE" | jq '.done') + echo " Step $i: phase=$PHASE_ID, reward=$REWARD, done=$DONE" + else + echo "โŒ Step $i failed!" + echo "Response: $STEP_RESPONSE" + exit 1 + fi +done + +echo "โœ… All steps successful" +echo "" + +# Step 7: Test state endpoint +echo "โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”" +echo "Step 7: Testing state endpoint..." +echo "โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”" + +STATE_RESPONSE=$(curl -s http://localhost:8000/state) + +if echo "$STATE_RESPONSE" | jq -e '.episode_id' > /dev/null 2>&1; then + echo "โœ… State endpoint working" + + # Extract state details + EPISODE_ID=$(echo "$STATE_RESPONSE" | jq -r '.episode_id') + STEP_COUNT=$(echo "$STATE_RESPONSE" | jq '.step_count') + SIM_TIME=$(echo "$STATE_RESPONSE" | jq '.sim_time') + TOTAL_VEHICLES=$(echo "$STATE_RESPONSE" | jq '.total_vehicles') + + echo " ๐Ÿ“ Episode ID: ${EPISODE_ID:0:8}..." + echo " ๐Ÿ”ข Step count: $STEP_COUNT" + echo " โฑ๏ธ Simulation time: $SIM_TIME seconds" + echo " ๐Ÿš— Total vehicles: $TOTAL_VEHICLES" +else + echo "โŒ State endpoint failed!" + echo "Response: $STATE_RESPONSE" + exit 1 +fi +echo "" + +# Step 8: Check logs for errors +echo "โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”" +echo "Step 8: Checking container logs for errors..." +echo "โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”" + +LOGS=$(docker logs sumo-rl-test 2>&1) + +# Check for Python errors (but ignore LoggerMode.Error which is expected) +if echo "$LOGS" | grep -i "error\|exception\|traceback" | grep -v "LoggerMode.Error"; then + echo "โš ๏ธ Found errors in logs:" + echo "$LOGS" | grep -i "error\|exception\|traceback" | grep -v "LoggerMode.Error" +else + echo "โœ… No errors found in logs" +fi +echo "" + +# Step 9: Cleanup +echo "โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”" +echo "Step 9: Cleanup..." +echo "โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”" + +echo "๐Ÿงน Stopping and removing test container..." +docker stop sumo-rl-test +docker rm sumo-rl-test + +echo "โœ… Cleanup complete" +echo "" + +# Final summary +echo "โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”" +echo "๐ŸŽ‰ ALL TESTS PASSED!" +echo "โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”" +echo "" +echo "Summary:" +echo " โœ… Docker image built successfully ($IMAGE_SIZE)" +echo " โœ… Container started and ran" +echo " โœ… Health endpoint working" +echo " โœ… Reset endpoint working" +echo " โœ… Step endpoint working (5 actions executed)" +echo " โœ… State endpoint working" +echo " โœ… No errors in logs" +echo "" +echo "๐ŸŽฏ SUMO-RL integration is working perfectly!" +echo "" +echo "Next steps:" +echo " 1. Test Python client: python examples/sumo_rl_simple.py" +echo " 2. Push to GitHub to trigger CI/CD" +echo " 3. Use for RL training!" +echo "" From 99c97100484f35c2919878c83da7278e8b22b1e6 Mon Sep 17 00:00:00 2001 From: Sanyam Bhutani Date: Fri, 17 Oct 2025 13:13:05 -0700 Subject: [PATCH 11/13] mv configs --- .../nets}/single-intersection/single-intersection.edg.xml | 0 .../nets}/single-intersection/single-intersection.net.xml | 0 .../nets}/single-intersection/single-intersection.nod.xml | 0 .../nets}/single-intersection/single-intersection.rou.xml | 0 .../nets}/single-intersection/single-intersection.sumocfg | 0 5 files changed, 0 insertions(+), 0 deletions(-) rename {nets => src/envs/sumo_rl_env/nets}/single-intersection/single-intersection.edg.xml (100%) rename {nets => src/envs/sumo_rl_env/nets}/single-intersection/single-intersection.net.xml (100%) rename {nets => src/envs/sumo_rl_env/nets}/single-intersection/single-intersection.nod.xml (100%) rename {nets => src/envs/sumo_rl_env/nets}/single-intersection/single-intersection.rou.xml (100%) rename {nets => src/envs/sumo_rl_env/nets}/single-intersection/single-intersection.sumocfg (100%) diff --git a/nets/single-intersection/single-intersection.edg.xml b/src/envs/sumo_rl_env/nets/single-intersection/single-intersection.edg.xml similarity index 100% rename from nets/single-intersection/single-intersection.edg.xml rename to src/envs/sumo_rl_env/nets/single-intersection/single-intersection.edg.xml diff --git a/nets/single-intersection/single-intersection.net.xml b/src/envs/sumo_rl_env/nets/single-intersection/single-intersection.net.xml similarity index 100% rename from nets/single-intersection/single-intersection.net.xml rename to src/envs/sumo_rl_env/nets/single-intersection/single-intersection.net.xml diff --git a/nets/single-intersection/single-intersection.nod.xml b/src/envs/sumo_rl_env/nets/single-intersection/single-intersection.nod.xml similarity index 100% rename from nets/single-intersection/single-intersection.nod.xml rename to src/envs/sumo_rl_env/nets/single-intersection/single-intersection.nod.xml diff --git a/nets/single-intersection/single-intersection.rou.xml b/src/envs/sumo_rl_env/nets/single-intersection/single-intersection.rou.xml similarity index 100% rename from nets/single-intersection/single-intersection.rou.xml rename to src/envs/sumo_rl_env/nets/single-intersection/single-intersection.rou.xml diff --git a/nets/single-intersection/single-intersection.sumocfg b/src/envs/sumo_rl_env/nets/single-intersection/single-intersection.sumocfg similarity index 100% rename from nets/single-intersection/single-intersection.sumocfg rename to src/envs/sumo_rl_env/nets/single-intersection/single-intersection.sumocfg From 475a32fc71711f4ff19a922d24f5b186bdee6936 Mon Sep 17 00:00:00 2001 From: Sanyam Bhutani Date: Fri, 17 Oct 2025 13:13:19 -0700 Subject: [PATCH 12/13] Update Dockerfile --- src/envs/sumo_rl_env/server/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/envs/sumo_rl_env/server/Dockerfile b/src/envs/sumo_rl_env/server/Dockerfile index cb5527bc..161c6e00 100644 --- a/src/envs/sumo_rl_env/server/Dockerfile +++ b/src/envs/sumo_rl_env/server/Dockerfile @@ -41,7 +41,7 @@ COPY src/envs/sumo_rl_env/ /app/src/envs/sumo_rl_env/ # Copy example network files # Default: single-intersection (simple 4-way intersection) -COPY nets/single-intersection/ /app/nets/single-intersection/ +COPY src/envs/sumo_rl_env/nets/single-intersection/ /app/nets/single-intersection/ # SUMO environment variables (can be overridden at runtime) ENV SUMO_NET_FILE=/app/nets/single-intersection/single-intersection.net.xml From 17bf4c4af458da2ee35b7c8e58af32dee28b6d0c Mon Sep 17 00:00:00 2001 From: Sanyam Bhutani Date: Fri, 17 Oct 2025 13:13:40 -0700 Subject: [PATCH 13/13] fix Docker --- src/envs/sumo_rl_env/server/Dockerfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/envs/sumo_rl_env/server/Dockerfile b/src/envs/sumo_rl_env/server/Dockerfile index 161c6e00..d1495283 100644 --- a/src/envs/sumo_rl_env/server/Dockerfile +++ b/src/envs/sumo_rl_env/server/Dockerfile @@ -36,10 +36,10 @@ RUN pip install --no-cache-dir \ # Copy OpenEnv core (base image already set WORKDIR=/app) COPY src/core/ /app/src/core/ -# Copy SUMO-RL environment code +# Copy SUMO-RL environment code (includes nets/) COPY src/envs/sumo_rl_env/ /app/src/envs/sumo_rl_env/ -# Copy example network files +# Copy example network files to expected location # Default: single-intersection (simple 4-way intersection) COPY src/envs/sumo_rl_env/nets/single-intersection/ /app/nets/single-intersection/