From 43c3c905c0ad71fc9c5973174d437b5b1d569ee0 Mon Sep 17 00:00:00 2001
From: Sanyam Bhutani <sanyambhutani@meta.com>
Date: Fri, 17 Oct 2025 11:27:11 -0700
Subject: [PATCH 01/13] add model

---
 SUMO_RL_INTEGRATION_PLAN.md    | 663 +++++++++++++++++++++++++++++++++
 SUMO_RL_RISK_ANALYSIS.md       | 505 +++++++++++++++++++++++++
 src/envs/sumo_rl_env/models.py | 110 ++++++
 3 files changed, 1278 insertions(+)
 create mode 100644 SUMO_RL_INTEGRATION_PLAN.md
 create mode 100644 SUMO_RL_RISK_ANALYSIS.md
 create mode 100644 src/envs/sumo_rl_env/models.py

diff --git a/SUMO_RL_INTEGRATION_PLAN.md b/SUMO_RL_INTEGRATION_PLAN.md
new file mode 100644
index 00000000..47e4d339
--- /dev/null
+++ b/SUMO_RL_INTEGRATION_PLAN.md
@@ -0,0 +1,663 @@
+# SUMO-RL Integration Plan for OpenEnv
+
+**Date**: 2025-10-17
+**Status**: Design Phase
+**Complexity**: High (Docker + SUMO system dependencies)
+
+---
+
+## 🤔 ULTRATHINK ANALYSIS
+
+### What is SUMO-RL?
+
+**SUMO-RL** is a Reinforcement Learning environment for **Traffic Signal Control** using SUMO (Simulation of Urban MObility).
+
+- **Use Case**: Train RL agents to optimize traffic light timing to minimize vehicle delays
+- **Main Class**: `SumoEnvironment` from `sumo_rl.environment.env`
+- **APIs**: Supports both Gymnasium (single-agent) and PettingZoo (multi-agent)
+- **Repository**: https://github.com/LucasAlegre/sumo-rl
+- **Version**: 1.4.5
+
+### How SUMO-RL Works
+
+1. **SUMO Simulator**: Microscopic traffic simulation
+2. **Network Files**: `.net.xml` (road network) + `.rou.xml` (vehicle routes)
+3. **Traffic Signals**: RL agent controls when lights change phases
+4. **Observation**: Lane densities, queues, current phase, min_green flag
+5. **Action**: Select next green phase (discrete action space)
+6. **Reward**: Change in cumulative vehicle delay (default)
+
+### Example Usage
+
+```python
+import gymnasium as gym
+import sumo_rl
+
+env = gym.make('sumo-rl-v0',
+                net_file='nets/single-intersection.net.xml',
+                route_file='nets/single-intersection.rou.xml',
+                use_gui=False,
+                num_seconds=100000)
+
+obs, info = env.reset()
+done = False
+while not done:
+    action = env.action_space.sample()
+    obs, reward, terminated, truncated, info = env.step(action)
+    done = terminated or truncated
+```
+
+---
+
+## 🎯 Integration Strategy
+
+### Follow Atari Pattern
+
+Like Atari, we'll create:
+1. **models.py** - Data models
+2. **server/sumo_environment.py** - Environment wrapper
+3. **server/app.py** - FastAPI server
+4. **server/Dockerfile** - Container with SUMO
+5. **client.py** - HTTP client
+
+### Key Differences from Atari
+
+| Aspect | Atari | SUMO-RL |
+|--------|-------|---------|
+| **External Dependency** | ALE (pip installable) | SUMO (system package) |
+| **Configuration** | Game name (simple) | Network + route files (complex) |
+| **Observation** | Image pixels | Traffic metrics (vectors) |
+| **Action** | Joystick actions | Traffic signal phases |
+| **Docker Complexity** | Simple | High (need SUMO system install) |
+| **File Dependencies** | None (ROMs bundled) | Network/route XML files required |
+
+---
+
+## 📋 Technical Design
+
+### 1. Data Models (`models.py`)
+
+```python
+from dataclasses import dataclass
+from typing import List, Optional
+from core.env_server import Action, Observation, State
+
+@dataclass
+class SumoAction(Action):
+    """Action for SUMO environment - select next green phase."""
+    phase_id: int  # Which green phase to activate next
+    ts_id: str = "0"  # Traffic signal ID (for multi-agent support later)
+
+@dataclass
+class SumoObservation(Observation):
+    """Observation from SUMO environment."""
+    observation: List[float]  # Full observation vector
+    observation_shape: List[int]  # Shape for reshaping
+
+    # Observation components (for interpretability)
+    current_phase: Optional[int] = None
+    min_green_passed: Optional[bool] = None
+    lane_densities: Optional[List[float]] = None
+    lane_queues: Optional[List[float]] = None
+
+    # Metadata
+    action_mask: Optional[List[int]] = None  # Legal actions
+    sim_time: float = 0.0  # Current simulation time
+
+    done: bool = False
+    reward: Optional[float] = None
+
+@dataclass
+class SumoState(State):
+    """State of SUMO environment."""
+    episode_id: str = ""
+    step_count: int = 0
+
+    # SUMO configuration
+    net_file: str = ""
+    route_file: str = ""
+    num_seconds: int = 20000
+    delta_time: int = 5
+    yellow_time: int = 2
+    min_green: int = 5
+    max_green: int = 50
+
+    # Runtime state
+    sim_time: float = 0.0
+    total_vehicles: int = 0
+    total_waiting_time: float = 0.0
+```
+
+### 2. Environment Wrapper (`server/sumo_environment.py`)
+
+```python
+import uuid
+from typing import Any, Dict, Literal, Optional
+from core.env_server import Action, Environment, Observation
+from ..models import SumoAction, SumoObservation, SumoState
+
+import os
+os.environ.setdefault('SUMO_HOME', '/usr/share/sumo')
+
+from sumo_rl import SumoEnvironment as BaseSumoEnv
+
+class SumoEnvironment(Environment):
+    """
+    SUMO-RL Environment wrapper for OpenEnv.
+
+    Wraps the SUMO traffic signal control environment for single-agent RL.
+
+    Args:
+        net_file: Path to SUMO network file (.net.xml)
+        route_file: Path to SUMO route file (.rou.xml)
+        num_seconds: Simulation duration in seconds
+        delta_time: Seconds between actions
+        yellow_time: Yellow phase duration
+        min_green: Minimum green time
+        max_green: Maximum green time
+        reward_fn: Reward function name
+    """
+
+    def __init__(
+        self,
+        net_file: str,
+        route_file: str,
+        num_seconds: int = 20000,
+        delta_time: int = 5,
+        yellow_time: int = 2,
+        min_green: int = 5,
+        max_green: int = 50,
+        reward_fn: str = "diff-waiting-time",
+    ):
+        super().__init__()
+
+        # Store config
+        self.net_file = net_file
+        self.route_file = route_file
+        self.num_seconds = num_seconds
+        self.delta_time = delta_time
+        self.yellow_time = yellow_time
+        self.min_green = min_green
+        self.max_green = max_green
+        self.reward_fn = reward_fn
+
+        # Create SUMO environment (single-agent mode)
+        self.env = BaseSumoEnv(
+            net_file=net_file,
+            route_file=route_file,
+            use_gui=False,  # No GUI in Docker
+            single_agent=True,  # Single-agent for OpenEnv
+            num_seconds=num_seconds,
+            delta_time=delta_time,
+            yellow_time=yellow_time,
+            min_green=min_green,
+            max_green=max_green,
+            reward_fn=reward_fn,
+            sumo_warnings=False,
+        )
+
+        # Initialize state
+        self._state = SumoState(
+            net_file=net_file,
+            route_file=route_file,
+            num_seconds=num_seconds,
+            delta_time=delta_time,
+            yellow_time=yellow_time,
+            min_green=min_green,
+            max_green=max_green,
+        )
+
+        self._last_obs = None
+        self._last_info = None
+
+    def reset(self) -> Observation:
+        """Reset the environment."""
+        # Reset SUMO
+        obs, info = self.env.reset()
+
+        # Update state
+        self._state.episode_id = str(uuid.uuid4())
+        self._state.step_count = 0
+        self._state.sim_time = 0.0
+
+        # Store for later
+        self._last_obs = obs
+        self._last_info = info
+
+        return self._make_observation(obs, 0.0, False, info)
+
+    def step(self, action: Action) -> Observation:
+        """Execute action."""
+        if not isinstance(action, SumoAction):
+            raise ValueError(f"Expected SumoAction, got {type(action)}")
+
+        # Validate action
+        if action.phase_id < 0 or action.phase_id >= self.env.action_space.n:
+            raise ValueError(
+                f"Invalid phase_id: {action.phase_id}. "
+                f"Valid range: [0, {self.env.action_space.n - 1}]"
+            )
+
+        # Execute in SUMO
+        obs, reward, terminated, truncated, info = self.env.step(action.phase_id)
+        done = terminated or truncated
+
+        # Update state
+        self._state.step_count += 1
+        self._state.sim_time = info.get('step', 0.0)
+        self._state.total_vehicles = info.get('system_total_running', 0)
+        self._state.total_waiting_time = info.get('system_total_waiting_time', 0.0)
+
+        # Store for later
+        self._last_obs = obs
+        self._last_info = info
+
+        return self._make_observation(obs, reward, done, info)
+
+    @property
+    def state(self) -> SumoState:
+        """Get current state."""
+        return self._state
+
+    def _make_observation(
+        self,
+        obs: Any,
+        reward: float,
+        done: bool,
+        info: Dict
+    ) -> SumoObservation:
+        """Create SumoObservation from SUMO env output."""
+        # Convert observation to list
+        if hasattr(obs, 'tolist'):
+            obs_list = obs.tolist()
+        else:
+            obs_list = list(obs)
+
+        # Get action mask (all actions valid in SUMO-RL)
+        action_mask = list(range(self.env.action_space.n))
+
+        # Create observation
+        return SumoObservation(
+            observation=obs_list,
+            observation_shape=[len(obs_list)],
+            action_mask=action_mask,
+            sim_time=info.get('step', 0.0),
+            done=done,
+            reward=reward,
+            metadata={
+                "num_green_phases": self.env.action_space.n,
+                "system_info": {
+                    k: v for k, v in info.items() if k.startswith('system_')
+                },
+            },
+        )
+```
+
+### 3. FastAPI Server (`server/app.py`)
+
+```python
+import os
+from core.env_server import create_fastapi_app
+from ..models import SumoAction, SumoObservation
+from .sumo_environment import SumoEnvironment
+
+# Get configuration from environment
+net_file = os.getenv("SUMO_NET_FILE", "/app/nets/single-intersection.net.xml")
+route_file = os.getenv("SUMO_ROUTE_FILE", "/app/nets/single-intersection.rou.xml")
+num_seconds = int(os.getenv("SUMO_NUM_SECONDS", "20000"))
+delta_time = int(os.getenv("SUMO_DELTA_TIME", "5"))
+yellow_time = int(os.getenv("SUMO_YELLOW_TIME", "2"))
+min_green = int(os.getenv("SUMO_MIN_GREEN", "5"))
+max_green = int(os.getenv("SUMO_MAX_GREEN", "50"))
+reward_fn = os.getenv("SUMO_REWARD_FN", "diff-waiting-time")
+
+# Create environment
+env = SumoEnvironment(
+    net_file=net_file,
+    route_file=route_file,
+    num_seconds=num_seconds,
+    delta_time=delta_time,
+    yellow_time=yellow_time,
+    min_green=min_green,
+    max_green=max_green,
+    reward_fn=reward_fn,
+)
+
+# Create FastAPI app
+app = create_fastapi_app(env, SumoAction, SumoObservation)
+```
+
+### 4. Dockerfile (`server/Dockerfile`)
+
+```dockerfile
+# Configurable base image
+ARG BASE_IMAGE=envtorch-base:latest
+FROM ${BASE_IMAGE}
+
+# Install SUMO
+# SUMO is a microscopic traffic simulation package
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    software-properties-common \
+    && add-apt-repository ppa:sumo/stable \
+    && apt-get update \
+    && apt-get install -y --no-install-recommends \
+        sumo \
+        sumo-tools \
+    && rm -rf /var/lib/apt/lists/*
+
+# Set SUMO_HOME
+ENV SUMO_HOME=/usr/share/sumo
+
+# Install SUMO-RL and dependencies
+RUN pip install --no-cache-dir \
+    gymnasium>=0.28 \
+    pettingzoo>=1.24.3 \
+    numpy>=1.24.0 \
+    pandas>=2.0.0 \
+    sumolib>=1.14.0 \
+    traci>=1.14.0 \
+    sumo-rl>=1.4.5
+
+# Copy OpenEnv core
+COPY src/core/ /app/src/core/
+
+# Copy SUMO-RL environment code
+COPY src/envs/sumo_rl_env/ /app/src/envs/sumo_rl_env/
+
+# Copy example networks
+# We'll bundle a simple single-intersection example
+COPY sumo-rl/sumo_rl/nets/single-intersection/ /app/nets/
+
+# Environment variables (can be overridden at runtime)
+ENV SUMO_NET_FILE=/app/nets/single-intersection.net.xml
+ENV SUMO_ROUTE_FILE=/app/nets/single-intersection.rou.xml
+ENV SUMO_NUM_SECONDS=20000
+ENV SUMO_DELTA_TIME=5
+ENV SUMO_YELLOW_TIME=2
+ENV SUMO_MIN_GREEN=5
+ENV SUMO_MAX_GREEN=50
+ENV SUMO_REWARD_FN=diff-waiting-time
+
+# Expose port
+EXPOSE 8000
+
+# Health check
+HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
+    CMD curl -f http://localhost:8000/health || exit 1
+
+# Run the FastAPI server
+CMD ["uvicorn", "envs.sumo_rl_env.server.app:app", "--host", "0.0.0.0", "--port", "8000"]
+```
+
+### 5. HTTP Client (`client.py`)
+
+```python
+from typing import Any, Dict
+from core.http_env_client import HTTPEnvClient
+from core.types import StepResult
+from .models import SumoAction, SumoObservation, SumoState
+
+class SumoRLEnv(HTTPEnvClient[SumoAction, SumoObservation]):
+    """
+    HTTP client for SUMO-RL environment.
+
+    Example:
+        >>> env = SumoRLEnv.from_docker_image("sumo-rl-env:latest")
+        >>> result = env.reset()
+        >>> result = env.step(SumoAction(phase_id=1))
+        >>> print(f"Reward: {result.reward}, Done: {result.done}")
+        >>> env.close()
+    """
+
+    def _step_payload(self, action: SumoAction) -> Dict[str, Any]:
+        """Convert action to JSON payload."""
+        return {
+            "phase_id": action.phase_id,
+            "ts_id": action.ts_id,
+        }
+
+    def _parse_result(self, payload: Dict[str, Any]) -> StepResult[SumoObservation]:
+        """Parse step result from JSON."""
+        obs_data = payload.get("observation", {})
+
+        observation = SumoObservation(
+            observation=obs_data.get("observation", []),
+            observation_shape=obs_data.get("observation_shape", []),
+            current_phase=obs_data.get("current_phase"),
+            min_green_passed=obs_data.get("min_green_passed"),
+            lane_densities=obs_data.get("lane_densities"),
+            lane_queues=obs_data.get("lane_queues"),
+            action_mask=obs_data.get("action_mask", []),
+            sim_time=obs_data.get("sim_time", 0.0),
+            done=obs_data.get("done", False),
+            reward=obs_data.get("reward"),
+            metadata=obs_data.get("metadata", {}),
+        )
+
+        return StepResult(
+            observation=observation,
+            reward=payload.get("reward"),
+            done=payload.get("done", False),
+        )
+
+    def _parse_state(self, payload: Dict[str, Any]) -> SumoState:
+        """Parse state from JSON."""
+        return SumoState(
+            episode_id=payload.get("episode_id", ""),
+            step_count=payload.get("step_count", 0),
+            net_file=payload.get("net_file", ""),
+            route_file=payload.get("route_file", ""),
+            num_seconds=payload.get("num_seconds", 20000),
+            delta_time=payload.get("delta_time", 5),
+            yellow_time=payload.get("yellow_time", 2),
+            min_green=payload.get("min_green", 5),
+            max_green=payload.get("max_green", 50),
+            sim_time=payload.get("sim_time", 0.0),
+            total_vehicles=payload.get("total_vehicles", 0),
+            total_waiting_time=payload.get("total_waiting_time", 0.0),
+        )
+```
+
+---
+
+## ⚠️ Critical Challenges
+
+### 1. SUMO System Dependency
+
+**Challenge**: SUMO must be installed at system level (apt-get), not just pip.
+
+**Solution**:
+```dockerfile
+RUN add-apt-repository ppa:sumo/stable && \
+    apt-get update && \
+    apt-get install -y sumo sumo-tools
+```
+
+### 2. Network Files Required
+
+**Challenge**: SUMO needs `.net.xml` and `.rou.xml` files to run.
+
+**Solutions**:
+- **Bundle examples**: Copy simple networks from sumo-rl repo
+- **Volume mount**: Let users mount their own networks
+- **Default config**: Use single-intersection as default
+
+### 3. No GUI Support
+
+**Challenge**: Docker can't run SUMO GUI.
+
+**Solution**: Always use `use_gui=False` in Docker environment.
+
+### 4. Long Simulation Times
+
+**Challenge**: Traffic simulations can take minutes to complete.
+
+**Solution**:
+- Set reasonable defaults (20000 seconds simulation time)
+- Allow configuration via environment variables
+- Document expected runtimes
+
+### 5. Multi-Agent Complexity
+
+**Challenge**: SUMO-RL supports multi-agent (multiple traffic lights).
+
+**Solution**: Start with single-agent only for OpenEnv integration. Multi-agent can be added later.
+
+---
+
+## 📊 Configuration Matrix
+
+| Variable | Default | Description |
+|----------|---------|-------------|
+| `SUMO_NET_FILE` | `/app/nets/single-intersection.net.xml` | Network topology file |
+| `SUMO_ROUTE_FILE` | `/app/nets/single-intersection.rou.xml` | Vehicle routes file |
+| `SUMO_NUM_SECONDS` | `20000` | Simulation duration |
+| `SUMO_DELTA_TIME` | `5` | Seconds between actions |
+| `SUMO_YELLOW_TIME` | `2` | Yellow phase duration |
+| `SUMO_MIN_GREEN` | `5` | Minimum green time |
+| `SUMO_MAX_GREEN` | `50` | Maximum green time |
+| `SUMO_REWARD_FN` | `diff-waiting-time` | Reward function |
+
+### Available Reward Functions
+
+From SUMO-RL source:
+- `diff-waiting-time` (default) - Change in cumulative waiting time
+- `average-speed` - Average speed of vehicles
+- `queue` - Total queue length
+- `pressure` - Pressure (difference between incoming/outgoing vehicles)
+
+---
+
+## 🧪 Testing Strategy
+
+### 1. Pre-Flight Checks
+- Verify network files exist
+- Check SUMO installation
+- Validate Dockerfile syntax
+- Test imports
+
+### 2. Docker Build Test
+```bash
+docker build -f src/envs/sumo_rl_env/server/Dockerfile -t sumo-rl-env:latest .
+```
+
+### 3. Runtime Tests
+```bash
+docker run -p 8000:8000 sumo-rl-env:latest
+
+curl http://localhost:8000/health
+curl -X POST http://localhost:8000/reset
+curl -X POST http://localhost:8000/step \
+  -H "Content-Type: application/json" \
+  -d '{"action": {"phase_id": 1, "ts_id": "0"}}'
+```
+
+### 4. Python Client Test
+```python
+from envs.sumo_rl_env import SumoRLEnv, SumoAction
+
+env = SumoRLEnv.from_docker_image("sumo-rl-env:latest")
+result = env.reset()
+result = env.step(SumoAction(phase_id=1))
+print(f"Reward: {result.reward}, Done: {result.done}")
+env.close()
+```
+
+---
+
+## 📦 What to Bundle
+
+### Minimal Network Example
+
+Bundle the single-intersection example from sumo-rl:
+```
+sumo-rl/sumo_rl/nets/single-intersection/
+├── single-intersection.net.xml  # Network topology
+├── single-intersection.rou.xml  # Vehicle routes
+```
+
+This provides a working example out-of-the-box.
+
+### Additional Networks (Optional)
+
+Could bundle RESCO benchmarks for research:
+- `grid4x4` - 4×4 grid of intersections
+- `arterial4x4` - Arterial road network
+- `cologne1` - Real-world Cologne network
+
+But start with single-intersection for simplicity.
+
+---
+
+## 🎯 Implementation Plan
+
+### Phase 1: Core Implementation (4-6 hours)
+1. Create `models.py` ✓ (designed)
+2. Create `server/sumo_environment.py` ✓ (designed)
+3. Create `server/app.py` ✓ (designed)
+4. Create `server/Dockerfile` ✓ (designed)
+5. Create `client.py` ✓ (designed)
+
+### Phase 2: Testing (2-3 hours)
+1. Build Docker image
+2. Test basic functionality
+3. Test different configurations
+4. Verify reward functions work
+
+### Phase 3: Documentation (1-2 hours)
+1. Write README.md
+2. Create examples
+3. Document network file format
+4. Add to GitHub Actions
+
+### Phase 4: Integration (1 hour)
+1. Add to `.github/workflows/docker-build.yml`
+2. Update main README
+3. Add to environments list
+
+**Total Estimate**: 8-12 hours
+
+---
+
+## 🚀 Next Steps
+
+1. **Create file structure** in `/Users/sanyambhutani/GH/OpenEnv/src/envs/sumo_rl_env/`
+2. **Copy network files** from `/Users/sanyambhutani/OpenEnv/sumo-rl/sumo_rl/nets/`
+3. **Implement all files** following the designs above
+4. **Build and test Docker image**
+5. **Create documentation**
+6. **Add to GitHub Actions**
+
+---
+
+## 💡 Key Insights
+
+### Why SUMO-RL is Harder Than Atari
+
+1. **System Dependencies**: Atari (ale-py) is pip-installable, SUMO requires apt-get
+2. **Configuration Complexity**: Atari just needs game name, SUMO needs network files
+3. **Runtime**: Atari is fast, SUMO simulations can take minutes
+4. **File Dependencies**: Atari bundles ROMs, SUMO needs user-provided networks
+
+### Why It's Still Doable
+
+1. **Single-Agent Mode**: Simplifies to standard Gymnasium API
+2. **Bundle Example**: Include simple network to start immediately
+3. **Environment Variables**: Easy runtime configuration
+4. **Pattern Reuse**: Follow exact Atari pattern for consistency
+
+---
+
+## 📚 References
+
+- [SUMO-RL GitHub](https://github.com/LucasAlegre/sumo-rl)
+- [SUMO Documentation](https://sumo.dlr.de/docs/)
+- [SUMO-RL Docs](https://lucasalegre.github.io/sumo-rl/)
+- [RESCO Benchmarks Paper](https://people.engr.tamu.edu/guni/Papers/NeurIPS-signals.pdf)
+
+---
+
+**Status**: Design complete, ready for implementation
+**Complexity**: High (system dependencies + network files)
+**Time Estimate**: 8-12 hours
+**Confidence**: 85% (Dockerfile complexity is main risk)
diff --git a/SUMO_RL_RISK_ANALYSIS.md b/SUMO_RL_RISK_ANALYSIS.md
new file mode 100644
index 00000000..0f216b39
--- /dev/null
+++ b/SUMO_RL_RISK_ANALYSIS.md
@@ -0,0 +1,505 @@
+# SUMO-RL Integration: ULTRATHINK Risk Analysis
+
+**Date**: 2025-10-17
+**Status**: Deep Risk Assessment
+
+---
+
+## ✅ Docker Eliminates PRIMARY Risk
+
+**YES - Docker solves the hardest problem!**
+
+| Risk | Without Docker | With Docker |
+|------|---------------|-------------|
+| **System Dependencies** | ❌ Nightmare | ✅ Solved |
+| **Cross-platform** | ❌ Linux only | ✅ Works everywhere |
+| **Installation** | ❌ Requires sudo | ✅ Just `docker run` |
+| **Reproducibility** | ❌ "Works on my machine" | ✅ Identical |
+
+**Conclusion**: Docker takes away 80% of the pain. ✨
+
+---
+
+## ⚠️ Remaining Risks (Deep Analysis)
+
+### 🔴 HIGH RISK
+
+#### 1. **TraCI Connection Management in HTTP Server**
+
+**Issue**: `SumoEnvironment` uses class variable `CONNECTION_LABEL` that increments globally.
+
+```python
+CONNECTION_LABEL = 0  # For traci multi-client support
+
+def __init__(self):
+    self.label = str(SumoEnvironment.CONNECTION_LABEL)
+    SumoEnvironment.CONNECTION_LABEL += 1
+```
+
+**Risk**: In HTTP server with concurrent requests:
+- Request 1 creates env (label=0)
+- Request 2 creates env (label=1)
+- Request 1 resets → closes connection label=0
+- Request 2 steps → tries to use label=1
+- **Potential conflict if requests overlap**
+
+**Likelihood**: Medium (depends on usage pattern)
+
+**Impact**: High (could cause simulation errors)
+
+**Mitigation**:
+```python
+# Option 1: Single environment instance (RECOMMENDED)
+# Create ONE environment at server startup, reuse for all requests
+env = SumoEnvironment(...)  # Created once
+app = create_fastapi_app(env, ...)  # Reuses same env
+
+# Option 2: Thread-safe connection management
+# Use threading locks around TraCI operations
+```
+
+**Decision**: Use single environment instance per container (same as Atari pattern). Each HTTP request uses the same environment. **SOLVES ISSUE**.
+
+---
+
+#### 2. **LIBSUMO vs TraCI Performance Trade-off**
+
+**Background**:
+```python
+LIBSUMO = "LIBSUMO_AS_TRACI" in os.environ
+```
+
+- **TraCI**: Standard, supports GUI, slower (1x speed)
+- **LIBSUMO**: No GUI, no parallel sims, faster (8x speed)
+
+**Risk**: Default TraCI could be too slow for RL training.
+
+**Likelihood**: High (traffic sims are inherently slow)
+
+**Impact**: Medium (training takes longer, not broken)
+
+**Mitigation**:
+```dockerfile
+# Option 1: Use TraCI (default, safer)
+# No env var needed, works out of box
+
+# Option 2: Enable LIBSUMO for speed
+ENV LIBSUMO_AS_TRACI=1
+
+# Recommendation: Start with TraCI, add LIBSUMO as optimization later
+```
+
+**Decision**: Start with TraCI (default), document LIBSUMO option for advanced users.
+
+---
+
+### 🟡 MEDIUM RISK
+
+#### 3. **Episode Reset Performance**
+
+**Issue**: Each `reset()` closes and restarts SUMO simulation.
+
+```python
+def reset(self, seed=None, **kwargs):
+    if self.episode != 0:
+        self.close()  # Closes previous simulation
+    self._start_simulation()  # Starts new one
+```
+
+**Risk**: Reset could take 1-5 seconds (slow for RL training loop).
+
+**Likelihood**: High (this is how SUMO works)
+
+**Impact**: Medium (slows training, doesn't break it)
+
+**Mitigation**:
+- Document expected reset time
+- Use long episodes (`num_seconds=20000`)
+- Consider warm-start optimizations later
+
+**Decision**: Accept this limitation, document it. Not a blocker.
+
+---
+
+#### 4. **CSV Output Accumulation**
+
+**Issue**: Environment can write CSV metrics to disk.
+
+```python
+def save_csv(self, out_csv_name, episode):
+    df.to_csv(out_csv_name + f"_conn{self.label}_ep{episode}" + ".csv")
+```
+
+**Risk**: In Docker, CSV files accumulate → disk space.
+
+**Likelihood**: Low (only if user enables CSV output)
+
+**Impact**: Low (disk space, not functionality)
+
+**Mitigation**:
+```python
+# In our wrapper, set out_csv_name=None (disables CSV)
+env = SumoEnvironment(
+    ...,
+    out_csv_name=None,  # Disable CSV output
+)
+```
+
+**Decision**: Disable CSV output by default. Users can enable via volume mount if needed.
+
+---
+
+#### 5. **Network File Path Resolution**
+
+**Issue**: SUMO needs absolute paths to `.net.xml` and `.rou.xml` files.
+
+**Risk**: If paths are wrong in Docker, simulation fails.
+
+**Likelihood**: Low (we control the paths)
+
+**Impact**: High (breaks everything if wrong)
+
+**Mitigation**:
+```dockerfile
+# Bundle networks at known paths
+COPY sumo-rl/sumo_rl/nets/single-intersection/ /app/nets/
+
+# Set absolute paths as defaults
+ENV SUMO_NET_FILE=/app/nets/single-intersection.net.xml
+ENV SUMO_ROUTE_FILE=/app/nets/single-intersection.rou.xml
+```
+
+**Decision**: Bundle example networks, use absolute paths. Test during build.
+
+---
+
+#### 6. **Dynamic Observation/Action Spaces**
+
+**Issue**: Different networks → different action/observation sizes.
+
+```python
+# Action space size = number of traffic signal phases (varies)
+self.action_space = gym.spaces.Discrete(num_green_phases)
+
+# Observation size = depends on number of lanes (varies)
+obs_size = num_green_phases + 1 + 2*num_lanes
+```
+
+**Risk**: OpenEnv expects fixed-size spaces?
+
+**Likelihood**: Low (we use single network by default)
+
+**Impact**: Medium (breaks if user changes network)
+
+**Mitigation**:
+- Use single-intersection as default (fixed sizes)
+- Document that changing networks may change spaces
+- Future: Make spaces configurable
+
+**Decision**: Not a blocker. Start with single network, document clearly.
+
+---
+
+### 🟢 LOW RISK
+
+#### 7. **SUMO Version Compatibility**
+
+**Issue**: `ppa:sumo/stable` might update SUMO version over time.
+
+**Risk**: New SUMO version breaks sumo-rl compatibility.
+
+**Likelihood**: Low (SUMO is stable)
+
+**Impact**: Medium (breaks after rebuild)
+
+**Mitigation**:
+```dockerfile
+# Option 1: Pin SUMO version (if available)
+RUN apt-get install -y sumo=1.14.0
+
+# Option 2: Pin sumolib/traci versions
+RUN pip install sumolib==1.14.0 traci==1.14.0
+
+# Option 3: Accept latest (simpler, usually works)
+```
+
+**Decision**: Start with latest, pin if issues arise.
+
+---
+
+#### 8. **sumolib/traci vs System SUMO Mismatch**
+
+**Issue**: Pip packages `sumolib` and `traci` should match system SUMO version.
+
+**Risk**: Version mismatch causes compatibility issues.
+
+**Likelihood**: Low (sumo-rl handles this)
+
+**Impact**: Medium (simulation errors)
+
+**Mitigation**:
+```dockerfile
+# Install SUMO first
+RUN apt-get install -y sumo sumo-tools
+
+# Then install matching Python packages
+RUN pip install sumolib>=1.14.0 traci>=1.14.0
+```
+
+**Decision**: Use `>=` versions, should work. Test during build.
+
+---
+
+#### 9. **PettingZoo Version Compatibility**
+
+**Issue**: Code has fallback for PettingZoo 1.24 vs 1.25+
+
+```python
+try:
+    from pettingzoo.utils import AgentSelector  # 1.25+
+except ImportError:
+    from pettingzoo.utils import agent_selector as AgentSelector  # 1.24
+```
+
+**Risk**: Version incompatibility breaks import.
+
+**Likelihood**: Low (pyproject.toml specifies `pettingzoo>=1.24.3`)
+
+**Impact**: Low (import error, easy to debug)
+
+**Mitigation**:
+```dockerfile
+RUN pip install pettingzoo>=1.24.3
+```
+
+**Decision**: Use version spec from pyproject.toml.
+
+---
+
+#### 10. **Memory Usage with Many Vehicles**
+
+**Issue**: Large traffic networks with thousands of vehicles → high memory.
+
+**Risk**: Container OOM (out of memory).
+
+**Likelihood**: Low (single-intersection is small)
+
+**Impact**: High (container crash)
+
+**Mitigation**:
+- Use small default network (single-intersection)
+- Document memory requirements for large networks
+- Docker memory limits (optional)
+
+**Decision**: Not a blocker. Document memory requirements.
+
+---
+
+#### 11. **Simulation Determinism**
+
+**Issue**: Default `sumo_seed="random"` → non-deterministic.
+
+**Risk**: Can't reproduce training runs.
+
+**Likelihood**: High (default is random)
+
+**Impact**: Low (science issue, not functionality)
+
+**Mitigation**:
+```python
+# Allow seed control via environment variable
+sumo_seed = int(os.getenv("SUMO_SEED", "42"))  # Default fixed seed
+
+# Or keep random, document it
+sumo_seed = os.getenv("SUMO_SEED", "random")
+```
+
+**Decision**: Default to fixed seed (42) for reproducibility. Document how to use random.
+
+---
+
+#### 12. **Headless Operation (No GUI)**
+
+**Issue**: We force `use_gui=False` in Docker.
+
+**Risk**: Users might want to see simulation GUI.
+
+**Likelihood**: Low (Docker is headless)
+
+**Impact**: Low (convenience feature)
+
+**Mitigation**:
+- Document that GUI is not available in Docker
+- Suggest local development for GUI
+- Future: VNC access to container GUI
+
+**Decision**: Not a blocker. GUI doesn't work in Docker anyway.
+
+---
+
+#### 13. **Docker Image Size**
+
+**Issue**: SUMO + dependencies → large image.
+
+**Estimate**:
+- Base: ~200MB
+- SUMO: ~500MB
+- Python packages: ~200MB
+- **Total: ~900MB-1GB**
+
+**Risk**: Large downloads, storage.
+
+**Likelihood**: High (definitely will be large)
+
+**Impact**: Low (acceptable for complex sim)
+
+**Mitigation**:
+- Multi-stage builds (future optimization)
+- Clear documentation of size
+- Accept it (complexity requires space)
+
+**Decision**: Accept ~1GB image size. Not a blocker.
+
+---
+
+#### 14. **Long Simulation Times**
+
+**Issue**: Traffic simulations take time (minutes per episode).
+
+**Example**: 20,000 simulated seconds with delta_time=5 → 4,000 steps per episode.
+
+**Risk**: RL training is slow.
+
+**Likelihood**: High (inherent to traffic simulation)
+
+**Impact**: Medium (slower research, not broken)
+
+**Mitigation**:
+- Document expected times
+- Recommend shorter episodes for quick tests
+- Suggest LIBSUMO for speedup
+
+**Decision**: Document clearly. Not a technical blocker.
+
+---
+
+## 📊 Risk Summary
+
+| Risk | Severity | Likelihood | Mitigation Status |
+|------|----------|-----------|-------------------|
+| TraCI Connection Management | 🔴 High | Medium | ✅ Solved (single env instance) |
+| LIBSUMO vs TraCI | 🔴 High | High | ✅ Mitigated (default TraCI, doc LIBSUMO) |
+| Episode Reset Performance | 🟡 Medium | High | ✅ Accepted (document) |
+| CSV Output Accumulation | 🟡 Medium | Low | ✅ Solved (disable by default) |
+| Network File Paths | 🟡 Medium | Low | ✅ Solved (bundle at known paths) |
+| Dynamic Spaces | 🟡 Medium | Low | ✅ Accepted (document) |
+| SUMO Version | 🟢 Low | Low | ✅ Accepted (use latest) |
+| sumolib/traci Mismatch | 🟢 Low | Low | ✅ Mitigated (>=1.14.0) |
+| PettingZoo Version | 🟢 Low | Low | ✅ Mitigated (>=1.24.3) |
+| Memory Usage | 🟢 Low | Low | ✅ Accepted (document) |
+| Simulation Determinism | 🟢 Low | High | ✅ Solved (default fixed seed) |
+| No GUI | 🟢 Low | Low | ✅ Accepted (Docker is headless) |
+| Image Size | 🟢 Low | High | ✅ Accepted (~1GB) |
+| Long Sim Times | 🟢 Low | High | ✅ Accepted (document) |
+
+---
+
+## ✅ Final Risk Assessment
+
+### Overall Risk Level: **LOW-MEDIUM** ✅
+
+### Key Findings:
+
+1. **Docker solves the hardest problem** (system dependencies) ✅
+2. **No critical blockers** - all risks have mitigations ✅
+3. **Main concerns are performance** (speed, memory) - acceptable for simulation ✅
+4. **Connection management solved** by single env instance pattern ✅
+
+### Recommended Mitigations:
+
+```python
+# 1. Single environment instance per container
+env = SumoEnvironment(
+    net_file="/app/nets/single-intersection.net.xml",
+    route_file="/app/nets/single-intersection.rou.xml",
+    use_gui=False,  # No GUI in Docker
+    single_agent=True,  # Single-agent mode
+    num_seconds=20000,
+    sumo_seed=42,  # Fixed seed for reproducibility
+    out_csv_name=None,  # Disable CSV output
+    sumo_warnings=False,  # Quiet
+)
+
+# 2. Reuse for all HTTP requests
+app = create_fastapi_app(env, SumoAction, SumoObservation)
+```
+
+```dockerfile
+# 3. Bundle network files at known paths
+COPY sumo-rl/sumo_rl/nets/single-intersection/ /app/nets/
+
+# 4. Set SUMO_HOME
+ENV SUMO_HOME=/usr/share/sumo
+
+# 5. Don't enable LIBSUMO by default (safer)
+# ENV LIBSUMO_AS_TRACI=1  # Optional for advanced users
+```
+
+---
+
+## 🎯 Confidence Level
+
+**Original**: 85% confident
+**After Deep Analysis**: **95% confident** ✅
+
+**Reasons for Increased Confidence**:
+1. All high-risk items have clear mitigations
+2. Docker architecture naturally solves connection management
+3. Pattern matches Atari (proven to work)
+4. Risks are mostly performance/documentation, not functionality
+5. No unexpected blockers found
+
+---
+
+## 🚀 Ready to Implement
+
+**Recommendation**: **PROCEED WITH IMPLEMENTATION** ✅
+
+The risks are manageable and well-understood. Docker makes this integration feasible and clean.
+
+**Estimated Effort**: 8-12 hours (unchanged)
+
+**Success Probability**: 95%
+
+---
+
+## 📝 Documentation Requirements
+
+Based on risk analysis, must document:
+
+1. **Performance expectations**:
+   - Reset takes 1-5 seconds
+   - Episodes can take minutes
+   - LIBSUMO option for 8x speedup
+
+2. **Network files**:
+   - Default: single-intersection (bundled)
+   - Custom: mount volume with your .net.xml/.rou.xml
+
+3. **Reproducibility**:
+   - Default seed=42 (deterministic)
+   - Set SUMO_SEED=random for stochastic
+
+4. **Limitations**:
+   - No GUI in Docker
+   - Single-agent only (v1)
+   - Fixed network per container
+
+5. **Memory requirements**:
+   - Small networks: ~500MB
+   - Large networks: 2-4GB
+   - Document scaling
+
+---
+
+**Analysis Complete**: All risks identified, mitigated, and documented. ✅
diff --git a/src/envs/sumo_rl_env/models.py b/src/envs/sumo_rl_env/models.py
new file mode 100644
index 00000000..611b0eed
--- /dev/null
+++ b/src/envs/sumo_rl_env/models.py
@@ -0,0 +1,110 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+"""
+Data models for SUMO-RL Environment.
+
+This module defines the Action, Observation, and State types for traffic
+signal control using SUMO (Simulation of Urban MObility).
+"""
+
+from dataclasses import dataclass, field
+from typing import Dict, List, Optional
+
+from core.env_server import Action, Observation, State
+
+
+@dataclass
+class SumoAction(Action):
+    """
+    Action for SUMO traffic signal control environment.
+
+    Represents selecting which traffic light phase to activate next.
+
+    Attributes:
+        phase_id: Index of the green phase to activate (0 to num_phases-1)
+        ts_id: Traffic signal ID (for multi-agent support, default "0")
+    """
+
+    phase_id: int
+    ts_id: str = "0"
+
+
+@dataclass
+class SumoObservation(Observation):
+    """
+    Observation from SUMO traffic signal environment.
+
+    Contains traffic metrics for decision-making.
+
+    Attributes:
+        observation: Flattened observation vector containing:
+                    - One-hot encoded current phase
+                    - Min green flag (binary)
+                    - Lane densities (normalized)
+                    - Lane queues (normalized)
+        observation_shape: Shape of observation for reshaping
+        action_mask: List of valid action indices
+        sim_time: Current simulation time in seconds
+        done: Whether episode is complete
+        reward: Reward from last action (None on reset)
+        metadata: Additional info (system metrics, etc.)
+    """
+
+    observation: List[float]
+    observation_shape: List[int]
+    action_mask: List[int] = field(default_factory=list)
+    sim_time: float = 0.0
+    done: bool = False
+    reward: Optional[float] = None
+    metadata: Dict = field(default_factory=dict)
+
+
+@dataclass
+class SumoState(State):
+    """
+    State of SUMO traffic signal environment.
+
+    Tracks both configuration and runtime state.
+
+    Configuration attributes:
+        net_file: Path to SUMO network file (.net.xml)
+        route_file: Path to SUMO route file (.rou.xml)
+        num_seconds: Total simulation duration in seconds
+        delta_time: Seconds between agent actions
+        yellow_time: Duration of yellow phase in seconds
+        min_green: Minimum green time per phase in seconds
+        max_green: Maximum green time per phase in seconds
+        reward_fn: Name of reward function used
+
+    Runtime attributes:
+        episode_id: Unique episode identifier
+        step_count: Number of steps taken in episode
+        sim_time: Current simulation time in seconds
+        total_vehicles: Total number of vehicles in simulation
+        total_waiting_time: Cumulative waiting time across all vehicles
+    """
+
+    # Episode tracking
+    episode_id: str = ""
+    step_count: int = 0
+
+    # SUMO configuration
+    net_file: str = ""
+    route_file: str = ""
+    num_seconds: int = 20000
+    delta_time: int = 5
+    yellow_time: int = 2
+    min_green: int = 5
+    max_green: int = 50
+    reward_fn: str = "diff-waiting-time"
+
+    # Runtime metrics
+    sim_time: float = 0.0
+    total_vehicles: int = 0
+    total_waiting_time: float = 0.0
+    mean_waiting_time: float = 0.0
+    mean_speed: float = 0.0

From c822a741fd2d23374a9165931fe2461aa56dc6b1 Mon Sep 17 00:00:00 2001
From: Sanyam Bhutani <sanyambhutani@meta.com>
Date: Fri, 17 Oct 2025 11:27:34 -0700
Subject: [PATCH 02/13] add env

---
 .../sumo_rl_env/server/sumo_environment.py    | 237 ++++++++++++++++++
 1 file changed, 237 insertions(+)
 create mode 100644 src/envs/sumo_rl_env/server/sumo_environment.py

diff --git a/src/envs/sumo_rl_env/server/sumo_environment.py b/src/envs/sumo_rl_env/server/sumo_environment.py
new file mode 100644
index 00000000..757b9f17
--- /dev/null
+++ b/src/envs/sumo_rl_env/server/sumo_environment.py
@@ -0,0 +1,237 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+"""
+SUMO-RL Environment Server Implementation.
+
+This module wraps the SUMO-RL SumoEnvironment and exposes it
+via the OpenEnv Environment interface for traffic signal control.
+"""
+
+import os
+import uuid
+from typing import Any, Dict
+
+# Set SUMO_HOME before importing sumo_rl
+os.environ.setdefault("SUMO_HOME", "/usr/share/sumo")
+
+from core.env_server import Action, Environment, Observation
+
+from ..models import SumoAction, SumoObservation, SumoState
+
+# Import SUMO-RL
+try:
+    from sumo_rl import SumoEnvironment as BaseSumoEnv
+except ImportError as e:
+    raise ImportError(
+        "sumo-rl is not installed. "
+        "Please install it with: pip install sumo-rl"
+    ) from e
+
+
+class SumoEnvironment(Environment):
+    """
+    SUMO-RL Environment wrapper for OpenEnv.
+
+    This environment wraps the SUMO traffic signal control environment
+    for single-agent reinforcement learning.
+
+    Args:
+        net_file: Path to SUMO network file (.net.xml)
+        route_file: Path to SUMO route file (.rou.xml)
+        num_seconds: Simulation duration in seconds (default: 20000)
+        delta_time: Seconds between agent actions (default: 5)
+        yellow_time: Yellow phase duration in seconds (default: 2)
+        min_green: Minimum green time in seconds (default: 5)
+        max_green: Maximum green time in seconds (default: 50)
+        reward_fn: Reward function name (default: "diff-waiting-time")
+        sumo_seed: Random seed for reproducibility (default: 42)
+
+    Example:
+        >>> env = SumoEnvironment(
+        ...     net_file="/app/nets/single-intersection.net.xml",
+        ...     route_file="/app/nets/single-intersection.rou.xml"
+        ... )
+        >>> obs = env.reset()
+        >>> print(obs.observation_shape)
+        >>> obs = env.step(SumoAction(phase_id=1))
+        >>> print(obs.reward, obs.done)
+    """
+
+    def __init__(
+        self,
+        net_file: str,
+        route_file: str,
+        num_seconds: int = 20000,
+        delta_time: int = 5,
+        yellow_time: int = 2,
+        min_green: int = 5,
+        max_green: int = 50,
+        reward_fn: str = "diff-waiting-time",
+        sumo_seed: int = 42,
+    ):
+        """Initialize SUMO traffic signal environment."""
+        super().__init__()
+
+        # Store configuration
+        self.net_file = net_file
+        self.route_file = route_file
+        self.num_seconds = num_seconds
+        self.delta_time = delta_time
+        self.yellow_time = yellow_time
+        self.min_green = min_green
+        self.max_green = max_green
+        self.reward_fn = reward_fn
+        self.sumo_seed = sumo_seed
+
+        # Create SUMO environment (single-agent mode)
+        # Key settings:
+        # - use_gui=False: No GUI in Docker
+        # - single_agent=True: Returns single obs/reward (not dict)
+        # - sumo_warnings=False: Suppress SUMO warnings
+        # - out_csv_name=None: Don't write CSV files
+        self.env = BaseSumoEnv(
+            net_file=net_file,
+            route_file=route_file,
+            use_gui=False,
+            single_agent=True,
+            num_seconds=num_seconds,
+            delta_time=delta_time,
+            yellow_time=yellow_time,
+            min_green=min_green,
+            max_green=max_green,
+            reward_fn=reward_fn,
+            sumo_seed=sumo_seed,
+            sumo_warnings=False,
+            out_csv_name=None,  # Disable CSV output
+            add_system_info=True,
+            add_per_agent_info=False,
+        )
+
+        # Initialize state
+        self._state = SumoState(
+            net_file=net_file,
+            route_file=route_file,
+            num_seconds=num_seconds,
+            delta_time=delta_time,
+            yellow_time=yellow_time,
+            min_green=min_green,
+            max_green=max_green,
+            reward_fn=reward_fn,
+        )
+
+        self._last_info = {}
+
+    def reset(self) -> Observation:
+        """
+        Reset the environment and return initial observation.
+
+        Returns:
+            Initial SumoObservation for the agent.
+        """
+        # Reset SUMO simulation
+        obs, info = self.env.reset()
+
+        # Update state tracking
+        self._state.episode_id = str(uuid.uuid4())
+        self._state.step_count = 0
+        self._state.sim_time = 0.0
+
+        # Store info for metadata
+        self._last_info = info
+
+        return self._make_observation(obs, reward=None, done=False, info=info)
+
+    def step(self, action: Action) -> Observation:
+        """
+        Execute agent's action and return resulting observation.
+
+        Args:
+            action: SumoAction containing the phase_id to execute.
+
+        Returns:
+            SumoObservation after action execution.
+
+        Raises:
+            ValueError: If action is not a SumoAction.
+        """
+        if not isinstance(action, SumoAction):
+            raise ValueError(f"Expected SumoAction, got {type(action)}")
+
+        # Validate phase_id
+        num_phases = self.env.action_space.n
+        if action.phase_id < 0 or action.phase_id >= num_phases:
+            raise ValueError(
+                f"Invalid phase_id: {action.phase_id}. "
+                f"Valid range: [0, {num_phases - 1}]"
+            )
+
+        # Execute action in SUMO
+        # Returns: (obs, reward, terminated, truncated, info)
+        obs, reward, terminated, truncated, info = self.env.step(action.phase_id)
+        done = terminated or truncated
+
+        # Update state
+        self._state.step_count += 1
+        self._state.sim_time = info.get("step", 0.0)
+        self._state.total_vehicles = info.get("system_total_running", 0)
+        self._state.total_waiting_time = info.get("system_total_waiting_time", 0.0)
+        self._state.mean_waiting_time = info.get("system_mean_waiting_time", 0.0)
+        self._state.mean_speed = info.get("system_mean_speed", 0.0)
+
+        # Store info for metadata
+        self._last_info = info
+
+        return self._make_observation(obs, reward=reward, done=done, info=info)
+
+    @property
+    def state(self) -> SumoState:
+        """Get current environment state."""
+        return self._state
+
+    def _make_observation(
+        self, obs: Any, reward: float, done: bool, info: Dict
+    ) -> SumoObservation:
+        """
+        Create SumoObservation from SUMO environment output.
+
+        Args:
+            obs: Observation array from SUMO environment
+            reward: Reward value (None on reset)
+            done: Whether episode is complete
+            info: Info dictionary from SUMO environment
+
+        Returns:
+            SumoObservation for the agent.
+        """
+        # Convert observation to list
+        if hasattr(obs, "tolist"):
+            obs_list = obs.tolist()
+        else:
+            obs_list = list(obs)
+
+        # Get action mask (all actions valid in SUMO-RL)
+        num_phases = self.env.action_space.n
+        action_mask = list(range(num_phases))
+
+        # Extract system metrics for metadata
+        system_info = {
+            k: v for k, v in info.items() if k.startswith("system_")
+        }
+
+        # Create observation
+        return SumoObservation(
+            observation=obs_list,
+            observation_shape=[len(obs_list)],
+            action_mask=action_mask,
+            sim_time=info.get("step", 0.0),
+            done=done,
+            reward=reward,
+            metadata={
+                "num_green_phases": num_phases,
+                "system_info": system_info,
+            },
+        )

From 980abc3ee8ece88dccc61e3b031d71f17adaa930 Mon Sep 17 00:00:00 2001
From: Sanyam Bhutani <sanyambhutani@meta.com>
Date: Fri, 17 Oct 2025 11:28:42 -0700
Subject: [PATCH 03/13] app

---
 src/envs/sumo_rl_env/server/app.py | 47 ++++++++++++++++++++++++++++++
 1 file changed, 47 insertions(+)
 create mode 100644 src/envs/sumo_rl_env/server/app.py

diff --git a/src/envs/sumo_rl_env/server/app.py b/src/envs/sumo_rl_env/server/app.py
new file mode 100644
index 00000000..b81463ae
--- /dev/null
+++ b/src/envs/sumo_rl_env/server/app.py
@@ -0,0 +1,47 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+"""
+FastAPI application for SUMO-RL environment server.
+
+This module creates an HTTP server that exposes traffic signal control
+via the OpenEnv API using SUMO (Simulation of Urban MObility).
+"""
+
+import os
+
+from core.env_server import create_fastapi_app
+
+from ..models import SumoAction, SumoObservation
+from .sumo_environment import SumoEnvironment
+
+# Get configuration from environment variables
+net_file = os.getenv("SUMO_NET_FILE", "/app/nets/single-intersection.net.xml")
+route_file = os.getenv("SUMO_ROUTE_FILE", "/app/nets/single-intersection.rou.xml")
+num_seconds = int(os.getenv("SUMO_NUM_SECONDS", "20000"))
+delta_time = int(os.getenv("SUMO_DELTA_TIME", "5"))
+yellow_time = int(os.getenv("SUMO_YELLOW_TIME", "2"))
+min_green = int(os.getenv("SUMO_MIN_GREEN", "5"))
+max_green = int(os.getenv("SUMO_MAX_GREEN", "50"))
+reward_fn = os.getenv("SUMO_REWARD_FN", "diff-waiting-time")
+sumo_seed = int(os.getenv("SUMO_SEED", "42"))
+
+# Create single environment instance
+# This is reused for all HTTP requests (avoids TraCI connection issues)
+env = SumoEnvironment(
+    net_file=net_file,
+    route_file=route_file,
+    num_seconds=num_seconds,
+    delta_time=delta_time,
+    yellow_time=yellow_time,
+    min_green=min_green,
+    max_green=max_green,
+    reward_fn=reward_fn,
+    sumo_seed=sumo_seed,
+)
+
+# Create FastAPI app
+app = create_fastapi_app(env, SumoAction, SumoObservation)

From 1d3c96c5a8df3c0948ee56438f7737aa87abda57 Mon Sep 17 00:00:00 2001
From: Sanyam Bhutani <sanyambhutani@meta.com>
Date: Fri, 17 Oct 2025 11:28:51 -0700
Subject: [PATCH 04/13] init

---
 src/envs/sumo_rl_env/server/__init__.py | 7 +++++++
 1 file changed, 7 insertions(+)
 create mode 100644 src/envs/sumo_rl_env/server/__init__.py

diff --git a/src/envs/sumo_rl_env/server/__init__.py b/src/envs/sumo_rl_env/server/__init__.py
new file mode 100644
index 00000000..f4b70221
--- /dev/null
+++ b/src/envs/sumo_rl_env/server/__init__.py
@@ -0,0 +1,7 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+"""SUMO-RL environment server package."""

From e64f3d76fe4447f126ad34b031236c3fb359386a Mon Sep 17 00:00:00 2001
From: Sanyam Bhutani <sanyambhutani@meta.com>
Date: Fri, 17 Oct 2025 11:28:59 -0700
Subject: [PATCH 05/13] DockerFile

---
 src/envs/sumo_rl_env/server/Dockerfile | 69 ++++++++++++++++++++++++++
 1 file changed, 69 insertions(+)
 create mode 100644 src/envs/sumo_rl_env/server/Dockerfile

diff --git a/src/envs/sumo_rl_env/server/Dockerfile b/src/envs/sumo_rl_env/server/Dockerfile
new file mode 100644
index 00000000..618ebe88
--- /dev/null
+++ b/src/envs/sumo_rl_env/server/Dockerfile
@@ -0,0 +1,69 @@
+# Dockerfile for SUMO-RL Environment
+# This image provides traffic signal control via SUMO (Simulation of Urban MObility)
+
+# Configurable base image - defaults to local build, can be overridden for CI/CD
+# Base image provides: fastapi, uvicorn, requests, curl, PYTHONPATH=/app/src
+#
+# Local build: docker build -t envtorch-base:latest -f src/core/containers/images/Dockerfile .
+#              docker build -f src/envs/sumo_rl_env/server/Dockerfile -t sumo-rl-env:latest .
+#
+# CI/CD build: docker build --build-arg BASE_IMAGE=ghcr.io/meta-pytorch/openenv-base:latest \
+#              -f src/envs/sumo_rl_env/server/Dockerfile -t sumo-rl-env:latest .
+ARG BASE_IMAGE=envtorch-base:latest
+FROM ${BASE_IMAGE}
+
+# Install SUMO system dependencies
+# SUMO is a microscopic traffic simulation package
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    software-properties-common \
+    && add-apt-repository ppa:sumo/stable \
+    && apt-get update \
+    && apt-get install -y --no-install-recommends \
+        sumo \
+        sumo-tools \
+    && rm -rf /var/lib/apt/lists/*
+
+# Set SUMO_HOME environment variable
+ENV SUMO_HOME=/usr/share/sumo
+
+# Install SUMO-RL and Python dependencies
+# sumo-rl includes: gymnasium, pettingzoo, numpy, pandas, sumolib, traci
+RUN pip install --no-cache-dir \
+    gymnasium>=0.28 \
+    pettingzoo>=1.24.3 \
+    numpy>=1.24.0 \
+    pandas>=2.0.0 \
+    sumolib>=1.14.0 \
+    traci>=1.14.0 \
+    sumo-rl>=1.4.5
+
+# Copy OpenEnv core (base image already set WORKDIR=/app)
+COPY src/core/ /app/src/core/
+
+# Copy SUMO-RL environment code
+COPY src/envs/sumo_rl_env/ /app/src/envs/sumo_rl_env/
+
+# Copy example network files
+# Default: single-intersection (simple 4-way intersection)
+COPY nets/single-intersection/ /app/nets/single-intersection/
+
+# SUMO environment variables (can be overridden at runtime)
+ENV SUMO_NET_FILE=/app/nets/single-intersection/single-intersection.net.xml
+ENV SUMO_ROUTE_FILE=/app/nets/single-intersection/single-intersection.rou.xml
+ENV SUMO_NUM_SECONDS=20000
+ENV SUMO_DELTA_TIME=5
+ENV SUMO_YELLOW_TIME=2
+ENV SUMO_MIN_GREEN=5
+ENV SUMO_MAX_GREEN=50
+ENV SUMO_REWARD_FN=diff-waiting-time
+ENV SUMO_SEED=42
+
+# Expose port
+EXPOSE 8000
+
+# Health check
+HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
+    CMD curl -f http://localhost:8000/health || exit 1
+
+# Run the FastAPI server
+CMD ["uvicorn", "envs.sumo_rl_env.server.app:app", "--host", "0.0.0.0", "--port", "8000"]

From debd7243f829501bca42fb89040c4080c5cdd257 Mon Sep 17 00:00:00 2001
From: Sanyam Bhutani <sanyambhutani@meta.com>
Date: Fri, 17 Oct 2025 11:29:48 -0700
Subject: [PATCH 06/13] client and configs

---
 .../single-intersection.edg.xml               |   6 +
 .../single-intersection.net.xml               |  86 +++++++++++
 .../single-intersection.nod.xml               |   7 +
 .../single-intersection.rou.xml               |   6 +
 .../single-intersection.sumocfg               |  10 ++
 src/envs/sumo_rl_env/client.py                | 145 ++++++++++++++++++
 6 files changed, 260 insertions(+)
 create mode 100755 nets/single-intersection/single-intersection.edg.xml
 create mode 100755 nets/single-intersection/single-intersection.net.xml
 create mode 100755 nets/single-intersection/single-intersection.nod.xml
 create mode 100755 nets/single-intersection/single-intersection.rou.xml
 create mode 100755 nets/single-intersection/single-intersection.sumocfg
 create mode 100644 src/envs/sumo_rl_env/client.py

diff --git a/nets/single-intersection/single-intersection.edg.xml b/nets/single-intersection/single-intersection.edg.xml
new file mode 100755
index 00000000..52c3e7aa
--- /dev/null
+++ b/nets/single-intersection/single-intersection.edg.xml
@@ -0,0 +1,6 @@
+<edges>
+	<edge from="n" id="n_t" to="t" numLanes="2"/>
+	<edge from="w" id="w_t" to="t" numLanes="2"/>
+	<edge from="t" id="t_s" to="s" numLanes="2"/>
+	<edge from="t" id="t_e" to="e" numLanes="2"/>
+</edges>
diff --git a/nets/single-intersection/single-intersection.net.xml b/nets/single-intersection/single-intersection.net.xml
new file mode 100755
index 00000000..0f32510f
--- /dev/null
+++ b/nets/single-intersection/single-intersection.net.xml
@@ -0,0 +1,86 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!-- generated on seg 17 dez 2018 17:22:14 -02 by Netedit Version 0.32.0
+<?xml version="1.0" encoding="UTF-8"?>
+
+<configuration xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:noNamespaceSchemaLocation="http://sumo.dlr.de/xsd/netconvertConfiguration.xsd">
+
+    <input>
+        <sumo-net-file value="nets/single-intersection/single-intersection.net.xml"/>
+    </input>
+
+    <output>
+        <output-file value="/home/lucas/Documents/sumo-rl/nets/single-intersection/single-intersection2.net.xml"/>
+    </output>
+
+    <processing>
+        <no-turnarounds value="true"/>
+        <offset.disable-normalization value="true"/>
+        <lefthand value="false"/>
+        <junctions.corner-detail value="0"/>
+        <rectangular-lane-cut value="false"/>
+        <walkingareas value="false"/>
+    </processing>
+
+</configuration>
+-->
+
+<net version="0.27" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:noNamespaceSchemaLocation="http://sumo.dlr.de/xsd/net_file.xsd">
+
+    <location netOffset="150.00,150.00" convBoundary="0.00,0.00,300.00,300.00" origBoundary="-150.00,-150.00,150.00,150.00" projParameter="!"/>
+
+    <edge id=":t_0" function="internal">
+        <lane id=":t_0_0" index="0" speed="13.90" length="9.50" shape="145.05,151.45 145.05,141.95"/>
+        <lane id=":t_0_1" index="1" speed="13.90" length="9.50" shape="148.35,151.45 148.35,141.95"/>
+    </edge>
+    <edge id=":t_2" function="internal">
+        <lane id=":t_2_0" index="0" speed="13.90" length="9.50" shape="141.95,145.05 151.45,145.05"/>
+        <lane id=":t_2_1" index="1" speed="13.90" length="9.50" shape="141.95,148.35 151.45,148.35"/>
+    </edge>
+
+    <edge id="n_t" from="n" to="t" priority="-1">
+        <lane id="n_t_0" index="0" speed="13.90" length="148.55" shape="145.05,300.00 145.05,151.45"/>
+        <lane id="n_t_1" index="1" speed="13.90" length="148.55" shape="148.35,300.00 148.35,151.45"/>
+    </edge>
+    <edge id="t_e" from="t" to="e" priority="-1">
+        <lane id="t_e_0" index="0" speed="13.90" length="148.55" shape="151.45,145.05 300.00,145.05"/>
+        <lane id="t_e_1" index="1" speed="13.90" length="148.55" shape="151.45,148.35 300.00,148.35"/>
+    </edge>
+    <edge id="t_s" from="t" to="s" priority="-1">
+        <lane id="t_s_0" index="0" speed="13.90" length="141.95" shape="145.05,141.95 145.05,0.00"/>
+        <lane id="t_s_1" index="1" speed="13.90" length="141.95" shape="148.35,141.95 148.35,0.00"/>
+    </edge>
+    <edge id="w_t" from="w" to="t" priority="-1">
+        <lane id="w_t_0" index="0" speed="13.90" length="141.95" shape="0.00,145.05 141.95,145.05"/>
+        <lane id="w_t_1" index="1" speed="13.90" length="141.95" shape="0.00,148.35 141.95,148.35"/>
+    </edge>
+
+    <tlLogic id="t" type="static" programID="0" offset="0">
+        <phase duration="42" state="GGrr"/>
+        <phase duration="2" state="yyrr"/>
+        <phase duration="42" state="rrGG"/>
+        <phase duration="2" state="rryy"/>
+    </tlLogic>
+
+    <junction id="e" type="dead_end" x="300.00" y="150.00" incLanes="t_e_0 t_e_1" intLanes="" shape="300.00,143.45 300.00,149.95"/>
+    <junction id="n" type="dead_end" x="150.00" y="300.00" incLanes="" intLanes="" shape="149.95,300.00 143.45,300.00"/>
+    <junction id="s" type="dead_end" x="150.00" y="0.00" incLanes="t_s_0 t_s_1" intLanes="" shape="143.45,0.00 149.95,0.00"/>
+    <junction id="t" type="traffic_light" x="150.00" y="150.00" incLanes="n_t_0 n_t_1 w_t_0 w_t_1" intLanes=":t_0_0 :t_0_1 :t_2_0 :t_2_1" shape="143.45,151.45 149.95,151.45 151.45,149.95 151.45,143.45 149.95,141.95 143.45,141.95 141.95,143.45 141.95,149.95">
+        <request index="0" response="1100" foes="1100" cont="0"/>
+        <request index="1" response="1100" foes="1100" cont="0"/>
+        <request index="2" response="0000" foes="0011" cont="0"/>
+        <request index="3" response="0000" foes="0011" cont="0"/>
+    </junction>
+    <junction id="w" type="dead_end" x="0.00" y="150.00" incLanes="" intLanes="" shape="0.00,149.95 0.00,143.45"/>
+
+    <connection from="n_t" to="t_s" fromLane="0" toLane="0" via=":t_0_0" tl="t" linkIndex="0" dir="s" state="o"/>
+    <connection from="n_t" to="t_s" fromLane="1" toLane="1" via=":t_0_1" tl="t" linkIndex="1" dir="s" state="o"/>
+    <connection from="w_t" to="t_e" fromLane="0" toLane="0" via=":t_2_0" tl="t" linkIndex="2" dir="s" state="o"/>
+    <connection from="w_t" to="t_e" fromLane="1" toLane="1" via=":t_2_1" tl="t" linkIndex="3" dir="s" state="o"/>
+
+    <connection from=":t_0" to="t_s" fromLane="0" toLane="0" dir="s" state="M"/>
+    <connection from=":t_0" to="t_s" fromLane="1" toLane="1" dir="s" state="M"/>
+    <connection from=":t_2" to="t_e" fromLane="0" toLane="0" dir="s" state="M"/>
+    <connection from=":t_2" to="t_e" fromLane="1" toLane="1" dir="s" state="M"/>
+
+</net>
diff --git a/nets/single-intersection/single-intersection.nod.xml b/nets/single-intersection/single-intersection.nod.xml
new file mode 100755
index 00000000..a8b68d54
--- /dev/null
+++ b/nets/single-intersection/single-intersection.nod.xml
@@ -0,0 +1,7 @@
+<nodes>
+    <node id="n" x="0.0" y="150.0" type="priority"/>
+    <node id="s" x="0.0" y="-150.0" type="priority"/>
+    <node id="e" x="150.0" y="0.0" type="priority"/>
+    <node id="w" x="-150.0" y="0.0" type="priority"/>
+	<node id="t" x="0.0" y="0.0" type="priority"/>
+</nodes>
diff --git a/nets/single-intersection/single-intersection.rou.xml b/nets/single-intersection/single-intersection.rou.xml
new file mode 100755
index 00000000..291cdee8
--- /dev/null
+++ b/nets/single-intersection/single-intersection.rou.xml
@@ -0,0 +1,6 @@
+<routes>
+    <route id="route_ns" edges="n_t t_s"/>
+    <route id="route_we" edges="w_t t_e"/>
+    <flow id="flow_ns" route="route_ns" begin="0" end="100000" probability="0.2" departSpeed="max" departPos="base" departLane="best"/>
+    <flow id="flow_we" route="route_we" begin="0" end="100000" probability="0.5" departSpeed="max" departPos="base" departLane="best"/>
+</routes>
diff --git a/nets/single-intersection/single-intersection.sumocfg b/nets/single-intersection/single-intersection.sumocfg
new file mode 100755
index 00000000..035327b7
--- /dev/null
+++ b/nets/single-intersection/single-intersection.sumocfg
@@ -0,0 +1,10 @@
+<configuration>
+    <input>
+        <net-file value="single-intersection.net.xml"/>
+        <route-files value="single-intersection.rou.xml"/>
+    </input>
+    <time>
+        <begin value="0"/>
+        <end value="100000"/>
+    </time>
+</configuration>
diff --git a/src/envs/sumo_rl_env/client.py b/src/envs/sumo_rl_env/client.py
new file mode 100644
index 00000000..deba88fd
--- /dev/null
+++ b/src/envs/sumo_rl_env/client.py
@@ -0,0 +1,145 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+"""
+HTTP client for SUMO-RL environment.
+
+This module provides a client to interact with the SUMO traffic signal
+control environment over HTTP.
+"""
+
+from typing import Any, Dict
+
+from core.http_env_client import HTTPEnvClient
+from core.types import StepResult
+
+from .models import SumoAction, SumoObservation, SumoState
+
+
+class SumoRLEnv(HTTPEnvClient[SumoAction, SumoObservation]):
+    """
+    HTTP client for SUMO-RL traffic signal control environment.
+
+    This client communicates with a SUMO environment server to control
+    traffic signals using reinforcement learning.
+
+    Example:
+        >>> # Start container and connect
+        >>> env = SumoRLEnv.from_docker_image("sumo-rl-env:latest")
+        >>>
+        >>> # Reset environment
+        >>> result = env.reset()
+        >>> print(f"Observation shape: {result.observation.observation_shape}")
+        >>> print(f"Action space: {result.observation.action_mask}")
+        >>>
+        >>> # Take action
+        >>> result = env.step(SumoAction(phase_id=1))
+        >>> print(f"Reward: {result.reward}, Done: {result.done}")
+        >>>
+        >>> # Get state
+        >>> state = env.state()
+        >>> print(f"Sim time: {state.sim_time}, Total vehicles: {state.total_vehicles}")
+        >>>
+        >>> # Cleanup
+        >>> env.close()
+
+    Example with custom network:
+        >>> # Use custom SUMO network via volume mount
+        >>> env = SumoRLEnv.from_docker_image(
+        ...     "sumo-rl-env:latest",
+        ...     port=8000,
+        ...     volumes={
+        ...         "/path/to/my/nets": {"bind": "/nets", "mode": "ro"}
+        ...     },
+        ...     environment={
+        ...         "SUMO_NET_FILE": "/nets/my-network.net.xml",
+        ...         "SUMO_ROUTE_FILE": "/nets/my-routes.rou.xml",
+        ...     }
+        ... )
+
+    Example with configuration:
+        >>> # Adjust simulation parameters
+        >>> env = SumoRLEnv.from_docker_image(
+        ...     "sumo-rl-env:latest",
+        ...     environment={
+        ...         "SUMO_NUM_SECONDS": "10000",
+        ...         "SUMO_DELTA_TIME": "10",
+        ...         "SUMO_REWARD_FN": "queue",
+        ...         "SUMO_SEED": "123",
+        ...     }
+        ... )
+    """
+
+    def _step_payload(self, action: SumoAction) -> Dict[str, Any]:
+        """
+        Convert SumoAction to JSON payload for HTTP request.
+
+        Args:
+            action: SumoAction containing phase_id to execute.
+
+        Returns:
+            Dictionary payload for step endpoint.
+        """
+        return {
+            "phase_id": action.phase_id,
+            "ts_id": action.ts_id,
+        }
+
+    def _parse_result(self, payload: Dict[str, Any]) -> StepResult[SumoObservation]:
+        """
+        Parse step result from HTTP response JSON.
+
+        Args:
+            payload: JSON response from step endpoint.
+
+        Returns:
+            StepResult containing SumoObservation.
+        """
+        obs_data = payload.get("observation", {})
+
+        observation = SumoObservation(
+            observation=obs_data.get("observation", []),
+            observation_shape=obs_data.get("observation_shape", []),
+            action_mask=obs_data.get("action_mask", []),
+            sim_time=obs_data.get("sim_time", 0.0),
+            done=obs_data.get("done", False),
+            reward=obs_data.get("reward"),
+            metadata=obs_data.get("metadata", {}),
+        )
+
+        return StepResult(
+            observation=observation,
+            reward=payload.get("reward"),
+            done=payload.get("done", False),
+        )
+
+    def _parse_state(self, payload: Dict[str, Any]) -> SumoState:
+        """
+        Parse state from HTTP response JSON.
+
+        Args:
+            payload: JSON response from state endpoint.
+
+        Returns:
+            SumoState object.
+        """
+        return SumoState(
+            episode_id=payload.get("episode_id", ""),
+            step_count=payload.get("step_count", 0),
+            net_file=payload.get("net_file", ""),
+            route_file=payload.get("route_file", ""),
+            num_seconds=payload.get("num_seconds", 20000),
+            delta_time=payload.get("delta_time", 5),
+            yellow_time=payload.get("yellow_time", 2),
+            min_green=payload.get("min_green", 5),
+            max_green=payload.get("max_green", 50),
+            reward_fn=payload.get("reward_fn", "diff-waiting-time"),
+            sim_time=payload.get("sim_time", 0.0),
+            total_vehicles=payload.get("total_vehicles", 0),
+            total_waiting_time=payload.get("total_waiting_time", 0.0),
+            mean_waiting_time=payload.get("mean_waiting_time", 0.0),
+            mean_speed=payload.get("mean_speed", 0.0),
+        )

From c1b2aa494a95a14ec4a6aa2b2257368892a446f6 Mon Sep 17 00:00:00 2001
From: Sanyam Bhutani <sanyambhutani@meta.com>
Date: Fri, 17 Oct 2025 11:31:28 -0700
Subject: [PATCH 07/13] init

---
 src/envs/sumo_rl_env/__init__.py | 31 +++++++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)
 create mode 100644 src/envs/sumo_rl_env/__init__.py

diff --git a/src/envs/sumo_rl_env/__init__.py b/src/envs/sumo_rl_env/__init__.py
new file mode 100644
index 00000000..17aaf2f6
--- /dev/null
+++ b/src/envs/sumo_rl_env/__init__.py
@@ -0,0 +1,31 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+"""
+SUMO-RL Environment for OpenEnv.
+
+This module provides OpenEnv integration for traffic signal control using
+SUMO (Simulation of Urban MObility) via the SUMO-RL library.
+
+Example:
+    >>> from envs.sumo_rl_env import SumoRLEnv, SumoAction
+    >>>
+    >>> # Connect to a running server or start via Docker
+    >>> env = SumoRLEnv.from_docker_image("sumo-rl-env:latest")
+    >>>
+    >>> # Reset and interact
+    >>> result = env.reset()
+    >>> result = env.step(SumoAction(phase_id=1))
+    >>> print(result.reward, result.done)
+    >>>
+    >>> # Cleanup
+    >>> env.close()
+"""
+
+from .client import SumoRLEnv
+from .models import SumoAction, SumoObservation, SumoState
+
+__all__ = ["SumoRLEnv", "SumoAction", "SumoObservation", "SumoState"]

From 0df6a874cf9da7826a60dc4157a4556d34292796 Mon Sep 17 00:00:00 2001
From: Sanyam Bhutani <sanyambhutani@meta.com>
Date: Fri, 17 Oct 2025 11:31:41 -0700
Subject: [PATCH 08/13] ReadMe

---
 src/envs/sumo_rl_env/README.md | 341 +++++++++++++++++++++++++++++++++
 1 file changed, 341 insertions(+)
 create mode 100644 src/envs/sumo_rl_env/README.md

diff --git a/src/envs/sumo_rl_env/README.md b/src/envs/sumo_rl_env/README.md
new file mode 100644
index 00000000..e35035ae
--- /dev/null
+++ b/src/envs/sumo_rl_env/README.md
@@ -0,0 +1,341 @@
+# SUMO-RL Environment
+
+Integration of traffic signal control with the OpenEnv framework via SUMO (Simulation of Urban MObility) and SUMO-RL.
+
+## Overview
+
+This environment enables reinforcement learning for **traffic signal control** using SUMO, a microscopic traffic simulation package. Train RL agents to optimize traffic light timing and minimize vehicle delays.
+
+**Key Features**:
+- **Realistic traffic simulation** via SUMO
+- **Single-agent mode** for single intersection control
+- **Configurable rewards** (waiting time, queue, pressure, speed)
+- **Multiple networks** supported (custom .net.xml and .rou.xml files)
+- **Docker-ready** with pre-bundled example network
+
+## Quick Start
+
+### Using Docker (Recommended)
+
+```python
+from envs.sumo_rl_env import SumoRLEnv, SumoAction
+
+# Automatically starts container
+env = SumoRLEnv.from_docker_image("sumo-rl-env:latest")
+
+# Reset environment
+result = env.reset()
+print(f"Observation shape: {result.observation.observation_shape}")
+print(f"Available actions: {result.observation.action_mask}")
+
+# Take action (select next green phase)
+result = env.step(SumoAction(phase_id=1))
+print(f"Reward: {result.reward}, Done: {result.done}")
+
+# Get state
+state = env.state()
+print(f"Simulation time: {state.sim_time}")
+print(f"Total vehicles: {state.total_vehicles}")
+print(f"Mean waiting time: {state.mean_waiting_time}")
+
+# Cleanup
+env.close()
+```
+
+### Building the Docker Image
+
+```bash
+cd OpenEnv
+
+# Build base image first (if not already built)
+docker build -t envtorch-base:latest -f src/core/containers/images/Dockerfile .
+
+# Build SUMO-RL environment
+docker build -f src/envs/sumo_rl_env/server/Dockerfile -t sumo-rl-env:latest .
+```
+
+### Running with Different Configurations
+
+```bash
+# Default: single-intersection
+docker run -p 8000:8000 sumo-rl-env:latest
+
+# Longer simulation
+docker run -p 8000:8000 \
+  -e SUMO_NUM_SECONDS=50000 \
+  sumo-rl-env:latest
+
+# Different reward function
+docker run -p 8000:8000 \
+  -e SUMO_REWARD_FN=queue \
+  sumo-rl-env:latest
+
+# Custom seed for reproducibility
+docker run -p 8000:8000 \
+  -e SUMO_SEED=123 \
+  sumo-rl-env:latest
+```
+
+## Observation
+
+The observation is a vector containing:
+- **Phase one-hot**: Current active green phase (one-hot encoded)
+- **Min green flag**: Binary indicator if minimum green time has passed
+- **Lane densities**: Number of vehicles / lane capacity for each incoming lane
+- **Lane queues**: Number of queued vehicles / lane capacity for each incoming lane
+
+Observation size varies by network topology (depends on number of phases and lanes).
+
+**Default (single-intersection)**:
+- 4 green phases
+- 8 incoming lanes
+- Observation size: ~21 elements
+
+## Action Space
+
+The action space is discrete and represents selecting the next green phase to activate.
+
+- **Action type**: Discrete
+- **Action range**: `[0, num_green_phases - 1]`
+- **Default (single-intersection)**: 4 actions (one per green phase)
+
+When a phase change is requested, SUMO automatically inserts a yellow phase before switching.
+
+## Rewards
+
+Default reward function is **change in cumulative waiting time**:
+```
+reward = -(total_waiting_time_now - total_waiting_time_previous)
+```
+
+Positive rewards indicate waiting time decreased (good).
+
+### Available Reward Functions
+
+Set via `SUMO_REWARD_FN` environment variable:
+
+- **`diff-waiting-time`** (default): Change in cumulative waiting time
+- **`average-speed`**: Average speed of all vehicles
+- **`queue`**: Negative total queue length
+- **`pressure`**: Pressure metric (incoming - outgoing vehicles)
+
+## Configuration
+
+### Environment Variables
+
+| Variable | Default | Description |
+|----------|---------|-------------|
+| `SUMO_NET_FILE` | `/app/nets/single-intersection.net.xml` | Network topology file |
+| `SUMO_ROUTE_FILE` | `/app/nets/single-intersection.rou.xml` | Vehicle routes file |
+| `SUMO_NUM_SECONDS` | `20000` | Simulation duration (seconds) |
+| `SUMO_DELTA_TIME` | `5` | Seconds between agent actions |
+| `SUMO_YELLOW_TIME` | `2` | Yellow phase duration (seconds) |
+| `SUMO_MIN_GREEN` | `5` | Minimum green time (seconds) |
+| `SUMO_MAX_GREEN` | `50` | Maximum green time (seconds) |
+| `SUMO_REWARD_FN` | `diff-waiting-time` | Reward function name |
+| `SUMO_SEED` | `42` | Random seed (use for reproducibility) |
+
+### Using Custom Networks
+
+To use your own SUMO network:
+
+```python
+from envs.sumo_rl_env import SumoRLEnv
+
+env = SumoRLEnv.from_docker_image(
+    "sumo-rl-env:latest",
+    volumes={
+        "/path/to/your/nets": {"bind": "/nets", "mode": "ro"}
+    },
+    environment={
+        "SUMO_NET_FILE": "/nets/my-network.net.xml",
+        "SUMO_ROUTE_FILE": "/nets/my-routes.rou.xml",
+    }
+)
+```
+
+Your network directory should contain:
+- `.net.xml` - Network topology (roads, junctions, traffic lights)
+- `.rou.xml` - Vehicle routes (trip definitions, flow rates)
+
+## API Reference
+
+### SumoAction
+
+```python
+@dataclass
+class SumoAction(Action):
+    phase_id: int  # Green phase to activate (0 to num_phases-1)
+    ts_id: str = "0"  # Traffic signal ID (for multi-agent)
+```
+
+### SumoObservation
+
+```python
+@dataclass
+class SumoObservation(Observation):
+    observation: List[float]  # Observation vector
+    observation_shape: List[int]  # Shape for reshaping
+    action_mask: List[int]  # Valid action indices
+    sim_time: float  # Current simulation time
+    done: bool  # Episode finished
+    reward: Optional[float]  # Reward from last action
+    metadata: Dict  # System metrics
+```
+
+### SumoState
+
+```python
+@dataclass
+class SumoState(State):
+    episode_id: str  # Unique episode ID
+    step_count: int  # Steps taken
+    net_file: str  # Network file path
+    route_file: str  # Route file path
+    sim_time: float  # Current simulation time
+    total_vehicles: int  # Total vehicles in simulation
+    total_waiting_time: float  # Cumulative waiting time
+    mean_waiting_time: float  # Mean waiting time
+    mean_speed: float  # Mean vehicle speed
+    # ... configuration parameters
+```
+
+## Example Training Loop
+
+```python
+from envs.sumo_rl_env import SumoRLEnv, SumoAction
+import numpy as np
+
+# Start environment
+env = SumoRLEnv.from_docker_image("sumo-rl-env:latest")
+
+# Training loop
+for episode in range(10):
+    result = env.reset()
+    episode_reward = 0
+    steps = 0
+
+    while not result.done and steps < 1000:
+        # Random policy (replace with your RL agent)
+        action_id = np.random.choice(result.observation.action_mask)
+
+        # Take action
+        result = env.step(SumoAction(phase_id=action_id))
+
+        episode_reward += result.reward or 0
+        steps += 1
+
+        # Print progress every 100 steps
+        if steps % 100 == 0:
+            state = env.state()
+            print(f"Step {steps}: "
+                  f"reward={result.reward:.2f}, "
+                  f"vehicles={state.total_vehicles}, "
+                  f"waiting={state.mean_waiting_time:.2f}")
+
+    print(f"Episode {episode}: total_reward={episode_reward:.2f}, steps={steps}")
+
+env.close()
+```
+
+## Performance Notes
+
+### Simulation Speed
+
+- **Reset time**: 1-5 seconds (starts new SUMO simulation)
+- **Step time**: ~50-200ms per step (depends on network size)
+- **Episode duration**: Minutes (20,000 sim seconds with delta_time=5 → ~4,000 steps)
+
+### Optimization
+
+For faster simulation:
+1. Reduce `SUMO_NUM_SECONDS` for shorter episodes
+2. Increase `SUMO_DELTA_TIME` for fewer decisions
+3. Use simpler networks with fewer vehicles
+
+## Architecture
+
+```
+┌─────────────────────────────────┐
+│ Client: SumoRLEnv               │
+│  .step(phase_id=1)              │
+└──────────────┬──────────────────┘
+               │ HTTP
+┌──────────────▼──────────────────┐
+│ FastAPI Server (Docker)         │
+│   SumoEnvironment               │
+│     ├─ Wraps sumo_rl           │
+│     ├─ Single-agent mode       │
+│     └─ No GUI                  │
+└──────────────┬──────────────────┘
+               │
+┌──────────────▼──────────────────┐
+│ SUMO Simulator                  │
+│  - Reads .net.xml (network)     │
+│  - Reads .rou.xml (routes)      │
+│  - Simulates traffic flow       │
+│  - Provides observations        │
+└─────────────────────────────────┘
+```
+
+## Bundled Network
+
+The default `single-intersection` network is a simple 4-way intersection with:
+- **4 incoming roads** (North, South, East, West)
+- **4 green phases** (NS straight, NS left, EW straight, EW left)
+- **Vehicle flow**: Continuous stream with varying rates
+
+## Limitations
+
+- **No GUI in Docker**: SUMO GUI requires X server (not available in containers)
+- **Single-agent only**: Multi-agent (multiple intersections) coming in future version
+- **Fixed network per container**: Each container uses one network topology
+- **Memory usage**: ~500MB for small networks, 2-4GB for large city networks
+
+## Troubleshooting
+
+### Container won't start
+```bash
+# Check logs
+docker logs <container-id>
+
+# Verify network files exist
+docker run sumo-rl-env:latest ls -la /app/nets/
+```
+
+### "SUMO_HOME not set" error
+This should be automatic in Docker. If running locally:
+```bash
+export SUMO_HOME=/usr/share/sumo
+```
+
+### Slow performance
+- Reduce simulation duration: `SUMO_NUM_SECONDS=5000`
+- Increase action interval: `SUMO_DELTA_TIME=10`
+- Use smaller networks with fewer vehicles
+
+## References
+
+- [SUMO Documentation](https://sumo.dlr.de/docs/)
+- [SUMO-RL GitHub](https://github.com/LucasAlegre/sumo-rl)
+- [SUMO-RL Paper](https://peerj.com/articles/cs-575/)
+- [RESCO Benchmarks](https://github.com/jault/RESCO)
+
+## Citation
+
+If you use SUMO-RL in your research, please cite:
+
+```bibtex
+@misc{sumorl,
+    author = {Lucas N. Alegre},
+    title = {{SUMO-RL}},
+    year = {2019},
+    publisher = {GitHub},
+    journal = {GitHub repository},
+    howpublished = {\url{https://github.com/LucasAlegre/sumo-rl}},
+}
+```
+
+## License
+
+This integration is licensed under the BSD-style license. SUMO-RL and SUMO have their own licenses.

From c1651adf8d6fb7266069672be8952ccf5873d72a Mon Sep 17 00:00:00 2001
From: Sanyam Bhutani <sanyambhutani@meta.com>
Date: Fri, 17 Oct 2025 11:32:38 -0700
Subject: [PATCH 09/13] example and docker

---
 .github/workflows/docker-build.yml |   2 +
 examples/sumo_rl_simple.py         | 105 +++++++++++++++++++++++++++++
 2 files changed, 107 insertions(+)
 create mode 100644 examples/sumo_rl_simple.py

diff --git a/.github/workflows/docker-build.yml b/.github/workflows/docker-build.yml
index ef753e07..26934ec9 100644
--- a/.github/workflows/docker-build.yml
+++ b/.github/workflows/docker-build.yml
@@ -73,6 +73,8 @@ jobs:
             dockerfile: src/envs/chat_env/server/Dockerfile
           - name: coding-env
             dockerfile: src/envs/coding_env/server/Dockerfile
+          - name: sumo-rl-env
+            dockerfile: src/envs/sumo_rl_env/server/Dockerfile
 
     steps:
       - name: Checkout code
diff --git a/examples/sumo_rl_simple.py b/examples/sumo_rl_simple.py
new file mode 100644
index 00000000..ec5f08ae
--- /dev/null
+++ b/examples/sumo_rl_simple.py
@@ -0,0 +1,105 @@
+#!/usr/bin/env python3
+"""
+Simple example demonstrating SUMO-RL Environment usage.
+
+This example shows how to:
+1. Connect to a SUMO traffic signal control environment
+2. Reset the environment
+3. Take actions (select traffic light phases)
+4. Process observations and rewards
+
+Usage:
+    # Option 1: Start the server manually
+    python -m envs.sumo_rl_env.server.app
+    # Then run: python examples/sumo_rl_simple.py
+
+    # Option 2: Use Docker
+    docker run -p 8000:8000 sumo-rl-env:latest
+    # Then run: python examples/sumo_rl_simple.py
+"""
+
+import numpy as np
+
+from envs.sumo_rl_env import SumoAction, SumoRLEnv
+
+
+def main():
+    """Run a simple SUMO traffic control episode."""
+    # Connect to the SUMO environment server
+    print("Connecting to SUMO-RL environment...")
+    env = SumoRLEnv(base_url="http://localhost:8000")
+
+    try:
+        # Reset the environment
+        print("\nResetting environment...")
+        result = env.reset()
+        print(f"Observation shape: {result.observation.observation_shape}")
+        print(f"Available actions: {result.observation.action_mask}")
+        print(f"Number of green phases: {len(result.observation.action_mask)}")
+
+        # Get initial state
+        state = env.state()
+        print(f"\nSimulation configuration:")
+        print(f"  Network: {state.net_file}")
+        print(f"  Duration: {state.num_seconds} seconds")
+        print(f"  Delta time: {state.delta_time} seconds")
+        print(f"  Reward function: {state.reward_fn}")
+
+        # Run a few steps with random policy
+        print("\nRunning traffic control with random policy...")
+        episode_reward = 0
+        steps = 0
+        max_steps = 100
+
+        for step in range(max_steps):
+            # Random policy: select random green phase
+            action_id = np.random.choice(result.observation.action_mask)
+
+            # Take action
+            result = env.step(SumoAction(phase_id=action_id))
+
+            episode_reward += result.reward or 0
+            steps += 1
+
+            # Print progress every 10 steps
+            if step % 10 == 0:
+                state = env.state()
+                print(
+                    f"Step {step:3d}: "
+                    f"phase={action_id}, "
+                    f"reward={result.reward:6.2f}, "
+                    f"vehicles={state.total_vehicles:3d}, "
+                    f"waiting={state.mean_waiting_time:6.2f}s, "
+                    f"speed={state.mean_speed:5.2f}m/s"
+                )
+
+            if result.done:
+                print(f"\nEpisode finished after {steps} steps!")
+                break
+
+        # Final statistics
+        print(f"\n{'='*60}")
+        print(f"Episode Summary:")
+        print(f"  Total steps: {steps}")
+        print(f"  Total reward: {episode_reward:.2f}")
+        print(f"  Average reward: {episode_reward/steps:.2f}")
+
+        # Get final state
+        state = env.state()
+        print(f"\nFinal State:")
+        print(f"  Simulation time: {state.sim_time:.0f} seconds")
+        print(f"  Total vehicles: {state.total_vehicles}")
+        print(f"  Total waiting time: {state.total_waiting_time:.2f} seconds")
+        print(f"  Mean waiting time: {state.mean_waiting_time:.2f} seconds")
+        print(f"  Mean speed: {state.mean_speed:.2f} m/s")
+        print(f"{'='*60}")
+
+    finally:
+        # Cleanup
+        print("\nClosing environment...")
+        env.close()
+        print("Done!")
+
+
+if __name__ == "__main__":
+    main()

From c19c6f435a2e5bf9d1b03d9e1d57f4bcb7291bc4 Mon Sep 17 00:00:00 2001
From: Sanyam Bhutani <sanyambhutani@meta.com>
Date: Fri, 17 Oct 2025 13:07:51 -0700
Subject: [PATCH 10/13] fix docker

---
 SUMO_RL_INTEGRATION_PLAN.md            | 663 -------------------------
 SUMO_RL_RISK_ANALYSIS.md               | 505 -------------------
 src/envs/sumo_rl_env/models.py         |   4 +-
 src/envs/sumo_rl_env/server/Dockerfile |  10 +-
 src/envs/sumo_rl_env/test_sumo_rl.sh   | 220 ++++++++
 5 files changed, 225 insertions(+), 1177 deletions(-)
 delete mode 100644 SUMO_RL_INTEGRATION_PLAN.md
 delete mode 100644 SUMO_RL_RISK_ANALYSIS.md
 create mode 100755 src/envs/sumo_rl_env/test_sumo_rl.sh

diff --git a/SUMO_RL_INTEGRATION_PLAN.md b/SUMO_RL_INTEGRATION_PLAN.md
deleted file mode 100644
index 47e4d339..00000000
--- a/SUMO_RL_INTEGRATION_PLAN.md
+++ /dev/null
@@ -1,663 +0,0 @@
-# SUMO-RL Integration Plan for OpenEnv
-
-**Date**: 2025-10-17
-**Status**: Design Phase
-**Complexity**: High (Docker + SUMO system dependencies)
-
----
-
-## 🤔 ULTRATHINK ANALYSIS
-
-### What is SUMO-RL?
-
-**SUMO-RL** is a Reinforcement Learning environment for **Traffic Signal Control** using SUMO (Simulation of Urban MObility).
-
-- **Use Case**: Train RL agents to optimize traffic light timing to minimize vehicle delays
-- **Main Class**: `SumoEnvironment` from `sumo_rl.environment.env`
-- **APIs**: Supports both Gymnasium (single-agent) and PettingZoo (multi-agent)
-- **Repository**: https://github.com/LucasAlegre/sumo-rl
-- **Version**: 1.4.5
-
-### How SUMO-RL Works
-
-1. **SUMO Simulator**: Microscopic traffic simulation
-2. **Network Files**: `.net.xml` (road network) + `.rou.xml` (vehicle routes)
-3. **Traffic Signals**: RL agent controls when lights change phases
-4. **Observation**: Lane densities, queues, current phase, min_green flag
-5. **Action**: Select next green phase (discrete action space)
-6. **Reward**: Change in cumulative vehicle delay (default)
-
-### Example Usage
-
-```python
-import gymnasium as gym
-import sumo_rl
-
-env = gym.make('sumo-rl-v0',
-                net_file='nets/single-intersection.net.xml',
-                route_file='nets/single-intersection.rou.xml',
-                use_gui=False,
-                num_seconds=100000)
-
-obs, info = env.reset()
-done = False
-while not done:
-    action = env.action_space.sample()
-    obs, reward, terminated, truncated, info = env.step(action)
-    done = terminated or truncated
-```
-
----
-
-## 🎯 Integration Strategy
-
-### Follow Atari Pattern
-
-Like Atari, we'll create:
-1. **models.py** - Data models
-2. **server/sumo_environment.py** - Environment wrapper
-3. **server/app.py** - FastAPI server
-4. **server/Dockerfile** - Container with SUMO
-5. **client.py** - HTTP client
-
-### Key Differences from Atari
-
-| Aspect | Atari | SUMO-RL |
-|--------|-------|---------|
-| **External Dependency** | ALE (pip installable) | SUMO (system package) |
-| **Configuration** | Game name (simple) | Network + route files (complex) |
-| **Observation** | Image pixels | Traffic metrics (vectors) |
-| **Action** | Joystick actions | Traffic signal phases |
-| **Docker Complexity** | Simple | High (need SUMO system install) |
-| **File Dependencies** | None (ROMs bundled) | Network/route XML files required |
-
----
-
-## 📋 Technical Design
-
-### 1. Data Models (`models.py`)
-
-```python
-from dataclasses import dataclass
-from typing import List, Optional
-from core.env_server import Action, Observation, State
-
-@dataclass
-class SumoAction(Action):
-    """Action for SUMO environment - select next green phase."""
-    phase_id: int  # Which green phase to activate next
-    ts_id: str = "0"  # Traffic signal ID (for multi-agent support later)
-
-@dataclass
-class SumoObservation(Observation):
-    """Observation from SUMO environment."""
-    observation: List[float]  # Full observation vector
-    observation_shape: List[int]  # Shape for reshaping
-
-    # Observation components (for interpretability)
-    current_phase: Optional[int] = None
-    min_green_passed: Optional[bool] = None
-    lane_densities: Optional[List[float]] = None
-    lane_queues: Optional[List[float]] = None
-
-    # Metadata
-    action_mask: Optional[List[int]] = None  # Legal actions
-    sim_time: float = 0.0  # Current simulation time
-
-    done: bool = False
-    reward: Optional[float] = None
-
-@dataclass
-class SumoState(State):
-    """State of SUMO environment."""
-    episode_id: str = ""
-    step_count: int = 0
-
-    # SUMO configuration
-    net_file: str = ""
-    route_file: str = ""
-    num_seconds: int = 20000
-    delta_time: int = 5
-    yellow_time: int = 2
-    min_green: int = 5
-    max_green: int = 50
-
-    # Runtime state
-    sim_time: float = 0.0
-    total_vehicles: int = 0
-    total_waiting_time: float = 0.0
-```
-
-### 2. Environment Wrapper (`server/sumo_environment.py`)
-
-```python
-import uuid
-from typing import Any, Dict, Literal, Optional
-from core.env_server import Action, Environment, Observation
-from ..models import SumoAction, SumoObservation, SumoState
-
-import os
-os.environ.setdefault('SUMO_HOME', '/usr/share/sumo')
-
-from sumo_rl import SumoEnvironment as BaseSumoEnv
-
-class SumoEnvironment(Environment):
-    """
-    SUMO-RL Environment wrapper for OpenEnv.
-
-    Wraps the SUMO traffic signal control environment for single-agent RL.
-
-    Args:
-        net_file: Path to SUMO network file (.net.xml)
-        route_file: Path to SUMO route file (.rou.xml)
-        num_seconds: Simulation duration in seconds
-        delta_time: Seconds between actions
-        yellow_time: Yellow phase duration
-        min_green: Minimum green time
-        max_green: Maximum green time
-        reward_fn: Reward function name
-    """
-
-    def __init__(
-        self,
-        net_file: str,
-        route_file: str,
-        num_seconds: int = 20000,
-        delta_time: int = 5,
-        yellow_time: int = 2,
-        min_green: int = 5,
-        max_green: int = 50,
-        reward_fn: str = "diff-waiting-time",
-    ):
-        super().__init__()
-
-        # Store config
-        self.net_file = net_file
-        self.route_file = route_file
-        self.num_seconds = num_seconds
-        self.delta_time = delta_time
-        self.yellow_time = yellow_time
-        self.min_green = min_green
-        self.max_green = max_green
-        self.reward_fn = reward_fn
-
-        # Create SUMO environment (single-agent mode)
-        self.env = BaseSumoEnv(
-            net_file=net_file,
-            route_file=route_file,
-            use_gui=False,  # No GUI in Docker
-            single_agent=True,  # Single-agent for OpenEnv
-            num_seconds=num_seconds,
-            delta_time=delta_time,
-            yellow_time=yellow_time,
-            min_green=min_green,
-            max_green=max_green,
-            reward_fn=reward_fn,
-            sumo_warnings=False,
-        )
-
-        # Initialize state
-        self._state = SumoState(
-            net_file=net_file,
-            route_file=route_file,
-            num_seconds=num_seconds,
-            delta_time=delta_time,
-            yellow_time=yellow_time,
-            min_green=min_green,
-            max_green=max_green,
-        )
-
-        self._last_obs = None
-        self._last_info = None
-
-    def reset(self) -> Observation:
-        """Reset the environment."""
-        # Reset SUMO
-        obs, info = self.env.reset()
-
-        # Update state
-        self._state.episode_id = str(uuid.uuid4())
-        self._state.step_count = 0
-        self._state.sim_time = 0.0
-
-        # Store for later
-        self._last_obs = obs
-        self._last_info = info
-
-        return self._make_observation(obs, 0.0, False, info)
-
-    def step(self, action: Action) -> Observation:
-        """Execute action."""
-        if not isinstance(action, SumoAction):
-            raise ValueError(f"Expected SumoAction, got {type(action)}")
-
-        # Validate action
-        if action.phase_id < 0 or action.phase_id >= self.env.action_space.n:
-            raise ValueError(
-                f"Invalid phase_id: {action.phase_id}. "
-                f"Valid range: [0, {self.env.action_space.n - 1}]"
-            )
-
-        # Execute in SUMO
-        obs, reward, terminated, truncated, info = self.env.step(action.phase_id)
-        done = terminated or truncated
-
-        # Update state
-        self._state.step_count += 1
-        self._state.sim_time = info.get('step', 0.0)
-        self._state.total_vehicles = info.get('system_total_running', 0)
-        self._state.total_waiting_time = info.get('system_total_waiting_time', 0.0)
-
-        # Store for later
-        self._last_obs = obs
-        self._last_info = info
-
-        return self._make_observation(obs, reward, done, info)
-
-    @property
-    def state(self) -> SumoState:
-        """Get current state."""
-        return self._state
-
-    def _make_observation(
-        self,
-        obs: Any,
-        reward: float,
-        done: bool,
-        info: Dict
-    ) -> SumoObservation:
-        """Create SumoObservation from SUMO env output."""
-        # Convert observation to list
-        if hasattr(obs, 'tolist'):
-            obs_list = obs.tolist()
-        else:
-            obs_list = list(obs)
-
-        # Get action mask (all actions valid in SUMO-RL)
-        action_mask = list(range(self.env.action_space.n))
-
-        # Create observation
-        return SumoObservation(
-            observation=obs_list,
-            observation_shape=[len(obs_list)],
-            action_mask=action_mask,
-            sim_time=info.get('step', 0.0),
-            done=done,
-            reward=reward,
-            metadata={
-                "num_green_phases": self.env.action_space.n,
-                "system_info": {
-                    k: v for k, v in info.items() if k.startswith('system_')
-                },
-            },
-        )
-```
-
-### 3. FastAPI Server (`server/app.py`)
-
-```python
-import os
-from core.env_server import create_fastapi_app
-from ..models import SumoAction, SumoObservation
-from .sumo_environment import SumoEnvironment
-
-# Get configuration from environment
-net_file = os.getenv("SUMO_NET_FILE", "/app/nets/single-intersection.net.xml")
-route_file = os.getenv("SUMO_ROUTE_FILE", "/app/nets/single-intersection.rou.xml")
-num_seconds = int(os.getenv("SUMO_NUM_SECONDS", "20000"))
-delta_time = int(os.getenv("SUMO_DELTA_TIME", "5"))
-yellow_time = int(os.getenv("SUMO_YELLOW_TIME", "2"))
-min_green = int(os.getenv("SUMO_MIN_GREEN", "5"))
-max_green = int(os.getenv("SUMO_MAX_GREEN", "50"))
-reward_fn = os.getenv("SUMO_REWARD_FN", "diff-waiting-time")
-
-# Create environment
-env = SumoEnvironment(
-    net_file=net_file,
-    route_file=route_file,
-    num_seconds=num_seconds,
-    delta_time=delta_time,
-    yellow_time=yellow_time,
-    min_green=min_green,
-    max_green=max_green,
-    reward_fn=reward_fn,
-)
-
-# Create FastAPI app
-app = create_fastapi_app(env, SumoAction, SumoObservation)
-```
-
-### 4. Dockerfile (`server/Dockerfile`)
-
-```dockerfile
-# Configurable base image
-ARG BASE_IMAGE=envtorch-base:latest
-FROM ${BASE_IMAGE}
-
-# Install SUMO
-# SUMO is a microscopic traffic simulation package
-RUN apt-get update && apt-get install -y --no-install-recommends \
-    software-properties-common \
-    && add-apt-repository ppa:sumo/stable \
-    && apt-get update \
-    && apt-get install -y --no-install-recommends \
-        sumo \
-        sumo-tools \
-    && rm -rf /var/lib/apt/lists/*
-
-# Set SUMO_HOME
-ENV SUMO_HOME=/usr/share/sumo
-
-# Install SUMO-RL and dependencies
-RUN pip install --no-cache-dir \
-    gymnasium>=0.28 \
-    pettingzoo>=1.24.3 \
-    numpy>=1.24.0 \
-    pandas>=2.0.0 \
-    sumolib>=1.14.0 \
-    traci>=1.14.0 \
-    sumo-rl>=1.4.5
-
-# Copy OpenEnv core
-COPY src/core/ /app/src/core/
-
-# Copy SUMO-RL environment code
-COPY src/envs/sumo_rl_env/ /app/src/envs/sumo_rl_env/
-
-# Copy example networks
-# We'll bundle a simple single-intersection example
-COPY sumo-rl/sumo_rl/nets/single-intersection/ /app/nets/
-
-# Environment variables (can be overridden at runtime)
-ENV SUMO_NET_FILE=/app/nets/single-intersection.net.xml
-ENV SUMO_ROUTE_FILE=/app/nets/single-intersection.rou.xml
-ENV SUMO_NUM_SECONDS=20000
-ENV SUMO_DELTA_TIME=5
-ENV SUMO_YELLOW_TIME=2
-ENV SUMO_MIN_GREEN=5
-ENV SUMO_MAX_GREEN=50
-ENV SUMO_REWARD_FN=diff-waiting-time
-
-# Expose port
-EXPOSE 8000
-
-# Health check
-HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
-    CMD curl -f http://localhost:8000/health || exit 1
-
-# Run the FastAPI server
-CMD ["uvicorn", "envs.sumo_rl_env.server.app:app", "--host", "0.0.0.0", "--port", "8000"]
-```
-
-### 5. HTTP Client (`client.py`)
-
-```python
-from typing import Any, Dict
-from core.http_env_client import HTTPEnvClient
-from core.types import StepResult
-from .models import SumoAction, SumoObservation, SumoState
-
-class SumoRLEnv(HTTPEnvClient[SumoAction, SumoObservation]):
-    """
-    HTTP client for SUMO-RL environment.
-
-    Example:
-        >>> env = SumoRLEnv.from_docker_image("sumo-rl-env:latest")
-        >>> result = env.reset()
-        >>> result = env.step(SumoAction(phase_id=1))
-        >>> print(f"Reward: {result.reward}, Done: {result.done}")
-        >>> env.close()
-    """
-
-    def _step_payload(self, action: SumoAction) -> Dict[str, Any]:
-        """Convert action to JSON payload."""
-        return {
-            "phase_id": action.phase_id,
-            "ts_id": action.ts_id,
-        }
-
-    def _parse_result(self, payload: Dict[str, Any]) -> StepResult[SumoObservation]:
-        """Parse step result from JSON."""
-        obs_data = payload.get("observation", {})
-
-        observation = SumoObservation(
-            observation=obs_data.get("observation", []),
-            observation_shape=obs_data.get("observation_shape", []),
-            current_phase=obs_data.get("current_phase"),
-            min_green_passed=obs_data.get("min_green_passed"),
-            lane_densities=obs_data.get("lane_densities"),
-            lane_queues=obs_data.get("lane_queues"),
-            action_mask=obs_data.get("action_mask", []),
-            sim_time=obs_data.get("sim_time", 0.0),
-            done=obs_data.get("done", False),
-            reward=obs_data.get("reward"),
-            metadata=obs_data.get("metadata", {}),
-        )
-
-        return StepResult(
-            observation=observation,
-            reward=payload.get("reward"),
-            done=payload.get("done", False),
-        )
-
-    def _parse_state(self, payload: Dict[str, Any]) -> SumoState:
-        """Parse state from JSON."""
-        return SumoState(
-            episode_id=payload.get("episode_id", ""),
-            step_count=payload.get("step_count", 0),
-            net_file=payload.get("net_file", ""),
-            route_file=payload.get("route_file", ""),
-            num_seconds=payload.get("num_seconds", 20000),
-            delta_time=payload.get("delta_time", 5),
-            yellow_time=payload.get("yellow_time", 2),
-            min_green=payload.get("min_green", 5),
-            max_green=payload.get("max_green", 50),
-            sim_time=payload.get("sim_time", 0.0),
-            total_vehicles=payload.get("total_vehicles", 0),
-            total_waiting_time=payload.get("total_waiting_time", 0.0),
-        )
-```
-
----
-
-## ⚠️ Critical Challenges
-
-### 1. SUMO System Dependency
-
-**Challenge**: SUMO must be installed at system level (apt-get), not just pip.
-
-**Solution**:
-```dockerfile
-RUN add-apt-repository ppa:sumo/stable && \
-    apt-get update && \
-    apt-get install -y sumo sumo-tools
-```
-
-### 2. Network Files Required
-
-**Challenge**: SUMO needs `.net.xml` and `.rou.xml` files to run.
-
-**Solutions**:
-- **Bundle examples**: Copy simple networks from sumo-rl repo
-- **Volume mount**: Let users mount their own networks
-- **Default config**: Use single-intersection as default
-
-### 3. No GUI Support
-
-**Challenge**: Docker can't run SUMO GUI.
-
-**Solution**: Always use `use_gui=False` in Docker environment.
-
-### 4. Long Simulation Times
-
-**Challenge**: Traffic simulations can take minutes to complete.
-
-**Solution**:
-- Set reasonable defaults (20000 seconds simulation time)
-- Allow configuration via environment variables
-- Document expected runtimes
-
-### 5. Multi-Agent Complexity
-
-**Challenge**: SUMO-RL supports multi-agent (multiple traffic lights).
-
-**Solution**: Start with single-agent only for OpenEnv integration. Multi-agent can be added later.
-
----
-
-## 📊 Configuration Matrix
-
-| Variable | Default | Description |
-|----------|---------|-------------|
-| `SUMO_NET_FILE` | `/app/nets/single-intersection.net.xml` | Network topology file |
-| `SUMO_ROUTE_FILE` | `/app/nets/single-intersection.rou.xml` | Vehicle routes file |
-| `SUMO_NUM_SECONDS` | `20000` | Simulation duration |
-| `SUMO_DELTA_TIME` | `5` | Seconds between actions |
-| `SUMO_YELLOW_TIME` | `2` | Yellow phase duration |
-| `SUMO_MIN_GREEN` | `5` | Minimum green time |
-| `SUMO_MAX_GREEN` | `50` | Maximum green time |
-| `SUMO_REWARD_FN` | `diff-waiting-time` | Reward function |
-
-### Available Reward Functions
-
-From SUMO-RL source:
-- `diff-waiting-time` (default) - Change in cumulative waiting time
-- `average-speed` - Average speed of vehicles
-- `queue` - Total queue length
-- `pressure` - Pressure (difference between incoming/outgoing vehicles)
-
----
-
-## 🧪 Testing Strategy
-
-### 1. Pre-Flight Checks
-- Verify network files exist
-- Check SUMO installation
-- Validate Dockerfile syntax
-- Test imports
-
-### 2. Docker Build Test
-```bash
-docker build -f src/envs/sumo_rl_env/server/Dockerfile -t sumo-rl-env:latest .
-```
-
-### 3. Runtime Tests
-```bash
-docker run -p 8000:8000 sumo-rl-env:latest
-
-curl http://localhost:8000/health
-curl -X POST http://localhost:8000/reset
-curl -X POST http://localhost:8000/step \
-  -H "Content-Type: application/json" \
-  -d '{"action": {"phase_id": 1, "ts_id": "0"}}'
-```
-
-### 4. Python Client Test
-```python
-from envs.sumo_rl_env import SumoRLEnv, SumoAction
-
-env = SumoRLEnv.from_docker_image("sumo-rl-env:latest")
-result = env.reset()
-result = env.step(SumoAction(phase_id=1))
-print(f"Reward: {result.reward}, Done: {result.done}")
-env.close()
-```
-
----
-
-## 📦 What to Bundle
-
-### Minimal Network Example
-
-Bundle the single-intersection example from sumo-rl:
-```
-sumo-rl/sumo_rl/nets/single-intersection/
-├── single-intersection.net.xml  # Network topology
-├── single-intersection.rou.xml  # Vehicle routes
-```
-
-This provides a working example out-of-the-box.
-
-### Additional Networks (Optional)
-
-Could bundle RESCO benchmarks for research:
-- `grid4x4` - 4×4 grid of intersections
-- `arterial4x4` - Arterial road network
-- `cologne1` - Real-world Cologne network
-
-But start with single-intersection for simplicity.
-
----
-
-## 🎯 Implementation Plan
-
-### Phase 1: Core Implementation (4-6 hours)
-1. Create `models.py` ✓ (designed)
-2. Create `server/sumo_environment.py` ✓ (designed)
-3. Create `server/app.py` ✓ (designed)
-4. Create `server/Dockerfile` ✓ (designed)
-5. Create `client.py` ✓ (designed)
-
-### Phase 2: Testing (2-3 hours)
-1. Build Docker image
-2. Test basic functionality
-3. Test different configurations
-4. Verify reward functions work
-
-### Phase 3: Documentation (1-2 hours)
-1. Write README.md
-2. Create examples
-3. Document network file format
-4. Add to GitHub Actions
-
-### Phase 4: Integration (1 hour)
-1. Add to `.github/workflows/docker-build.yml`
-2. Update main README
-3. Add to environments list
-
-**Total Estimate**: 8-12 hours
-
----
-
-## 🚀 Next Steps
-
-1. **Create file structure** in `/Users/sanyambhutani/GH/OpenEnv/src/envs/sumo_rl_env/`
-2. **Copy network files** from `/Users/sanyambhutani/OpenEnv/sumo-rl/sumo_rl/nets/`
-3. **Implement all files** following the designs above
-4. **Build and test Docker image**
-5. **Create documentation**
-6. **Add to GitHub Actions**
-
----
-
-## 💡 Key Insights
-
-### Why SUMO-RL is Harder Than Atari
-
-1. **System Dependencies**: Atari (ale-py) is pip-installable, SUMO requires apt-get
-2. **Configuration Complexity**: Atari just needs game name, SUMO needs network files
-3. **Runtime**: Atari is fast, SUMO simulations can take minutes
-4. **File Dependencies**: Atari bundles ROMs, SUMO needs user-provided networks
-
-### Why It's Still Doable
-
-1. **Single-Agent Mode**: Simplifies to standard Gymnasium API
-2. **Bundle Example**: Include simple network to start immediately
-3. **Environment Variables**: Easy runtime configuration
-4. **Pattern Reuse**: Follow exact Atari pattern for consistency
-
----
-
-## 📚 References
-
-- [SUMO-RL GitHub](https://github.com/LucasAlegre/sumo-rl)
-- [SUMO Documentation](https://sumo.dlr.de/docs/)
-- [SUMO-RL Docs](https://lucasalegre.github.io/sumo-rl/)
-- [RESCO Benchmarks Paper](https://people.engr.tamu.edu/guni/Papers/NeurIPS-signals.pdf)
-
----
-
-**Status**: Design complete, ready for implementation
-**Complexity**: High (system dependencies + network files)
-**Time Estimate**: 8-12 hours
-**Confidence**: 85% (Dockerfile complexity is main risk)
diff --git a/SUMO_RL_RISK_ANALYSIS.md b/SUMO_RL_RISK_ANALYSIS.md
deleted file mode 100644
index 0f216b39..00000000
--- a/SUMO_RL_RISK_ANALYSIS.md
+++ /dev/null
@@ -1,505 +0,0 @@
-# SUMO-RL Integration: ULTRATHINK Risk Analysis
-
-**Date**: 2025-10-17
-**Status**: Deep Risk Assessment
-
----
-
-## ✅ Docker Eliminates PRIMARY Risk
-
-**YES - Docker solves the hardest problem!**
-
-| Risk | Without Docker | With Docker |
-|------|---------------|-------------|
-| **System Dependencies** | ❌ Nightmare | ✅ Solved |
-| **Cross-platform** | ❌ Linux only | ✅ Works everywhere |
-| **Installation** | ❌ Requires sudo | ✅ Just `docker run` |
-| **Reproducibility** | ❌ "Works on my machine" | ✅ Identical |
-
-**Conclusion**: Docker takes away 80% of the pain. ✨
-
----
-
-## ⚠️ Remaining Risks (Deep Analysis)
-
-### 🔴 HIGH RISK
-
-#### 1. **TraCI Connection Management in HTTP Server**
-
-**Issue**: `SumoEnvironment` uses class variable `CONNECTION_LABEL` that increments globally.
-
-```python
-CONNECTION_LABEL = 0  # For traci multi-client support
-
-def __init__(self):
-    self.label = str(SumoEnvironment.CONNECTION_LABEL)
-    SumoEnvironment.CONNECTION_LABEL += 1
-```
-
-**Risk**: In HTTP server with concurrent requests:
-- Request 1 creates env (label=0)
-- Request 2 creates env (label=1)
-- Request 1 resets → closes connection label=0
-- Request 2 steps → tries to use label=1
-- **Potential conflict if requests overlap**
-
-**Likelihood**: Medium (depends on usage pattern)
-
-**Impact**: High (could cause simulation errors)
-
-**Mitigation**:
-```python
-# Option 1: Single environment instance (RECOMMENDED)
-# Create ONE environment at server startup, reuse for all requests
-env = SumoEnvironment(...)  # Created once
-app = create_fastapi_app(env, ...)  # Reuses same env
-
-# Option 2: Thread-safe connection management
-# Use threading locks around TraCI operations
-```
-
-**Decision**: Use single environment instance per container (same as Atari pattern). Each HTTP request uses the same environment. **SOLVES ISSUE**.
-
----
-
-#### 2. **LIBSUMO vs TraCI Performance Trade-off**
-
-**Background**:
-```python
-LIBSUMO = "LIBSUMO_AS_TRACI" in os.environ
-```
-
-- **TraCI**: Standard, supports GUI, slower (1x speed)
-- **LIBSUMO**: No GUI, no parallel sims, faster (8x speed)
-
-**Risk**: Default TraCI could be too slow for RL training.
-
-**Likelihood**: High (traffic sims are inherently slow)
-
-**Impact**: Medium (training takes longer, not broken)
-
-**Mitigation**:
-```dockerfile
-# Option 1: Use TraCI (default, safer)
-# No env var needed, works out of box
-
-# Option 2: Enable LIBSUMO for speed
-ENV LIBSUMO_AS_TRACI=1
-
-# Recommendation: Start with TraCI, add LIBSUMO as optimization later
-```
-
-**Decision**: Start with TraCI (default), document LIBSUMO option for advanced users.
-
----
-
-### 🟡 MEDIUM RISK
-
-#### 3. **Episode Reset Performance**
-
-**Issue**: Each `reset()` closes and restarts SUMO simulation.
-
-```python
-def reset(self, seed=None, **kwargs):
-    if self.episode != 0:
-        self.close()  # Closes previous simulation
-    self._start_simulation()  # Starts new one
-```
-
-**Risk**: Reset could take 1-5 seconds (slow for RL training loop).
-
-**Likelihood**: High (this is how SUMO works)
-
-**Impact**: Medium (slows training, doesn't break it)
-
-**Mitigation**:
-- Document expected reset time
-- Use long episodes (`num_seconds=20000`)
-- Consider warm-start optimizations later
-
-**Decision**: Accept this limitation, document it. Not a blocker.
-
----
-
-#### 4. **CSV Output Accumulation**
-
-**Issue**: Environment can write CSV metrics to disk.
-
-```python
-def save_csv(self, out_csv_name, episode):
-    df.to_csv(out_csv_name + f"_conn{self.label}_ep{episode}" + ".csv")
-```
-
-**Risk**: In Docker, CSV files accumulate → disk space.
-
-**Likelihood**: Low (only if user enables CSV output)
-
-**Impact**: Low (disk space, not functionality)
-
-**Mitigation**:
-```python
-# In our wrapper, set out_csv_name=None (disables CSV)
-env = SumoEnvironment(
-    ...,
-    out_csv_name=None,  # Disable CSV output
-)
-```
-
-**Decision**: Disable CSV output by default. Users can enable via volume mount if needed.
-
----
-
-#### 5. **Network File Path Resolution**
-
-**Issue**: SUMO needs absolute paths to `.net.xml` and `.rou.xml` files.
-
-**Risk**: If paths are wrong in Docker, simulation fails.
-
-**Likelihood**: Low (we control the paths)
-
-**Impact**: High (breaks everything if wrong)
-
-**Mitigation**:
-```dockerfile
-# Bundle networks at known paths
-COPY sumo-rl/sumo_rl/nets/single-intersection/ /app/nets/
-
-# Set absolute paths as defaults
-ENV SUMO_NET_FILE=/app/nets/single-intersection.net.xml
-ENV SUMO_ROUTE_FILE=/app/nets/single-intersection.rou.xml
-```
-
-**Decision**: Bundle example networks, use absolute paths. Test during build.
-
----
-
-#### 6. **Dynamic Observation/Action Spaces**
-
-**Issue**: Different networks → different action/observation sizes.
-
-```python
-# Action space size = number of traffic signal phases (varies)
-self.action_space = gym.spaces.Discrete(num_green_phases)
-
-# Observation size = depends on number of lanes (varies)
-obs_size = num_green_phases + 1 + 2*num_lanes
-```
-
-**Risk**: OpenEnv expects fixed-size spaces?
-
-**Likelihood**: Low (we use single network by default)
-
-**Impact**: Medium (breaks if user changes network)
-
-**Mitigation**:
-- Use single-intersection as default (fixed sizes)
-- Document that changing networks may change spaces
-- Future: Make spaces configurable
-
-**Decision**: Not a blocker. Start with single network, document clearly.
-
----
-
-### 🟢 LOW RISK
-
-#### 7. **SUMO Version Compatibility**
-
-**Issue**: `ppa:sumo/stable` might update SUMO version over time.
-
-**Risk**: New SUMO version breaks sumo-rl compatibility.
-
-**Likelihood**: Low (SUMO is stable)
-
-**Impact**: Medium (breaks after rebuild)
-
-**Mitigation**:
-```dockerfile
-# Option 1: Pin SUMO version (if available)
-RUN apt-get install -y sumo=1.14.0
-
-# Option 2: Pin sumolib/traci versions
-RUN pip install sumolib==1.14.0 traci==1.14.0
-
-# Option 3: Accept latest (simpler, usually works)
-```
-
-**Decision**: Start with latest, pin if issues arise.
-
----
-
-#### 8. **sumolib/traci vs System SUMO Mismatch**
-
-**Issue**: Pip packages `sumolib` and `traci` should match system SUMO version.
-
-**Risk**: Version mismatch causes compatibility issues.
-
-**Likelihood**: Low (sumo-rl handles this)
-
-**Impact**: Medium (simulation errors)
-
-**Mitigation**:
-```dockerfile
-# Install SUMO first
-RUN apt-get install -y sumo sumo-tools
-
-# Then install matching Python packages
-RUN pip install sumolib>=1.14.0 traci>=1.14.0
-```
-
-**Decision**: Use `>=` versions, should work. Test during build.
-
----
-
-#### 9. **PettingZoo Version Compatibility**
-
-**Issue**: Code has fallback for PettingZoo 1.24 vs 1.25+
-
-```python
-try:
-    from pettingzoo.utils import AgentSelector  # 1.25+
-except ImportError:
-    from pettingzoo.utils import agent_selector as AgentSelector  # 1.24
-```
-
-**Risk**: Version incompatibility breaks import.
-
-**Likelihood**: Low (pyproject.toml specifies `pettingzoo>=1.24.3`)
-
-**Impact**: Low (import error, easy to debug)
-
-**Mitigation**:
-```dockerfile
-RUN pip install pettingzoo>=1.24.3
-```
-
-**Decision**: Use version spec from pyproject.toml.
-
----
-
-#### 10. **Memory Usage with Many Vehicles**
-
-**Issue**: Large traffic networks with thousands of vehicles → high memory.
-
-**Risk**: Container OOM (out of memory).
-
-**Likelihood**: Low (single-intersection is small)
-
-**Impact**: High (container crash)
-
-**Mitigation**:
-- Use small default network (single-intersection)
-- Document memory requirements for large networks
-- Docker memory limits (optional)
-
-**Decision**: Not a blocker. Document memory requirements.
-
----
-
-#### 11. **Simulation Determinism**
-
-**Issue**: Default `sumo_seed="random"` → non-deterministic.
-
-**Risk**: Can't reproduce training runs.
-
-**Likelihood**: High (default is random)
-
-**Impact**: Low (science issue, not functionality)
-
-**Mitigation**:
-```python
-# Allow seed control via environment variable
-sumo_seed = int(os.getenv("SUMO_SEED", "42"))  # Default fixed seed
-
-# Or keep random, document it
-sumo_seed = os.getenv("SUMO_SEED", "random")
-```
-
-**Decision**: Default to fixed seed (42) for reproducibility. Document how to use random.
-
----
-
-#### 12. **Headless Operation (No GUI)**
-
-**Issue**: We force `use_gui=False` in Docker.
-
-**Risk**: Users might want to see simulation GUI.
-
-**Likelihood**: Low (Docker is headless)
-
-**Impact**: Low (convenience feature)
-
-**Mitigation**:
-- Document that GUI is not available in Docker
-- Suggest local development for GUI
-- Future: VNC access to container GUI
-
-**Decision**: Not a blocker. GUI doesn't work in Docker anyway.
-
----
-
-#### 13. **Docker Image Size**
-
-**Issue**: SUMO + dependencies → large image.
-
-**Estimate**:
-- Base: ~200MB
-- SUMO: ~500MB
-- Python packages: ~200MB
-- **Total: ~900MB-1GB**
-
-**Risk**: Large downloads, storage.
-
-**Likelihood**: High (definitely will be large)
-
-**Impact**: Low (acceptable for complex sim)
-
-**Mitigation**:
-- Multi-stage builds (future optimization)
-- Clear documentation of size
-- Accept it (complexity requires space)
-
-**Decision**: Accept ~1GB image size. Not a blocker.
-
----
-
-#### 14. **Long Simulation Times**
-
-**Issue**: Traffic simulations take time (minutes per episode).
-
-**Example**: 20,000 simulated seconds with delta_time=5 → 4,000 steps per episode.
-
-**Risk**: RL training is slow.
-
-**Likelihood**: High (inherent to traffic simulation)
-
-**Impact**: Medium (slower research, not broken)
-
-**Mitigation**:
-- Document expected times
-- Recommend shorter episodes for quick tests
-- Suggest LIBSUMO for speedup
-
-**Decision**: Document clearly. Not a technical blocker.
-
----
-
-## 📊 Risk Summary
-
-| Risk | Severity | Likelihood | Mitigation Status |
-|------|----------|-----------|-------------------|
-| TraCI Connection Management | 🔴 High | Medium | ✅ Solved (single env instance) |
-| LIBSUMO vs TraCI | 🔴 High | High | ✅ Mitigated (default TraCI, doc LIBSUMO) |
-| Episode Reset Performance | 🟡 Medium | High | ✅ Accepted (document) |
-| CSV Output Accumulation | 🟡 Medium | Low | ✅ Solved (disable by default) |
-| Network File Paths | 🟡 Medium | Low | ✅ Solved (bundle at known paths) |
-| Dynamic Spaces | 🟡 Medium | Low | ✅ Accepted (document) |
-| SUMO Version | 🟢 Low | Low | ✅ Accepted (use latest) |
-| sumolib/traci Mismatch | 🟢 Low | Low | ✅ Mitigated (>=1.14.0) |
-| PettingZoo Version | 🟢 Low | Low | ✅ Mitigated (>=1.24.3) |
-| Memory Usage | 🟢 Low | Low | ✅ Accepted (document) |
-| Simulation Determinism | 🟢 Low | High | ✅ Solved (default fixed seed) |
-| No GUI | 🟢 Low | Low | ✅ Accepted (Docker is headless) |
-| Image Size | 🟢 Low | High | ✅ Accepted (~1GB) |
-| Long Sim Times | 🟢 Low | High | ✅ Accepted (document) |
-
----
-
-## ✅ Final Risk Assessment
-
-### Overall Risk Level: **LOW-MEDIUM** ✅
-
-### Key Findings:
-
-1. **Docker solves the hardest problem** (system dependencies) ✅
-2. **No critical blockers** - all risks have mitigations ✅
-3. **Main concerns are performance** (speed, memory) - acceptable for simulation ✅
-4. **Connection management solved** by single env instance pattern ✅
-
-### Recommended Mitigations:
-
-```python
-# 1. Single environment instance per container
-env = SumoEnvironment(
-    net_file="/app/nets/single-intersection.net.xml",
-    route_file="/app/nets/single-intersection.rou.xml",
-    use_gui=False,  # No GUI in Docker
-    single_agent=True,  # Single-agent mode
-    num_seconds=20000,
-    sumo_seed=42,  # Fixed seed for reproducibility
-    out_csv_name=None,  # Disable CSV output
-    sumo_warnings=False,  # Quiet
-)
-
-# 2. Reuse for all HTTP requests
-app = create_fastapi_app(env, SumoAction, SumoObservation)
-```
-
-```dockerfile
-# 3. Bundle network files at known paths
-COPY sumo-rl/sumo_rl/nets/single-intersection/ /app/nets/
-
-# 4. Set SUMO_HOME
-ENV SUMO_HOME=/usr/share/sumo
-
-# 5. Don't enable LIBSUMO by default (safer)
-# ENV LIBSUMO_AS_TRACI=1  # Optional for advanced users
-```
-
----
-
-## 🎯 Confidence Level
-
-**Original**: 85% confident
-**After Deep Analysis**: **95% confident** ✅
-
-**Reasons for Increased Confidence**:
-1. All high-risk items have clear mitigations
-2. Docker architecture naturally solves connection management
-3. Pattern matches Atari (proven to work)
-4. Risks are mostly performance/documentation, not functionality
-5. No unexpected blockers found
-
----
-
-## 🚀 Ready to Implement
-
-**Recommendation**: **PROCEED WITH IMPLEMENTATION** ✅
-
-The risks are manageable and well-understood. Docker makes this integration feasible and clean.
-
-**Estimated Effort**: 8-12 hours (unchanged)
-
-**Success Probability**: 95%
-
----
-
-## 📝 Documentation Requirements
-
-Based on risk analysis, must document:
-
-1. **Performance expectations**:
-   - Reset takes 1-5 seconds
-   - Episodes can take minutes
-   - LIBSUMO option for 8x speedup
-
-2. **Network files**:
-   - Default: single-intersection (bundled)
-   - Custom: mount volume with your .net.xml/.rou.xml
-
-3. **Reproducibility**:
-   - Default seed=42 (deterministic)
-   - Set SUMO_SEED=random for stochastic
-
-4. **Limitations**:
-   - No GUI in Docker
-   - Single-agent only (v1)
-   - Fixed network per container
-
-5. **Memory requirements**:
-   - Small networks: ~500MB
-   - Large networks: 2-4GB
-   - Document scaling
-
----
-
-**Analysis Complete**: All risks identified, mitigated, and documented. ✅
diff --git a/src/envs/sumo_rl_env/models.py b/src/envs/sumo_rl_env/models.py
index 611b0eed..6c73092b 100644
--- a/src/envs/sumo_rl_env/models.py
+++ b/src/envs/sumo_rl_env/models.py
@@ -54,8 +54,8 @@ class SumoObservation(Observation):
         metadata: Additional info (system metrics, etc.)
     """
 
-    observation: List[float]
-    observation_shape: List[int]
+    observation: List[float] = field(default_factory=list)
+    observation_shape: List[int] = field(default_factory=list)
     action_mask: List[int] = field(default_factory=list)
     sim_time: float = 0.0
     done: bool = False
diff --git a/src/envs/sumo_rl_env/server/Dockerfile b/src/envs/sumo_rl_env/server/Dockerfile
index 618ebe88..cb5527bc 100644
--- a/src/envs/sumo_rl_env/server/Dockerfile
+++ b/src/envs/sumo_rl_env/server/Dockerfile
@@ -13,14 +13,10 @@ ARG BASE_IMAGE=envtorch-base:latest
 FROM ${BASE_IMAGE}
 
 # Install SUMO system dependencies
-# SUMO is a microscopic traffic simulation package
+# SUMO is available in Debian repositories
 RUN apt-get update && apt-get install -y --no-install-recommends \
-    software-properties-common \
-    && add-apt-repository ppa:sumo/stable \
-    && apt-get update \
-    && apt-get install -y --no-install-recommends \
-        sumo \
-        sumo-tools \
+    sumo \
+    sumo-tools \
     && rm -rf /var/lib/apt/lists/*
 
 # Set SUMO_HOME environment variable
diff --git a/src/envs/sumo_rl_env/test_sumo_rl.sh b/src/envs/sumo_rl_env/test_sumo_rl.sh
new file mode 100755
index 00000000..61265c73
--- /dev/null
+++ b/src/envs/sumo_rl_env/test_sumo_rl.sh
@@ -0,0 +1,220 @@
+#!/bin/bash
+# Complete SUMO-RL Integration Test Script
+# Run this to verify everything works!
+
+set -e  # Exit on error
+
+echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
+echo "🚀 SUMO-RL Environment Test Script"
+echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
+echo ""
+
+# Navigate to repo root
+cd /Users/sanyambhutani/GH/OpenEnv
+
+echo "📁 Working directory: $(pwd)"
+echo ""
+
+# Step 1: Check if base image exists
+echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
+echo "Step 1: Checking for base image..."
+echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
+
+if docker images | grep -q "envtorch-base.*latest"; then
+    echo "✅ envtorch-base:latest found"
+else
+    echo "⚠️  envtorch-base:latest not found - building it now..."
+    echo ""
+    docker build -t envtorch-base:latest -f src/core/containers/images/Dockerfile .
+    echo ""
+    echo "✅ Base image built successfully"
+fi
+echo ""
+
+# Step 2: Build SUMO-RL environment
+echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
+echo "Step 2: Building SUMO-RL environment image..."
+echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
+echo "⏳ This will take 5-10 minutes (installing SUMO)..."
+echo ""
+
+docker build -f src/envs/sumo_rl_env/server/Dockerfile -t sumo-rl-env:latest .
+
+echo ""
+echo "✅ SUMO-RL environment built successfully"
+echo ""
+
+# Check image size
+IMAGE_SIZE=$(docker images sumo-rl-env:latest --format "{{.Size}}")
+echo "📦 Image size: $IMAGE_SIZE"
+echo ""
+
+# Step 3: Start container
+echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
+echo "Step 3: Starting SUMO-RL container..."
+echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
+
+# Stop any existing container
+docker stop sumo-rl-test 2>/dev/null || true
+docker rm sumo-rl-test 2>/dev/null || true
+
+# Start new container
+docker run -d -p 8000:8000 --name sumo-rl-test sumo-rl-env:latest
+
+echo "⏳ Waiting for container to start..."
+sleep 5
+
+# Check if container is running
+if docker ps | grep -q sumo-rl-test; then
+    echo "✅ Container is running"
+else
+    echo "❌ Container failed to start!"
+    echo "Logs:"
+    docker logs sumo-rl-test
+    exit 1
+fi
+echo ""
+
+# Step 4: Test health endpoint
+echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
+echo "Step 4: Testing health endpoint..."
+echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
+
+HEALTH_RESPONSE=$(curl -s http://localhost:8000/health)
+echo "Response: $HEALTH_RESPONSE"
+
+if echo "$HEALTH_RESPONSE" | grep -q "healthy"; then
+    echo "✅ Health check passed"
+else
+    echo "❌ Health check failed!"
+    exit 1
+fi
+echo ""
+
+# Step 5: Test reset endpoint
+echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
+echo "Step 5: Testing reset endpoint..."
+echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
+echo "⏳ This may take 3-5 seconds (SUMO simulation starting)..."
+
+RESET_RESPONSE=$(curl -s -X POST http://localhost:8000/reset)
+
+if echo "$RESET_RESPONSE" | jq -e '.observation.observation' > /dev/null 2>&1; then
+    echo "✅ Reset successful"
+
+    # Extract observation details
+    OBS_SHAPE=$(echo "$RESET_RESPONSE" | jq '.observation.observation_shape')
+    ACTION_MASK=$(echo "$RESET_RESPONSE" | jq '.observation.action_mask')
+
+    echo "  📊 Observation shape: $OBS_SHAPE"
+    echo "  🎮 Available actions: $ACTION_MASK"
+else
+    echo "❌ Reset failed!"
+    echo "Response: $RESET_RESPONSE"
+    exit 1
+fi
+echo ""
+
+# Step 6: Test step endpoint
+echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
+echo "Step 6: Testing step endpoint (taking 5 actions)..."
+echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
+
+for i in {1..5}; do
+    # Take action (cycle through phases 0-1)
+    PHASE_ID=$((i % 2))
+
+    STEP_RESPONSE=$(curl -s -X POST http://localhost:8000/step \
+        -H "Content-Type: application/json" \
+        -d "{\"action\": {\"phase_id\": $PHASE_ID, \"ts_id\": \"0\"}}")
+
+    if echo "$STEP_RESPONSE" | jq -e '.reward' > /dev/null 2>&1; then
+        REWARD=$(echo "$STEP_RESPONSE" | jq '.reward')
+        DONE=$(echo "$STEP_RESPONSE" | jq '.done')
+        echo "  Step $i: phase=$PHASE_ID, reward=$REWARD, done=$DONE"
+    else
+        echo "❌ Step $i failed!"
+        echo "Response: $STEP_RESPONSE"
+        exit 1
+    fi
+done
+
+echo "✅ All steps successful"
+echo ""
+
+# Step 7: Test state endpoint
+echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
+echo "Step 7: Testing state endpoint..."
+echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
+
+STATE_RESPONSE=$(curl -s http://localhost:8000/state)
+
+if echo "$STATE_RESPONSE" | jq -e '.episode_id' > /dev/null 2>&1; then
+    echo "✅ State endpoint working"
+
+    # Extract state details
+    EPISODE_ID=$(echo "$STATE_RESPONSE" | jq -r '.episode_id')
+    STEP_COUNT=$(echo "$STATE_RESPONSE" | jq '.step_count')
+    SIM_TIME=$(echo "$STATE_RESPONSE" | jq '.sim_time')
+    TOTAL_VEHICLES=$(echo "$STATE_RESPONSE" | jq '.total_vehicles')
+
+    echo "  📝 Episode ID: ${EPISODE_ID:0:8}..."
+    echo "  🔢 Step count: $STEP_COUNT"
+    echo "  ⏱️  Simulation time: $SIM_TIME seconds"
+    echo "  🚗 Total vehicles: $TOTAL_VEHICLES"
+else
+    echo "❌ State endpoint failed!"
+    echo "Response: $STATE_RESPONSE"
+    exit 1
+fi
+echo ""
+
+# Step 8: Check logs for errors
+echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
+echo "Step 8: Checking container logs for errors..."
+echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
+
+LOGS=$(docker logs sumo-rl-test 2>&1)
+
+# Check for Python errors (but ignore LoggerMode.Error which is expected)
+if echo "$LOGS" | grep -i "error\|exception\|traceback" | grep -v "LoggerMode.Error"; then
+    echo "⚠️  Found errors in logs:"
+    echo "$LOGS" | grep -i "error\|exception\|traceback" | grep -v "LoggerMode.Error"
+else
+    echo "✅ No errors found in logs"
+fi
+echo ""
+
+# Step 9: Cleanup
+echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
+echo "Step 9: Cleanup..."
+echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
+
+echo "🧹 Stopping and removing test container..."
+docker stop sumo-rl-test
+docker rm sumo-rl-test
+
+echo "✅ Cleanup complete"
+echo ""
+
+# Final summary
+echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
+echo "🎉 ALL TESTS PASSED!"
+echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
+echo ""
+echo "Summary:"
+echo "  ✅ Docker image built successfully ($IMAGE_SIZE)"
+echo "  ✅ Container started and ran"
+echo "  ✅ Health endpoint working"
+echo "  ✅ Reset endpoint working"
+echo "  ✅ Step endpoint working (5 actions executed)"
+echo "  ✅ State endpoint working"
+echo "  ✅ No errors in logs"
+echo ""
+echo "🎯 SUMO-RL integration is working perfectly!"
+echo ""
+echo "Next steps:"
+echo "  1. Test Python client: python examples/sumo_rl_simple.py"
+echo "  2. Push to GitHub to trigger CI/CD"
+echo "  3. Use for RL training!"
+echo ""

From 99c97100484f35c2919878c83da7278e8b22b1e6 Mon Sep 17 00:00:00 2001
From: Sanyam Bhutani <sanyambhutani@meta.com>
Date: Fri, 17 Oct 2025 13:13:05 -0700
Subject: [PATCH 11/13] mv configs

---
 .../nets}/single-intersection/single-intersection.edg.xml         | 0
 .../nets}/single-intersection/single-intersection.net.xml         | 0
 .../nets}/single-intersection/single-intersection.nod.xml         | 0
 .../nets}/single-intersection/single-intersection.rou.xml         | 0
 .../nets}/single-intersection/single-intersection.sumocfg         | 0
 5 files changed, 0 insertions(+), 0 deletions(-)
 rename {nets => src/envs/sumo_rl_env/nets}/single-intersection/single-intersection.edg.xml (100%)
 rename {nets => src/envs/sumo_rl_env/nets}/single-intersection/single-intersection.net.xml (100%)
 rename {nets => src/envs/sumo_rl_env/nets}/single-intersection/single-intersection.nod.xml (100%)
 rename {nets => src/envs/sumo_rl_env/nets}/single-intersection/single-intersection.rou.xml (100%)
 rename {nets => src/envs/sumo_rl_env/nets}/single-intersection/single-intersection.sumocfg (100%)

diff --git a/nets/single-intersection/single-intersection.edg.xml b/src/envs/sumo_rl_env/nets/single-intersection/single-intersection.edg.xml
similarity index 100%
rename from nets/single-intersection/single-intersection.edg.xml
rename to src/envs/sumo_rl_env/nets/single-intersection/single-intersection.edg.xml
diff --git a/nets/single-intersection/single-intersection.net.xml b/src/envs/sumo_rl_env/nets/single-intersection/single-intersection.net.xml
similarity index 100%
rename from nets/single-intersection/single-intersection.net.xml
rename to src/envs/sumo_rl_env/nets/single-intersection/single-intersection.net.xml
diff --git a/nets/single-intersection/single-intersection.nod.xml b/src/envs/sumo_rl_env/nets/single-intersection/single-intersection.nod.xml
similarity index 100%
rename from nets/single-intersection/single-intersection.nod.xml
rename to src/envs/sumo_rl_env/nets/single-intersection/single-intersection.nod.xml
diff --git a/nets/single-intersection/single-intersection.rou.xml b/src/envs/sumo_rl_env/nets/single-intersection/single-intersection.rou.xml
similarity index 100%
rename from nets/single-intersection/single-intersection.rou.xml
rename to src/envs/sumo_rl_env/nets/single-intersection/single-intersection.rou.xml
diff --git a/nets/single-intersection/single-intersection.sumocfg b/src/envs/sumo_rl_env/nets/single-intersection/single-intersection.sumocfg
similarity index 100%
rename from nets/single-intersection/single-intersection.sumocfg
rename to src/envs/sumo_rl_env/nets/single-intersection/single-intersection.sumocfg

From 475a32fc71711f4ff19a922d24f5b186bdee6936 Mon Sep 17 00:00:00 2001
From: Sanyam Bhutani <sanyambhutani@meta.com>
Date: Fri, 17 Oct 2025 13:13:19 -0700
Subject: [PATCH 12/13] Update Dockerfile

---
 src/envs/sumo_rl_env/server/Dockerfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/envs/sumo_rl_env/server/Dockerfile b/src/envs/sumo_rl_env/server/Dockerfile
index cb5527bc..161c6e00 100644
--- a/src/envs/sumo_rl_env/server/Dockerfile
+++ b/src/envs/sumo_rl_env/server/Dockerfile
@@ -41,7 +41,7 @@ COPY src/envs/sumo_rl_env/ /app/src/envs/sumo_rl_env/
 
 # Copy example network files
 # Default: single-intersection (simple 4-way intersection)
-COPY nets/single-intersection/ /app/nets/single-intersection/
+COPY src/envs/sumo_rl_env/nets/single-intersection/ /app/nets/single-intersection/
 
 # SUMO environment variables (can be overridden at runtime)
 ENV SUMO_NET_FILE=/app/nets/single-intersection/single-intersection.net.xml

From 17bf4c4af458da2ee35b7c8e58af32dee28b6d0c Mon Sep 17 00:00:00 2001
From: Sanyam Bhutani <sanyambhutani@meta.com>
Date: Fri, 17 Oct 2025 13:13:40 -0700
Subject: [PATCH 13/13] fix Docker

---
 src/envs/sumo_rl_env/server/Dockerfile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/envs/sumo_rl_env/server/Dockerfile b/src/envs/sumo_rl_env/server/Dockerfile
index 161c6e00..d1495283 100644
--- a/src/envs/sumo_rl_env/server/Dockerfile
+++ b/src/envs/sumo_rl_env/server/Dockerfile
@@ -36,10 +36,10 @@ RUN pip install --no-cache-dir \
 # Copy OpenEnv core (base image already set WORKDIR=/app)
 COPY src/core/ /app/src/core/
 
-# Copy SUMO-RL environment code
+# Copy SUMO-RL environment code (includes nets/)
 COPY src/envs/sumo_rl_env/ /app/src/envs/sumo_rl_env/
 
-# Copy example network files
+# Copy example network files to expected location
 # Default: single-intersection (simple 4-way intersection)
 COPY src/envs/sumo_rl_env/nets/single-intersection/ /app/nets/single-intersection/