Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions src/core/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,8 @@ class StepResult(Generic[ObsT]):
observation: The environment's observation after the action.
reward: Scalar reward for this step (optional).
done: Whether the episode is finished.
info: Additional metadata (e.g. debug info, latency, etc.).
"""

observation: ObsT
reward: Optional[float] = None
done: bool = False
info: Optional[dict[str, Any]] = None
72 changes: 72 additions & 0 deletions src/envs/coding_env/env.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
"""
envs/coding_env/env.py
--------------------------------
Concrete environment implementation using the core BaseEnv.
POC implementation runs code locally via subprocess that can be changed later.
"""

from __future__ import annotations

import subprocess
from typing import Optional

from core.base import BaseEnv
from core.types import StepResult

from .models import CodeAction, CodeObservation


class CodingEnv(BaseEnv[CodeAction, CodeObservation]):
"""
Minimal Coding Environment.

POC behavior:
- reset(): returns a fresh, empty observation (no persistent state).
- step(action): runs Python code with `python -c` and returns stdout/stderr/exit_code.

Future swap:
Replace _run_code_locally() with a call to your Docker/gateway backend without
changing the public API.
"""

def __init__(
self,
default_timeout_s: float = 10.0,
python_executable: str = "python",
):
"""
Args:
default_timeout_s: Max seconds to allow code execution before timing out.
python_executable: Interpreter to run (e.g., "python3", a venv path, etc.).
"""
self._default_timeout_s = float(default_timeout_s)
self._python = python_executable

# --- BaseEnv interface ---

def reset(self) -> CodeObservation:
# No state to clear in this POC; return an initial observation.
return CodeObservation(stdout="", stderr="", exit_code=0)

def step(self, action: CodeAction) -> StepResult[CodeObservation]:
if not isinstance(action, CodeAction):
raise TypeError(f"Expected CodeAction, got {type(action)!r}")

# TODO: replace dummy response with the call to the code executor inside the container
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@Darktex - I ll iterate on this part. But, take a look at overall layout.

obs, timed_out = CodeObservation(stderr="", stdout="", exit_code=0), False

# Simple reward heuristic: success and no stderr -> 1.0 else 0.0
reward: Optional[float] = (
1.0 if (obs.exit_code == 0 and not obs.stderr) else 0.0
)

info = {
"timed_out": timed_out,
"interpreter": self._python,
}

return StepResult(
observation=obs,
reward=reward,
done=False, # Coding env is not episodic by default
)
31 changes: 31 additions & 0 deletions src/envs/coding_env/models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
"""
envs/coding_env/models.py
--------------------------------
Action/Observation types for the Coding environment.
"""

from __future__ import annotations

from dataclasses import dataclass
from typing import Any, Optional


@dataclass
class CodeAction:
"""
Represents a single code execution request.
"""

code: str
# Optional: future fields like 'lint': bool, 'timeout_s': float, etc.


@dataclass
class CodeObservation:
"""
Result of executing code in the environment.
"""

stdout: str = ""
stderr: str = ""
exit_code: int = 0