Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement docker based command line code executor #1856

Merged
merged 18 commits into from
Mar 7, 2024
5 changes: 5 additions & 0 deletions autogen/coding/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
from .base import CodeBlock, CodeExecutor, CodeExtractor, CodeResult
from .factory import CodeExecutorFactory
from .markdown_code_extractor import MarkdownCodeExtractor
from .local_commandline_code_executor import LocalCommandLineCodeExecutor, CommandLineCodeResult
from .docker_commandline_code_executor import DockerCommandLineCodeExecutor

__all__ = (
"CodeBlock",
Expand All @@ -9,4 +11,7 @@
"CodeExecutor",
"CodeExecutorFactory",
"MarkdownCodeExtractor",
"LocalCommandLineCodeExecutor",
"CommandLineCodeResult",
"DockerCommandLineCodeExecutor",
)
231 changes: 231 additions & 0 deletions autogen/coding/docker_commandline_code_executor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,231 @@
from __future__ import annotations
import atexit
from hashlib import md5
import logging
from pathlib import Path
from time import sleep
from types import TracebackType
import uuid
from typing import List, Optional, Type, Union
import docker
from docker.models.containers import Container
from docker.errors import ImageNotFound

from .local_commandline_code_executor import CommandLineCodeResult

from ..code_utils import TIMEOUT_MSG, _cmd
from .base import CodeBlock, CodeExecutor, CodeExtractor
from .markdown_code_extractor import MarkdownCodeExtractor
import sys

if sys.version_info >= (3, 11):
from typing import Self
else:
from typing_extensions import Self


def _wait_for_ready(container: Container, timeout: int = 60, stop_time: int = 0.1) -> None:
elapsed_time = 0
while container.status != "running" and elapsed_time < timeout:
sleep(stop_time)
elapsed_time += stop_time
container.reload()
continue
if container.status != "running":
raise ValueError("Container failed to start")


__all__ = ("DockerCommandLineCodeExecutor",)


class DockerCommandLineCodeExecutor(CodeExecutor):
def __init__(
self,
image: str = "python:3-slim",
container_name: Optional[str] = None,
timeout: int = 60,
work_dir: Union[Path, str] = Path("."),
ekzhu marked this conversation as resolved.
Show resolved Hide resolved
auto_remove: bool = True,
stop_container: bool = True,
):
"""(Experimental) A code executor class that executes code through
a command line environment in a Docker container.

The executor first saves each code block in a file in the working
directory, and then executes the code file in the container.
The executor executes the code blocks in the order they are received.
Currently, the executor only supports Python and shell scripts.
For Python code, use the language "python" for the code block.
For shell scripts, use the language "bash", "shell", or "sh" for the code
block.

Args:
image (_type_, optional): Docker image to use for code execution.
Defaults to "python:3-slim".
container_name (Optional[str], optional): Name of the Docker container
which is created. If None, will autogenerate a name. Defaults to None.
timeout (int, optional): The timeout for code execution. Defaults to 60.
work_dir (Union[Path, str], optional): The working directory for the code
execution. Defaults to Path(".").
auto_remove (bool, optional): If true, will automatically remove the Docker
container when it is stopped. Defaults to True.
stop_container (bool, optional): If true, will automatically stop the
container when stop is called, when the context manager exits or when
the Python process exits with atext. Defaults to True.

Raises:
ValueError: On argument error, or if the container fails to start.
"""

if timeout < 1:
raise ValueError("Timeout must be greater than or equal to 1.")

if isinstance(work_dir, str):
work_dir = Path(work_dir)

if not work_dir.exists():
raise ValueError(f"Working directory {work_dir} does not exist.")

client = docker.from_env()

# Check if the image exists
try:
client.images.get(image)
except ImageNotFound:
logging.info(f"Pulling image {image}...")
# Let the docker exception escape if this fails.
client.images.pull(image)

if container_name is None:
container_name = f"autogen-code-exec-{uuid.uuid4()}"

# Start a container from the image, read to exec commands later
self._container = client.containers.create(
image,
name=container_name,
entrypoint="/bin/sh",
tty=True,
auto_remove=auto_remove,
volumes={str(work_dir.resolve()): {"bind": "/workspace", "mode": "rw"}},
working_dir="/workspace",
)
self._container.start()

_wait_for_ready(self._container)

def cleanup():
try:
container = client.containers.get(container_name)
container.stop()
except docker.errors.NotFound:
pass

atexit.unregister(cleanup)

if stop_container:
atexit.register(cleanup)

self._cleanup = cleanup

# Check if the container is running
if self._container.status != "running":
raise ValueError(f"Failed to start container from image {image}. Logs: {self._container.logs()}")

self._timeout = timeout
self._work_dir: Path = work_dir

@property
def timeout(self) -> int:
"""(Experimental) The timeout for code execution."""
return self._timeout

@property
def work_dir(self) -> Path:
"""(Experimental) The working directory for the code execution."""
return self._work_dir

@property
def code_extractor(self) -> CodeExtractor:
"""(Experimental) Export a code extractor that can be used by an agent."""
return MarkdownCodeExtractor()

def execute_code_blocks(self, code_blocks: List[CodeBlock]) -> CommandLineCodeResult:
"""(Experimental) Execute the code blocks and return the result.

Args:
code_blocks (List[CodeBlock]): The code blocks to execute.

Returns:
CommandlineCodeResult: The result of the code execution."""

if len(code_blocks) == 0:
raise ValueError("No code blocks to execute.")

outputs = []
files = []
last_exit_code = 0
for code_block in code_blocks:
lang = code_block.language
code = code_block.code

code_hash = md5(code.encode()).hexdigest()

# Check if there is a filename comment
ekzhu marked this conversation as resolved.
Show resolved Hide resolved
# Get first line
first_line = code.split("\n")[0]
if first_line.startswith("# filename:"):
filename = first_line.split(":")[1].strip()

# Handle relative paths in the filename
path = Path(filename)
if not path.is_absolute():
path = Path("/workspace") / path
path = path.resolve()
try:
path.relative_to(Path("/workspace"))
except ValueError:
return CommandLineCodeResult(exit_code=1, output="Filename is not in the workspace")
else:
# create a file with a automatically generated name
filename = f"tmp_code_{code_hash}.{'py' if lang.startswith('python') else lang}"

code_path = self._work_dir / filename
with code_path.open("w", encoding="utf-8") as fout:
fout.write(code)

command = ["timeout", str(self._timeout), _cmd(lang), filename]

result = self._container.exec_run(command)
exit_code = result.exit_code
output = result.output.decode("utf-8")
if exit_code == 124:
output += "\n"
output += TIMEOUT_MSG

outputs.append(output)
files.append(code_path)

last_exit_code = exit_code
if exit_code != 0:
break

code_file = str(files[0]) if files else None
return CommandLineCodeResult(exit_code=last_exit_code, output="".join(outputs), code_file=code_file)

def restart(self) -> None:
"""(Experimental) Restart the code executor."""
self._container.restart()
if self._container.status != "running":
raise ValueError(f"Failed to restart container. Logs: {self._container.logs()}")

def stop(self) -> None:
"""(Experimental) Stop the code executor."""
self._cleanup()

def __enter__(self) -> Self:
return self

def __exit__(
self, exc_type: Optional[Type[BaseException]], exc_val: Optional[BaseException], exc_tb: Optional[TracebackType]
) -> None:
self.stop()
18 changes: 4 additions & 14 deletions autogen/coding/jupyter/docker_jupyter_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,17 @@

from pathlib import Path
import sys
from time import sleep
from types import TracebackType
import uuid
from typing import Dict, Optional, Union
from typing import Dict, Optional, Type, Union
import docker
import secrets
import io
import atexit
import logging

from ..docker_commandline_code_executor import _wait_for_ready

if sys.version_info >= (3, 11):
from typing import Self
else:
Expand All @@ -22,17 +23,6 @@
from .base import JupyterConnectable, JupyterConnectionInfo


def _wait_for_ready(container: docker.Container, timeout: int = 60, stop_time: int = 0.1) -> None:
elapsed_time = 0
while container.status != "running" and elapsed_time < timeout:
sleep(stop_time)
elapsed_time += stop_time
container.reload()
continue
if container.status != "running":
raise ValueError("Container failed to start")


class DockerJupyterServer(JupyterConnectable):
DEFAULT_DOCKERFILE = """FROM quay.io/jupyter/docker-stacks-foundation

Expand Down Expand Up @@ -162,6 +152,6 @@ def __enter__(self) -> Self:
return self

def __exit__(
self, exc_type: Optional[type[BaseException]], exc_val: Optional[BaseException], exc_tb: Optional[TracebackType]
self, exc_type: Optional[Type[BaseException]], exc_val: Optional[BaseException], exc_tb: Optional[TracebackType]
) -> None:
self.stop()
4 changes: 2 additions & 2 deletions autogen/coding/jupyter/jupyter_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from dataclasses import dataclass
from types import TracebackType
from typing import Any, Dict, List, Optional, cast
from typing import Any, Dict, List, Optional, Type, cast
import sys

if sys.version_info >= (3, 11):
Expand Down Expand Up @@ -111,7 +111,7 @@ def __enter__(self) -> Self:
return self

def __exit__(
self, exc_type: Optional[type[BaseException]], exc_val: Optional[BaseException], exc_tb: Optional[TracebackType]
self, exc_type: Optional[Type[BaseException]], exc_val: Optional[BaseException], exc_tb: Optional[TracebackType]
) -> None:
self.stop()

Expand Down
4 changes: 2 additions & 2 deletions autogen/coding/jupyter/jupyter_code_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import re
from types import TracebackType
import uuid
from typing import Any, ClassVar, List, Optional, Union
from typing import Any, ClassVar, List, Optional, Type, Union
import sys

if sys.version_info >= (3, 11):
Expand Down Expand Up @@ -201,6 +201,6 @@ def __enter__(self) -> Self:
return self

def __exit__(
self, exc_type: Optional[type[BaseException]], exc_val: Optional[BaseException], exc_tb: Optional[TracebackType]
self, exc_type: Optional[Type[BaseException]], exc_val: Optional[BaseException], exc_tb: Optional[TracebackType]
) -> None:
self.stop()
4 changes: 2 additions & 2 deletions autogen/coding/jupyter/local_jupyter_server.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from __future__ import annotations
from types import TracebackType

from typing import Optional, Union, cast
from typing import Optional, Type, Union, cast
import subprocess
import signal
import sys
Expand Down Expand Up @@ -157,6 +157,6 @@ def __enter__(self) -> Self:
return self

def __exit__(
self, exc_type: Optional[type[BaseException]], exc_val: Optional[BaseException], exc_tb: Optional[TracebackType]
self, exc_type: Optional[Type[BaseException]], exc_val: Optional[BaseException], exc_tb: Optional[TracebackType]
) -> None:
self.stop()
Loading
Loading