From 2bd10556739e2af602ea85371b976390f7c48077 Mon Sep 17 00:00:00 2001 From: Xingyao Wang Date: Thu, 23 May 2024 14:12:14 +0800 Subject: [PATCH] correctly setup plugins for swebench eval --- evaluation/swe_bench/run_infer.py | 12 +++++++----- evaluation/swe_bench/swe_env_box.py | 18 +++++++++++++----- 2 files changed, 20 insertions(+), 10 deletions(-) diff --git a/evaluation/swe_bench/run_infer.py b/evaluation/swe_bench/run_infer.py index 352bcb87d0a..3910ed1290e 100644 --- a/evaluation/swe_bench/run_infer.py +++ b/evaluation/swe_bench/run_infer.py @@ -14,6 +14,7 @@ from datasets import load_dataset from tqdm import tqdm +import agenthub from evaluation.swe_bench.swe_env_box import SWEBenchSSHBox from opendevin.controller.state.state import State from opendevin.core.config import args, config, get_llm_config_arg @@ -185,11 +186,11 @@ def get_test_result(instance, sandbox, workspace_dir_name): def process_instance( - instance, - agent_class, - metadata, - skip_workspace_mount, - eval_output_dir, + instance: dict, + agent_class: str, + metadata: dict, + skip_workspace_mount: bool, + eval_output_dir: str, reset_logger: bool = True, ): workspace_mount_path = os.path.join(config.workspace_mount_path, '_eval_workspace') @@ -234,6 +235,7 @@ def process_instance( workspace_dir_name, skip_workspace_mount=skip_workspace_mount, workspace_mount_path=workspace_mount_path, + sandbox_plugins=agenthub.Agent.get_cls(agent_class).sandbox_plugins, ) # Prepare instruction diff --git a/evaluation/swe_bench/swe_env_box.py b/evaluation/swe_bench/swe_env_box.py index f3d217208ac..51fa8ae581c 100644 --- a/evaluation/swe_bench/swe_env_box.py +++ b/evaluation/swe_bench/swe_env_box.py @@ -4,7 +4,11 @@ from opendevin.core.config import config from opendevin.core.logger import opendevin_logger as logger from opendevin.runtime.docker.ssh_box import DockerSSHBox -from opendevin.runtime.plugins import JupyterRequirement, SWEAgentCommandsRequirement +from opendevin.runtime.plugins import ( + AgentSkillsRequirement, + JupyterRequirement, + PluginRequirement, +) SWE_BENCH_CONTAINER_IMAGE = 'ghcr.io/opendevin/eval-swe-bench:full-v1.0' @@ -18,6 +22,7 @@ def __init__( swe_instance_id: str | None = None, swe_instance: dict | None = None, skip_workspace_mount: bool = True, + sandbox_plugins: list[PluginRequirement] = [], # noqa: B006 ): if swe_instance_id is None: raise ValueError('swe_instance_id must be provided!') @@ -31,6 +36,7 @@ def __init__( # Need to run as root to use SWEBench container sid = f'swe_bench_{swe_instance_id}' + str(uuid.uuid4()) super().__init__(container_image, timeout, sid) + self.init_plugins(sandbox_plugins) exit_code, output = self.execute('mv ~/.bashrc ~/.bashrc.bak') assert exit_code == 0, f'Failed to backup ~/.bashrc: {output}' @@ -66,6 +72,7 @@ def get_box_for_instance( n_tries=5, skip_workspace_mount: bool = True, workspace_mount_path: str | None = None, + sandbox_plugins: list[PluginRequirement] = [], # noqa: B006 ) -> 'SWEBenchSSHBox': if workspace_dir_name is None: workspace_dir_name = f"{instance['repo']}__{instance['version']}".replace( @@ -82,6 +89,7 @@ def get_box_for_instance( swe_instance_id=instance['instance_id'], swe_instance=instance, skip_workspace_mount=skip_workspace_mount, + sandbox_plugins=sandbox_plugins, ) logger.info(f"SSH box started for instance {instance['instance_id']}.") @@ -138,10 +146,10 @@ def get_diff_patch(self): 'environment_setup_commit': '419a78300f7cd27611196e1e464d50fd0385ff27', } - sandbox = SWEBenchSSHBox.get_box_for_instance(instance=EXAMPLE_INSTANCE) - - # in actual eval, this will be initialized by the controller - sandbox.init_plugins([JupyterRequirement(), SWEAgentCommandsRequirement()]) + sandbox = SWEBenchSSHBox.get_box_for_instance( + instance=EXAMPLE_INSTANCE, + sandbox_plugins=[AgentSkillsRequirement(), JupyterRequirement()], + ) # PRE TEST exit_code, output = sandbox.execute('cd $REPO_PATH')