Skip to content

Commit

Permalink
correctly setup plugins for swebench eval
Browse files Browse the repository at this point in the history
  • Loading branch information
xingyaoww committed May 23, 2024
1 parent 0eccf31 commit 2bd1055
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 10 deletions.
12 changes: 7 additions & 5 deletions evaluation/swe_bench/run_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from datasets import load_dataset
from tqdm import tqdm

import agenthub
from evaluation.swe_bench.swe_env_box import SWEBenchSSHBox
from opendevin.controller.state.state import State
from opendevin.core.config import args, config, get_llm_config_arg
Expand Down Expand Up @@ -185,11 +186,11 @@ def get_test_result(instance, sandbox, workspace_dir_name):


def process_instance(
instance,
agent_class,
metadata,
skip_workspace_mount,
eval_output_dir,
instance: dict,
agent_class: str,
metadata: dict,
skip_workspace_mount: bool,
eval_output_dir: str,
reset_logger: bool = True,
):
workspace_mount_path = os.path.join(config.workspace_mount_path, '_eval_workspace')
Expand Down Expand Up @@ -234,6 +235,7 @@ def process_instance(
workspace_dir_name,
skip_workspace_mount=skip_workspace_mount,
workspace_mount_path=workspace_mount_path,
sandbox_plugins=agenthub.Agent.get_cls(agent_class).sandbox_plugins,
)

# Prepare instruction
Expand Down
18 changes: 13 additions & 5 deletions evaluation/swe_bench/swe_env_box.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,11 @@
from opendevin.core.config import config
from opendevin.core.logger import opendevin_logger as logger
from opendevin.runtime.docker.ssh_box import DockerSSHBox
from opendevin.runtime.plugins import JupyterRequirement, SWEAgentCommandsRequirement
from opendevin.runtime.plugins import (
AgentSkillsRequirement,
JupyterRequirement,
PluginRequirement,
)

SWE_BENCH_CONTAINER_IMAGE = 'ghcr.io/opendevin/eval-swe-bench:full-v1.0'

Expand All @@ -18,6 +22,7 @@ def __init__(
swe_instance_id: str | None = None,
swe_instance: dict | None = None,
skip_workspace_mount: bool = True,
sandbox_plugins: list[PluginRequirement] = [], # noqa: B006
):
if swe_instance_id is None:
raise ValueError('swe_instance_id must be provided!')
Expand All @@ -31,6 +36,7 @@ def __init__(
# Need to run as root to use SWEBench container
sid = f'swe_bench_{swe_instance_id}' + str(uuid.uuid4())
super().__init__(container_image, timeout, sid)
self.init_plugins(sandbox_plugins)

exit_code, output = self.execute('mv ~/.bashrc ~/.bashrc.bak')
assert exit_code == 0, f'Failed to backup ~/.bashrc: {output}'
Expand Down Expand Up @@ -66,6 +72,7 @@ def get_box_for_instance(
n_tries=5,
skip_workspace_mount: bool = True,
workspace_mount_path: str | None = None,
sandbox_plugins: list[PluginRequirement] = [], # noqa: B006
) -> 'SWEBenchSSHBox':
if workspace_dir_name is None:
workspace_dir_name = f"{instance['repo']}__{instance['version']}".replace(
Expand All @@ -82,6 +89,7 @@ def get_box_for_instance(
swe_instance_id=instance['instance_id'],
swe_instance=instance,
skip_workspace_mount=skip_workspace_mount,
sandbox_plugins=sandbox_plugins,
)
logger.info(f"SSH box started for instance {instance['instance_id']}.")

Expand Down Expand Up @@ -138,10 +146,10 @@ def get_diff_patch(self):
'environment_setup_commit': '419a78300f7cd27611196e1e464d50fd0385ff27',
}

sandbox = SWEBenchSSHBox.get_box_for_instance(instance=EXAMPLE_INSTANCE)

# in actual eval, this will be initialized by the controller
sandbox.init_plugins([JupyterRequirement(), SWEAgentCommandsRequirement()])
sandbox = SWEBenchSSHBox.get_box_for_instance(
instance=EXAMPLE_INSTANCE,
sandbox_plugins=[AgentSkillsRequirement(), JupyterRequirement()],
)

# PRE TEST
exit_code, output = sandbox.execute('cd $REPO_PATH')
Expand Down

0 comments on commit 2bd1055

Please sign in to comment.