diff --git a/tests/suites/unit/ci/test_fresh_host.py b/tests/suites/unit/ci/test_fresh_host.py index 0b7f26e..f1e282e 100644 --- a/tests/suites/unit/ci/test_fresh_host.py +++ b/tests/suites/unit/ci/test_fresh_host.py @@ -989,6 +989,173 @@ def fake_verify_sidecars(compose_file: Path, **kwargs: object) -> None: assert verify_kwargs["repo_local_state"] is True +def test_deactivate_macos_host_services_limits_teardown_to_active_sidecars_services( + tmp_path: Path, + test_context: TestContext, +) -> None: + """macOS sidecars teardown should skip browser-lab commands entirely.""" + github_env = tmp_path / "github.env" + runner_temp = tmp_path / "runner-temp" + workspace = tmp_path / "workspace" + workspace.mkdir() + test_context.apply_profiles("fresh_host_macos_colima") + + context = fresh_host.prepare_context( + scenario_id="macos-sidecars", + repo_root=workspace, + runner_temp=runner_temp, + workspace=workspace, + github_env_file=github_env, + ) + inspected_labels: list[str] = [] + executed_commands: list[list[str]] = [] + + def fake_run( + command: list[str], + *, + cwd: Path, + env: dict[str, str], + check: bool, + capture_output: bool, + text: bool, + timeout: int, + ) -> subprocess.CompletedProcess[str]: + del cwd, env, check, capture_output, text, timeout + assert command[:2] == ["launchctl", "print"] + label = command[2].rsplit("/", maxsplit=1)[1] + inspected_labels.append(label) + return subprocess.CompletedProcess(command, 0, "", "") + + def fake_best_effort( + command: list[str], + *, + cwd: Path, + env: dict[str, str], + ) -> str | None: + del cwd, env + executed_commands.append(command) + return None + + test_context.patch.patch_object(fresh_host_macos.subprocess, "run", new=fake_run) + test_context.patch.patch_object(fresh_host_macos, "best_effort", new=fake_best_effort) + + command = fresh_host_macos.deactivate_macos_host_services(context) + + assert command is not None + assert inspected_labels == ["ai.openclaw.gateway", "ai.openclaw.sidecars"] + assert len(executed_commands) == 3 + assert executed_commands[0][:2] == ["launchctl", "bootout"] + assert executed_commands[0][-1].endswith("ai.openclaw.gateway.plist") + assert executed_commands[1][:2] == ["launchctl", "bootout"] + assert executed_commands[1][-1].endswith("ai.openclaw.sidecars.plist") + assert executed_commands[2][-2:] == ["sidecars", "down"] + assert all("browserlab" not in " ".join(step) for step in executed_commands) + assert all("browser-lab" not in " ".join(step) for step in executed_commands) + + +def test_deactivate_macos_host_services_raises_for_active_bootout_failure( + tmp_path: Path, + test_context: TestContext, +) -> None: + """macOS teardown should fail when an active launchd bootout fails.""" + github_env = tmp_path / "github.env" + runner_temp = tmp_path / "runner-temp" + workspace = tmp_path / "workspace" + workspace.mkdir() + test_context.apply_profiles("fresh_host_macos_colima") + + context = fresh_host.prepare_context( + scenario_id="macos-sidecars", + repo_root=workspace, + runner_temp=runner_temp, + workspace=workspace, + github_env_file=github_env, + ) + executed_commands: list[list[str]] = [] + + def fake_run( + command: list[str], + *, + cwd: Path, + env: dict[str, str], + check: bool, + capture_output: bool, + text: bool, + timeout: int, + ) -> subprocess.CompletedProcess[str]: + del cwd, env, check, capture_output, text, timeout + assert command[:2] == ["launchctl", "print"] + return subprocess.CompletedProcess(command, 0, "", "") + + def fake_best_effort( + command: list[str], + *, + cwd: Path, + env: dict[str, str], + ) -> str | None: + del cwd, env + executed_commands.append(command) + if command[:2] == ["launchctl", "bootout"] and command[-1].endswith( + "ai.openclaw.sidecars.plist" + ): + return ( + "launchctl bootout gui/501 /tmp/ai.openclaw.sidecars.plist failed: " + "Boot-out failed: 5: Input/output error" + ) + return None + + test_context.patch.patch_object(fresh_host_macos.subprocess, "run", new=fake_run) + test_context.patch.patch_object(fresh_host_macos, "best_effort", new=fake_best_effort) + + with pytest.raises(fresh_host.FreshHostError, match="Boot-out failed: 5: Input/output error"): + fresh_host_macos.deactivate_macos_host_services(context) + + assert any( + command[:2] == ["launchctl", "bootout"] + and command[-1].endswith("ai.openclaw.sidecars.plist") + for command in executed_commands + ) + + +def test_cleanup_cli_returns_nonzero_and_records_failure_when_cleanup_raises( + tmp_path: Path, + test_context: TestContext, +) -> None: + """Cleanup should persist failure state and return a nonzero CLI exit code.""" + github_env = tmp_path / "github.env" + runner_temp = tmp_path / "runner-temp" + workspace = tmp_path / "workspace" + workspace.mkdir() + test_context.apply_profiles("fresh_host_macos_colima") + + context = fresh_host.prepare_context( + scenario_id="macos-sidecars", + repo_root=workspace, + runner_temp=runner_temp, + workspace=workspace, + github_env_file=github_env, + ) + + def fake_cleanup_macos(_: fresh_host.FreshHostContext) -> object: + raise fresh_host.FreshHostError("cleanup exploded") + + test_context.patch.patch_object( + fresh_host_reporting, + "cleanup_macos", + new=fake_cleanup_macos, + ) + + exit_code = fresh_host_script.main(["cleanup", "--context", str(context.context_path)]) + report = fresh_host.load_report(Path(context.report_path)) + + assert exit_code == 1 + assert report.status == "failure" + assert report.failure_reason == "cleanup exploded" + assert report.phases[-1].name == "cleanup" + assert report.phases[-1].status == "failure" + assert report.phases[-1].failure_reason == "cleanup exploded" + + def test_collect_diagnostics_uses_compose_probe_env_for_macos_compose_commands( tmp_path: Path, test_context: TestContext, diff --git a/tests/utils/helpers/_fresh_host/macos.py b/tests/utils/helpers/_fresh_host/macos.py index 92c2918..c3f2b39 100644 --- a/tests/utils/helpers/_fresh_host/macos.py +++ b/tests/utils/helpers/_fresh_host/macos.py @@ -4,6 +4,7 @@ import os import subprocess +from dataclasses import dataclass from pathlib import Path from tests.utils.helpers._fresh_host.linux import run_clawops_bootstrap @@ -24,10 +25,140 @@ verify_sidecar_services_running, wait_for_docker_backend, ) +from tests.utils.helpers._fresh_host.storage import log HOSTED_MACOS_SIDECAR_STARTUP_TIMEOUT_SECONDS = 300 +@dataclass(frozen=True, slots=True) +class _MacosCleanupResult: + """Structured cleanup execution result.""" + + command: list[str] | None + notes: list[str] + + +def _managed_launchd_labels(context: FreshHostContext) -> tuple[str, ...]: + """Return the launchd labels managed by the scenario.""" + if not context.activate_services: + return () + labels = ["ai.openclaw.gateway"] + if context.exercise_sidecars: + labels.append("ai.openclaw.sidecars") + if context.exercise_browser_lab: + labels.append("ai.openclaw.browserlab") + return tuple(labels) + + +def _managed_host_components(context: FreshHostContext) -> tuple[str, ...]: + """Return the host-managed stack components for the scenario.""" + if not context.activate_services: + return () + components: list[str] = [] + if context.exercise_sidecars: + components.append("sidecars") + if context.exercise_browser_lab: + components.append("browser-lab") + return tuple(components) + + +def _repo_local_components(context: FreshHostContext) -> tuple[str, ...]: + """Return the repo-local stack components for the scenario.""" + components: list[str] = [] + if context.exercise_sidecars: + components.append("sidecars") + if context.exercise_browser_lab: + components.append("browser-lab") + return tuple(components) + + +def _launchd_label_for_component(component: str) -> str: + """Return the launchd label that owns one component.""" + return "ai.openclaw.browserlab" if component == "browser-lab" else f"ai.openclaw.{component}" + + +def _launchd_service_is_loaded( + *, + cwd: Path, + env: dict[str, str], + domain: str, + label: str, +) -> bool: + """Return whether the requested launchd label is currently loaded.""" + try: + completed = subprocess.run( + ["launchctl", "print", f"{domain}/{label}"], + cwd=cwd, + env=env, + check=False, + capture_output=True, + text=True, + timeout=30, + ) + except (OSError, subprocess.TimeoutExpired) as exc: + raise FreshHostError(f"launchctl print {domain}/{label} failed: {exc}") from exc + return completed.returncode == 0 + + +def _run_actionable_command(command: list[str], *, cwd: Path, env: dict[str, str]) -> None: + """Run one teardown command and raise on actionable failure.""" + warning = best_effort(command, cwd=cwd, env=env) + if warning is not None: + raise FreshHostError(warning) + + +def _run_macos_teardown( + context: FreshHostContext, + *, + include_repo_local_state: bool, +) -> _MacosCleanupResult: + """Execute one scenario-aware macOS teardown plan.""" + repo_root, home_dir = repo_paths(context) + env = phase_env(context) + if include_repo_local_state: + ensure_private_dir(home_dir / ".xdg-runtime") + env["XDG_RUNTIME_DIR"] = str(home_dir / ".xdg-runtime") + domain = f"gui/{os.getuid()}" + launch_agents = home_dir / "Library" / "LaunchAgents" + notes: list[str] = [] + active_labels: set[str] = set() + last_command: list[str] | None = None + + for label in _managed_launchd_labels(context): + if not _launchd_service_is_loaded(cwd=repo_root, env=env, domain=domain, label=label): + note = f"Skipping launchctl bootout for {label}: service is not loaded." + notes.append(note) + log(note) + continue + command = ["launchctl", "bootout", domain, str(launch_agents / f"{label}.plist")] + _run_actionable_command(command, cwd=repo_root, env=env) + active_labels.add(label) + last_command = command + + for component in _managed_host_components(context): + if _launchd_label_for_component(component) not in active_labels: + continue + command = venv_clawops_command(context, "ops", "--asset-root", ".", component, "down") + _run_actionable_command(command, cwd=repo_root, env=env) + last_command = command + + if include_repo_local_state: + for component in _repo_local_components(context): + command = venv_clawops_command( + context, + "ops", + "--asset-root", + ".", + component, + "down", + "--repo-local-state", + ) + _run_actionable_command(command, cwd=repo_root, env=env) + last_command = command + + return _MacosCleanupResult(command=last_command, notes=notes) + + def normalize_macos_machine_name(_: FreshHostContext) -> list[str]: """Normalize the hosted macOS machine name.""" commands = [ @@ -159,53 +290,11 @@ def exercise_macos_browser_lab(context: FreshHostContext) -> list[str]: return _run_repo_local_cycle(context, "browser-lab") -def deactivate_macos_host_services(context: FreshHostContext) -> list[str]: +def deactivate_macos_host_services(context: FreshHostContext) -> list[str] | None: """Stop launchd-managed macOS services before repo-local exercises.""" - repo_root, home_dir = repo_paths(context) - env = phase_env(context) - domain = f"gui/{os.getuid()}" - launch_agents = home_dir / "Library" / "LaunchAgents" - commands = [ - ["launchctl", "bootout", domain, str(launch_agents / "ai.openclaw.gateway.plist")], - ["launchctl", "bootout", domain, str(launch_agents / "ai.openclaw.sidecars.plist")], - ["launchctl", "bootout", domain, str(launch_agents / "ai.openclaw.browserlab.plist")], - venv_clawops_command(context, "ops", "--asset-root", ".", "sidecars", "down"), - venv_clawops_command(context, "ops", "--asset-root", ".", "browser-lab", "down"), - ] - for command in commands: - warning = best_effort(command, cwd=repo_root, env=env) - if warning is not None: - from tests.utils.helpers._fresh_host.storage import log - - log(warning) - return commands[-1] + return _run_macos_teardown(context, include_repo_local_state=False).command -def cleanup_macos(context: FreshHostContext) -> list[str]: - """Best-effort cleanup for macOS launchd and compose state.""" - repo_root, home_dir = repo_paths(context) - env = phase_env(context) - ensure_private_dir(home_dir / ".xdg-runtime") - env["XDG_RUNTIME_DIR"] = str(home_dir / ".xdg-runtime") - domain = f"gui/{os.getuid()}" - launch_agents = home_dir / "Library" / "LaunchAgents" - commands = [ - ["launchctl", "bootout", domain, str(launch_agents / "ai.openclaw.gateway.plist")], - ["launchctl", "bootout", domain, str(launch_agents / "ai.openclaw.sidecars.plist")], - ["launchctl", "bootout", domain, str(launch_agents / "ai.openclaw.browserlab.plist")], - venv_clawops_command(context, "ops", "--asset-root", ".", "sidecars", "down"), - venv_clawops_command(context, "ops", "--asset-root", ".", "browser-lab", "down"), - venv_clawops_command( - context, "ops", "--asset-root", ".", "sidecars", "down", "--repo-local-state" - ), - venv_clawops_command( - context, "ops", "--asset-root", ".", "browser-lab", "down", "--repo-local-state" - ), - ] - for command in commands: - warning = best_effort(command, cwd=repo_root, env=env) - if warning is not None: - from tests.utils.helpers._fresh_host.storage import log - - log(warning) - return commands[-1] +def cleanup_macos(context: FreshHostContext) -> _MacosCleanupResult: + """Clean up macOS launchd and compose state for the active scenario.""" + return _run_macos_teardown(context, include_repo_local_state=True) diff --git a/tests/utils/helpers/_fresh_host/reporting.py b/tests/utils/helpers/_fresh_host/reporting.py index 95f9129..939425d 100644 --- a/tests/utils/helpers/_fresh_host/reporting.py +++ b/tests/utils/helpers/_fresh_host/reporting.py @@ -9,7 +9,12 @@ from typing import cast from tests.utils.helpers._fresh_host.macos import cleanup_macos -from tests.utils.helpers._fresh_host.models import FreshHostContext, FreshHostReport, PhaseResult +from tests.utils.helpers._fresh_host.models import ( + FreshHostContext, + FreshHostError, + FreshHostReport, + PhaseResult, +) from tests.utils.helpers._fresh_host.shell import capture_to_file, compose_probe_env, phase_env from tests.utils.helpers._fresh_host.storage import ( context_path, @@ -136,21 +141,45 @@ def collect_diagnostics(context_file: Path) -> FreshHostReport: def cleanup(context_file: Path) -> FreshHostReport: - """Run best-effort scenario cleanup.""" + """Run scenario cleanup and persist the structured result.""" context = load_context(context_file) report_file = context_path(context.report_path) report = load_report(report_file) + started_at = now_iso() started = time.monotonic() - command = cleanup_macos(context) if context.platform == "macos" else None + command: list[str] | None = None + notes: list[str] = [] + try: + if context.platform == "macos": + result = cleanup_macos(context) + command = result.command + notes = result.notes + except Exception as exc: # noqa: BLE001 + report.phases.append( + PhaseResult( + name="cleanup", + status="failure", + duration_seconds=round(time.monotonic() - started, 3), + started_at=started_at, + finished_at=now_iso(), + command=command, + failure_reason=str(exc), + notes=notes, + ) + ) + report.failure_reason = str(exc) + report.status = "failure" + write_report(report, report_file) + raise FreshHostError(str(exc)) from exc report.phases.append( PhaseResult( name="cleanup", status="success", duration_seconds=round(time.monotonic() - started, 3), - started_at=now_iso(), + started_at=started_at, finished_at=now_iso(), command=command, - notes=[], + notes=notes, ) ) write_report(report, report_file)