diff --git a/tests/suites/unit/ci/test_fresh_host.py b/tests/suites/unit/ci/test_fresh_host.py
index 0b7f26e..f1e282e 100644
--- a/tests/suites/unit/ci/test_fresh_host.py
+++ b/tests/suites/unit/ci/test_fresh_host.py
@@ -989,6 +989,173 @@ def fake_verify_sidecars(compose_file: Path, **kwargs: object) -> None:
     assert verify_kwargs["repo_local_state"] is True
 
 
+def test_deactivate_macos_host_services_limits_teardown_to_active_sidecars_services(
+    tmp_path: Path,
+    test_context: TestContext,
+) -> None:
+    """macOS sidecars teardown should skip browser-lab commands entirely."""
+    github_env = tmp_path / "github.env"
+    runner_temp = tmp_path / "runner-temp"
+    workspace = tmp_path / "workspace"
+    workspace.mkdir()
+    test_context.apply_profiles("fresh_host_macos_colima")
+
+    context = fresh_host.prepare_context(
+        scenario_id="macos-sidecars",
+        repo_root=workspace,
+        runner_temp=runner_temp,
+        workspace=workspace,
+        github_env_file=github_env,
+    )
+    inspected_labels: list[str] = []
+    executed_commands: list[list[str]] = []
+
+    def fake_run(
+        command: list[str],
+        *,
+        cwd: Path,
+        env: dict[str, str],
+        check: bool,
+        capture_output: bool,
+        text: bool,
+        timeout: int,
+    ) -> subprocess.CompletedProcess[str]:
+        del cwd, env, check, capture_output, text, timeout
+        assert command[:2] == ["launchctl", "print"]
+        label = command[2].rsplit("/", maxsplit=1)[1]
+        inspected_labels.append(label)
+        return subprocess.CompletedProcess(command, 0, "", "")
+
+    def fake_best_effort(
+        command: list[str],
+        *,
+        cwd: Path,
+        env: dict[str, str],
+    ) -> str | None:
+        del cwd, env
+        executed_commands.append(command)
+        return None
+
+    test_context.patch.patch_object(fresh_host_macos.subprocess, "run", new=fake_run)
+    test_context.patch.patch_object(fresh_host_macos, "best_effort", new=fake_best_effort)
+
+    command = fresh_host_macos.deactivate_macos_host_services(context)
+
+    assert command is not None
+    assert inspected_labels == ["ai.openclaw.gateway", "ai.openclaw.sidecars"]
+    assert len(executed_commands) == 3
+    assert executed_commands[0][:2] == ["launchctl", "bootout"]
+    assert executed_commands[0][-1].endswith("ai.openclaw.gateway.plist")
+    assert executed_commands[1][:2] == ["launchctl", "bootout"]
+    assert executed_commands[1][-1].endswith("ai.openclaw.sidecars.plist")
+    assert executed_commands[2][-2:] == ["sidecars", "down"]
+    assert all("browserlab" not in " ".join(step) for step in executed_commands)
+    assert all("browser-lab" not in " ".join(step) for step in executed_commands)
+
+
+def test_deactivate_macos_host_services_raises_for_active_bootout_failure(
+    tmp_path: Path,
+    test_context: TestContext,
+) -> None:
+    """macOS teardown should fail when an active launchd bootout fails."""
+    github_env = tmp_path / "github.env"
+    runner_temp = tmp_path / "runner-temp"
+    workspace = tmp_path / "workspace"
+    workspace.mkdir()
+    test_context.apply_profiles("fresh_host_macos_colima")
+
+    context = fresh_host.prepare_context(
+        scenario_id="macos-sidecars",
+        repo_root=workspace,
+        runner_temp=runner_temp,
+        workspace=workspace,
+        github_env_file=github_env,
+    )
+    executed_commands: list[list[str]] = []
+
+    def fake_run(
+        command: list[str],
+        *,
+        cwd: Path,
+        env: dict[str, str],
+        check: bool,
+        capture_output: bool,
+        text: bool,
+        timeout: int,
+    ) -> subprocess.CompletedProcess[str]:
+        del cwd, env, check, capture_output, text, timeout
+        assert command[:2] == ["launchctl", "print"]
+        return subprocess.CompletedProcess(command, 0, "", "")
+
+    def fake_best_effort(
+        command: list[str],
+        *,
+        cwd: Path,
+        env: dict[str, str],
+    ) -> str | None:
+        del cwd, env
+        executed_commands.append(command)
+        if command[:2] == ["launchctl", "bootout"] and command[-1].endswith(
+            "ai.openclaw.sidecars.plist"
+        ):
+            return (
+                "launchctl bootout gui/501 /tmp/ai.openclaw.sidecars.plist failed: "
+                "Boot-out failed: 5: Input/output error"
+            )
+        return None
+
+    test_context.patch.patch_object(fresh_host_macos.subprocess, "run", new=fake_run)
+    test_context.patch.patch_object(fresh_host_macos, "best_effort", new=fake_best_effort)
+
+    with pytest.raises(fresh_host.FreshHostError, match="Boot-out failed: 5: Input/output error"):
+        fresh_host_macos.deactivate_macos_host_services(context)
+
+    assert any(
+        command[:2] == ["launchctl", "bootout"]
+        and command[-1].endswith("ai.openclaw.sidecars.plist")
+        for command in executed_commands
+    )
+
+
+def test_cleanup_cli_returns_nonzero_and_records_failure_when_cleanup_raises(
+    tmp_path: Path,
+    test_context: TestContext,
+) -> None:
+    """Cleanup should persist failure state and return a nonzero CLI exit code."""
+    github_env = tmp_path / "github.env"
+    runner_temp = tmp_path / "runner-temp"
+    workspace = tmp_path / "workspace"
+    workspace.mkdir()
+    test_context.apply_profiles("fresh_host_macos_colima")
+
+    context = fresh_host.prepare_context(
+        scenario_id="macos-sidecars",
+        repo_root=workspace,
+        runner_temp=runner_temp,
+        workspace=workspace,
+        github_env_file=github_env,
+    )
+
+    def fake_cleanup_macos(_: fresh_host.FreshHostContext) -> object:
+        raise fresh_host.FreshHostError("cleanup exploded")
+
+    test_context.patch.patch_object(
+        fresh_host_reporting,
+        "cleanup_macos",
+        new=fake_cleanup_macos,
+    )
+
+    exit_code = fresh_host_script.main(["cleanup", "--context", str(context.context_path)])
+    report = fresh_host.load_report(Path(context.report_path))
+
+    assert exit_code == 1
+    assert report.status == "failure"
+    assert report.failure_reason == "cleanup exploded"
+    assert report.phases[-1].name == "cleanup"
+    assert report.phases[-1].status == "failure"
+    assert report.phases[-1].failure_reason == "cleanup exploded"
+
+
 def test_collect_diagnostics_uses_compose_probe_env_for_macos_compose_commands(
     tmp_path: Path,
     test_context: TestContext,
diff --git a/tests/utils/helpers/_fresh_host/macos.py b/tests/utils/helpers/_fresh_host/macos.py
index 92c2918..c3f2b39 100644
--- a/tests/utils/helpers/_fresh_host/macos.py
+++ b/tests/utils/helpers/_fresh_host/macos.py
@@ -4,6 +4,7 @@
 
 import os
 import subprocess
+from dataclasses import dataclass
 from pathlib import Path
 
 from tests.utils.helpers._fresh_host.linux import run_clawops_bootstrap
@@ -24,10 +25,140 @@
     verify_sidecar_services_running,
     wait_for_docker_backend,
 )
+from tests.utils.helpers._fresh_host.storage import log
 
 HOSTED_MACOS_SIDECAR_STARTUP_TIMEOUT_SECONDS = 300
 
 
+@dataclass(frozen=True, slots=True)
+class _MacosCleanupResult:
+    """Structured cleanup execution result."""
+
+    command: list[str] | None
+    notes: list[str]
+
+
+def _managed_launchd_labels(context: FreshHostContext) -> tuple[str, ...]:
+    """Return the launchd labels managed by the scenario."""
+    if not context.activate_services:
+        return ()
+    labels = ["ai.openclaw.gateway"]
+    if context.exercise_sidecars:
+        labels.append("ai.openclaw.sidecars")
+    if context.exercise_browser_lab:
+        labels.append("ai.openclaw.browserlab")
+    return tuple(labels)
+
+
+def _managed_host_components(context: FreshHostContext) -> tuple[str, ...]:
+    """Return the host-managed stack components for the scenario."""
+    if not context.activate_services:
+        return ()
+    components: list[str] = []
+    if context.exercise_sidecars:
+        components.append("sidecars")
+    if context.exercise_browser_lab:
+        components.append("browser-lab")
+    return tuple(components)
+
+
+def _repo_local_components(context: FreshHostContext) -> tuple[str, ...]:
+    """Return the repo-local stack components for the scenario."""
+    components: list[str] = []
+    if context.exercise_sidecars:
+        components.append("sidecars")
+    if context.exercise_browser_lab:
+        components.append("browser-lab")
+    return tuple(components)
+
+
+def _launchd_label_for_component(component: str) -> str:
+    """Return the launchd label that owns one component."""
+    return "ai.openclaw.browserlab" if component == "browser-lab" else f"ai.openclaw.{component}"
+
+
+def _launchd_service_is_loaded(
+    *,
+    cwd: Path,
+    env: dict[str, str],
+    domain: str,
+    label: str,
+) -> bool:
+    """Return whether the requested launchd label is currently loaded."""
+    try:
+        completed = subprocess.run(
+            ["launchctl", "print", f"{domain}/{label}"],
+            cwd=cwd,
+            env=env,
+            check=False,
+            capture_output=True,
+            text=True,
+            timeout=30,
+        )
+    except (OSError, subprocess.TimeoutExpired) as exc:
+        raise FreshHostError(f"launchctl print {domain}/{label} failed: {exc}") from exc
+    return completed.returncode == 0
+
+
+def _run_actionable_command(command: list[str], *, cwd: Path, env: dict[str, str]) -> None:
+    """Run one teardown command and raise on actionable failure."""
+    warning = best_effort(command, cwd=cwd, env=env)
+    if warning is not None:
+        raise FreshHostError(warning)
+
+
+def _run_macos_teardown(
+    context: FreshHostContext,
+    *,
+    include_repo_local_state: bool,
+) -> _MacosCleanupResult:
+    """Execute one scenario-aware macOS teardown plan."""
+    repo_root, home_dir = repo_paths(context)
+    env = phase_env(context)
+    if include_repo_local_state:
+        ensure_private_dir(home_dir / ".xdg-runtime")
+        env["XDG_RUNTIME_DIR"] = str(home_dir / ".xdg-runtime")
+    domain = f"gui/{os.getuid()}"
+    launch_agents = home_dir / "Library" / "LaunchAgents"
+    notes: list[str] = []
+    active_labels: set[str] = set()
+    last_command: list[str] | None = None
+
+    for label in _managed_launchd_labels(context):
+        if not _launchd_service_is_loaded(cwd=repo_root, env=env, domain=domain, label=label):
+            note = f"Skipping launchctl bootout for {label}: service is not loaded."
+            notes.append(note)
+            log(note)
+            continue
+        command = ["launchctl", "bootout", domain, str(launch_agents / f"{label}.plist")]
+        _run_actionable_command(command, cwd=repo_root, env=env)
+        active_labels.add(label)
+        last_command = command
+
+    for component in _managed_host_components(context):
+        if _launchd_label_for_component(component) not in active_labels:
+            continue
+        command = venv_clawops_command(context, "ops", "--asset-root", ".", component, "down")
+        _run_actionable_command(command, cwd=repo_root, env=env)
+        last_command = command
+
+    if include_repo_local_state:
+        for component in _repo_local_components(context):
+            command = venv_clawops_command(
+                context,
+                "ops",
+                "--asset-root",
+                ".",
+                component,
+                "down",
+                "--repo-local-state",
+            )
+            _run_actionable_command(command, cwd=repo_root, env=env)
+            last_command = command
+
+    return _MacosCleanupResult(command=last_command, notes=notes)
+
+
 def normalize_macos_machine_name(_: FreshHostContext) -> list[str]:
     """Normalize the hosted macOS machine name."""
     commands = [
@@ -159,53 +290,11 @@ def exercise_macos_browser_lab(context: FreshHostContext) -> list[str]:
     return _run_repo_local_cycle(context, "browser-lab")
 
 
-def deactivate_macos_host_services(context: FreshHostContext) -> list[str]:
+def deactivate_macos_host_services(context: FreshHostContext) -> list[str] | None:
     """Stop launchd-managed macOS services before repo-local exercises."""
-    repo_root, home_dir = repo_paths(context)
-    env = phase_env(context)
-    domain = f"gui/{os.getuid()}"
-    launch_agents = home_dir / "Library" / "LaunchAgents"
-    commands = [
-        ["launchctl", "bootout", domain, str(launch_agents / "ai.openclaw.gateway.plist")],
-        ["launchctl", "bootout", domain, str(launch_agents / "ai.openclaw.sidecars.plist")],
-        ["launchctl", "bootout", domain, str(launch_agents / "ai.openclaw.browserlab.plist")],
-        venv_clawops_command(context, "ops", "--asset-root", ".", "sidecars", "down"),
-        venv_clawops_command(context, "ops", "--asset-root", ".", "browser-lab", "down"),
-    ]
-    for command in commands:
-        warning = best_effort(command, cwd=repo_root, env=env)
-        if warning is not None:
-            from tests.utils.helpers._fresh_host.storage import log
-
-            log(warning)
-    return commands[-1]
+    return _run_macos_teardown(context, include_repo_local_state=False).command
 
 
-def cleanup_macos(context: FreshHostContext) -> list[str]:
-    """Best-effort cleanup for macOS launchd and compose state."""
-    repo_root, home_dir = repo_paths(context)
-    env = phase_env(context)
-    ensure_private_dir(home_dir / ".xdg-runtime")
-    env["XDG_RUNTIME_DIR"] = str(home_dir / ".xdg-runtime")
-    domain = f"gui/{os.getuid()}"
-    launch_agents = home_dir / "Library" / "LaunchAgents"
-    commands = [
-        ["launchctl", "bootout", domain, str(launch_agents / "ai.openclaw.gateway.plist")],
-        ["launchctl", "bootout", domain, str(launch_agents / "ai.openclaw.sidecars.plist")],
-        ["launchctl", "bootout", domain, str(launch_agents / "ai.openclaw.browserlab.plist")],
-        venv_clawops_command(context, "ops", "--asset-root", ".", "sidecars", "down"),
-        venv_clawops_command(context, "ops", "--asset-root", ".", "browser-lab", "down"),
-        venv_clawops_command(
-            context, "ops", "--asset-root", ".", "sidecars", "down", "--repo-local-state"
-        ),
-        venv_clawops_command(
-            context, "ops", "--asset-root", ".", "browser-lab", "down", "--repo-local-state"
-        ),
-    ]
-    for command in commands:
-        warning = best_effort(command, cwd=repo_root, env=env)
-        if warning is not None:
-            from tests.utils.helpers._fresh_host.storage import log
-
-            log(warning)
-    return commands[-1]
+def cleanup_macos(context: FreshHostContext) -> _MacosCleanupResult:
+    """Clean up macOS launchd and compose state for the active scenario."""
+    return _run_macos_teardown(context, include_repo_local_state=True)
diff --git a/tests/utils/helpers/_fresh_host/reporting.py b/tests/utils/helpers/_fresh_host/reporting.py
index 95f9129..939425d 100644
--- a/tests/utils/helpers/_fresh_host/reporting.py
+++ b/tests/utils/helpers/_fresh_host/reporting.py
@@ -9,7 +9,12 @@
 from typing import cast
 
 from tests.utils.helpers._fresh_host.macos import cleanup_macos
-from tests.utils.helpers._fresh_host.models import FreshHostContext, FreshHostReport, PhaseResult
+from tests.utils.helpers._fresh_host.models import (
+    FreshHostContext,
+    FreshHostError,
+    FreshHostReport,
+    PhaseResult,
+)
 from tests.utils.helpers._fresh_host.shell import capture_to_file, compose_probe_env, phase_env
 from tests.utils.helpers._fresh_host.storage import (
     context_path,
@@ -136,21 +141,45 @@ def collect_diagnostics(context_file: Path) -> FreshHostReport:
 
 
 def cleanup(context_file: Path) -> FreshHostReport:
-    """Run best-effort scenario cleanup."""
+    """Run scenario cleanup and persist the structured result."""
     context = load_context(context_file)
     report_file = context_path(context.report_path)
     report = load_report(report_file)
+    started_at = now_iso()
     started = time.monotonic()
-    command = cleanup_macos(context) if context.platform == "macos" else None
+    command: list[str] | None = None
+    notes: list[str] = []
+    try:
+        if context.platform == "macos":
+            result = cleanup_macos(context)
+            command = result.command
+            notes = result.notes
+    except Exception as exc:  # noqa: BLE001
+        report.phases.append(
+            PhaseResult(
+                name="cleanup",
+                status="failure",
+                duration_seconds=round(time.monotonic() - started, 3),
+                started_at=started_at,
+                finished_at=now_iso(),
+                command=command,
+                failure_reason=str(exc),
+                notes=notes,
+            )
+        )
+        report.failure_reason = str(exc)
+        report.status = "failure"
+        write_report(report, report_file)
+        raise FreshHostError(str(exc)) from exc
     report.phases.append(
         PhaseResult(
             name="cleanup",
             status="success",
             duration_seconds=round(time.monotonic() - started, 3),
-            started_at=now_iso(),
+            started_at=started_at,
             finished_at=now_iso(),
             command=command,
-            notes=[],
+            notes=notes,
         )
     )
     write_report(report, report_file)