diff --git a/cli/stack/src/flowmesh_cli_stack/stack.py b/cli/stack/src/flowmesh_cli_stack/stack.py index 65e28bd3..81704c98 100644 --- a/cli/stack/src/flowmesh_cli_stack/stack.py +++ b/cli/stack/src/flowmesh_cli_stack/stack.py @@ -114,45 +114,109 @@ def _require_bin(name: str) -> str: return path -def _parse_buildx_driver(output: str) -> str | None: +def _parse_buildx_field(output: str, field: str) -> str | None: + needle = f"{field}:" for line in output.splitlines(): line = line.strip() - if line.startswith("Driver:"): + if line.startswith(needle): return line.split(":", 1)[1].strip() or None return None -def _ensure_multiplatform_builder_support(docker_bin: str) -> None: - inspect = subprocess.run( - [ - docker_bin, - "buildx", - "inspect", - ], # nosec B603: argv list, absolute binary path. +def _inspect_buildx_builder( + docker_bin: str, name: str | None +) -> subprocess.CompletedProcess[str]: + args = [docker_bin, "buildx", "inspect"] + if name is not None: + args.append(name) + return subprocess.run( # nosec B603: argv list, absolute binary path. + args, capture_output=True, text=True, check=False + ) + + +def _get_active_buildx_builder(docker_bin: str) -> str | None: + result = _inspect_buildx_builder(docker_bin, None) + if result.returncode != 0: + return None + return _parse_buildx_field(result.stdout, "Name") + + +def _ensure_buildx_builder_ready( + docker_bin: str, + builder: str, + expected_driver: str, + missing_hint: str | None = None, +) -> None: + """Verify ``builder`` exists and uses ``expected_driver`` before bake runs.""" + result = _inspect_buildx_builder(docker_bin, builder) + if result.returncode != 0: + logging.error(f"Buildx builder '{builder}' is not available.") + if result.stderr: + logging.log(result.stderr.strip(), err=True) + if missing_hint: + logging.log(missing_hint) + raise typer.Exit(code=1) + driver = _parse_buildx_field(result.stdout, "Driver") + if driver != expected_driver: + logging.error( + f"Buildx builder '{builder}' uses driver '{driver or 'unknown'}'; " + f"'{expected_driver}' is required." + ) + raise typer.Exit(code=1) + + +def _switch_active_buildx_builder(docker_bin: str, target: str, force: bool) -> None: + """If the active buildx builder differs from ``target``, switch to it. + + Prompts for confirmation unless ``force`` is true; aborts the command on + decline so the user never silently builds against an unintended builder. + """ + active = _get_active_buildx_builder(docker_bin) + if active == target: + return + if not force: + prompt = ( + f"Active buildx builder is '{active or 'unknown'}'; " + f"switch to '{target}'?" + ) + if not typer.confirm(prompt, default=False): + logging.error(f"Aborted; '{target}' is not the active buildx builder.") + raise typer.Exit(code=1) + result = subprocess.run( # nosec B603: argv list, absolute binary path. + [docker_bin, "buildx", "use", target], capture_output=True, text=True, check=False, ) - if inspect.returncode != 0: - return - driver = _parse_buildx_driver(inspect.stdout) - if driver != "docker": - return - logging.error("The active buildx builder uses the 'docker' driver.") - logging.error( - "Multi-platform push requires either the containerd image store or a " - "buildx builder that uses the 'docker-container' driver." - ) - logging.log( - "Create and select a compatible builder, then retry:\n" - "docker buildx create --name flowmesh-multiarch " - "--driver docker-container --bootstrap --use" - ) - raise typer.Exit(code=1) + if result.returncode != 0: + if result.stdout: + logging.log(result.stdout) + if result.stderr: + logging.log(result.stderr, err=True) + logging.error(f"Failed to switch active buildx builder to '{target}'.") + raise typer.Exit(code=result.returncode) + logging.info(f"Switched active buildx builder to '{target}'.") + + +# Driver split: load uses the native docker driver (local cache, image goes +# straight into the daemon's image store); push uses docker-container (registry +# cache in/out, multi-platform). +_BUILD_DEFAULT_BUILDER = "default" +_PUSH_DEFAULT_BUILDER = "flowmesh-multiarch" +_PUSH_BUILDER_MISSING_HINT = ( + "Create the builder, then retry:\n" + f"docker buildx create --name {_PUSH_DEFAULT_BUILDER} " + "--driver docker-container --bootstrap" +) def _run_bake( - mode: str, targets: list[str] | None, env_file: Path, no_builder: bool = False + mode: str, + targets: list[str] | None, + env_file: Path, + builder: str, + force: bool, + no_builder: bool = False, ) -> None: ensure_env_file(env_file, stack_env_example()) load_env(env_file, base_dir=Path.cwd(), path_keys=STACK_PATH_KEYS) @@ -186,8 +250,17 @@ def _run_bake( if not bake_file.exists(): logging.error(f"Bake file not found: {bake_file}") raise typer.Exit(code=1) + if mode == "push": - _ensure_multiplatform_builder_support(docker_bin) + _ensure_buildx_builder_ready( + docker_bin, + builder, + "docker-container", + missing_hint=_PUSH_BUILDER_MISSING_HINT, + ) + else: + _ensure_buildx_builder_ready(docker_bin, builder, "docker") + _switch_active_buildx_builder(docker_bin, builder, force) build_created = datetime.now(UTC).strftime("%Y-%m-%dT%H:%M:%SZ") registry = os.getenv("FLOWMESH_REGISTRY", "ghcr.io/mlsys-io") @@ -201,7 +274,15 @@ def _run_bake( } for batch_targets in _resolve_bake_batches(targets, no_builder=no_builder): - args = [docker_bin, "buildx", "bake", "-f", str(bake_file)] + args = [ + docker_bin, + "buildx", + "bake", + "-f", + str(bake_file), + "--builder", + builder, + ] if mode == "push": args.append("--push") else: @@ -209,11 +290,12 @@ def _run_bake( args.extend(batch_targets) selected_targets = _resolve_build_targets(batch_targets) - for target in selected_targets: - cache_ref = get_cache_ref(registry, cache_version, target) - args += ["--set", f"{target}.cache-from=type=registry,ref={cache_ref}"] - if mode == "push": + if mode == "push": + for target in selected_targets: + cache_ref = get_cache_ref(registry, cache_version, target) args += [ + "--set", + f"{target}.cache-from=type=registry,ref={cache_ref}", "--set", f"{target}.cache-to=type=registry,ref={cache_ref},mode=max", ] @@ -253,9 +335,22 @@ def build( "--no-builder", help="Skip exporting the standalone GPU builder image.", ), + builder: str = typer.Option( + _BUILD_DEFAULT_BUILDER, + "--builder", + help="Buildx builder to use; must use the native 'docker' driver.", + ), + force: bool = typer.Option( + False, + "-f", + "--force", + help="Skip the confirmation prompt when switching the active buildx builder.", + ), ) -> None: """Build FlowMesh Docker images locally using buildx.""" - _run_bake("load", targets, env_file, no_builder=no_builder) + _run_bake( + "load", targets, env_file, builder=builder, force=force, no_builder=no_builder + ) logging.success("Images built locally.") @@ -272,9 +367,22 @@ def push( "--no-builder", help="Skip publishing the standalone GPU builder image.", ), + builder: str = typer.Option( + _PUSH_DEFAULT_BUILDER, + "--builder", + help="Buildx builder to use; must use the 'docker-container' driver.", + ), + force: bool = typer.Option( + False, + "-f", + "--force", + help="Skip the confirmation prompt when switching the active buildx builder.", + ), ) -> None: """Build FlowMesh Docker images and push them to the container registry.""" - _run_bake("push", targets, env_file, no_builder=no_builder) + _run_bake( + "push", targets, env_file, builder=builder, force=force, no_builder=no_builder + ) logging.success("Images pushed.") diff --git a/docs/CLI.md b/docs/CLI.md index f87415ca..108ebac9 100644 --- a/docs/CLI.md +++ b/docs/CLI.md @@ -97,9 +97,13 @@ GPU builder image while still building GPU runtime images. subsequent multi-platform pushes can reuse `arm64` and multi-stage layers. Set `FLOWMESH_CACHE_VERSION` only when you want to intentionally start a new remote cache lineage. -When pushing multi-platform images from Docker Engine, use either the -containerd image store or a `buildx` builder with the `docker-container` -driver. +`flowmesh stack build` runs on the native `docker` driver and reuses +the local layer cache for fast iteration. `flowmesh stack push` +requires a `buildx` builder with the `docker-container` driver so it +can build multi-platform images and share the registry cache across +machines. Pass `--builder ` to either command to use a builder +other than the default, and `-f`/`--force` to skip the confirmation +prompt when the active `buildx` builder needs to switch. ## SSH tasks diff --git a/tests/cli/test_stack_build.py b/tests/cli/test_stack_build.py index c352649b..f1a44c96 100644 --- a/tests/cli/test_stack_build.py +++ b/tests/cli/test_stack_build.py @@ -1,10 +1,17 @@ +from subprocess import CompletedProcess +from typing import Any +from unittest.mock import patch + import pytest import typer +from flowmesh_cli_stack import stack as stack_module from flowmesh_cli_stack.stack import ( - _parse_buildx_driver, + _ensure_buildx_builder_ready, + _parse_buildx_field, _platform_overrides, _resolve_bake_batches, _resolve_build_targets, + _switch_active_buildx_builder, ) from flowmesh_stack.images import get_cache_ref @@ -57,13 +64,191 @@ def test_platform_overrides_use_multiarch_for_push_mode() -> None: ] -def test_parse_buildx_driver_returns_named_driver() -> None: +def test_parse_buildx_field_returns_named_value() -> None: output = """Name: default Driver: docker-container Nodes: Name: default0 """ - assert _parse_buildx_driver(output) == "docker-container" + assert _parse_buildx_field(output, "Driver") == "docker-container" + assert _parse_buildx_field(output, "Name") == "default" + assert _parse_buildx_field(output, "Missing") is None + + +def _ok(stdout: str) -> CompletedProcess[str]: + return CompletedProcess(args=[], returncode=0, stdout=stdout, stderr="") + + +def _err(returncode: int = 1, stderr: str = "not found") -> CompletedProcess[str]: + return CompletedProcess(args=[], returncode=returncode, stdout="", stderr=stderr) + + +def test_ensure_buildx_builder_ready_accepts_matching_driver() -> None: + inspect = _ok("Name: default\nDriver: docker\n") + with patch.object(stack_module, "_inspect_buildx_builder", return_value=inspect): + _ensure_buildx_builder_ready("/usr/bin/docker", "default", "docker") + + +def test_ensure_buildx_builder_ready_rejects_driver_mismatch() -> None: + inspect = _ok("Name: default\nDriver: docker-container\n") + with patch.object(stack_module, "_inspect_buildx_builder", return_value=inspect): + with pytest.raises(typer.Exit): + _ensure_buildx_builder_ready("/usr/bin/docker", "default", "docker") + + +def test_ensure_buildx_builder_ready_errors_when_builder_missing() -> None: + with patch.object(stack_module, "_inspect_buildx_builder", return_value=_err()): + with pytest.raises(typer.Exit): + _ensure_buildx_builder_ready( + "/usr/bin/docker", + "flowmesh-multiarch", + "docker-container", + missing_hint="hint", + ) + + +def test_switch_active_buildx_builder_noop_when_already_active() -> None: + inspect = _ok("Name: default\nDriver: docker\n") + with ( + patch.object(stack_module, "_inspect_buildx_builder", return_value=inspect), + patch.object(stack_module.subprocess, "run") as run_mock, + ): + _switch_active_buildx_builder("/usr/bin/docker", "default", force=False) + run_mock.assert_not_called() + + +def test_switch_active_buildx_builder_runs_use_when_force() -> None: + inspect = _ok("Name: other\nDriver: docker-container\n") + use_result = CompletedProcess(args=[], returncode=0, stdout="", stderr="") + with ( + patch.object(stack_module, "_inspect_buildx_builder", return_value=inspect), + patch.object( + stack_module.subprocess, "run", return_value=use_result + ) as run_mock, + patch.object(stack_module.typer, "confirm") as confirm_mock, + ): + _switch_active_buildx_builder("/usr/bin/docker", "default", force=True) + confirm_mock.assert_not_called() + assert run_mock.call_args.args[0][:4] == [ + "/usr/bin/docker", + "buildx", + "use", + "default", + ] + + +def test_switch_active_buildx_builder_prompts_when_not_forced() -> None: + inspect = _ok("Name: other\nDriver: docker-container\n") + use_result = CompletedProcess(args=[], returncode=0, stdout="", stderr="") + with ( + patch.object(stack_module, "_inspect_buildx_builder", return_value=inspect), + patch.object(stack_module.subprocess, "run", return_value=use_result), + patch.object(stack_module.typer, "confirm", return_value=True) as confirm_mock, + ): + _switch_active_buildx_builder("/usr/bin/docker", "default", force=False) + confirm_mock.assert_called_once() + + +def test_switch_active_buildx_builder_aborts_on_decline() -> None: + inspect = _ok("Name: other\nDriver: docker-container\n") + with ( + patch.object(stack_module, "_inspect_buildx_builder", return_value=inspect), + patch.object(stack_module.subprocess, "run") as run_mock, + patch.object(stack_module.typer, "confirm", return_value=False), + ): + with pytest.raises(typer.Exit): + _switch_active_buildx_builder("/usr/bin/docker", "default", force=False) + run_mock.assert_not_called() + + +def _capture_bake_args( + monkeypatch: pytest.MonkeyPatch, tmp_path: Any +) -> list[list[str]]: + """Stub out the bake subprocess and capture the argv each call would launch.""" + captured: list[list[str]] = [] + + def fake_run(args: list[str], **_: Any) -> CompletedProcess[str]: + captured.append(args) + return CompletedProcess(args=args, returncode=0, stdout="", stderr="") + + monkeypatch.setattr(stack_module.subprocess, "run", fake_run) + monkeypatch.setattr(stack_module, "ensure_docker_available", lambda: None) + monkeypatch.setattr(stack_module, "_require_bin", lambda _name: "/usr/bin/docker") + monkeypatch.setattr( + stack_module, + "_inspect_buildx_builder", + lambda _b, name: _ok(f"Name: {name or 'default'}\nDriver: docker\n"), + ) + monkeypatch.setattr( + stack_module, "_get_active_buildx_builder", lambda _b: "default" + ) + monkeypatch.setattr( + stack_module, "_resolve_bake_batches", lambda _t, no_builder=False: [["server"]] + ) + monkeypatch.setattr( + stack_module, "_resolve_build_targets", lambda batch: ["flowmesh_server"] + ) + monkeypatch.setattr(stack_module, "_platform_overrides", lambda _m, _t: []) + monkeypatch.setattr(stack_module, "ensure_env_file", lambda *_a, **_k: None) + monkeypatch.setattr(stack_module, "load_env", lambda *_a, **_k: None) + bake_file = tmp_path / "bake.hcl" + bake_file.write_text("") + monkeypatch.setattr(stack_module, "stack_bake_file", lambda: bake_file) + monkeypatch.setattr( + stack_module, "stack_env_example", lambda: tmp_path / ".env.example" + ) + # buildx version check returns success. + return captured + + +def _flatten(args: list[str]) -> str: + return " ".join(args) + + +def test_run_bake_load_uses_local_cache_only( + monkeypatch: pytest.MonkeyPatch, tmp_path: Any +) -> None: + captured = _capture_bake_args(monkeypatch, tmp_path) + stack_module._run_bake( + "load", None, tmp_path / ".env", builder="default", force=True + ) + bake_calls = [a for a in captured if "bake" in a] + assert bake_calls, "expected a bake invocation" + cmd = _flatten(bake_calls[0]) + assert "--builder default" in cmd + assert "--load" in cmd + assert "cache-from=type=registry" not in cmd + assert "cache-to=type=registry" not in cmd + + +def test_run_bake_push_uses_registry_cache( + monkeypatch: pytest.MonkeyPatch, tmp_path: Any +) -> None: + captured = _capture_bake_args(monkeypatch, tmp_path) + monkeypatch.setattr( + stack_module, + "_inspect_buildx_builder", + lambda _b, name: _ok( + f"Name: {name or 'flowmesh-multiarch'}\nDriver: docker-container\n" + ), + ) + monkeypatch.setattr( + stack_module, "_get_active_buildx_builder", lambda _b: "flowmesh-multiarch" + ) + stack_module._run_bake( + "push", + None, + tmp_path / ".env", + builder="flowmesh-multiarch", + force=True, + ) + bake_calls = [a for a in captured if "bake" in a] + assert bake_calls, "expected a bake invocation" + cmd = _flatten(bake_calls[0]) + assert "--builder flowmesh-multiarch" in cmd + assert "--push" in cmd + assert "cache-from=type=registry" in cmd + assert "cache-to=type=registry" in cmd def test_get_cache_ref_uses_stable_target_specific_tags() -> None: