diff --git a/cli/stack/pyproject.toml b/cli/stack/pyproject.toml index 04656e73..70bfb04e 100644 --- a/cli/stack/pyproject.toml +++ b/cli/stack/pyproject.toml @@ -12,6 +12,7 @@ dependencies = [ "flowmesh-cli==0.1.0", "flowmesh-sdk-stack==0.1.0", "docker>=7.1.0", + "packaging>=24", "pyyaml>=6.0.2", ] diff --git a/cli/stack/src/flowmesh_cli_stack/assets/compose.yml b/cli/stack/src/flowmesh_cli_stack/assets/compose.yml index 3ff1fd62..9bc07dfb 100644 --- a/cli/stack/src/flowmesh_cli_stack/assets/compose.yml +++ b/cli/stack/src/flowmesh_cli_stack/assets/compose.yml @@ -1,6 +1,7 @@ services: redis_control: image: redis:7-alpine + profiles: [root] container_name: ${FLOWMESH_STACK_SLUG:-flowmesh_node}_redis_control environment: REDIS_ACL_ENABLED: ${REDIS_ACL_ENABLED:-0} @@ -68,6 +69,7 @@ services: redis_telemetry: image: redis:7-alpine + profiles: [root] container_name: ${FLOWMESH_STACK_SLUG:-flowmesh_node}_redis_telemetry environment: REDIS_ACL_ENABLED: ${REDIS_ACL_ENABLED:-0} @@ -139,8 +141,10 @@ services: depends_on: redis_control: condition: service_healthy + required: false redis_telemetry: condition: service_healthy + required: false env_file: - ${STACK_ENV_FILE:-./.env} environment: diff --git a/cli/stack/src/flowmesh_cli_stack/bundle.py b/cli/stack/src/flowmesh_cli_stack/bundle.py index 5c1b0fea..a6de196a 100644 --- a/cli/stack/src/flowmesh_cli_stack/bundle.py +++ b/cli/stack/src/flowmesh_cli_stack/bundle.py @@ -5,11 +5,13 @@ import sys import tarfile import tempfile +from importlib.metadata import PackageNotFoundError, version from pathlib import Path import typer from flowmesh_cli.core import logging from flowmesh_cli.core.typer import get_typer +from packaging.version import InvalidVersion, Version app = get_typer( help="Package FlowMesh deployments into portable bundles for distribution." @@ -111,22 +113,72 @@ def _build_cli_wheels(wheel_dir: Path) -> None: raise typer.Exit(code=result.returncode) -def _write_install_script(dest: Path) -> None: - """Write an install.sh script to set up a venv and install bundled wheels.""" +def _published_cli_spec() -> str: + """Return the published FlowMesh CLI package spec for this release.""" + try: + package_version = version("flowmesh-cli-stack") + except PackageNotFoundError: + logging.error("Unable to resolve installed flowmesh-cli-stack version.") + raise typer.Exit(code=1) from None + try: + parsed = Version(package_version) + except InvalidVersion: + logging.error( + f"Installed flowmesh-cli-stack version {package_version!r} is not a " + "valid PEP 440 version." + ) + raise typer.Exit(code=1) from None + if parsed.is_prerelease or parsed.is_devrelease or parsed.local is not None: + logging.error( + f"Installed flowmesh-cli-stack version {package_version!r} is not a " + "published release; the bundle's install.sh would fail on PyPI. " + "Install a release of flowmesh-cli-stack first, or pass " + "--include-wheels to bundle local wheels instead." + ) + raise typer.Exit(code=1) + # Workspace versions are kept in lock-step by scripts/dev/bump_version.py, + # so flowmesh-cli-stack's version is also the matching flowmesh-metapackage + # version on PyPI. + return f"flowmesh[cli]=={package_version}" + + +def _write_install_script( + dest: Path, *, package_spec: str | None = None, include_wheels: bool = False +) -> None: + """Write an install.sh script to set up a venv and install FlowMesh CLI.""" script_path = dest / "install.sh" - script = """#!/usr/bin/env bash + if include_wheels: + install_block = '"$UV_BIN" pip install ./wheels/*.whl' + else: + assert package_spec is not None + install_block = f"""\ +FLOWMESH_PACKAGE_SPEC="${{FLOWMESH_PACKAGE_SPEC:-{package_spec}}}" +FLOWMESH_INDEX_URL="${{FLOWMESH_INDEX_URL:-}}" +FLOWMESH_EXTRA_INDEX_URL="${{FLOWMESH_EXTRA_INDEX_URL:-}}" + +INSTALL_ARGS=("$FLOWMESH_PACKAGE_SPEC") +if [ -n "$FLOWMESH_INDEX_URL" ]; then + INSTALL_ARGS=(--index-url "$FLOWMESH_INDEX_URL" "${{INSTALL_ARGS[@]}}") +fi +if [ -n "$FLOWMESH_EXTRA_INDEX_URL" ]; then + INSTALL_ARGS=(--extra-index-url "$FLOWMESH_EXTRA_INDEX_URL" "${{INSTALL_ARGS[@]}}") +fi +"$UV_BIN" pip install "${{INSTALL_ARGS[@]}}" +""" + + script = f"""#!/usr/bin/env bash set -euo pipefail -VENV_DIR="${VENV_DIR:-.venv}" -UV_BIN="${UV_BIN:-uv}" -PYTHON_REQ="${FLOWMESH_PYTHON:-3.12}" +VENV_DIR="${{VENV_DIR:-.venv}}" +UV_BIN="${{UV_BIN:-uv}}" +PYTHON_REQ="${{FLOWMESH_PYTHON:-3.12}}" ENV_FILE=".env" if ! command -v "$UV_BIN" >/dev/null 2>&1; then echo "uv not found; installing..." curl -LsSf https://astral.sh/uv/install.sh | sh export PATH="$HOME/.local/bin:$PATH" - UV_BIN="${UV_BIN:-uv}" + UV_BIN="${{UV_BIN:-uv}}" fi if ! command -v "$UV_BIN" >/dev/null 2>&1; then echo "uv install failed or not found in PATH." >&2 @@ -141,7 +193,7 @@ def _write_install_script(dest: Path) -> None: source "$VENV_DIR/bin/activate" "$UV_BIN" pip install --upgrade pip -"$UV_BIN" pip install ./wheels/*.whl +{install_block} echo "Installed flowmesh CLI into $VENV_DIR." echo "Activate it with 'source $VENV_DIR/bin/activate'." if [ ! -f "$ENV_FILE" ]; then @@ -158,7 +210,9 @@ def _write_install_script(dest: Path) -> None: script_path.chmod(script_path.stat().st_mode | 0o111) -def _create_bundle_tarball(tar_path: Path, include_tls: bool) -> None: +def _create_bundle_tarball( + tar_path: Path, include_tls: bool, *, include_wheels: bool +) -> None: """Create a deployable bundle as a tar.gz with a top-level prefix.""" tar_path.parent.mkdir(parents=True, exist_ok=True) prefix = "flowmesh_server_bundle" @@ -166,9 +220,16 @@ def _create_bundle_tarball(tar_path: Path, include_tls: bool) -> None: staging_root = Path(tmp) / prefix staging_root.mkdir(parents=True, exist_ok=True) _copy_server_assets(staging_root, include_tls=include_tls) - wheel_dir = staging_root / "wheels" - _build_cli_wheels(wheel_dir) - _write_install_script(staging_root) + if include_wheels: + wheel_dir = staging_root / "wheels" + _build_cli_wheels(wheel_dir) + _write_install_script(staging_root, include_wheels=True) + else: + _write_install_script( + staging_root, + package_spec=_published_cli_spec(), + include_wheels=False, + ) with tarfile.open(tar_path, mode="w:gz") as tf: # Ensure we archive the top-level prefix directory. tf.add(staging_root, arcname=prefix) @@ -183,12 +244,22 @@ def bundle_export( help="Output tar.gz path (default: ./dist/flowmesh_server_bundle.tar.gz)", ), no_tls: bool = typer.Option(False, "--no-tls", help="Exclude TLS assets"), + include_wheels: bool = typer.Option( + False, + "--include-wheels", + help=( + "Bundle local CLI/SDK wheels instead of installing " + "published flowmesh[cli]." + ), + ), ) -> None: - """Create a self-contained deployment bundle for the server.""" + """Create a deployment bundle for the server.""" logging.info("Creating server bundle...") tar_path = output if tar_path is None: tar_path = Path("./dist/flowmesh_server_bundle.tar.gz") tar_path.parent.mkdir(parents=True, exist_ok=True) - _create_bundle_tarball(tar_path, include_tls=not no_tls) + _create_bundle_tarball( + tar_path, include_tls=not no_tls, include_wheels=include_wheels + ) logging.success(f"Bundle created at {tar_path}") diff --git a/cli/stack/src/flowmesh_cli_stack/stack.py b/cli/stack/src/flowmesh_cli_stack/stack.py index 81704c98..9985d29e 100644 --- a/cli/stack/src/flowmesh_cli_stack/stack.py +++ b/cli/stack/src/flowmesh_cli_stack/stack.py @@ -19,7 +19,7 @@ remove_image, ) from flowmesh_stack.doctor import DoctorFinding, run_doctor_checks -from flowmesh_stack.env import ensure_env_file, load_env +from flowmesh_stack.env import ensure_env_file, load_env, parse_env_file from flowmesh_stack.env_schema import render_env_example from flowmesh_stack.images import ( BUILD_GROUPS, @@ -65,14 +65,25 @@ def _load(env_file: Path) -> None: def _compose( - args: list[str], env_file: Path, env: dict[str, str] | None, to_deploy: bool = False + args: list[str], + env_file: Path, + env: dict[str, str] | None, + to_deploy: bool = False, + profile: str | None = None, ) -> None: ensure_env_file(env_file, stack_env_example()) - result = _stack().run(args, env_file=env_file, env=env, to_deploy=to_deploy) + full_args = (["--profile", profile] if profile else []) + args + result = _stack().run(full_args, env_file=env_file, env=env, to_deploy=to_deploy) if result.returncode != 0: raise typer.Exit(code=result.returncode) +def _node_role(env_file: Path) -> str: + """Return the configured NODE_ROLE (root | worker), defaulting to root.""" + role = parse_env_file(env_file).get("NODE_ROLE", "root").strip().lower() + return role if role in ("root", "worker") else "root" + + def _resolve_build_targets(batch_targets: list[str]) -> list[str]: resolved: list[str] = [] for target in batch_targets: @@ -400,7 +411,10 @@ def pull( ) -> None: """Pull Docker images for stack services from the registry.""" args = ["pull"] + (services or []) - _compose(args, env_file=env_file, env=image_env_overrides(image_tag)) + profile = "root" if _node_role(env_file) == "root" else None + _compose( + args, env_file=env_file, env=image_env_overrides(image_tag), profile=profile + ) @app.command() @@ -427,12 +441,20 @@ def up( None, "--image-tag", help="Override FLOWMESH_VERSION" ), ) -> None: - """Start the stack including server and Redis.""" + """Start the stack. + + On root nodes (NODE_ROLE=root, the default), the local Redis services are + started alongside the server. On worker nodes (NODE_ROLE=worker), Redis + services are skipped — the worker is expected to connect to the root + node's Redis via REDIS_CONTROL_URL / REDIS_TELEMETRY_URL. + """ + profile = "root" if _node_role(env_file) == "root" else None _compose( ["up", "-d", "--wait"], env_file=env_file, env=image_env_overrides(image_tag), to_deploy=True, + profile=profile, ) logging.success("FlowMesh stack is up.") @@ -459,7 +481,12 @@ def down( logging.info("Draining workers...") _drain_workers(env_file) logging.info("Shutting down the FlowMesh stack...") - _compose(["down"], env_file=env_file, env=image_env_overrides(image_tag)) + _compose( + ["down"], + env_file=env_file, + env=image_env_overrides(image_tag), + profile="root", + ) logging.success("FlowMesh stack stopped.") @@ -475,12 +502,19 @@ def restart( """Drain workers and restart the stack.""" logging.info("Draining workers...") _drain_workers(env_file) - _compose(["down"], env_file=env_file, env=image_env_overrides(image_tag)) + _compose( + ["down"], + env_file=env_file, + env=image_env_overrides(image_tag), + profile="root", + ) + profile = "root" if _node_role(env_file) == "root" else None _compose( ["up", "-d", "--wait"], env_file=env_file, env=image_env_overrides(image_tag), to_deploy=True, + profile=profile, ) logging.success("FlowMesh stack is up.") @@ -493,7 +527,7 @@ def logs( ), ) -> None: """Stream logs from stack services or a specific service container.""" - code = _stack().stream_logs(env_file=env_file, service=service) + code = _stack().stream_logs(env_file=env_file, service=service, profile="root") if code != 0: raise typer.Exit(code=code) @@ -505,7 +539,7 @@ def ps( ), ) -> None: """Display running status of stack containers and worker containers.""" - _compose(["ps"], env_file=env_file, env=None) + _compose(["ps"], env_file=env_file, env=None, profile="root") logging.log("\nWorkers:") docker_bin = _require_bin("docker") subprocess.run( @@ -545,7 +579,12 @@ def clean( logging.info("Draining workers...") _drain_workers(env_file) logging.info("Removing stack containers and volumes...") - _compose(["down", "-v"], env_file=env_file, env=image_env_overrides(image_tag)) + _compose( + ["down", "-v"], + env_file=env_file, + env=image_env_overrides(image_tag), + profile="root", + ) logging.success("FlowMesh stack cleaned.") diff --git a/docs/CLI.md b/docs/CLI.md index 108ebac9..5a57a3a7 100644 --- a/docs/CLI.md +++ b/docs/CLI.md @@ -20,6 +20,7 @@ flowmesh result {fetch, download} flowmesh trace {fetch, analyze} flowmesh system {metrics} flowmesh stack {build, push, pull, pullall, up, down, restart, ps, logs} +flowmesh stack bundle export flowmesh stack worker {up, start, stop, down, list, pull} ``` @@ -73,12 +74,19 @@ When multiple deployments share one host, give each stack its own The suffix isolates Docker object names (including containers, volumes, and networks); the ports isolate host bindings. ```bash -flowmesh stack up # Server + Redis + Supervisor +flowmesh stack up # Server + Redis + Supervisor (root) flowmesh stack worker up cpu 1 # 1 CPU worker flowmesh stack worker up gpu --targets 0 # 1 GPU worker pinned to GPU 0 flowmesh stack down ``` +`flowmesh stack up` reads `NODE_ROLE` from the env file (default `root`). On a +root node, both local Redis services are deployed alongside the server. On a +worker node (`NODE_ROLE=worker`), Redis services are skipped — the worker +server connects to the root node's Redis via `REDIS_CONTROL_URL` and +`REDIS_TELEMETRY_URL`, which must be set in the worker's `.env` to reachable +endpoints on the root node. + After changing executor code, rebuild the affected image before bringing the stack back up — running containers don't pick up source changes: @@ -105,6 +113,17 @@ machines. Pass `--builder ` to either command to use a builder other than the default, and `-f`/`--force` to skip the confirmation prompt when the active `buildx` builder needs to switch. +To hand off a deployment bundle with bootstrap/config assets: + +```bash +flowmesh stack bundle export +flowmesh stack bundle export --include-wheels +``` + +By default, the bundle's `install.sh` installs the published +`flowmesh[cli]` package for the current release. Use `--include-wheels` +when you need the archive to carry locally-built CLI/SDK wheels instead. + ## SSH tasks ```bash diff --git a/docs/ENV.md b/docs/ENV.md index 804b765e..a6d613fc 100644 --- a/docs/ENV.md +++ b/docs/ENV.md @@ -12,8 +12,9 @@ listed here is in `.env.example`. | Variable | Default | Description | |----------|---------|-------------| -| `REDIS_CONTROL_URL` | `redis://localhost:6379/0` | Redis control channel | -| `REDIS_TELEMETRY_URL` | `redis://localhost:6380/0` | Redis telemetry channel | +| `NODE_ROLE` | `root` | `root` deploys local Redis; `worker` skips it and connects to the root's Redis via the URLs below | +| `REDIS_CONTROL_URL` | `redis://localhost:6379/0` | Redis control channel. On worker nodes, must point at the root node's reachable Redis endpoint | +| `REDIS_TELEMETRY_URL` | `redis://localhost:6380/0` | Redis telemetry channel. On worker nodes, must point at the root node's reachable Redis endpoint | | `DATABASE_URL` | – | Postgres connection string | | `RESULTS_DIR` | `./results` | Server-side results directory | | `SERVER_RESULTS_DIR` | `flowmesh_results` | Host-side directory/docker volume to mount at `RESULTS_DIR` in the server container | diff --git a/sdk/stack/src/flowmesh_stack/docker.py b/sdk/stack/src/flowmesh_stack/docker.py index 47e36896..bb85fd8c 100644 --- a/sdk/stack/src/flowmesh_stack/docker.py +++ b/sdk/stack/src/flowmesh_stack/docker.py @@ -50,9 +50,13 @@ def compose_logs( env: Mapping[str, str] | None = None, service: str | None = None, capture_output: bool = False, + profile: str | None = None, ) -> subprocess.CompletedProcess[str]: """Stream compose logs, optionally for a specific service.""" - args = ["logs", "-f"] + args: list[str] = [] + if profile: + args += ["--profile", profile] + args += ["logs", "-f"] if service: args.append(service) return compose( @@ -225,7 +229,12 @@ def run( env=compose_env, ) - def stream_logs(self, env_file: Path, service: str | None = None) -> int: + def stream_logs( + self, + env_file: Path, + service: str | None = None, + profile: str | None = None, + ) -> int: """Stream stack logs and fall back to container logs when needed.""" self.load_env(env_file) compose_env = {self.env_file_var: str(env_file.resolve())} @@ -236,6 +245,7 @@ def stream_logs(self, env_file: Path, service: str | None = None) -> int: env=compose_env, service=service, capture_output=False, + profile=profile, ) if result.returncode == 0: return 0 @@ -247,5 +257,6 @@ def stream_logs(self, env_file: Path, service: str | None = None) -> int: env_file=env_file, env=compose_env, capture_output=False, + profile=profile, ) return result.returncode diff --git a/uv.lock b/uv.lock index bbb346d3..d96559d5 100644 --- a/uv.lock +++ b/uv.lock @@ -2316,6 +2316,7 @@ dependencies = [ { name = "docker" }, { name = "flowmesh-cli" }, { name = "flowmesh-sdk-stack" }, + { name = "packaging" }, { name = "pyyaml" }, ] @@ -2324,6 +2325,7 @@ requires-dist = [ { name = "docker", specifier = ">=7.1.0" }, { name = "flowmesh-cli", specifier = "==0.1.0" }, { name = "flowmesh-sdk-stack", specifier = "==0.1.0" }, + { name = "packaging", specifier = ">=24" }, { name = "pyyaml", specifier = ">=6.0.2" }, ]