From a4b372d022c276df20f24963b527fdd53f0fd601 Mon Sep 17 00:00:00 2001 From: Jim Pudar Date: Sat, 23 May 2026 12:47:50 -0400 Subject: [PATCH] Use graceful Lima stop for VM restart lifecycle --- src/rootcell/process.ts | 2 + src/rootcell/providers/aws-ec2.ts | 4 + src/rootcell/providers/lima.ts | 24 ++++- src/rootcell/providers/types.ts | 1 + src/rootcell/rootcell.test.ts | 151 +++++++++++++++++++++++++++++- src/rootcell/rootcell.ts | 4 +- 6 files changed, 182 insertions(+), 4 deletions(-) diff --git a/src/rootcell/process.ts b/src/rootcell/process.ts index f1b5f5c..bbcf876 100644 --- a/src/rootcell/process.ts +++ b/src/rootcell/process.ts @@ -20,6 +20,7 @@ export interface InheritOptions { readonly env?: NodeJS.ProcessEnv; readonly allowFailure?: boolean; readonly ignoredOutput?: boolean; + readonly timeoutMs?: number; } function statusFromSignal(signal: NodeJS.Signals | null): number { @@ -62,6 +63,7 @@ export function runInherited(command: string, args: readonly string[], options: env: options.env ?? process.env, stdio: options.ignoredOutput ? "ignore" : "inherit", encoding: "utf8", + timeout: options.timeoutMs, }); const status = result.status ?? statusFromSignal(result.signal); if (!options.allowFailure && status !== 0) { diff --git a/src/rootcell/providers/aws-ec2.ts b/src/rootcell/providers/aws-ec2.ts index d0a60a1..7a9a4ea 100644 --- a/src/rootcell/providers/aws-ec2.ts +++ b/src/rootcell/providers/aws-ec2.ts @@ -32,6 +32,10 @@ export class AwsEc2VmProvider implements VmProvider { } async forceStopIfRunning(name: string): Promise { + await this.stopIfRunning(name); + } + + async stopIfRunning(name: string): Promise { if ((await this.status(name)).state !== "running") { return; } diff --git a/src/rootcell/providers/lima.ts b/src/rootcell/providers/lima.ts index dfb5216..ddc2d92 100644 --- a/src/rootcell/providers/lima.ts +++ b/src/rootcell/providers/lima.ts @@ -34,6 +34,8 @@ const LimaVmStateSchema = z.object({ type LimaVmState = Readonly>; +const LIMA_GRACEFUL_STOP_TIMEOUT_MS = 180_000; + export const NIXOS_LIMA_AARCH64_IMAGE = { location: "https://github.com/nixos-lima/nixos-lima/releases/download/v0.0.5/nixos-lima-v0.0.5-aarch64.qcow2", arch: "aarch64", @@ -104,11 +106,31 @@ export class LimaVmProvider implements VmProvider { return Promise.resolve({ state: "unexpected", detail: status.length === 0 ? "unknown Lima status" : status }); } - async forceStopIfRunning(name: string): Promise { + async stopIfRunning(name: string): Promise { if ((await this.status(name)).state !== "running") { return; } this.log(`stopping ${name} Lima VM...`); + const result = runInherited(this.ensureLimactl(), ["--tty=false", "stop", name], { + allowFailure: true, + timeoutMs: LIMA_GRACEFUL_STOP_TIMEOUT_MS, + }); + if (result.status === 0 || (await this.status(name)).state !== "running") { + return; + } + this.log(`graceful stop for ${name} did not complete; force-stopping Lima VM...`); + this.forceStopRunning(name); + } + + async forceStopIfRunning(name: string): Promise { + if ((await this.status(name)).state !== "running") { + return; + } + this.forceStopRunning(name); + } + + private forceStopRunning(name: string): void { + this.log(`force-stopping ${name} Lima VM...`); runInherited(this.ensureLimactl(), ["--tty=false", "stop", "--force", name], { allowFailure: true, }); diff --git a/src/rootcell/providers/types.ts b/src/rootcell/providers/types.ts index 07605ba..147b2cd 100644 --- a/src/rootcell/providers/types.ts +++ b/src/rootcell/providers/types.ts @@ -57,6 +57,7 @@ export interface CopyToGuestOptions { export interface VmProvider { readonly id: string; status(name: string): Promise; + stopIfRunning(name: string): Promise; forceStopIfRunning(name: string): Promise; remove(name: string): Promise; assertCompatible(name: string, network: TAttachment): Promise; diff --git a/src/rootcell/rootcell.test.ts b/src/rootcell/rootcell.test.ts index f18dc2e..e419e1b 100644 --- a/src/rootcell/rootcell.test.ts +++ b/src/rootcell/rootcell.test.ts @@ -5,7 +5,7 @@ import { ROOTCELL_SUBCOMMANDS } from "./metadata.ts"; import { loadDotEnv, parseSecretMappings } from "./env.ts"; import { resolveHostTool } from "./host-tools.ts"; import { initRootcellInstanceEnv } from "./init-env.ts"; -import { buildConfig, formatVmList, rootcellMain } from "./rootcell.ts"; +import { buildConfig, formatVmList, rootcellMain, RootcellApp } from "./rootcell.ts"; import { deriveVmNames, instancePaths, listRootcellVmInstanceNames, loadRootcellInstance, seedRootcellInstanceFiles } from "./instance.ts"; import { runCapture } from "./process.ts"; import { parseAwsEc2Config } from "./providers/aws-ec2-config.ts"; @@ -17,6 +17,7 @@ import { type TerraformRunner, } from "./providers/aws-ec2-terraform.ts"; import type { AwsEc2Api, AwsS3ObjectRef } from "./providers/aws-ec2-aws.ts"; +import type { ProviderBundle, VmNetworkAttachment } from "./providers/types.ts"; import { createProviderBundle } from "./providers/factory.ts"; import { limaNetworkListIncludes, @@ -762,6 +763,72 @@ describe("VM and network providers", () => { expect(providers.secrets.ids).toEqual(["macos-keychain", "aws-prod", "aws-dev"]); }); + test("app lifecycle stop uses graceful VM stop before stopping networks", async () => { + const config = buildConfig("/repo", {}, fakeInstance("dev")); + const calls: string[] = []; + const attachment: VmNetworkAttachment = { kind: "fake" }; + const providers: ProviderBundle = { + network: { + id: "fake-network", + plan: () => ({ + provider: "fake-network", + guest: { + firewallIp: config.firewallIp, + agentIp: config.agentIp, + networkPrefix: 24, + agentPrivateInterface: "agent0", + firewallPrivateInterface: "firewall0", + firewallEgressInterface: "egress0", + }, + vms: { + agent: attachment, + firewall: attachment, + }, + }), + preflight: () => Promise.resolve(), + stop: () => { + calls.push("network:stop"); + return Promise.resolve(); + }, + remove: () => Promise.resolve(), + ensureReady: () => Promise.resolve(), + }, + vm: { + id: "fake-vm", + status: (name) => { + calls.push(`status:${name}`); + return Promise.resolve({ state: "stopped" }); + }, + stopIfRunning: (name) => { + calls.push(`stop:${name}`); + return Promise.resolve(); + }, + forceStopIfRunning: (name) => { + calls.push(`force:${name}`); + return Promise.resolve(); + }, + remove: () => Promise.resolve(), + assertCompatible: () => Promise.resolve(), + ensureRunning: () => Promise.resolve({ created: false }), + exec: () => Promise.resolve({ status: 0 }), + execCapture: () => Promise.resolve({ status: 0, stdout: "", stderr: "" }), + execInteractive: () => Promise.resolve(0), + copyToGuest: () => Promise.resolve(), + }, + secrets: new StaticSecretProviderRegistry([]), + }; + + await new RootcellApp(config, providers).stopVms(); + + expect(calls).toEqual([ + "stop:agent-dev", + "stop:firewall-dev", + "status:agent-dev", + "status:firewall-dev", + "network:stop", + ]); + }); + test("macOS Lima user-v2 provider exposes egress firewall and private-only agent attachments", () => { const config = buildConfig("/repo", {}, fakeInstance("dev")); const plan = new MacOsLimaUserV2NetworkProvider(config, ignoreLog).plan(); @@ -1311,6 +1378,64 @@ describe("VM and network providers", () => { expect(() => parseLimaVmState({ provider: "unknown" })).toThrow("provider mismatch"); }); + test("Lima lifecycle stop requests graceful stop without force", async () => { + const dir = mkdtempSync(join(tmpdir(), "rootcell-lima-stop-test-")); + const oldLimactl = process.env.ROOTCELL_LIMACTL; + const oldCalls = process.env.ROOTCELL_LIMACTL_CALLS; + try { + const callsPath = join(dir, "calls.txt"); + const limactl = join(dir, "limactl"); + writeFileSync(limactl, fakeLimactlStopScript({ gracefulStatus: 0 }), "utf8"); + chmodSync(limactl, 0o755); + process.env.ROOTCELL_LIMACTL = limactl; + process.env.ROOTCELL_LIMACTL_CALLS = callsPath; + + const config = buildConfig(dir, {}, fakeInstance("dev", dir)); + const provider = new LimaVmProvider(config, ignoreLog); + + await provider.stopIfRunning(config.agentVm); + + expect(readLines(callsPath)).toEqual([ + "list --format json agent-dev", + "--tty=false stop agent-dev", + ]); + } finally { + restoreEnv("ROOTCELL_LIMACTL", oldLimactl); + restoreEnv("ROOTCELL_LIMACTL_CALLS", oldCalls); + rmSync(dir, { recursive: true, force: true }); + } + }); + + test("Lima lifecycle stop falls back to force when graceful stop fails", async () => { + const dir = mkdtempSync(join(tmpdir(), "rootcell-lima-stop-fallback-test-")); + const oldLimactl = process.env.ROOTCELL_LIMACTL; + const oldCalls = process.env.ROOTCELL_LIMACTL_CALLS; + try { + const callsPath = join(dir, "calls.txt"); + const limactl = join(dir, "limactl"); + writeFileSync(limactl, fakeLimactlStopScript({ gracefulStatus: 1 }), "utf8"); + chmodSync(limactl, 0o755); + process.env.ROOTCELL_LIMACTL = limactl; + process.env.ROOTCELL_LIMACTL_CALLS = callsPath; + + const config = buildConfig(dir, {}, fakeInstance("dev", dir)); + const provider = new LimaVmProvider(config, ignoreLog); + + await provider.stopIfRunning(config.agentVm); + + expect(readLines(callsPath)).toEqual([ + "list --format json agent-dev", + "--tty=false stop agent-dev", + "list --format json agent-dev", + "--tty=false stop --force agent-dev", + ]); + } finally { + restoreEnv("ROOTCELL_LIMACTL", oldLimactl); + restoreEnv("ROOTCELL_LIMACTL_CALLS", oldCalls); + rmSync(dir, { recursive: true, force: true }); + } + }); + test("Lima transport refreshes stale firewall SSH local ports", async () => { const dir = mkdtempSync(join(tmpdir(), "rootcell-lima-port-test-")); const oldPath = process.env.PATH; @@ -1734,6 +1859,30 @@ function removedRuntimeNames(): readonly string[] { return [["vf", "kit"].join(""), ["socket", "_vmnet"].join("")]; } +function fakeLimactlStopScript(input: { readonly gracefulStatus: number }): string { + return [ + "#!/bin/sh", + "printf '%s\\n' \"$*\" >> \"$ROOTCELL_LIMACTL_CALLS\"", + "if [ \"$1\" = \"list\" ] && [ \"$2\" = \"--format\" ] && [ \"$3\" = \"json\" ] && [ \"$4\" = \"agent-dev\" ]; then", + " printf '[{\"name\":\"agent-dev\",\"status\":\"running\"}]\\n'", + " exit 0", + "fi", + "if [ \"$1\" = \"--tty=false\" ] && [ \"$2\" = \"stop\" ] && [ \"$3\" = \"agent-dev\" ]; then", + ` exit ${String(input.gracefulStatus)}`, + "fi", + "if [ \"$1\" = \"--tty=false\" ] && [ \"$2\" = \"stop\" ] && [ \"$3\" = \"--force\" ] && [ \"$4\" = \"agent-dev\" ]; then", + " exit 0", + "fi", + "echo unexpected limactl \"$@\" >&2", + "exit 1", + "", + ].join("\n"); +} + +function readLines(path: string): readonly string[] { + return readFileSync(path, "utf8").trim().split("\n"); +} + function fakeInstance(name: string, repo = "/repo", env: NodeJS.ProcessEnv = {}): RootcellInstance { const paths = instancePaths(repo, name, env); return { diff --git a/src/rootcell/rootcell.ts b/src/rootcell/rootcell.ts index dd7672f..83ae49f 100644 --- a/src/rootcell/rootcell.ts +++ b/src/rootcell/rootcell.ts @@ -229,7 +229,7 @@ export class RootcellApp { async stopVms(): Promise { for (const entry of this.vmEntries()) { - await this.providers.vm.forceStopIfRunning(entry.name); + await this.providers.vm.stopIfRunning(entry.name); } await this.waitForVmsStopped(); await this.providers.network.stop(); @@ -777,7 +777,7 @@ Run \`./rootcell pubkey\` to print it again. private async restartAgentVm(message: string): Promise { log(message); - await this.providers.vm.forceStopIfRunning(this.config.agentVm); + await this.providers.vm.stopIfRunning(this.config.agentVm); await this.providers.vm.ensureRunning({ role: "agent", name: this.config.agentVm,