Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
b951a75
test: add test-e2e-fixture preset for VM e2e tests
fullstackjam May 18, 2026
26e7c38
test(vm): use test-e2e-fixture preset in FullSetupConfiguresEverythin…
fullstackjam May 18, 2026
704f671
test(vm): move FullSetupConfiguresEverything to VM_B parallel slot
fullstackjam May 18, 2026
1aa5feb
docs: update L4 canonical command to test-vm-parallel (~14 min)
fullstackjam May 18, 2026
2ca965d
docs: update stale make test-vm references to test-vm-parallel
fullstackjam May 18, 2026
407a821
docs: fix missed test-vm reference in HARNESS.md auto-release row
fullstackjam May 18, 2026
b01c55e
docs: fix remaining stale test-vm references in auto-release.yml and …
fullstackjam May 18, 2026
5ee3c9b
chore(vm): rename base image from macos-tahoe-base to tahoe-base
fullstackjam May 18, 2026
4ad1f49
test(vm): remove redundant VM e2e tests
fullstackjam May 19, 2026
2ba31b4
ci: spike — run TestVM_Infra on macos-14 GitHub runner
fullstackjam May 19, 2026
0585c5e
ci: spike — full parallel vm-e2e on macos-14 (group-a + group-b)
fullstackjam May 19, 2026
c9453df
fix: dry-run must not write packages cache; adapt vm e2e for pre-rele…
fullstackjam May 19, 2026
3816385
fix: remove unused fmt import from vm_user_journey_test
fullstackjam May 19, 2026
719de10
chore: remove tart-based test-vm targets; L4 now runs on github actions
fullstackjam May 19, 2026
793b72b
chore: delete scripts/vm/ and remove stale run.sh references
fullstackjam May 19, 2026
db7d890
chore: update archtest no-raw-http baseline after loadRemotePackages …
fullstackjam May 19, 2026
0587f57
style: remove stray blank line in macos_defaults_e2e_test.go
fullstackjam May 19, 2026
98508d3
refactor: remove test-e2e-fixture preset; use minimal in e2e tests
fullstackjam May 19, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions .github/workflows/auto-release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -176,13 +176,13 @@ jobs:
**not** auto-tag. Run the destructive e2e suite locally, then
cut the release manually:

- [ ] \`make test-vm\` passes (Apple Silicon + Tart required — see scripts/vm/README.md)
- [ ] L4 CI (\`vm-e2e-spike.yml\`) is green on the latest commit on \`main\`
- [ ] sanity-check the curl|bash smoke and cli-compat results in the most recent test.yml run on main
- [ ] \`git tag -a ${NEW_TAG} -m "..."\` and \`git push origin ${NEW_TAG}\`
- [ ] close this issue

Skipping \`make test-vm\` is allowed (it is not a hard gate),
but \`feat:\` changes carry more risk than \`fix:\` patches.
L4 CI is not yet a hard merge gate, but \`feat:\` changes carry
more risk than \`fix:\` patches — verify it before tagging.
EOF
)

Expand Down
49 changes: 49 additions & 0 deletions .github/workflows/vm-e2e-spike.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
name: vm-e2e-spike

on:
push:
branches: ["test/vm-e2e-speed"]
workflow_dispatch:

jobs:
# Group A: long-running journey tests that install packages and modify system state.
group-a:
runs-on: macos-14
timeout-minutes: 60
steps:
- uses: actions/checkout@v4

- uses: actions/setup-go@v5
with:
go-version-file: go.mod
cache: true

- name: Build binary
run: make build

- name: Run group-a tests
run: |
go test -v -timeout 55m -tags="e2e,vm" \
-run 'TestVM_Journey_FirstTimeUser|TestVM_Journey_DryRunIsCompletelySafe|TestVM_Interactive_InstallScript' \
./test/e2e/...

# Group B: dotfiles, macOS defaults, edge cases, sync, and non-destructive e2e.
group-b:
runs-on: macos-14
timeout-minutes: 60
steps:
- uses: actions/checkout@v4

- uses: actions/setup-go@v5
with:
go-version-file: go.mod
cache: true

- name: Build binary
run: make build

- name: Run group-b tests
run: |
go test -v -timeout 55m -tags="e2e,vm" \
-run 'TestVM_Journey_Dotfiles|TestVM_Journey_MacOS|TestVM_Journey_FullSetupConfiguresEverything|TestVM_Edge_|TestSmoke_|TestE2E_' \
./test/e2e/...
4 changes: 2 additions & 2 deletions AGENTS.md
Original file line number Diff line number Diff line change
Expand Up @@ -81,8 +81,8 @@ These are loaded automatically when Claude runs in this repo.
- `git push --force` against `main` or release tags.
- `git commit --amend` on commits already pushed.
- `git reset --hard` discarding uncommitted work.
- Running `make test-vm` (or any other `test-vm-*` target) outside an ephemeral
VM — these install real packages.
- Running `make test-vm-inner` (or `test-vm-inner-run`) outside a throwaway
machine — these install real packages onto the current host.
- Anything that modifies the user's `~/.zshrc`, Homebrew install, or
macOS `defaults`.

Expand Down
3 changes: 2 additions & 1 deletion CLAUDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@ make build-release VERSION=0.25.0 # optimized + UPX
# Test — full tier table in CONTRIBUTING.md
make test-unit # L1 (~75s) — unit + integration + contract; pre-push hook
make test-e2e # L3 compiled binary
make test-vm # L4 (~30m) — destructive e2e in a local Tart VM; before tagging
make test-vm-inner # L4 — full destructive e2e suite (runs in CI on macos-14; locally on a spare machine only)
make test-vm-inner-run TEST=Foo # L4 — single test
make test-coverage # coverage.out + coverage.html

# Single test
Expand Down
26 changes: 7 additions & 19 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,36 +37,24 @@ Tests are split across four tiers. Which one runs where:
| **L1 Unit + Integration + Contract** | Pure-Go logic with faked `Runner` *plus* real `brew` / `git` / `npm` against temp dirs and real `httptest` servers | `make test-unit` (~75s) | Every push (pre-push hook); CI on push/PR |
| **L2 Contract schema** | JSON schema validation against [openboot-contract](https://github.com/openbootdotdev/openboot-contract) | (runs in CI only) | CI on push/PR |
| **L3 E2E binary** | Compiled binary driven by scripts; `-tags=e2e` | `make test-e2e` | CI on release |
| **L4 VM e2e** | Full destructive suite (`-tags="e2e,vm"`) runs inside an ephemeral Tart VM provisioned by `scripts/vm/run.sh`. Installs real packages, modifies `~/.zshrc`, writes `defaults` — all contained to the throwaway VM. | `make test-vm` (~30 min, Apple Silicon + Tart required) | **Local only** — convention is to run before tagging a release. No CI gate. |
| **L4 VM e2e** | Full destructive suite (`-tags="e2e,vm"`). Installs real packages, modifies `~/.zshrc`, writes `defaults`. Each run requires a clean macOS host (Apple Silicon). | `make test-vm-inner` (or single test: `make test-vm-inner-run TEST=Foo`) | **CI** — runs on GitHub Actions `macos-14` runner (every PR via `vm-e2e-spike.yml`). Locally only on a throwaway machine. |

Rules of thumb:

- **Local dev:** run nothing manually if hooks are installed. `make test-unit` on demand when you want a sanity check. Skip L2+ unless you're cutting a release.
- **Before pushing:** `make test-unit` (the pre-push hook does this automatically). Requires `brew` / `git` / `npm` on PATH — they are queried read-only against temp dirs, no real installs.
- **Before tagging a release (convention, not enforced):** `make test-vm` on an Apple Silicon Mac with Tart installed. See [VM E2E setup](#vm-e2e-setup) below. `auto-release.yml` opens a `release-ready` issue on `feat:` thresholds to nudge you here.
- **Before tagging a release:** check that the L4 CI job (`vm-e2e-spike.yml`) is green on the latest commit on `main`. `auto-release.yml` opens a `release-ready` issue on `feat:` thresholds to nudge you here.

## VM E2E setup
## VM E2E

Destructive tests (L4) run inside an ephemeral Tart VM. One-time setup
on an Apple Silicon Mac:
L4 tests run on GitHub Actions (`macos-14` runner, Apple Silicon). Each job
gets a fresh macOS VM — no local setup required.

```bash
brew install cirruslabs/cli/tart
tart pull ghcr.nju.edu.cn/cirruslabs/macos-tahoe-base:latest
tart clone ghcr.nju.edu.cn/cirruslabs/macos-tahoe-base:latest macos-tahoe-base
make test-vm-inner # full suite (use on a throwaway machine only)
make test-vm-inner-run TEST=TestVM_Journey_FirstTimeUser # single test
```

Then:

```bash
make test-vm # full suite (~30 min)
make test-vm-run TEST=TestVM_Journey_FirstTimeUser # one test
OPENBOOT_VM_KEEP=1 make test-vm # don't destroy VM at exit (debug)
```

See `scripts/vm/README.md` for full environment-variable docs and
troubleshooting.

## Git Hooks

`make install-hooks` symlinks two hooks from `scripts/hooks/` into `.git/hooks/`:
Expand Down
40 changes: 7 additions & 33 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,12 +1,7 @@
.PHONY: test-unit test-e2e test-coverage test-all \
test-vm test-vm-run test-vm-parallel test-vm-inner test-vm-inner-run \
test-vm-inner test-vm-inner-run \
install-hooks uninstall-hooks

# VM A: install/journey tests that touch real system state (longest-running).
VM_A_TESTS := TestVM_Journey_FirstTimeUser|TestVM_Journey_DryRunIsCompletelySafe|TestVM_Journey_FullSetupConfiguresEverything|TestVM_Interactive_InstallScript
# VM B: all other VM tests — dotfiles, macOS, edge cases, smoke, real-install, sync.
VM_B_TESTS := TestVM_Journey_Dotfiles|TestVM_Journey_MacOS|TestVM_Edge_|TestSmoke_|TestE2E_

BINARY_NAME=openboot
BINARY_PATH=./$(BINARY_NAME)
VERSION ?= dev
Expand Down Expand Up @@ -34,38 +29,17 @@ test-all:
$(MAKE) test-coverage

# =============================================================================
# Tart VM e2e — destructive tests run inside a throwaway Tart VM provisioned
# by scripts/vm/run.sh. Files tagged `e2e,vm` run via `make test-vm-inner`;
# files tagged `e2e && !vm` (auth, snapshot_api) run as L3 on the host.
#
# Requires Apple Silicon + Tart installed locally. See scripts/vm/README.md
# for one-time setup. The relevant targets are defined immediately below
# this header: test-vm, test-vm-run, test-vm-inner, test-vm-inner-run.
# L4 VM e2e — destructive tests tagged `e2e,vm`. Run directly on any clean
# macOS host (Apple Silicon). In CI this is a GitHub Actions macos-14 runner
# (see .github/workflows/vm-e2e-spike.yml). Locally, run on a throwaway
# machine or a Tart VM — do NOT run on your primary dev machine.
# =============================================================================

# Developer-facing: provisions a Tart VM and runs the full e2e suite inside.
test-vm: build
scripts/vm/run.sh test-vm-inner

# Developer-facing: runs one named test inside a Tart VM.
test-vm-run: build
scripts/vm/run.sh "test-vm-inner-run TEST=$(TEST)"

# Developer-facing: runs e2e in two parallel VMs — VM A (system tests) and
# VM B (mock-server tests). Requires ~16 GB RAM and 8 cores free.
# Exit code is non-zero if either VM fails.
test-vm-parallel: build
@OPENBOOT_VM_TEST='$(VM_A_TESTS)' scripts/vm/run.sh test-vm-inner & PID_A=$$!; \
OPENBOOT_VM_TEST='$(VM_B_TESTS)' scripts/vm/run.sh test-vm-inner & PID_B=$$!; \
A_EXIT=0; B_EXIT=0; \
wait $$PID_A || A_EXIT=$$?; \
wait $$PID_B || B_EXIT=$$?; \
[ $$A_EXIT -eq 0 ] && [ $$B_EXIT -eq 0 ]

# In-VM: invoked over SSH by run.sh — not called by developers directly.
# Run the full L4 suite (same command CI uses).
test-vm-inner:
go test -v -timeout 60m -tags="e2e,vm" ./test/e2e/...

# Run a single L4 test by name: make test-vm-inner-run TEST=TestVM_Journey_FirstTimeUser
test-vm-inner-run:
go test -v -timeout 45m -tags="e2e,vm" -run '$(TEST)' ./test/e2e/...

Expand Down
17 changes: 8 additions & 9 deletions docs/HARNESS.md
Original file line number Diff line number Diff line change
Expand Up @@ -48,9 +48,9 @@ Three regulation categories:
| Behav. | L1 unit + integration + contract (faked runners *and* real brew/git/npm in temp dirs) | pre-push, CI | `make test-unit` |
| Behav. | L2 contract schema (against openboot-contract repo) | CI | `.github/workflows/test.yml` `contract` job |
| Behav. | L3 e2e binary | release | `make test-e2e` |
| Behav. | L4 VM e2e (`vm`) — runs full destructive suite in a local Tart VM | local only (convention is pre-release; no CI gate) | `make test-vm` (driver: `scripts/vm/run.sh`) |
| Behav. | L4 VM e2e (`vm`) — full destructive suite on a clean macOS host | every PR | `.github/workflows/vm-e2e-spike.yml` (macos-14 runner, two parallel jobs); `make test-vm-inner` for local runs |
| Behav. | curl\|bash smoke (install.sh + mock server) | every PR | `.github/workflows/test.yml` `curl-bash-smoke` job |
| Behav. | Auto-release sensor — patch fast lane (`fix:`-only) auto-tags + dispatches `release.yml`; feat threshold opens a `release-ready` issue with a `make test-vm` checklist instead | push to `main` | `.github/workflows/auto-release.yml` |
| Behav. | Auto-release sensor — patch fast lane (`fix:`-only) auto-tags + dispatches `release.yml`; feat threshold opens a `release-ready` issue (check L4 CI green, then tag manually) | push to `main` | `.github/workflows/auto-release.yml` |
| Behav. | Release notes — Conventional Commits since previous tag, grouped by type (Features / Bug Fixes / etc) + Full Changelog link, appended to the install-instructions template | tag push or `workflow_dispatch` | `.github/workflows/release.yml` (`Write release notes` step) |
| Behav. | Old-CLI compat (previous release × current mock server) | every PR | `.github/workflows/test.yml` `cli-compat` job |
| Feedfwd. | Agent conventions | every AI turn | `CLAUDE.md`, `AGENTS.md` |
Expand Down Expand Up @@ -110,13 +110,12 @@ it survives doc rot.
to the inline `\r\033[K` renderer when unavailable. A static rule can't
see runtime terminal capabilities, so this stays a runtime concern. The
fallback is covered by `TestStickyProgressFallsBackWhenScrollRegionUnsupported`.
- **No CI gate for VM e2e.** Apple Silicon Tart VMs don't run on
GitHub-hosted `macos-latest` runners (no nested virt, wrong arch
guarantees), and we declined to set up a self-hosted runner. L4 is
local-only. Running `make test-vm` before tagging is convention,
encoded as a `release-ready` issue opened by `auto-release.yml`
on `feat:` thresholds — not a hard gate. A human can release without
it.
- **L4 runs on GitHub Actions, not a self-hosted runner.** `macos-14`
runners are Apple Silicon VMs — each job gets a fresh clean macOS
environment, which is exactly what L4 needs. Tart is no longer required.
The L4 workflow (`vm-e2e-spike.yml`) is not yet a hard merge gate (not in
`required-checks.txt`); it runs on every PR. Promoting it to a required
check is the next step once the workflow has proven stable.

## How agents should think about this file

Expand Down
4 changes: 2 additions & 2 deletions internal/archtest/baseline/no-raw-http.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
internal/auth/login.go:106
internal/cli/snapshot_import.go:70
internal/cli/snapshot_publish.go:142
internal/config/packages_remote.go:69
internal/config/packages_remote.go:74
internal/config/packages_remote.go:82
internal/config/packages_remote.go:87
internal/config/remote.go:56
internal/config/remote.go:75
internal/config/remote.go:233
Expand Down
6 changes: 5 additions & 1 deletion internal/cli/root.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,11 @@ shell configuration, and macOS preferences.`,
// network overhead.
if cmd.Name() == "install" {
updater.AutoUpgrade(version)
config.RefreshPackagesFromRemote()
if installCfg.DryRun {
config.RefreshPackagesFromRemoteDryRun()
} else {
config.RefreshPackagesFromRemote()
}
}

return nil
Expand Down
21 changes: 17 additions & 4 deletions internal/config/packages_remote.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,18 @@ type packagesCacheEntry struct {
// into the global Categories slice. Safe to call multiple times; it is a no-op
// if the cache is fresh. Falls back to the embedded packages.yaml silently.
func RefreshPackagesFromRemote() {
pkgs, err := loadRemotePackages()
refreshPackages(false)
}

// RefreshPackagesFromRemoteDryRun is identical to RefreshPackagesFromRemote
// but suppresses writing the on-disk cache. Use during --dry-run so the
// command has zero side effects on ~/.openboot/.
func RefreshPackagesFromRemoteDryRun() {
refreshPackages(true)
}

func refreshPackages(dryRun bool) {
pkgs, err := loadRemotePackages(dryRun)
if err != nil || len(pkgs) == 0 {
return // keep embedded fallback
}
Expand All @@ -47,7 +58,7 @@ func RefreshPackagesFromRemote() {
mergeRemotePackages(pkgs)
}

func loadRemotePackages() ([]remotePackage, error) {
func loadRemotePackages(dryRun bool) ([]remotePackage, error) {
// Try disk cache first.
if pkgs, err := readPackagesCache(); err == nil {
return pkgs, nil
Expand All @@ -59,8 +70,10 @@ func loadRemotePackages() ([]remotePackage, error) {
return nil, err
}

// Write cache (best-effort).
_ = writePackagesCache(pkgs)
// Write cache (best-effort) — skip during dry-run to avoid disk side effects.
if !dryRun {
_ = writePackagesCache(pkgs)
}
return pkgs, nil
}

Expand Down
4 changes: 2 additions & 2 deletions internal/config/packages_remote_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -291,7 +291,7 @@ func TestLoadRemotePackages_UsesCacheThenFallsToNetwork(t *testing.T) {

// No cache, no server → error.
t.Setenv("OPENBOOT_API_URL", "http://localhost:1")
_, err := loadRemotePackages()
_, err := loadRemotePackages(false)
assert.Error(t, err)

// Write fresh cache.
Expand All @@ -303,7 +303,7 @@ func TestLoadRemotePackages_UsesCacheThenFallsToNetwork(t *testing.T) {
os.WriteFile(filepath.Join(dir, packagesCacheFile), data, 0600)

// Cache hit → no network call needed.
pkgs, err := loadRemotePackages()
pkgs, err := loadRemotePackages(false)
require.NoError(t, err)
assert.Equal(t, "cached-pkg", pkgs[0].Name)
}
Expand Down
80 changes: 0 additions & 80 deletions scripts/vm/README.md

This file was deleted.

Loading
Loading