Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions .github/workflows/verification-gate.yml
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,21 @@ jobs:
- uses: actions/checkout@v5

- uses: dtolnay/rust-toolchain@stable
with:
components: llvm-tools-preview
- uses: Swatinem/rust-cache@v2

# Provision the tools FV-FALCON-COV-002 expects. The dedicated
# Coverage workflow uses the same install; mirror it here so
# `cargo llvm-cov` succeeds inside the rivet gate too. Security
# note: this workflow does not interpolate any github.event.*
# inputs into run: blocks (PR body is bound via env: + sanitised
# before reaching the script).
- name: Install cargo-llvm-cov
uses: taiki-e/install-action@v2
with:
tool: cargo-llvm-cov

- name: Install rivet (skip if cached)
shell: bash
run: |
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -26,3 +26,4 @@ Cargo.lock
# Bazel's rust_wasm_component_bindgen (bindings are generated at build
# time; the lib.rs files alias the generated crate).
wasm/cm/*/src/bindings.rs
coverage.lcov
2 changes: 1 addition & 1 deletion artifacts/verification/FV-FALCON-COV-004.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ artifacts:
protocol: "witness-harness-v1 (counters only)"
steps:
- run: cargo build -p witness-wasi-harness --release
- run: $WITNESS run /path/to/instrumented.wasm --output run.json --harness target/release/witness-wasi-harness
- run: $WITNESS run /path/to/instrumented.wasm --output run.json --harness target/release/witness-wasi-harness # bench-only — template path; needs witness binary + instrumented module
links:
- type: verifies
target: SWREQ-FALCON-WORLD-P01
2 changes: 1 addition & 1 deletion artifacts/verification/FV-FALCON-COV-005.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ artifacts:
upstream-bug: "witness trace-buffer OOM on dense-branch subjects (reported to witness AI agent)"
steps:
- run: cargo build -p witness-wasi-harness
- run: WITNESS_HARNESS_INVOKES="run_inside" $WITNESS run /tmp/instr.wasm --harness target/debug/witness-wasi-harness -o /tmp/run.json
- run: WITNESS_HARNESS_INVOKES="run_inside" $WITNESS run /tmp/instr.wasm --harness target/debug/witness-wasi-harness -o /tmp/run.json # bench-only — needs $WITNESS + pre-instrumented module
links:
- type: verifies
target: SWREQ-FALCON-WORLD-P01
2 changes: 1 addition & 1 deletion artifacts/verification/FV-FALCON-GEO-002.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ artifacts:
kani-harness-count: 5
file: crates/relay-lc/plain/src/engine.rs
steps:
- run: cargo kani -p relay-lc
- run: cargo kani -p relay-lc # bench-only — kani-verifier + CBMC not on the standard CI runner
- run: cargo test -p relay-lc # cfg(kani) is invisible to cargo; cargo path still green
links:
- type: verifies
Expand Down
8 changes: 4 additions & 4 deletions artifacts/verification/FV-FALCON-GEO-003.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -67,10 +67,10 @@ artifacts:
test-count: 13
crates: ["relay-lc", "falcon-hitl-rfspoof"]
steps:
- run: rustup component add miri --toolchain nightly
- run: MIRIFLAGS=-Zmiri-disable-isolation cargo +nightly miri test -p relay-lc --lib geofence
- run: MIRIFLAGS=-Zmiri-disable-isolation cargo +nightly miri test -p falcon-hitl-rfspoof --bin falcon-hitl-rfspoof stub::tests
- run: MIRIFLAGS=-Zmiri-disable-isolation cargo +nightly miri test -p falcon-hitl-rfspoof --bin falcon-hitl-rfspoof mavlink::tests
- run: rustup component add miri --toolchain nightly # bench-only — CI doesn't provision the nightly miri component
- run: MIRIFLAGS=-Zmiri-disable-isolation cargo +nightly miri test -p relay-lc --lib geofence # bench-only
- run: MIRIFLAGS=-Zmiri-disable-isolation cargo +nightly miri test -p falcon-hitl-rfspoof --bin falcon-hitl-rfspoof stub::tests # bench-only
- run: MIRIFLAGS=-Zmiri-disable-isolation cargo +nightly miri test -p falcon-hitl-rfspoof --bin falcon-hitl-rfspoof mavlink::tests # bench-only
links:
- type: verifies
target: SWREQ-FALCON-GEO-P01
2 changes: 1 addition & 1 deletion artifacts/verification/FV-FALCON-HITL-002.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ artifacts:
steps:
- run: cargo test -p relay-mavlink
- run: cargo test -p falcon-hitl-rfspoof
- run: MIRIFLAGS=-Zmiri-disable-isolation cargo +nightly miri test -p falcon-hitl-rfspoof --bin falcon-hitl-rfspoof mavlink::tests
- run: MIRIFLAGS=-Zmiri-disable-isolation cargo +nightly miri test -p falcon-hitl-rfspoof --bin falcon-hitl-rfspoof mavlink::tests # bench-only — miri not on the standard CI runner
- run: cargo run -p falcon-hitl-rfspoof -- --backend=mavlink --listen=0.0.0.0:14550 # bench-only; needs a live FC
links:
- type: verifies
Expand Down
2 changes: 1 addition & 1 deletion artifacts/verification/FV-FALCON-NID-001.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ artifacts:
steps:
- run: cargo test -p relay-nid
- run: PROPTEST_CASES=4096 cargo test -p relay-nid
- run: cargo kani -p relay-nid
- run: cargo kani -p relay-nid # bench-only — kani not on the standard CI runner
links:
- type: verifies
target: SWREQ-FALCON-NID-P01
6 changes: 3 additions & 3 deletions artifacts/verification/FV-FALCON-SIM-001.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -52,9 +52,9 @@ artifacts:
preset: "--preset=px4-sitl"
recipe: "sim/px4-sitl/README.md"
steps:
- run: cargo run -p falcon-hitl-rfspoof -- --preset=px4-sitl --duration=1 # smoke; no PX4 expected → FAIL is correct
- run: cd ~/git/PX4-Autopilot && make px4_sitl jmavsim # terminal 1; needs PX4-Autopilot
- run: cargo run -p falcon-hitl-rfspoof -- --preset=px4-sitl # terminal 2; loop closes when PX4 emits position
- run: cargo run -p falcon-hitl-rfspoof -- --preset=px4-sitl --duration=1 # bench-only — smoke; no PX4 → FAIL verdict (rc=1) is correct
- run: cd ~/git/PX4-Autopilot && make px4_sitl jmavsim # bench-only — terminal 1; needs PX4-Autopilot
- run: cargo run -p falcon-hitl-rfspoof -- --preset=px4-sitl # bench-only — terminal 2; loop closes when PX4 emits position
links:
- type: verifies
target: SWREQ-FALCON-SIM-P01
Expand Down
117 changes: 97 additions & 20 deletions scripts/run-falcon-verification.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,32 +5,82 @@
This is the reference implementation that spar's
tools/post_verification_comment.py-equivalent should follow for relay:

1. List unit-verification artifacts matching a rivet filter.
1. List sw-verification artifacts matching a rivet filter.
2. For each, rivet-get the full artifact JSON.
3. Extract fields.steps[].run as shell commands.
4. Run each command, capture exit code + duration.
5. Aggregate pass/fail per artifact.
6. Emit a Markdown summary suitable for posting as a PR comment.
4. Skip commands marked with `# bench-only` or `# manual` (those need
hardware / a running sim / etc.) — CI runs only the rest.
5. Run each remaining command, capture exit code + duration.
6. Aggregate pass/fail per artifact.
7. Emit a Markdown summary suitable for posting as a PR comment.

The bench-only convention: any `run:` whose shell command contains the
substring `# bench-only` or `# manual` (case-insensitive) is reported
as "Skipped (bench-only)" rather than executed. Use this for steps
that require a real bench (gz sim, PX4-SITL, HackRF, etc.).

Usage:
python3 scripts/run-falcon-verification.py
python3 scripts/run-falcon-verification.py --filter '(has-tag "v0.1")'
python3 scripts/run-falcon-verification.py --type sw-verification
python3 scripts/run-falcon-verification.py --dry-run
python3 scripts/run-falcon-verification.py --markdown
"""

import argparse
import json
import re
import subprocess
import sys
import time
from typing import Any


def rivet_list(filter_expr: str) -> list[str]:
# Each pattern matches a command shape that needs infra the standard
# `Verification gate (rivet-driven)` ubuntu-latest runner does not
# provide today. Heuristic — rivet strips shell `# bench-only`
# comments at the YAML→JSON boundary, so we identify bench-only steps
# by command shape rather than a comment marker. Add to the list as
# new infra-needs appear; remove when CI gains the tool.
BENCH_PATTERNS = [
re.compile(r"\bcargo\s+kani\b"), # kani-verifier + CBMC
re.compile(r"\bcargo\s+\+nightly\s+miri\b"), # miri nightly component
re.compile(r"^\s*MIRIFLAGS="), # same family
re.compile(r"\brustup\s+component\s+add\s+miri"), # same family
re.compile(r"--backend=(hackrf|mavlink|gazebo)\b"), # needs PX4 / HackRF / gz sim
re.compile(r"--preset=px4-sitl\b"), # needs PX4-Autopilot or live PX4
re.compile(r"\$WITNESS\b"), # template env-var placeholder
re.compile(r"\bgz\s+sim\b"), # Gazebo Sim install
re.compile(r"\bmake\s+px4_sitl\b"), # PX4-Autopilot install
re.compile(r"\bbazel\s+(test|build|run)\b"), # bazel not provisioned in the gate job
re.compile(r"\bspar\s+\w"), # spar not on the gate runner
re.compile(r"^\s*cd\s+~"), # tilde-expanded path = not portable
re.compile(r"/Users/[^/]+/"), # developer-machine absolute path
re.compile(r"/tmp/falcon-spar-wit"), # temp dir created only by a bench-only spar step
re.compile(r"\bgh\s+attestation\s+verify\b"), # needs gh sigstore TUF root init
# v0.18.x — gz-transport-rs's --features gazebo build pulls in
# libzmq via zeromq-src (C compile); the gate runner doesn't
# have CMake + the toolchain set up. The default-feature
# `cargo test -p falcon-sitl-gz` runs fine.
re.compile(r"--features\s+gazebo\b"),
# Steps that read bazel-build output (witness-run.json etc.) —
# the bazel build step itself is skipped, so the read fails.
re.compile(r"\bcat\s+bazel-bin/"),
# `cp target/wasm32-unknown-unknown/release/...` chains depend on
# the prior wasm32 build step which needs `rustup target add` —
# and the build step itself (`--target wasm32-…`) needs the same.
re.compile(r"target/wasm32-"),
re.compile(r"--target\s+wasm32-"),
]


def is_bench_only(cmd: str) -> bool:
return any(p.search(cmd) for p in BENCH_PATTERNS)


def rivet_list(filter_expr: str, artifact_type: str) -> list[str]:
out = subprocess.check_output([
"rivet", "list",
"--type", "unit-verification",
"--type", artifact_type,
"--filter", filter_expr,
"--format", "json",
])
Expand All @@ -52,9 +102,16 @@ def run_steps(artifact: dict[str, Any], dry_run: bool) -> tuple[bool, list[dict]
artifact_pass = True
for i, step in enumerate(steps):
cmd = step["run"]
# bench-only convention: see module docstring. Skip without
# counting as a failure; still record in the report so an
# assessor sees what would run on a real bench.
if is_bench_only(cmd):
print(f" [ skip-bench-only] {aid}: {cmd}")
results.append({"cmd": cmd, "pass": True, "skipped": True, "rc": 0, "duration": 0.0})
continue
if dry_run:
print(f" [dry-run] {aid} step {i+1}: {cmd}")
results.append({"cmd": cmd, "pass": True, "duration": 0.0})
results.append({"cmd": cmd, "pass": True, "skipped": False, "rc": 0, "duration": 0.0})
continue
start = time.monotonic()
rc = subprocess.call(cmd, shell=True)
Expand All @@ -63,38 +120,55 @@ def run_steps(artifact: dict[str, Any], dry_run: bool) -> tuple[bool, list[dict]
artifact_pass = artifact_pass and passed
status = "PASS" if passed else f"FAIL (rc={rc})"
print(f" [{status:>14}] ({duration:6.2f}s) {aid}: {cmd}")
results.append({"cmd": cmd, "pass": passed, "rc": rc, "duration": duration})
results.append({"cmd": cmd, "pass": passed, "skipped": False, "rc": rc, "duration": duration})
if not steps:
print(f" [ skip-no-steps] {aid}: (no steps defined)")
return artifact_pass, results


def emit_markdown(report: list[dict]) -> str:
total = len(report)
passed = sum(1 for r in report if r["pass"])
skipped = sum(1 for r in report if not r["steps"])
failed = total - passed - skipped
skipped_no_steps = sum(1 for r in report if not r["steps"])
# An artifact "passed" iff every executed (non-skipped) step passed
# AND at least one step was executed (otherwise it's bench-only).
passed = 0
bench_only_artifacts = 0
for r in report:
executed = [s for s in r["steps"] if not s.get("skipped")]
if r["steps"] and not executed:
bench_only_artifacts += 1
elif r["pass"]:
passed += 1
failed = total - passed - skipped_no_steps - bench_only_artifacts
icon = "✅" if failed == 0 else "❌"
lines = [
f"## {icon} Rivet verification gate — falcon",
"",
f"**{passed}/{total} passed**",
f"**{passed}/{total - skipped_no_steps - bench_only_artifacts} passed**",
"",
"| count | |",
"|-----|---|",
f"| Passed | {passed} |",
f"| Failed | {failed} |",
f"| Skipped (no steps) | {skipped} |",
f"| Skipped (bench-only — needs hardware / sim) | {bench_only_artifacts} |",
f"| Skipped (no steps) | {skipped_no_steps} |",
"",
]
if failed:
lines.append("### Failed artifacts")
for r in report:
if not r["pass"] and r["steps"]:
executed_fails = [s for s in r["steps"] if not s.get("skipped") and not s["pass"]]
if executed_fails:
lines.append(f"- `{r['id']}` — {r['title']}")
for s in executed_fails:
lines.append(f" - `{s['cmd']}` (rc={s['rc']})")
lines.append("")
if bench_only_artifacts:
lines.append("### Bench-only artifacts (not run by CI)")
for r in report:
executed = [s for s in r["steps"] if not s.get("skipped")]
if r["steps"] and not executed:
lines.append(f"- `{r['id']}` — {r['title']}")
for s in r["steps"]:
if not s["pass"]:
lines.append(f" - `{s['cmd']}` (rc={s['rc']})")
lines.append("")
lines.append("Source of truth: `artifacts/verification/FV-FALCON-*.yaml`.")
return "\n".join(lines)
Expand All @@ -104,14 +178,17 @@ def main() -> int:
p = argparse.ArgumentParser()
p.add_argument("--filter", default='(has-tag "falcon")',
help='rivet S-expression filter (default: falcon-tagged)')
p.add_argument("--type", default="sw-verification",
help='rivet artifact type (default: sw-verification — '
'matches every FV-FALCON-*.yaml)')
p.add_argument("--dry-run", action="store_true",
help="print commands without executing")
p.add_argument("--markdown", action="store_true",
help="emit Markdown summary at end (for PR comment)")
args = p.parse_args()

print(f"# falcon verification gate (filter: {args.filter})")
ids = rivet_list(args.filter)
print(f"# falcon verification gate (type: {args.type}, filter: {args.filter})")
ids = rivet_list(args.filter, args.type)
print(f"# {len(ids)} artifact(s) matched: {', '.join(ids)}")
print()

Expand Down
Loading