From 8738aad727014e9539ddcba12af112989606ebaf Mon Sep 17 00:00:00 2001
From: Ralf Anton Beier <ralf_beier@me.com>
Date: Tue, 21 Apr 2026 19:08:22 +0200
Subject: [PATCH] test(fuzz): YAML footgun + CLI argv + artifact-id fuzzers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adds three libfuzzer-based targets under fuzz/ that empirically measure
the arxiv:2604.13108 "YAML silently corrupts ~50% of structural errors"
claim against rivet's actual artifact-ingest pipeline, plus a CLI argv
target and an id-roundtrip target.

Targets:
  * yaml_footguns  — Arbitrary-driven adversarial mutations of a known
    valid seed YAML (Norway, version-coercion, leading-zero-id,
    unquoted-date, duplicate-key, tab-indent, multi-doc, null-shorthand
    link, unknown top-level key, anchor cycle, deep nesting, control
    chars in id).  Five oracles: source-substring invariant for ids /
    types / link targets, phantom-link detection, null-ish target
    detection, serde-rejected-but-hir-accepted detection, and
    multi-document truncation detection.
  * cli_argv      — structured argv for rivet-cli subprocess; oracle
    fails on signal-death or when --format json returns success with
    non-JSON stdout.  Gated on $RIVET_BIN env var so it skips silently
    if no binary is configured.
  * artifact_ids  — arbitrary bytes as id: scalar; oracle requires
    Store::insert → Store::get to round-trip byte-exact.

Also adds fuzz/examples/oracle_smoke.rs — a non-libfuzzer harness that
runs the same oracle logic against a fixed set of Mythos-predicted
footgun inputs.  Running `cargo run --release --example oracle_smoke`
(before cargo-fuzz is available in CI) produces five findings on
current main, empirically confirming:
  - null / tilde / empty-string link targets produce phantom links
    (yaml_hir.rs:530-549 bug class)
  - multi-document YAML is silently truncated by the HIR path
    (yaml_cst.rs:517 bug class)
  - renaming `artifacts:` to a sibling key causes the HIR path to
    return Ok(vec![]) with zero diagnostics (formats/generic.rs:138)

CI: .github/workflows/fuzz.yml runs each target for 15 min on push to
main and nightly at 06:17 UTC.  continue-on-error so new crashes do
not block merges; crashes upload as workflow artifacts and the evolved
corpus is cached between runs.

REQ-052 is scoped to variant-solver fuzzing; these YAML/CLI fuzzers
verify the broader parser surface (REQ-028) and CLI surface (REQ-007).

Verifies: REQ-028, REQ-007
Refs: REQ-052

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .github/workflows/fuzz.yml         |  92 ++++++
 fuzz/Cargo.lock                    | 430 ++++++++++++++++++++++++++++-
 fuzz/Cargo.toml                    |  33 +++
 fuzz/README.md                     | 104 +++++++
 fuzz/examples/oracle_smoke.rs      | 349 +++++++++++++++++++++++
 fuzz/fuzz_targets/artifact_ids.rs  |  97 +++++++
 fuzz/fuzz_targets/cli_argv.rs      | 238 ++++++++++++++++
 fuzz/fuzz_targets/yaml_footguns.rs | 385 ++++++++++++++++++++++++++
 8 files changed, 1722 insertions(+), 6 deletions(-)
 create mode 100644 .github/workflows/fuzz.yml
 create mode 100644 fuzz/README.md
 create mode 100644 fuzz/examples/oracle_smoke.rs
 create mode 100644 fuzz/fuzz_targets/artifact_ids.rs
 create mode 100644 fuzz/fuzz_targets/cli_argv.rs
 create mode 100644 fuzz/fuzz_targets/yaml_footguns.rs

diff --git a/.github/workflows/fuzz.yml b/.github/workflows/fuzz.yml
new file mode 100644
index 0000000..8c274fb
--- /dev/null
+++ b/.github/workflows/fuzz.yml
@@ -0,0 +1,92 @@
+name: fuzz
+
+# YAML-footgun and CLI fuzz targets.  Separate workflow from `ci.yml` because
+# fuzz budgets are measured in minutes, not seconds.
+#
+# We run:
+#   * on every push to main (post-merge confirmation)
+#   * on a nightly schedule (06:17 UTC) so corpus growth is cumulative
+# We do NOT run on PRs — too expensive for the critical path.
+#
+# Each target gets a 15-minute time budget.  The job is marked
+# `continue-on-error: true` so a single crash does not block other work;
+# crashes are surfaced as artifact uploads.
+
+on:
+  push:
+    branches: [main]
+  schedule:
+    # Daily at 06:17 UTC.  Offset from round hour to spread CI load.
+    - cron: "17 6 * * *"
+  workflow_dispatch:
+
+concurrency:
+  group: fuzz-${{ github.ref }}
+  cancel-in-progress: false
+
+jobs:
+  fuzz:
+    name: fuzz ${{ matrix.target }}
+    runs-on: ubuntu-latest
+    continue-on-error: true
+    strategy:
+      fail-fast: false
+      matrix:
+        target:
+          - yaml_footguns
+          - cli_argv
+          - artifact_ids
+    timeout-minutes: 25
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Install nightly toolchain
+        uses: dtolnay/rust-toolchain@nightly
+
+      - name: Install cargo-fuzz
+        run: cargo install cargo-fuzz --locked
+
+      - name: Build rivet binary (for cli_argv)
+        if: matrix.target == 'cli_argv'
+        run: cargo build --release --bin rivet
+
+      - name: Cache fuzz corpora
+        uses: actions/cache@v4
+        with:
+          path: |
+            fuzz/corpus/${{ matrix.target }}
+            fuzz/artifacts/${{ matrix.target }}
+          key: fuzz-corpus-${{ matrix.target }}-${{ github.sha }}
+          restore-keys: |
+            fuzz-corpus-${{ matrix.target }}-
+
+      - name: Run fuzz target for 15 minutes
+        env:
+          TARGET: ${{ matrix.target }}
+          RIVET_BIN: ${{ github.workspace }}/target/release/rivet
+        run: |
+          cd fuzz
+          cargo +nightly fuzz run "$TARGET" -- \
+            -max_total_time=900 \
+            -timeout=30 \
+            -rss_limit_mb=2048
+
+      - name: Upload crash artifacts
+        if: failure() || cancelled()
+        uses: actions/upload-artifact@v4
+        with:
+          name: fuzz-crashes-${{ matrix.target }}
+          path: |
+            fuzz/artifacts/${{ matrix.target }}/
+          if-no-files-found: ignore
+          retention-days: 30
+
+      - name: Upload corpus snapshot
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: fuzz-corpus-${{ matrix.target }}
+          path: fuzz/corpus/${{ matrix.target }}/
+          if-no-files-found: ignore
+          retention-days: 14
diff --git a/fuzz/Cargo.lock b/fuzz/Cargo.lock
index c185d6a..20a2bff 100644
--- a/fuzz/Cargo.lock
+++ b/fuzz/Cargo.lock
@@ -2,6 +2,21 @@
 # It is not intended for manual editing.
 version = 4
 
+[[package]]
+name = "aho-corasick"
+version = "1.1.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301"
+dependencies = [
+ "memchr",
+]
+
+[[package]]
+name = "allocator-api2"
+version = "0.2.21"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923"
+
 [[package]]
 name = "anyhow"
 version = "1.0.102"
@@ -13,6 +28,27 @@ name = "arbitrary"
 version = "1.4.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "c3d036a3c4ab069c7b410a2ce876bd74808d2d0888a82667669f8e783a898bf1"
+dependencies = [
+ "derive_arbitrary",
+]
+
+[[package]]
+name = "autocfg"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8"
+
+[[package]]
+name = "bitflags"
+version = "2.11.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c4512299f36f043ab09a583e57bceb5a5aab7a73db1805848e8fef3c9e8c78b3"
+
+[[package]]
+name = "boxcar"
+version = "0.2.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "36f64beae40a84da1b4b26ff2761a5b895c12adc41dc25aaee1c4f2bbfe97a6e"
 
 [[package]]
 name = "cc"
@@ -32,12 +68,76 @@ version = "1.0.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801"
 
+[[package]]
+name = "countme"
+version = "3.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7704b5fdd17b18ae31c4c1da5a2e0305a2bf17b5249300a9ee9ed7b72114c636"
+
+[[package]]
+name = "crossbeam-deque"
+version = "0.8.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51"
+dependencies = [
+ "crossbeam-epoch",
+ "crossbeam-utils",
+]
+
+[[package]]
+name = "crossbeam-epoch"
+version = "0.9.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e"
+dependencies = [
+ "crossbeam-utils",
+]
+
+[[package]]
+name = "crossbeam-queue"
+version = "0.3.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0f58bbc28f91df819d0aa2a2c00cd19754769c2fad90579b3592b1c9ba7a3115"
+dependencies = [
+ "crossbeam-utils",
+]
+
+[[package]]
+name = "crossbeam-utils"
+version = "0.8.21"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28"
+
+[[package]]
+name = "derive_arbitrary"
+version = "1.4.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1e567bd82dcff979e4b03460c307b3cdc9e96fde3d73bed1496d2bc75d9dd62a"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "either"
+version = "1.15.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719"
+
 [[package]]
 name = "equivalent"
 version = "1.0.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f"
 
+[[package]]
+name = "etch"
+version = "0.4.0"
+dependencies = [
+ "petgraph",
+]
+
 [[package]]
 name = "find-msvc-tools"
 version = "0.1.9"
@@ -46,9 +146,15 @@ checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582"
 
 [[package]]
 name = "fixedbitset"
-version = "0.4.2"
+version = "0.5.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80"
+checksum = "1d674e81391d1e1ab681a28d99df07927c6d4aa5b027d7da16ba32d1d21ecd99"
+
+[[package]]
+name = "foldhash"
+version = "0.1.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2"
 
 [[package]]
 name = "getrandom"
@@ -62,12 +168,32 @@ dependencies = [
  "wasip2",
 ]
 
+[[package]]
+name = "hashbrown"
+version = "0.15.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1"
+dependencies = [
+ "allocator-api2",
+ "equivalent",
+ "foldhash",
+]
+
 [[package]]
 name = "hashbrown"
 version = "0.16.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100"
 
+[[package]]
+name = "hashlink"
+version = "0.10.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7382cf6263419f2d8df38c55d7da83da5c18aef87fc7a7fc1fb1e344edfe14c1"
+dependencies = [
+ "hashbrown 0.15.5",
+]
+
 [[package]]
 name = "indexmap"
 version = "2.13.0"
@@ -75,7 +201,25 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "7714e70437a7dc3ac8eb7e6f8df75fd8eb422675fc7678aff7364301092b1017"
 dependencies = [
  "equivalent",
- "hashbrown",
+ "hashbrown 0.16.1",
+]
+
+[[package]]
+name = "intrusive-collections"
+version = "0.9.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "189d0897e4cbe8c75efedf3502c18c887b05046e59d28404d4d8e46cbc4d1e86"
+dependencies = [
+ "memoffset",
+]
+
+[[package]]
+name = "inventory"
+version = "0.3.24"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a4f0c30c76f2f4ccee3fe55a2435f691ca00c0e4bd87abe4f4a851b1d4dac39b"
+dependencies = [
+ "rustversion",
 ]
 
 [[package]]
@@ -110,6 +254,15 @@ dependencies = [
  "cc",
 ]
 
+[[package]]
+name = "lock_api"
+version = "0.4.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "224399e74b87b5f3557511d98dff8b14089b3dadafcab6bb93eab67d3aace965"
+dependencies = [
+ "scopeguard",
+]
+
 [[package]]
 name = "log"
 version = "0.4.29"
@@ -122,16 +275,66 @@ version = "2.8.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79"
 
+[[package]]
+name = "memoffset"
+version = "0.9.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "488016bfae457b036d996092f6cb448677611ce4449e970ceaf42695203f218a"
+dependencies = [
+ "autocfg",
+]
+
+[[package]]
+name = "once_cell"
+version = "1.21.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9f7c3e4beb33f85d45ae3e3a1792185706c8e16d043238c593331cc7cd313b50"
+
+[[package]]
+name = "parking_lot"
+version = "0.12.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "93857453250e3077bd71ff98b6a65ea6621a19bb0f559a85248955ac12c45a1a"
+dependencies = [
+ "lock_api",
+ "parking_lot_core",
+]
+
+[[package]]
+name = "parking_lot_core"
+version = "0.9.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2621685985a2ebf1c516881c026032ac7deafcda1a2c9b7850dc81e3dfcb64c1"
+dependencies = [
+ "cfg-if",
+ "libc",
+ "redox_syscall",
+ "smallvec",
+ "windows-link",
+]
+
 [[package]]
 name = "petgraph"
-version = "0.6.5"
+version = "0.7.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b4c5cc86750666a3ed20bdaf5ca2a0344f9c67674cae0515bec2da16fbaa47db"
+checksum = "3672b37090dbd86368a4145bc067582552b29c27377cad4e0a306c97f9bd7772"
 dependencies = [
  "fixedbitset",
  "indexmap",
 ]
 
+[[package]]
+name = "pin-project-lite"
+version = "0.2.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a89322df9ebe1c1578d689c92318e070967d1042b512afbe49518723f4e6d5cd"
+
+[[package]]
+name = "portable-atomic"
+version = "1.13.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c33a9471896f1c69cecef8d20cbe2f7accd12527ce60845ff44c153bb2a21b49"
+
 [[package]]
 name = "proc-macro2"
 version = "1.0.106"
@@ -141,6 +344,24 @@ dependencies = [
  "unicode-ident",
 ]
 
+[[package]]
+name = "pulldown-cmark"
+version = "0.12.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f86ba2052aebccc42cbbb3ed234b8b13ce76f75c3551a303cb2bcffcff12bb14"
+dependencies = [
+ "bitflags",
+ "memchr",
+ "pulldown-cmark-escape",
+ "unicase",
+]
+
+[[package]]
+name = "pulldown-cmark-escape"
+version = "0.11.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "007d8adb5ddab6f8e3f491ac63566a7d5002cc7ed73901f72057943fa71ae1ae"
+
 [[package]]
 name = "quick-xml"
 version = "0.37.5"
@@ -166,14 +387,77 @@ version = "5.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f"
 
+[[package]]
+name = "rayon"
+version = "1.12.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fb39b166781f92d482534ef4b4b1b2568f42613b53e5b6c160e24cfbfa30926d"
+dependencies = [
+ "either",
+ "rayon-core",
+]
+
+[[package]]
+name = "rayon-core"
+version = "1.13.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "22e18b0f0062d30d4230b2e85ff77fdfe4326feb054b9783a3460d8435c8ab91"
+dependencies = [
+ "crossbeam-deque",
+ "crossbeam-utils",
+]
+
+[[package]]
+name = "redox_syscall"
+version = "0.5.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d"
+dependencies = [
+ "bitflags",
+]
+
+[[package]]
+name = "regex"
+version = "1.12.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e10754a14b9137dd7b1e3e5b0493cc9171fdd105e0ab477f51b72e7f3ac0e276"
+dependencies = [
+ "aho-corasick",
+ "memchr",
+ "regex-automata",
+ "regex-syntax",
+]
+
+[[package]]
+name = "regex-automata"
+version = "0.4.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f"
+dependencies = [
+ "aho-corasick",
+ "memchr",
+ "regex-syntax",
+]
+
+[[package]]
+name = "regex-syntax"
+version = "0.8.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dc897dd8d9e8bd1ed8cdad82b5966c3e0ecae09fb1907d58efaa013543185d0a"
+
 [[package]]
 name = "rivet-core"
-version = "0.1.0"
+version = "0.4.0"
 dependencies = [
  "anyhow",
+ "etch",
  "log",
  "petgraph",
+ "pulldown-cmark",
  "quick-xml",
+ "regex",
+ "rowan",
+ "salsa",
  "serde",
  "serde_json",
  "serde_yaml",
@@ -184,17 +468,91 @@ dependencies = [
 name = "rivet-fuzz"
 version = "0.0.0"
 dependencies = [
+ "arbitrary",
  "libfuzzer-sys",
  "rivet-core",
+ "serde_json",
  "serde_yaml",
 ]
 
+[[package]]
+name = "rowan"
+version = "0.16.2"
+source = "git+https://github.com/pulseengine/rowan.git?branch=fix%2Fmiri-soundness-v2#dcbece400019397b97764070435eba62c7aa5336"
+dependencies = [
+ "countme",
+ "hashbrown 0.15.5",
+ "rustc-hash",
+ "text-size",
+]
+
+[[package]]
+name = "rustc-hash"
+version = "2.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "94300abf3f1ae2e2b8ffb7b58043de3d399c73fa6f4b73826402a5c457614dbe"
+
+[[package]]
+name = "rustversion"
+version = "1.0.22"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d"
+
 [[package]]
 name = "ryu"
 version = "1.0.23"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "9774ba4a74de5f7b1c1451ed6cd5285a32eddb5cccb8cc655a4e50009e06477f"
 
+[[package]]
+name = "salsa"
+version = "0.26.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a07bc2a7df3f8e2306434a172a694d44d14fda738d08aad5f2f7f747d2f06fdc"
+dependencies = [
+ "boxcar",
+ "crossbeam-queue",
+ "crossbeam-utils",
+ "hashbrown 0.15.5",
+ "hashlink",
+ "indexmap",
+ "intrusive-collections",
+ "inventory",
+ "parking_lot",
+ "portable-atomic",
+ "rayon",
+ "rustc-hash",
+ "salsa-macro-rules",
+ "salsa-macros",
+ "smallvec",
+ "thin-vec",
+ "tracing",
+]
+
+[[package]]
+name = "salsa-macro-rules"
+version = "0.26.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ec256ece77895f4a8d624cecc133dd798c7961a861439740b1c7410a613ee7ba"
+
+[[package]]
+name = "salsa-macros"
+version = "0.26.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "978e5d5c9533ce19b6a58ad91024e1d136f6eec83c4ba98b5ce94c87986c41d8"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+ "synstructure",
+]
+
+[[package]]
+name = "scopeguard"
+version = "1.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
+
 [[package]]
 name = "serde"
 version = "1.0.228"
@@ -257,6 +615,12 @@ version = "1.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
 
+[[package]]
+name = "smallvec"
+version = "1.15.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03"
+
 [[package]]
 name = "syn"
 version = "2.0.117"
@@ -268,6 +632,29 @@ dependencies = [
  "unicode-ident",
 ]
 
+[[package]]
+name = "synstructure"
+version = "0.13.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "text-size"
+version = "1.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f18aa187839b2bdb1ad2fa35ead8c4c2976b64e4363c386d45ac0f7ee85c9233"
+
+[[package]]
+name = "thin-vec"
+version = "0.2.16"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "259cdf8ed4e4aca6f1e9d011e10bd53f524a2d0637d7b28450f6c64ac298c4c6"
+
 [[package]]
 name = "thiserror"
 version = "2.0.18"
@@ -288,6 +675,31 @@ dependencies = [
  "syn",
 ]
 
+[[package]]
+name = "tracing"
+version = "0.1.44"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "63e71662fa4b2a2c3a26f570f037eb95bb1f85397f3cd8076caed2f026a6d100"
+dependencies = [
+ "pin-project-lite",
+ "tracing-core",
+]
+
+[[package]]
+name = "tracing-core"
+version = "0.1.36"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "db97caf9d906fbde555dd62fa95ddba9eecfd14cb388e4f491a66d74cd5fb79a"
+dependencies = [
+ "once_cell",
+]
+
+[[package]]
+name = "unicase"
+version = "2.9.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dbc4bc3a9f746d862c45cb89d705aa10f187bb96c76001afab07a0d35ce60142"
+
 [[package]]
 name = "unicode-ident"
 version = "1.0.24"
@@ -309,6 +721,12 @@ dependencies = [
  "wit-bindgen",
 ]
 
+[[package]]
+name = "windows-link"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5"
+
 [[package]]
 name = "wit-bindgen"
 version = "0.51.0"
diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml
index 2a5dfd3..0ddc00a 100644
--- a/fuzz/Cargo.toml
+++ b/fuzz/Cargo.toml
@@ -11,6 +11,8 @@ cargo-fuzz = true
 libfuzzer-sys = "0.4"
 rivet-core = { path = "../rivet-core", default-features = false }
 serde_yaml = "0.9"
+serde_json = "1"
+arbitrary = { version = "1.4", features = ["derive"] }
 
 # Prevent this from being included in workspace
 [workspace]
@@ -39,3 +41,34 @@ doc = false
 name = "fuzz_needs_json_import"
 path = "fuzz_targets/fuzz_needs_json_import.rs"
 doc = false
+
+[[bin]]
+name = "yaml_footguns"
+path = "fuzz_targets/yaml_footguns.rs"
+doc = false
+test = false
+bench = false
+
+[[bin]]
+name = "cli_argv"
+path = "fuzz_targets/cli_argv.rs"
+doc = false
+test = false
+bench = false
+
+[[bin]]
+name = "artifact_ids"
+path = "fuzz_targets/artifact_ids.rs"
+doc = false
+test = false
+bench = false
+
+# Non-fuzz smoke harness: runs the same oracle invariants against a
+# fixed set of Mythos-predicted footgun inputs.  Useful when libfuzzer
+# is unavailable and as a reproducibility anchor for findings.
+[[example]]
+name = "oracle_smoke"
+path = "examples/oracle_smoke.rs"
+doc = false
+test = false
+bench = false
diff --git a/fuzz/README.md b/fuzz/README.md
new file mode 100644
index 0000000..d1a19f6
--- /dev/null
+++ b/fuzz/README.md
@@ -0,0 +1,104 @@
+# rivet fuzz targets
+
+Fuzz targets for the rivet artifact-ingest pipeline.  Built on
+[`cargo-fuzz`](https://rust-fuzz.github.io/book/cargo-fuzz.html) +
+`libfuzzer-sys`.
+
+## What each target checks
+
+- **`yaml_footguns`** — adversarial mutations of a known-valid artifact YAML;
+  oracle fails when rivet silently coerces / drops / synthesizes a changed
+  value instead of rejecting with an error.
+- **`cli_argv`** — structurally-generated argv for `rivet-cli`; oracle fails
+  on signal-death, or when `--format json` produces invalid JSON on stdout.
+- **`artifact_ids`** — arbitrary bytes as an `id:` scalar; oracle fails when
+  `Store::insert` → `Store::get` does not round-trip the parsed id byte-exact.
+- **`fuzz_yaml_artifact`** / **`fuzz_schema_merge`** / **`fuzz_reqif_import`**
+  / **`fuzz_document_parse`** / **`fuzz_needs_json_import`** — pre-existing
+  smoke fuzzers that only check for panics in low-level parse paths.
+
+## How to run locally
+
+```bash
+# Once, install the driver.  Requires a nightly toolchain for sanitizer flags.
+cargo install cargo-fuzz --locked
+rustup install nightly
+
+# YAML footgun fuzzer — priority target.
+cargo +nightly fuzz run yaml_footguns -- -max_total_time=60
+
+# Artifact-ID round-trip fuzzer.
+cargo +nightly fuzz run artifact_ids -- -max_total_time=60
+
+# CLI argv fuzzer.  Requires a pre-built rivet binary exposed via $RIVET_BIN.
+cargo build --release --bin rivet
+RIVET_BIN="$PWD/../target/release/rivet" \
+    cargo +nightly fuzz run cli_argv -- -max_total_time=60
+```
+
+All commands are run from the `fuzz/` directory.  Crashes land in
+`fuzz/artifacts/<target>/` and the evolved corpus in `fuzz/corpus/<target>/`.
+
+## What the oracle considers a failure
+
+The oracle is intentionally conservative: we only flag behavior we can prove
+is wrong from the input text alone.
+
+### `yaml_footguns`
+
+- **panic**  — any `unwrap`, `expect`, arithmetic overflow, or explicit panic
+  in `rivet_core::formats::generic::parse_generic_yaml` or
+  `rivet_core::yaml_hir::extract_generic_artifacts`.
+- **silent-accept** — parse returned `Ok(artifacts)` but at least one of
+  `Artifact::id`, `Artifact::artifact_type`, or `Link::target` is
+  (a) empty, or (b) not a substring of the source YAML.  The substring check
+  is cheap but catches Norway-problem coercions, duplicate-key merges, and
+  null-shorthand phantom links.
+
+### `cli_argv`
+
+- **panic** — the subprocess died from `SIGSEGV`, `SIGABRT`, `SIGILL`, or
+  any other signal.  Non-zero exit codes are NOT failures; rivet is
+  expected to reject malformed argv with a non-zero status.
+- **silent-accept** — `--format json` returned exit 0 with non-empty stdout
+  that does not parse as JSON.  CI pipelines pipe that to `jq`.
+
+### `artifact_ids`
+
+- **panic** — any panic from `parse_generic_yaml` or `Store::insert`.
+- **roundtrip mismatch** — `Store::get(parsed_id)` either returned `None`,
+  or returned an artifact whose `.id` byte-differs from the id we stored.
+  Both indicate silent normalization (whitespace, Unicode, case) in the
+  id handling code path.
+
+## How to classify a finding
+
+When a crash reproducer lands in `fuzz/artifacts/<target>/crash-*`, run:
+
+```bash
+cargo +nightly fuzz fmt <target> fuzz/artifacts/<target>/crash-<hash>
+```
+
+to pretty-print the structured input.  Classify as follows:
+
+| Symptom in panic message | Class | Likely root cause |
+|---|---|---|
+| `silent-accept: … not present in source` | silent-accept | serde Value coerced YAML 1.1 bool/null/version into a different Rust string |
+| `silent-accept: phantom link` | silent-accept | `yaml_hir.rs` extracted a link target from a `null`/`~`/`""` scalar |
+| `id-roundtrip: … returned None` | roundtrip-bug | `Store` insert-key and lookup-key differ (normalization mismatch) |
+| `rivet-cli died from signal …` | panic | CLI path hit an uncaught assertion or stack overflow |
+| `--format json returned success but stdout is not JSON` | silent-accept | JSON path printed a human-readable error on stdout |
+| plain Rust panic stack | panic | investigate directly; often `unwrap()` on schema lookup |
+
+If the finding reproduces a bug documented in the Mythos pass (e.g.,
+`yaml_hir.rs:530-549` phantom-link, `yaml_cst.rs:517` multi-doc truncation,
+`formats/generic.rs:138` unknown-top-level-key acceptance), that's
+empirical confirmation — file the minimal reproducer as a regression test
+under `rivet-core/tests/yaml_edge_cases.rs`.
+
+## CI
+
+`.github/workflows/fuzz.yml` runs each target for 15 minutes on push-to-main
+and nightly at 06:17 UTC.  Fuzz runs are `continue-on-error: true` so a new
+crash does not block main; crashes upload as workflow artifacts.  The
+evolved corpus is uploaded as an artifact and cached between runs.
diff --git a/fuzz/examples/oracle_smoke.rs b/fuzz/examples/oracle_smoke.rs
new file mode 100644
index 0000000..d0636ee
--- /dev/null
+++ b/fuzz/examples/oracle_smoke.rs
@@ -0,0 +1,349 @@
+//! Standalone smoke test for the yaml_footguns oracle.
+//!
+//! Runs the same `probe()` logic the fuzzer uses against a hand-picked
+//! set of known-footgun YAML inputs.  Intended as a reproducibility
+//! harness — if any invariant fires here, the fuzzer will surface the
+//! same finding in under a second.
+//!
+//! Run with:
+//!   cargo run --release --example oracle_smoke -p rivet-fuzz
+//!
+//! Exit codes:
+//!   0  — no silent-accept bugs triggered in the fixed corpus
+//!   1  — at least one invariant panicked; see stderr for details
+//!
+//! NOTE: this is NOT a replacement for `cargo fuzz run yaml_footguns`.
+//! It only exercises the hand-picked Mythos-predicted patterns.
+
+use rivet_core::formats::generic::parse_generic_yaml;
+use rivet_core::model::Artifact;
+
+fn main() {
+    // Count the number of probes and the number of silent-accept findings.
+    let mut probes = 0usize;
+    let mut findings: Vec<String> = Vec::new();
+
+    for (name, yaml) in cases() {
+        probes += 1;
+        // Diagnostic dump: show what the two parse paths return so a reader
+        // can classify "silently dropped" vs "returned wrong value" vs
+        // "correctly rejected".
+        let serde_res = parse_generic_yaml(yaml, None);
+        let hir = rivet_core::yaml_hir::extract_generic_artifacts(yaml);
+        match &serde_res {
+            Ok(artifacts) => {
+                eprintln!(
+                    "[{name}] serde_ok={} artifact(s): {:?}",
+                    artifacts.len(),
+                    artifacts
+                        .iter()
+                        .map(|a| (&a.id, &a.artifact_type))
+                        .collect::<Vec<_>>()
+                );
+                for (i, a) in artifacts.iter().enumerate() {
+                    if !a.links.is_empty() {
+                        eprintln!(
+                            "    a[{i}].links = {:?}",
+                            a.links.iter().map(|l| (&l.link_type, &l.target)).collect::<Vec<_>>()
+                        );
+                    }
+                }
+            }
+            Err(e) => eprintln!("[{name}] serde_err = {e}"),
+        }
+        eprintln!(
+            "[{name}] hir artifacts={}  diagnostics={}",
+            hir.artifacts.len(),
+            hir.diagnostics.len()
+        );
+        for sa in &hir.artifacts {
+            if !sa.artifact.links.is_empty() {
+                eprintln!(
+                    "    hir links = {:?}",
+                    sa.artifact
+                        .links
+                        .iter()
+                        .map(|l| (&l.link_type, &l.target))
+                        .collect::<Vec<_>>()
+                );
+            }
+        }
+
+        let finding = check(name, yaml);
+        if let Some(msg) = finding {
+            eprintln!("FINDING [{name}]: {msg}");
+            findings.push(name.to_string());
+        }
+    }
+
+    println!("smoke probes: {probes}");
+    println!("silent-accept findings: {}", findings.len());
+    for f in &findings {
+        println!("  - {f}");
+    }
+    if !findings.is_empty() {
+        std::process::exit(1);
+    }
+}
+
+fn cases() -> &'static [(&'static str, &'static str)] {
+    &[
+        (
+            "null-shorthand-link",
+            concat!(
+                "artifacts:\n",
+                "  - id: REQ-001\n",
+                "    type: requirement\n",
+                "    title: seed\n",
+                "    links:\n",
+                "      - type: derives-from\n",
+                "        target: null\n",
+            ),
+        ),
+        (
+            "tilde-shorthand-link",
+            concat!(
+                "artifacts:\n",
+                "  - id: REQ-001\n",
+                "    type: requirement\n",
+                "    title: seed\n",
+                "    links:\n",
+                "      - type: derives-from\n",
+                "        target: ~\n",
+            ),
+        ),
+        (
+            "empty-string-link-target",
+            concat!(
+                "artifacts:\n",
+                "  - id: REQ-001\n",
+                "    type: requirement\n",
+                "    title: seed\n",
+                "    links:\n",
+                "      - type: derives-from\n",
+                "        target: \"\"\n",
+            ),
+        ),
+        (
+            "multi-document",
+            concat!(
+                "artifacts:\n",
+                "  - id: REQ-001\n",
+                "    type: requirement\n",
+                "    title: seed\n",
+                "---\n",
+                "artifacts:\n",
+                "  - id: REQ-999\n",
+                "    type: requirement\n",
+                "    title: second\n",
+            ),
+        ),
+        (
+            "norway-problem-status",
+            concat!(
+                "artifacts:\n",
+                "  - id: REQ-001\n",
+                "    type: requirement\n",
+                "    title: seed\n",
+                "    status: NO\n",
+            ),
+        ),
+        (
+            "norway-problem-id",
+            concat!(
+                "artifacts:\n",
+                "  - id: NO\n",
+                "    type: requirement\n",
+                "    title: seed\n",
+            ),
+        ),
+        (
+            "unknown-top-level-key",
+            concat!(
+                "artifact:\n",
+                "  - id: REQ-001\n",
+                "    type: requirement\n",
+                "    title: seed\n",
+            ),
+        ),
+        (
+            "duplicate-id-key",
+            concat!(
+                "artifacts:\n",
+                "  - id: REQ-001\n",
+                "    id: REQ-XXX\n",
+                "    type: requirement\n",
+                "    title: seed\n",
+            ),
+        ),
+        (
+            "unquoted-date-title",
+            concat!(
+                "artifacts:\n",
+                "  - id: REQ-001\n",
+                "    type: requirement\n",
+                "    title: 2026-04-21\n",
+            ),
+        ),
+        (
+            "unquoted-version-baseline",
+            concat!(
+                "artifacts:\n",
+                "  - id: REQ-001\n",
+                "    type: requirement\n",
+                "    title: seed\n",
+                "    fields:\n",
+                "      baseline: 1.0\n",
+            ),
+        ),
+        (
+            "leading-zero-id",
+            concat!(
+                "artifacts:\n",
+                "  - id: REQ-0001\n",
+                "    type: requirement\n",
+                "    title: seed\n",
+            ),
+        ),
+        (
+            "soft-hyphen-in-id",
+            "artifacts:\n  - id: \"REQ-\u{00AD}001\"\n    type: requirement\n    title: seed\n",
+        ),
+    ]
+}
+
+fn check(_name: &str, yaml: &str) -> Option<String> {
+    // Use catch_unwind so a panic in probe() becomes a reported finding
+    // instead of aborting the whole smoke run.
+    let yaml_string = yaml.to_string();
+    let res = std::panic::catch_unwind(std::panic::AssertUnwindSafe(move || probe(&yaml_string)));
+    match res {
+        Ok(None) => None,
+        Ok(Some(msg)) => Some(msg),
+        Err(_) => Some("probe panicked".to_string()),
+    }
+}
+
+fn probe(yaml: &str) -> Option<String> {
+    let serde_result = parse_generic_yaml(yaml, None);
+    let hir = rivet_core::yaml_hir::extract_generic_artifacts(yaml);
+
+    // Also run the plain serde paths to catch panics.
+    let _ = serde_yaml::from_str::<Artifact>(yaml);
+    let _ = serde_yaml::from_str::<Vec<Artifact>>(yaml);
+
+    if let Ok(artifacts) = &serde_result {
+        for a in artifacts {
+            if a.id.is_empty() {
+                return Some(format!(
+                    "serde: empty id returned (silent-accept)\nYAML:\n{yaml}"
+                ));
+            }
+            if !yaml.contains(&a.id) {
+                return Some(format!(
+                    "serde: id {:?} not present in source (silent-accept / coercion)\nYAML:\n{yaml}",
+                    a.id
+                ));
+            }
+            if a.artifact_type.is_empty() {
+                return Some(format!(
+                    "serde: empty type returned (silent-accept)\nYAML:\n{yaml}"
+                ));
+            }
+            if !yaml.contains(&a.artifact_type) {
+                return Some(format!(
+                    "serde: type {:?} not present in source\nYAML:\n{yaml}",
+                    a.artifact_type
+                ));
+            }
+            for l in &a.links {
+                if l.target.is_empty() {
+                    return Some(format!(
+                        "serde: phantom link (empty target)\nYAML:\n{yaml}"
+                    ));
+                }
+                if !yaml.contains(&l.target) {
+                    return Some(format!(
+                        "serde: link target {:?} not present in source\nYAML:\n{yaml}",
+                        l.target
+                    ));
+                }
+            }
+        }
+    }
+
+    for sa in &hir.artifacts {
+        let a = &sa.artifact;
+        if !a.id.is_empty() && !yaml.contains(&a.id) {
+            return Some(format!(
+                "hir: id {:?} not present in source\nYAML:\n{yaml}",
+                a.id
+            ));
+        }
+        for l in &a.links {
+            if l.target.is_empty() {
+                return Some(format!(
+                    "hir: phantom link (empty target — yaml_hir.rs:530 bug class)\nYAML:\n{yaml}"
+                ));
+            }
+            if !yaml.contains(&l.target) {
+                return Some(format!(
+                    "hir: link target {:?} not present in source\nYAML:\n{yaml}",
+                    l.target
+                ));
+            }
+        }
+    }
+
+    // Oracle: null-ish link targets.
+    let null_ish = |t: &str| matches!(t.trim(), "null" | "NULL" | "Null" | "~");
+    if let Ok(arts) = &serde_result {
+        for a in arts {
+            for l in &a.links {
+                if null_ish(&l.target) {
+                    return Some(format!(
+                        "serde: link target coerced from YAML null: {:?} (yaml_hir.rs:530 class)\nYAML:\n{yaml}",
+                        l.target
+                    ));
+                }
+            }
+        }
+    }
+    for sa in &hir.artifacts {
+        for l in &sa.artifact.links {
+            if null_ish(&l.target) {
+                return Some(format!(
+                    "hir: link target coerced from YAML null: {:?}\nYAML:\n{yaml}",
+                    l.target
+                ));
+            }
+        }
+    }
+
+    // Oracle: serde rejected but hir silently accepted 0 artifacts.
+    if serde_result.is_err()
+        && hir.artifacts.is_empty()
+        && hir.diagnostics.is_empty()
+        && yaml.contains("id:")
+    {
+        return Some(format!(
+            "hir: serde rejected but HIR returned 0 artifacts / 0 diagnostics (formats/generic.rs:138 class)\nYAML:\n{yaml}"
+        ));
+    }
+
+    // Oracle: multi-document silent truncation.
+    if yaml.contains("\n---\n") {
+        let declared: usize = yaml
+            .lines()
+            .filter(|l| l.trim_start().starts_with("- id:"))
+            .count();
+        if declared > hir.artifacts.len() && hir.diagnostics.is_empty() {
+            return Some(format!(
+                "hir: multi-document truncation — source declares {declared} artifacts, HIR returned {} (yaml_cst.rs:517 class)\nYAML:\n{yaml}",
+                hir.artifacts.len()
+            ));
+        }
+    }
+
+    None
+}
diff --git a/fuzz/fuzz_targets/artifact_ids.rs b/fuzz/fuzz_targets/artifact_ids.rs
new file mode 100644
index 0000000..b68edaf
--- /dev/null
+++ b/fuzz/fuzz_targets/artifact_ids.rs
@@ -0,0 +1,97 @@
+#![no_main]
+//! Artifact-ID round-trip fuzzer.
+//!
+//! Feeds arbitrary byte sequences as `id:` values inside an otherwise-valid
+//! artifact YAML document.  Oracle: if the YAML parses at all, the id string
+//! must round-trip through the `Store` — `insert` then `get` returns an
+//! artifact whose id is byte-identical to the one we fed in.
+//!
+//! This catches silent normalization (whitespace stripping, unicode
+//! canonicalization, case folding) and insert/get key mismatches.
+
+use libfuzzer_sys::fuzz_target;
+use rivet_core::formats::generic::parse_generic_yaml;
+use rivet_core::store::Store;
+
+fuzz_target!(|data: &[u8]| {
+    let Ok(raw) = std::str::from_utf8(data) else {
+        return;
+    };
+
+    // Sanitize the candidate id so it is embeddable as a YAML plain scalar
+    // on the id: line.  We intentionally DO allow exotic unicode, since that
+    // is part of what we want to probe.  We DO strip newlines and NULs
+    // because those would break the surrounding YAML grammar itself (not
+    // rivet's fault).
+    let id_raw: String = raw
+        .chars()
+        .filter(|&c| c != '\n' && c != '\r' && c != '\0')
+        .take(128)
+        .collect();
+
+    if id_raw.is_empty() {
+        return;
+    }
+
+    // YAML-quote the id so even `:` and `#` survive into the scalar.  Double
+    // quotes with escaping handle everything except a stray `"` or `\` — we
+    // escape those.
+    let quoted = yaml_double_quote(&id_raw);
+
+    let yaml = format!(
+        "artifacts:\n  - id: {quoted}\n    type: requirement\n    title: Fuzz\n"
+    );
+
+    let Ok(artifacts) = parse_generic_yaml(&yaml, None) else {
+        return;
+    };
+    if artifacts.is_empty() {
+        return;
+    }
+
+    // There must be exactly one artifact returned.  Anything else is a bug.
+    assert_eq!(
+        artifacts.len(),
+        1,
+        "id-roundtrip: expected 1 artifact, got {} for id={id_raw:?}",
+        artifacts.len()
+    );
+
+    let parsed_id = artifacts[0].id.clone();
+
+    // Round-trip through the store.
+    let mut store = Store::new();
+    let artifact = artifacts.into_iter().next().unwrap();
+    store.insert(artifact).expect("first insert cannot fail");
+
+    // Lookup by the id returned from the parser.
+    let fetched = store.get(&parsed_id).unwrap_or_else(|| {
+        panic!(
+            "id-roundtrip: Store::insert succeeded but Store::get({parsed_id:?}) returned None"
+        )
+    });
+
+    assert_eq!(
+        fetched.id, parsed_id,
+        "id-roundtrip: fetched id differs from inserted id\n  inserted={parsed_id:?}\n  fetched={:?}",
+        fetched.id
+    );
+});
+
+fn yaml_double_quote(s: &str) -> String {
+    let mut out = String::with_capacity(s.len() + 2);
+    out.push('"');
+    for c in s.chars() {
+        match c {
+            '"' => out.push_str("\\\""),
+            '\\' => out.push_str("\\\\"),
+            // Control chars other than tab must be escaped as \uXXXX.
+            c if (c as u32) < 0x20 && c != '\t' => {
+                out.push_str(&format!("\\u{:04X}", c as u32));
+            }
+            c => out.push(c),
+        }
+    }
+    out.push('"');
+    out
+}
diff --git a/fuzz/fuzz_targets/cli_argv.rs b/fuzz/fuzz_targets/cli_argv.rs
new file mode 100644
index 0000000..0d8b8d5
--- /dev/null
+++ b/fuzz/fuzz_targets/cli_argv.rs
@@ -0,0 +1,238 @@
+#![no_main]
+//! CLI argv fuzzer.
+//!
+//! Drives `rivet-cli` with structurally-generated argv sequences.  Oracle:
+//!   * process must exit with a sane status code (0/1/2), never panic or
+//!     SIGSEGV,
+//!   * when `--format json` is requested, stdout must be parseable JSON *or*
+//!     stdout must be empty (with a human-readable error on stderr),
+//!   * path arguments containing `../` or absolute `/etc/` must be rejected
+//!     (we don't hard-assert — we record and surface via log).
+//!
+//! IMPORTANT: this target spawns `rivet` as a subprocess per iteration.  That
+//! is 10^4-10^5 x slower than an in-process fuzzer and will not produce
+//! millions of execs/sec.  It is still useful for hitting clap parsing paths
+//! and panic-surface in argument validation.  The env var `RIVET_BIN` must
+//! point at a pre-built rivet binary; we skip the target if unset, so the
+//! fuzzer does not crash-loop on a missing binary.
+//!
+//! To run this target locally after building:
+//!   cargo build --release --bin rivet
+//!   RIVET_BIN=$PWD/target/release/rivet \
+//!       cargo +nightly fuzz run cli_argv -- -max_total_time=60
+
+use arbitrary::{Arbitrary, Unstructured};
+use libfuzzer_sys::fuzz_target;
+use std::process::{Command, Stdio};
+use std::time::Duration;
+
+#[derive(Debug, Arbitrary)]
+enum Subcommand {
+    Validate,
+    List,
+    ListJson,
+    Coverage,
+    Stats,
+    Commits,
+    Add,
+    Modify,
+    Stamp,
+    Query,
+    Variant,
+    Help,
+}
+
+#[derive(Debug, Arbitrary)]
+enum Flag {
+    FormatJson,
+    FormatYaml,
+    FormatText,
+    Type(String),
+    Baseline(String),
+    Path(String),
+    Unknown(String),
+}
+
+#[derive(Debug, Arbitrary)]
+struct ArgvInput {
+    subcommand: Subcommand,
+    flags: Vec<Flag>,
+    positional: Vec<String>,
+}
+
+fn subcommand_name(s: &Subcommand) -> Option<&'static [&'static str]> {
+    match s {
+        Subcommand::Validate => Some(&["validate"]),
+        Subcommand::List => Some(&["list"]),
+        Subcommand::ListJson => Some(&["list", "--format", "json"]),
+        Subcommand::Coverage => Some(&["coverage"]),
+        Subcommand::Stats => Some(&["stats"]),
+        Subcommand::Commits => Some(&["commits"]),
+        Subcommand::Add => Some(&["add"]),
+        Subcommand::Modify => Some(&["modify"]),
+        Subcommand::Stamp => Some(&["stamp"]),
+        Subcommand::Query => Some(&["query"]),
+        Subcommand::Variant => Some(&["variant"]),
+        Subcommand::Help => Some(&["--help"]),
+    }
+}
+
+fn sanitize(s: &str) -> String {
+    // Remove NULs (which std::process rejects on unix) and bound length.
+    s.chars()
+        .filter(|&c| c != '\0')
+        .take(64)
+        .collect::<String>()
+}
+
+fn build_argv(input: &ArgvInput) -> Vec<String> {
+    let mut argv: Vec<String> = Vec::new();
+    if let Some(parts) = subcommand_name(&input.subcommand) {
+        for p in parts {
+            argv.push(p.to_string());
+        }
+    }
+    for flag in input.flags.iter().take(6) {
+        match flag {
+            Flag::FormatJson => {
+                argv.push("--format".into());
+                argv.push("json".into());
+            }
+            Flag::FormatYaml => {
+                argv.push("--format".into());
+                argv.push("yaml".into());
+            }
+            Flag::FormatText => {
+                argv.push("--format".into());
+                argv.push("text".into());
+            }
+            Flag::Type(t) => {
+                argv.push("--type".into());
+                argv.push(sanitize(t));
+            }
+            Flag::Baseline(b) => {
+                argv.push("--baseline".into());
+                argv.push(sanitize(b));
+            }
+            Flag::Path(p) => {
+                argv.push("-p".into());
+                argv.push(sanitize(p));
+            }
+            Flag::Unknown(u) => {
+                let cleaned = sanitize(u);
+                if !cleaned.is_empty() {
+                    argv.push(cleaned);
+                }
+            }
+        }
+    }
+    for p in input.positional.iter().take(4) {
+        let c = sanitize(p);
+        if !c.is_empty() {
+            argv.push(c);
+        }
+    }
+    argv
+}
+
+/// Returns true if the argv requested JSON output.
+fn is_json_format(argv: &[String]) -> bool {
+    let mut i = 0;
+    while i + 1 < argv.len() {
+        if argv[i] == "--format" && argv[i + 1] == "json" {
+            return true;
+        }
+        i += 1;
+    }
+    false
+}
+
+fuzz_target!(|input: ArgvInput| {
+    let Ok(bin) = std::env::var("RIVET_BIN") else {
+        // No binary configured → skip quietly.  We don't want the fuzzer to
+        // treat a missing binary as a crash.
+        return;
+    };
+    let argv = build_argv(&input);
+    let json_mode = is_json_format(&argv);
+
+    let mut cmd = Command::new(&bin);
+    cmd.args(&argv)
+        .stdin(Stdio::null())
+        .stdout(Stdio::piped())
+        .stderr(Stdio::piped())
+        // Avoid leaking the calling shell's CWD config.
+        .env_clear()
+        .env("PATH", std::env::var_os("PATH").unwrap_or_default())
+        .env("HOME", std::env::var_os("HOME").unwrap_or_default())
+        // Prevent update check from making network calls.
+        .env("RIVET_NO_UPDATE_CHECK", "1");
+
+    let Ok(mut child) = cmd.spawn() else {
+        return;
+    };
+
+    // Poor-man's 5-second timeout: spawn a reaper thread.  We cannot use
+    // `std::process::Child::wait_timeout` without adding a dep.
+    let start = std::time::Instant::now();
+    loop {
+        match child.try_wait() {
+            Ok(Some(status)) => {
+                // Oracle: exit code must be in {0, 1, 2, 64..}.  A SIGSEGV
+                // (signal 11) or SIGABRT (signal 6) surfaces as a panic.
+                if let Some(sig) = status_signal(&status) {
+                    panic!("rivet-cli died from signal {sig} on argv {argv:?}");
+                }
+                // Read stdout/stderr for the JSON oracle.  If stdout is
+                // supposed to be JSON, it must parse OR be empty.
+                let output = child
+                    .wait_with_output()
+                    .ok()
+                    .or_else(|| Some(std::process::Output {
+                        status,
+                        stdout: Vec::new(),
+                        stderr: Vec::new(),
+                    }))
+                    .unwrap();
+                if json_mode && status.success() && !output.stdout.is_empty() {
+                    let stdout = std::str::from_utf8(&output.stdout).unwrap_or("");
+                    if serde_json::from_str::<serde_json::Value>(stdout).is_err() {
+                        // Not a panic — surface it as a finding.  We keep
+                        // the target lenient because some subcommands may
+                        // not support --format json and should error out.
+                        // Require a non-zero exit in that case, which is
+                        // caught by status.success() above.
+                        panic!(
+                            "--format json returned success but stdout is not JSON\nargv={argv:?}\nstdout={stdout:?}"
+                        );
+                    }
+                }
+                return;
+            }
+            Ok(None) => {
+                if start.elapsed() > Duration::from_secs(5) {
+                    let _ = child.kill();
+                    let _ = child.wait();
+                    return;
+                }
+                std::thread::sleep(Duration::from_millis(20));
+            }
+            Err(_) => return,
+        }
+    }
+});
+
+#[cfg(unix)]
+fn status_signal(status: &std::process::ExitStatus) -> Option<i32> {
+    use std::os::unix::process::ExitStatusExt;
+    status.signal()
+}
+
+#[cfg(not(unix))]
+fn status_signal(_status: &std::process::ExitStatus) -> Option<i32> {
+    None
+}
+
+// Re-export of arbitrary so the derive sees `Unstructured` at expected path.
+#[allow(dead_code)]
+fn _unstructured_marker(_u: Unstructured<'_>) {}
diff --git a/fuzz/fuzz_targets/yaml_footguns.rs b/fuzz/fuzz_targets/yaml_footguns.rs
new file mode 100644
index 0000000..04566c2
--- /dev/null
+++ b/fuzz/fuzz_targets/yaml_footguns.rs
@@ -0,0 +1,385 @@
+#![no_main]
+//! YAML-footguns fuzzer.
+//!
+//! Empirically measures how often rivet's artifact-ingest pipeline silently
+//! corrupts structurally-wrong YAML inputs (arxiv:2604.13108 claim:
+//! "YAML silently corrupts ~50% of structural errors").
+//!
+//! Oracle: for each adversarial mutation of a *known-valid* artifact YAML,
+//! rivet must either
+//!   (a) reject the input with an Error-severity diagnostic, or
+//!   (b) preserve the intended value exactly
+//! but never silently coerce/drop/synthesize a changed artifact without error.
+//!
+//! Complements `rivet-core/tests/differential_yaml.rs` (which catches cases
+//! where rowan and serde_yaml *disagree*).  This target catches cases where
+//! they *agree on a wrong AST* — the silent-accept class.
+//!
+//! Classification of findings (see `fuzz/README.md`):
+//!   * panic          — any target panic (double fault)
+//!   * silent-accept  — YAML parses to Ok(artifacts), but the resulting id /
+//!                      link / field differs from the textually-present value
+//!   * coercion       — scalar value (YAML 1.1 "Norway", version, date, etc.)
+//!                      was silently re-typed
+
+use arbitrary::Arbitrary;
+use libfuzzer_sys::fuzz_target;
+use rivet_core::formats::generic::parse_generic_yaml;
+use rivet_core::model::Artifact;
+
+/// A single adversarial mutation applied to a seed artifact-YAML document.
+#[derive(Debug, Clone, Arbitrary)]
+enum Footgun {
+    /// YAML 1.1 Norway problem: replace a scalar with an unquoted boolean/null.
+    Norway { which_field: u8, variant: u8 },
+    /// Strip quotes from a version-like string.
+    VersionCoercion { which_field: u8 },
+    /// Prepend `0` to the integer suffix of the id.
+    LeadingZeroId,
+    /// Replace the title scalar with an unquoted date.
+    UnquotedDate,
+    /// Duplicate the `id:` or `type:` key.
+    DuplicateKey { which: u8 },
+    /// Replace a space-indent line with tab indent.
+    TabIndent { line_offset: u8 },
+    /// Inject a second `---\nartifacts: [...]\n` document.
+    MultiDocument,
+    /// Set a shorthand-link-style field to null / ~ / "".
+    NullShorthandLink { variant: u8 },
+    /// Rename the top-level `artifacts:` key.
+    UnknownTopLevelKey { variant: u8 },
+    /// Emit an anchor / alias cycle inside a field value.
+    AnchorCycle,
+    /// Deeply nest a list inside the `fields:` map.
+    DeepNesting { depth: u8 },
+    /// Insert a NUL/soft-hyphen/trailing-space into the id value.
+    ControlCharInId { variant: u8 },
+}
+
+/// Wrapper to drive multiple footgun mutations per input.
+#[derive(Debug, Arbitrary)]
+struct FuzzInput {
+    footguns: Vec<Footgun>,
+}
+
+const SEED_YAML: &str = "artifacts:\n  - id: REQ-001\n    type: requirement\n    title: Seed requirement\n    status: draft\n    tags: [safety]\n    links:\n      - type: derives-from\n        target: REQ-000\n    fields:\n      priority: must\n      baseline: v0.1.0\n";
+
+fuzz_target!(|input: FuzzInput| {
+    let mut yaml = SEED_YAML.to_string();
+    // Apply up to 3 footgun mutations (more than that often yields invalid YAML that
+    // just errors out — not interesting).
+    for f in input.footguns.iter().take(3) {
+        yaml = apply_footgun(&yaml, f);
+    }
+    probe(&yaml);
+});
+
+/// Feed an empty-Unstructured fallback path so cargo-fuzz can also consume raw
+/// bytes when it wants to.  Not the primary oracle path.
+#[allow(dead_code)]
+fn probe_raw(data: &[u8]) {
+    if let Ok(s) = std::str::from_utf8(data) {
+        probe(s);
+    }
+}
+
+/// Runs the oracle: parse through both the serde path and the rowan path.
+/// Any panic fails the target automatically.  Any *semantic discrepancy*
+/// between returned artifacts and text-present values flags a silent bug.
+fn probe(yaml: &str) {
+    // 1. Direct serde parse (`formats::generic::parse_generic_yaml`).
+    let serde_result = parse_generic_yaml(yaml, None);
+
+    // 2. Rowan HIR extraction, the path the LSP uses.
+    let hir = rivet_core::yaml_hir::extract_generic_artifacts(yaml);
+
+    // 3. Full artifact-level deserialize (some adapters use this).
+    let _ = serde_yaml::from_str::<Artifact>(yaml);
+    let _ = serde_yaml::from_str::<Vec<Artifact>>(yaml);
+
+    // Oracle 1: if parse_generic_yaml returned Ok, every returned id must
+    // literally appear in the source text.  A returned id that is NOT a
+    // substring of the source is a silent-synthesis bug (Norway coercion,
+    // duplicate key merge, etc.).
+    if let Ok(artifacts) = &serde_result {
+        for a in artifacts {
+            // An empty id passes all substring checks but is itself a silent
+            // acceptance bug — every artifact must have a non-empty id.
+            assert!(
+                !a.id.is_empty(),
+                "silent-accept: empty id returned by parse_generic_yaml\nYAML:\n{yaml}"
+            );
+            // Exact substring match: the id must appear as-is in the source.
+            // This catches Norway-problem coercions (e.g., `NO` being turned
+            // into `false` and re-serialized as the string `"false"`).
+            assert!(
+                yaml.contains(&a.id),
+                "silent-accept: parse_generic_yaml returned id {:?} not present in source\nYAML:\n{yaml}",
+                a.id
+            );
+            // Same oracle for artifact_type.
+            assert!(
+                !a.artifact_type.is_empty(),
+                "silent-accept: empty type returned by parse_generic_yaml\nYAML:\n{yaml}"
+            );
+            assert!(
+                yaml.contains(&a.artifact_type),
+                "silent-accept: parse_generic_yaml returned type {:?} not present in source\nYAML:\n{yaml}",
+                a.artifact_type
+            );
+            // Link targets must also be source-present substrings.
+            for l in &a.links {
+                assert!(
+                    !l.target.is_empty(),
+                    "silent-accept: link with empty target (phantom link)\nYAML:\n{yaml}"
+                );
+                assert!(
+                    yaml.contains(&l.target),
+                    "silent-accept: link target {:?} not present in source\nYAML:\n{yaml}",
+                    l.target
+                );
+            }
+        }
+    }
+
+    // Oracle 2: HIR path.  Same substring invariant.
+    for sa in &hir.artifacts {
+        let a = &sa.artifact;
+        if !a.id.is_empty() {
+            assert!(
+                yaml.contains(&a.id),
+                "silent-accept: yaml_hir returned id {:?} not present in source\nYAML:\n{yaml}",
+                a.id
+            );
+        }
+        for l in &a.links {
+            assert!(
+                !l.target.is_empty(),
+                "silent-accept: yaml_hir phantom link (empty target)\nYAML:\n{yaml}"
+            );
+            assert!(
+                yaml.contains(&l.target),
+                "silent-accept: yaml_hir link target {:?} not present in source\nYAML:\n{yaml}",
+                l.target
+            );
+        }
+    }
+
+    // Oracle 3: "null-ish" link targets are always a phantom link.
+    // serde_yaml happily materializes `target: null`, `target: ~`, and
+    // `target: ""` as a link with a string-ish target that is not a real
+    // artifact id.  This is the `yaml_hir.rs:530-549` bug class.
+    for list in [
+        serde_result.as_ref().ok().map(|v| v.as_slice()).unwrap_or(&[]),
+    ] {
+        for a in list {
+            for l in &a.links {
+                let t = l.target.trim();
+                assert!(
+                    t != "null" && t != "~" && t != "NULL" && t != "Null",
+                    "silent-accept: link target coerced from YAML null: {:?}\nYAML:\n{yaml}",
+                    l.target
+                );
+            }
+        }
+    }
+    for sa in &hir.artifacts {
+        for l in &sa.artifact.links {
+            let t = l.target.trim();
+            assert!(
+                t != "null" && t != "~" && t != "NULL" && t != "Null",
+                "silent-accept: hir link target coerced from YAML null: {:?}\nYAML:\n{yaml}",
+                l.target
+            );
+        }
+    }
+
+    // Oracle 4: HIR+serde disagree on parse outcome for the top-level
+    // `artifacts:` key.  If serde rejects with "missing field `artifacts`"
+    // and HIR returns 0 artifacts with 0 diagnostics, that is the
+    // `formats/generic.rs:138` Ok(vec![]) silent-accept.  We only flag the
+    // specific 0-artifacts / 0-diagnostics / serde-error shape.
+    if serde_result.is_err() && hir.artifacts.is_empty() && hir.diagnostics.is_empty() {
+        // If the source text contains NO mention of any artifact id shape,
+        // zero artifacts is the correct outcome.  We only panic when the
+        // source clearly intended to declare artifacts but HIR dropped them
+        // silently.  Heuristic: the source contains `id:` or `- id:`.
+        if yaml.contains("id:") {
+            panic!(
+                "silent-accept: serde rejected input but yaml_hir returned 0 artifacts / 0 diagnostics (formats/generic.rs:138 class)\nYAML:\n{yaml}"
+            );
+        }
+    }
+
+    // Oracle 5: multi-document silent truncation (`yaml_cst.rs:517`).
+    // If the source contains a literal `---` document separator preceded
+    // by an `artifacts:` block, HIR will often keep only the first doc.
+    // We compare the number of declared `- id:` occurrences on top-level
+    // artifact-list lines against the number of artifacts HIR returned.
+    // This is heuristic but empirically catches the known multi-doc bug.
+    if yaml.contains("\n---\n") {
+        // Count approximate declared artifacts.  We count lines matching
+        // `^  - id:` (the canonical list-item indent for artifacts:).
+        let declared: usize = yaml
+            .lines()
+            .filter(|l| l.trim_start().starts_with("- id:"))
+            .count();
+        if declared > hir.artifacts.len() && hir.diagnostics.is_empty() {
+            panic!(
+                "silent-accept: multi-document truncation — source declares {declared} artifacts but HIR returned {} with no diagnostics (yaml_cst.rs:517 class)\nYAML:\n{yaml}",
+                hir.artifacts.len()
+            );
+        }
+    }
+}
+
+// ── Mutation machinery ────────────────────────────────────────────────────
+
+fn apply_footgun(yaml: &str, f: &Footgun) -> String {
+    match f {
+        Footgun::Norway { which_field, variant } => {
+            let payload = norway_variant(*variant);
+            // Replace the first scalar value at column 4+ that matches the
+            // chosen field.  Keep it simple: pick one of id/title/status/
+            // target/priority/baseline.
+            let field = pick_field(*which_field);
+            replace_field_value(yaml, field, payload)
+        }
+        Footgun::VersionCoercion { which_field } => {
+            let field = pick_field(*which_field);
+            // Baseline values are quoted in the seed; swap `"v0.1.0"` for
+            // `v0.1.0` and also handle the 1.0 -> no quotes case.
+            replace_field_value(yaml, field, "1.0")
+        }
+        Footgun::LeadingZeroId => yaml.replace("REQ-001", "REQ-0001"),
+        Footgun::UnquotedDate => replace_field_value(yaml, "title", "2026-04-21"),
+        Footgun::DuplicateKey { which } => {
+            let key = if *which % 2 == 0 { "id" } else { "type" };
+            // Duplicate the key on the same artifact with a different value.
+            yaml.replace(
+                &format!("    {key}:"),
+                &format!("    {key}: DUPLICATE-VAL\n    {key}:"),
+            )
+        }
+        Footgun::TabIndent { line_offset } => {
+            // Convert one of the 4-space-indent lines to a tab.
+            let mut lines: Vec<String> = yaml.lines().map(|s| s.to_string()).collect();
+            if !lines.is_empty() {
+                let idx = (*line_offset as usize) % lines.len();
+                lines[idx] = lines[idx].replacen("    ", "\t", 1);
+            }
+            lines.join("\n") + "\n"
+        }
+        Footgun::MultiDocument => {
+            format!("{yaml}\n---\nartifacts:\n  - id: REQ-999\n    type: requirement\n    title: Second doc\n")
+        }
+        Footgun::NullShorthandLink { variant } => {
+            let value = match variant % 3 {
+                0 => "null",
+                1 => "~",
+                _ => "\"\"",
+            };
+            // Overwrite the `target:` scalar with a null form.  The seed has
+            // `target: REQ-000`; this exercises the phantom-link bug at
+            // yaml_hir.rs:530.
+            yaml.replace("target: REQ-000", &format!("target: {value}"))
+        }
+        Footgun::UnknownTopLevelKey { variant } => {
+            let key = match variant % 3 {
+                0 => "artifact:",    // singular typo
+                1 => "Artifacts:",   // case
+                _ => "artifcats:",   // misspelling
+            };
+            yaml.replacen("artifacts:", key, 1)
+        }
+        Footgun::AnchorCycle => {
+            // Insert an anchor/alias cycle inside the fields: block.
+            yaml.replace(
+                "    fields:\n",
+                "    fields:\n      cycle: &x\n        self: *x\n",
+            )
+        }
+        Footgun::DeepNesting { depth } => {
+            let d = (*depth % 40).max(2) as usize;
+            let mut nested = String::from("[");
+            for _ in 0..d {
+                nested.push('[');
+            }
+            nested.push_str("inner");
+            for _ in 0..d {
+                nested.push(']');
+            }
+            nested.push(']');
+            yaml.replace(
+                "      priority: must\n",
+                &format!("      priority: must\n      deep: {nested}\n"),
+            )
+        }
+        Footgun::ControlCharInId { variant } => {
+            let bad: &str = match variant % 3 {
+                // NUL — should be rejected by any sane parser
+                0 => "REQ-\u{0000}001",
+                // Soft hyphen — visually invisible
+                1 => "REQ-\u{00AD}001",
+                // Trailing whitespace
+                _ => "REQ-001 ",
+            };
+            yaml.replace("REQ-001", bad)
+        }
+    }
+}
+
+fn norway_variant(v: u8) -> &'static str {
+    match v % 12 {
+        0 => "NO",
+        1 => "no",
+        2 => "Off",
+        3 => "off",
+        4 => "yes",
+        5 => "YES",
+        6 => "true",
+        7 => "TRUE",
+        8 => "FALSE",
+        9 => "~",
+        10 => "null",
+        _ => "NULL",
+    }
+}
+
+fn pick_field(idx: u8) -> &'static str {
+    match idx % 6 {
+        0 => "id",
+        1 => "title",
+        2 => "status",
+        3 => "target",
+        4 => "priority",
+        _ => "baseline",
+    }
+}
+
+/// Replace the first occurrence of `<field>: <old>` with `<field>: <new>`.
+/// Preserves indentation and trailing newline.
+fn replace_field_value(yaml: &str, field: &str, new_value: &str) -> String {
+    let mut out = String::with_capacity(yaml.len() + new_value.len());
+    let mut replaced = false;
+    for line in yaml.split_inclusive('\n') {
+        if replaced {
+            out.push_str(line);
+            continue;
+        }
+        let trimmed = line.trim_start();
+        if let Some(rest) = trimmed.strip_prefix(&format!("{field}:")) {
+            let indent_len = line.len() - trimmed.len();
+            let _ = rest; // unused; we replace whatever followed the colon
+            out.push_str(&line[..indent_len]);
+            out.push_str(field);
+            out.push_str(": ");
+            out.push_str(new_value);
+            out.push('\n');
+            replaced = true;
+        } else {
+            out.push_str(line);
+        }
+    }
+    out
+}