From 8738aad727014e9539ddcba12af112989606ebaf Mon Sep 17 00:00:00 2001 From: Ralf Anton Beier Date: Tue, 21 Apr 2026 19:08:22 +0200 Subject: [PATCH] test(fuzz): YAML footgun + CLI argv + artifact-id fuzzers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds three libfuzzer-based targets under fuzz/ that empirically measure the arxiv:2604.13108 "YAML silently corrupts ~50% of structural errors" claim against rivet's actual artifact-ingest pipeline, plus a CLI argv target and an id-roundtrip target. Targets: * yaml_footguns — Arbitrary-driven adversarial mutations of a known valid seed YAML (Norway, version-coercion, leading-zero-id, unquoted-date, duplicate-key, tab-indent, multi-doc, null-shorthand link, unknown top-level key, anchor cycle, deep nesting, control chars in id). Five oracles: source-substring invariant for ids / types / link targets, phantom-link detection, null-ish target detection, serde-rejected-but-hir-accepted detection, and multi-document truncation detection. * cli_argv — structured argv for rivet-cli subprocess; oracle fails on signal-death or when --format json returns success with non-JSON stdout. Gated on $RIVET_BIN env var so it skips silently if no binary is configured. * artifact_ids — arbitrary bytes as id: scalar; oracle requires Store::insert → Store::get to round-trip byte-exact. Also adds fuzz/examples/oracle_smoke.rs — a non-libfuzzer harness that runs the same oracle logic against a fixed set of Mythos-predicted footgun inputs. Running `cargo run --release --example oracle_smoke` (before cargo-fuzz is available in CI) produces five findings on current main, empirically confirming: - null / tilde / empty-string link targets produce phantom links (yaml_hir.rs:530-549 bug class) - multi-document YAML is silently truncated by the HIR path (yaml_cst.rs:517 bug class) - renaming `artifacts:` to a sibling key causes the HIR path to return Ok(vec![]) with zero diagnostics (formats/generic.rs:138) CI: .github/workflows/fuzz.yml runs each target for 15 min on push to main and nightly at 06:17 UTC. continue-on-error so new crashes do not block merges; crashes upload as workflow artifacts and the evolved corpus is cached between runs. REQ-052 is scoped to variant-solver fuzzing; these YAML/CLI fuzzers verify the broader parser surface (REQ-028) and CLI surface (REQ-007). Verifies: REQ-028, REQ-007 Refs: REQ-052 Co-Authored-By: Claude Opus 4.7 (1M context) --- .github/workflows/fuzz.yml | 92 ++++++ fuzz/Cargo.lock | 430 ++++++++++++++++++++++++++++- fuzz/Cargo.toml | 33 +++ fuzz/README.md | 104 +++++++ fuzz/examples/oracle_smoke.rs | 349 +++++++++++++++++++++++ fuzz/fuzz_targets/artifact_ids.rs | 97 +++++++ fuzz/fuzz_targets/cli_argv.rs | 238 ++++++++++++++++ fuzz/fuzz_targets/yaml_footguns.rs | 385 ++++++++++++++++++++++++++ 8 files changed, 1722 insertions(+), 6 deletions(-) create mode 100644 .github/workflows/fuzz.yml create mode 100644 fuzz/README.md create mode 100644 fuzz/examples/oracle_smoke.rs create mode 100644 fuzz/fuzz_targets/artifact_ids.rs create mode 100644 fuzz/fuzz_targets/cli_argv.rs create mode 100644 fuzz/fuzz_targets/yaml_footguns.rs diff --git a/.github/workflows/fuzz.yml b/.github/workflows/fuzz.yml new file mode 100644 index 0000000..8c274fb --- /dev/null +++ b/.github/workflows/fuzz.yml @@ -0,0 +1,92 @@ +name: fuzz + +# YAML-footgun and CLI fuzz targets. Separate workflow from `ci.yml` because +# fuzz budgets are measured in minutes, not seconds. +# +# We run: +# * on every push to main (post-merge confirmation) +# * on a nightly schedule (06:17 UTC) so corpus growth is cumulative +# We do NOT run on PRs — too expensive for the critical path. +# +# Each target gets a 15-minute time budget. The job is marked +# `continue-on-error: true` so a single crash does not block other work; +# crashes are surfaced as artifact uploads. + +on: + push: + branches: [main] + schedule: + # Daily at 06:17 UTC. Offset from round hour to spread CI load. + - cron: "17 6 * * *" + workflow_dispatch: + +concurrency: + group: fuzz-${{ github.ref }} + cancel-in-progress: false + +jobs: + fuzz: + name: fuzz ${{ matrix.target }} + runs-on: ubuntu-latest + continue-on-error: true + strategy: + fail-fast: false + matrix: + target: + - yaml_footguns + - cli_argv + - artifact_ids + timeout-minutes: 25 + + steps: + - uses: actions/checkout@v4 + + - name: Install nightly toolchain + uses: dtolnay/rust-toolchain@nightly + + - name: Install cargo-fuzz + run: cargo install cargo-fuzz --locked + + - name: Build rivet binary (for cli_argv) + if: matrix.target == 'cli_argv' + run: cargo build --release --bin rivet + + - name: Cache fuzz corpora + uses: actions/cache@v4 + with: + path: | + fuzz/corpus/${{ matrix.target }} + fuzz/artifacts/${{ matrix.target }} + key: fuzz-corpus-${{ matrix.target }}-${{ github.sha }} + restore-keys: | + fuzz-corpus-${{ matrix.target }}- + + - name: Run fuzz target for 15 minutes + env: + TARGET: ${{ matrix.target }} + RIVET_BIN: ${{ github.workspace }}/target/release/rivet + run: | + cd fuzz + cargo +nightly fuzz run "$TARGET" -- \ + -max_total_time=900 \ + -timeout=30 \ + -rss_limit_mb=2048 + + - name: Upload crash artifacts + if: failure() || cancelled() + uses: actions/upload-artifact@v4 + with: + name: fuzz-crashes-${{ matrix.target }} + path: | + fuzz/artifacts/${{ matrix.target }}/ + if-no-files-found: ignore + retention-days: 30 + + - name: Upload corpus snapshot + if: always() + uses: actions/upload-artifact@v4 + with: + name: fuzz-corpus-${{ matrix.target }} + path: fuzz/corpus/${{ matrix.target }}/ + if-no-files-found: ignore + retention-days: 14 diff --git a/fuzz/Cargo.lock b/fuzz/Cargo.lock index c185d6a..20a2bff 100644 --- a/fuzz/Cargo.lock +++ b/fuzz/Cargo.lock @@ -2,6 +2,21 @@ # It is not intended for manual editing. version = 4 +[[package]] +name = "aho-corasick" +version = "1.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301" +dependencies = [ + "memchr", +] + +[[package]] +name = "allocator-api2" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923" + [[package]] name = "anyhow" version = "1.0.102" @@ -13,6 +28,27 @@ name = "arbitrary" version = "1.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c3d036a3c4ab069c7b410a2ce876bd74808d2d0888a82667669f8e783a898bf1" +dependencies = [ + "derive_arbitrary", +] + +[[package]] +name = "autocfg" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" + +[[package]] +name = "bitflags" +version = "2.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4512299f36f043ab09a583e57bceb5a5aab7a73db1805848e8fef3c9e8c78b3" + +[[package]] +name = "boxcar" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "36f64beae40a84da1b4b26ff2761a5b895c12adc41dc25aaee1c4f2bbfe97a6e" [[package]] name = "cc" @@ -32,12 +68,76 @@ version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" +[[package]] +name = "countme" +version = "3.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7704b5fdd17b18ae31c4c1da5a2e0305a2bf17b5249300a9ee9ed7b72114c636" + +[[package]] +name = "crossbeam-deque" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51" +dependencies = [ + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-queue" +version = "0.3.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0f58bbc28f91df819d0aa2a2c00cd19754769c2fad90579b3592b1c9ba7a3115" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" + +[[package]] +name = "derive_arbitrary" +version = "1.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e567bd82dcff979e4b03460c307b3cdc9e96fde3d73bed1496d2bc75d9dd62a" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "either" +version = "1.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" + [[package]] name = "equivalent" version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" +[[package]] +name = "etch" +version = "0.4.0" +dependencies = [ + "petgraph", +] + [[package]] name = "find-msvc-tools" version = "0.1.9" @@ -46,9 +146,15 @@ checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582" [[package]] name = "fixedbitset" -version = "0.4.2" +version = "0.5.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80" +checksum = "1d674e81391d1e1ab681a28d99df07927c6d4aa5b027d7da16ba32d1d21ecd99" + +[[package]] +name = "foldhash" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" [[package]] name = "getrandom" @@ -62,12 +168,32 @@ dependencies = [ "wasip2", ] +[[package]] +name = "hashbrown" +version = "0.15.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1" +dependencies = [ + "allocator-api2", + "equivalent", + "foldhash", +] + [[package]] name = "hashbrown" version = "0.16.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100" +[[package]] +name = "hashlink" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7382cf6263419f2d8df38c55d7da83da5c18aef87fc7a7fc1fb1e344edfe14c1" +dependencies = [ + "hashbrown 0.15.5", +] + [[package]] name = "indexmap" version = "2.13.0" @@ -75,7 +201,25 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7714e70437a7dc3ac8eb7e6f8df75fd8eb422675fc7678aff7364301092b1017" dependencies = [ "equivalent", - "hashbrown", + "hashbrown 0.16.1", +] + +[[package]] +name = "intrusive-collections" +version = "0.9.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "189d0897e4cbe8c75efedf3502c18c887b05046e59d28404d4d8e46cbc4d1e86" +dependencies = [ + "memoffset", +] + +[[package]] +name = "inventory" +version = "0.3.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4f0c30c76f2f4ccee3fe55a2435f691ca00c0e4bd87abe4f4a851b1d4dac39b" +dependencies = [ + "rustversion", ] [[package]] @@ -110,6 +254,15 @@ dependencies = [ "cc", ] +[[package]] +name = "lock_api" +version = "0.4.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "224399e74b87b5f3557511d98dff8b14089b3dadafcab6bb93eab67d3aace965" +dependencies = [ + "scopeguard", +] + [[package]] name = "log" version = "0.4.29" @@ -122,16 +275,66 @@ version = "2.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79" +[[package]] +name = "memoffset" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "488016bfae457b036d996092f6cb448677611ce4449e970ceaf42695203f218a" +dependencies = [ + "autocfg", +] + +[[package]] +name = "once_cell" +version = "1.21.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9f7c3e4beb33f85d45ae3e3a1792185706c8e16d043238c593331cc7cd313b50" + +[[package]] +name = "parking_lot" +version = "0.12.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93857453250e3077bd71ff98b6a65ea6621a19bb0f559a85248955ac12c45a1a" +dependencies = [ + "lock_api", + "parking_lot_core", +] + +[[package]] +name = "parking_lot_core" +version = "0.9.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2621685985a2ebf1c516881c026032ac7deafcda1a2c9b7850dc81e3dfcb64c1" +dependencies = [ + "cfg-if", + "libc", + "redox_syscall", + "smallvec", + "windows-link", +] + [[package]] name = "petgraph" -version = "0.6.5" +version = "0.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b4c5cc86750666a3ed20bdaf5ca2a0344f9c67674cae0515bec2da16fbaa47db" +checksum = "3672b37090dbd86368a4145bc067582552b29c27377cad4e0a306c97f9bd7772" dependencies = [ "fixedbitset", "indexmap", ] +[[package]] +name = "pin-project-lite" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a89322df9ebe1c1578d689c92318e070967d1042b512afbe49518723f4e6d5cd" + +[[package]] +name = "portable-atomic" +version = "1.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c33a9471896f1c69cecef8d20cbe2f7accd12527ce60845ff44c153bb2a21b49" + [[package]] name = "proc-macro2" version = "1.0.106" @@ -141,6 +344,24 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "pulldown-cmark" +version = "0.12.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f86ba2052aebccc42cbbb3ed234b8b13ce76f75c3551a303cb2bcffcff12bb14" +dependencies = [ + "bitflags", + "memchr", + "pulldown-cmark-escape", + "unicase", +] + +[[package]] +name = "pulldown-cmark-escape" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "007d8adb5ddab6f8e3f491ac63566a7d5002cc7ed73901f72057943fa71ae1ae" + [[package]] name = "quick-xml" version = "0.37.5" @@ -166,14 +387,77 @@ version = "5.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" +[[package]] +name = "rayon" +version = "1.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fb39b166781f92d482534ef4b4b1b2568f42613b53e5b6c160e24cfbfa30926d" +dependencies = [ + "either", + "rayon-core", +] + +[[package]] +name = "rayon-core" +version = "1.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22e18b0f0062d30d4230b2e85ff77fdfe4326feb054b9783a3460d8435c8ab91" +dependencies = [ + "crossbeam-deque", + "crossbeam-utils", +] + +[[package]] +name = "redox_syscall" +version = "0.5.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d" +dependencies = [ + "bitflags", +] + +[[package]] +name = "regex" +version = "1.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e10754a14b9137dd7b1e3e5b0493cc9171fdd105e0ab477f51b72e7f3ac0e276" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc897dd8d9e8bd1ed8cdad82b5966c3e0ecae09fb1907d58efaa013543185d0a" + [[package]] name = "rivet-core" -version = "0.1.0" +version = "0.4.0" dependencies = [ "anyhow", + "etch", "log", "petgraph", + "pulldown-cmark", "quick-xml", + "regex", + "rowan", + "salsa", "serde", "serde_json", "serde_yaml", @@ -184,17 +468,91 @@ dependencies = [ name = "rivet-fuzz" version = "0.0.0" dependencies = [ + "arbitrary", "libfuzzer-sys", "rivet-core", + "serde_json", "serde_yaml", ] +[[package]] +name = "rowan" +version = "0.16.2" +source = "git+https://github.com/pulseengine/rowan.git?branch=fix%2Fmiri-soundness-v2#dcbece400019397b97764070435eba62c7aa5336" +dependencies = [ + "countme", + "hashbrown 0.15.5", + "rustc-hash", + "text-size", +] + +[[package]] +name = "rustc-hash" +version = "2.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94300abf3f1ae2e2b8ffb7b58043de3d399c73fa6f4b73826402a5c457614dbe" + +[[package]] +name = "rustversion" +version = "1.0.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" + [[package]] name = "ryu" version = "1.0.23" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9774ba4a74de5f7b1c1451ed6cd5285a32eddb5cccb8cc655a4e50009e06477f" +[[package]] +name = "salsa" +version = "0.26.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a07bc2a7df3f8e2306434a172a694d44d14fda738d08aad5f2f7f747d2f06fdc" +dependencies = [ + "boxcar", + "crossbeam-queue", + "crossbeam-utils", + "hashbrown 0.15.5", + "hashlink", + "indexmap", + "intrusive-collections", + "inventory", + "parking_lot", + "portable-atomic", + "rayon", + "rustc-hash", + "salsa-macro-rules", + "salsa-macros", + "smallvec", + "thin-vec", + "tracing", +] + +[[package]] +name = "salsa-macro-rules" +version = "0.26.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec256ece77895f4a8d624cecc133dd798c7961a861439740b1c7410a613ee7ba" + +[[package]] +name = "salsa-macros" +version = "0.26.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "978e5d5c9533ce19b6a58ad91024e1d136f6eec83c4ba98b5ce94c87986c41d8" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "synstructure", +] + +[[package]] +name = "scopeguard" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" + [[package]] name = "serde" version = "1.0.228" @@ -257,6 +615,12 @@ version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" +[[package]] +name = "smallvec" +version = "1.15.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" + [[package]] name = "syn" version = "2.0.117" @@ -268,6 +632,29 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "synstructure" +version = "0.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "text-size" +version = "1.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f18aa187839b2bdb1ad2fa35ead8c4c2976b64e4363c386d45ac0f7ee85c9233" + +[[package]] +name = "thin-vec" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "259cdf8ed4e4aca6f1e9d011e10bd53f524a2d0637d7b28450f6c64ac298c4c6" + [[package]] name = "thiserror" version = "2.0.18" @@ -288,6 +675,31 @@ dependencies = [ "syn", ] +[[package]] +name = "tracing" +version = "0.1.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "63e71662fa4b2a2c3a26f570f037eb95bb1f85397f3cd8076caed2f026a6d100" +dependencies = [ + "pin-project-lite", + "tracing-core", +] + +[[package]] +name = "tracing-core" +version = "0.1.36" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db97caf9d906fbde555dd62fa95ddba9eecfd14cb388e4f491a66d74cd5fb79a" +dependencies = [ + "once_cell", +] + +[[package]] +name = "unicase" +version = "2.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dbc4bc3a9f746d862c45cb89d705aa10f187bb96c76001afab07a0d35ce60142" + [[package]] name = "unicode-ident" version = "1.0.24" @@ -309,6 +721,12 @@ dependencies = [ "wit-bindgen", ] +[[package]] +name = "windows-link" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" + [[package]] name = "wit-bindgen" version = "0.51.0" diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml index 2a5dfd3..0ddc00a 100644 --- a/fuzz/Cargo.toml +++ b/fuzz/Cargo.toml @@ -11,6 +11,8 @@ cargo-fuzz = true libfuzzer-sys = "0.4" rivet-core = { path = "../rivet-core", default-features = false } serde_yaml = "0.9" +serde_json = "1" +arbitrary = { version = "1.4", features = ["derive"] } # Prevent this from being included in workspace [workspace] @@ -39,3 +41,34 @@ doc = false name = "fuzz_needs_json_import" path = "fuzz_targets/fuzz_needs_json_import.rs" doc = false + +[[bin]] +name = "yaml_footguns" +path = "fuzz_targets/yaml_footguns.rs" +doc = false +test = false +bench = false + +[[bin]] +name = "cli_argv" +path = "fuzz_targets/cli_argv.rs" +doc = false +test = false +bench = false + +[[bin]] +name = "artifact_ids" +path = "fuzz_targets/artifact_ids.rs" +doc = false +test = false +bench = false + +# Non-fuzz smoke harness: runs the same oracle invariants against a +# fixed set of Mythos-predicted footgun inputs. Useful when libfuzzer +# is unavailable and as a reproducibility anchor for findings. +[[example]] +name = "oracle_smoke" +path = "examples/oracle_smoke.rs" +doc = false +test = false +bench = false diff --git a/fuzz/README.md b/fuzz/README.md new file mode 100644 index 0000000..d1a19f6 --- /dev/null +++ b/fuzz/README.md @@ -0,0 +1,104 @@ +# rivet fuzz targets + +Fuzz targets for the rivet artifact-ingest pipeline. Built on +[`cargo-fuzz`](https://rust-fuzz.github.io/book/cargo-fuzz.html) + +`libfuzzer-sys`. + +## What each target checks + +- **`yaml_footguns`** — adversarial mutations of a known-valid artifact YAML; + oracle fails when rivet silently coerces / drops / synthesizes a changed + value instead of rejecting with an error. +- **`cli_argv`** — structurally-generated argv for `rivet-cli`; oracle fails + on signal-death, or when `--format json` produces invalid JSON on stdout. +- **`artifact_ids`** — arbitrary bytes as an `id:` scalar; oracle fails when + `Store::insert` → `Store::get` does not round-trip the parsed id byte-exact. +- **`fuzz_yaml_artifact`** / **`fuzz_schema_merge`** / **`fuzz_reqif_import`** + / **`fuzz_document_parse`** / **`fuzz_needs_json_import`** — pre-existing + smoke fuzzers that only check for panics in low-level parse paths. + +## How to run locally + +```bash +# Once, install the driver. Requires a nightly toolchain for sanitizer flags. +cargo install cargo-fuzz --locked +rustup install nightly + +# YAML footgun fuzzer — priority target. +cargo +nightly fuzz run yaml_footguns -- -max_total_time=60 + +# Artifact-ID round-trip fuzzer. +cargo +nightly fuzz run artifact_ids -- -max_total_time=60 + +# CLI argv fuzzer. Requires a pre-built rivet binary exposed via $RIVET_BIN. +cargo build --release --bin rivet +RIVET_BIN="$PWD/../target/release/rivet" \ + cargo +nightly fuzz run cli_argv -- -max_total_time=60 +``` + +All commands are run from the `fuzz/` directory. Crashes land in +`fuzz/artifacts//` and the evolved corpus in `fuzz/corpus//`. + +## What the oracle considers a failure + +The oracle is intentionally conservative: we only flag behavior we can prove +is wrong from the input text alone. + +### `yaml_footguns` + +- **panic** — any `unwrap`, `expect`, arithmetic overflow, or explicit panic + in `rivet_core::formats::generic::parse_generic_yaml` or + `rivet_core::yaml_hir::extract_generic_artifacts`. +- **silent-accept** — parse returned `Ok(artifacts)` but at least one of + `Artifact::id`, `Artifact::artifact_type`, or `Link::target` is + (a) empty, or (b) not a substring of the source YAML. The substring check + is cheap but catches Norway-problem coercions, duplicate-key merges, and + null-shorthand phantom links. + +### `cli_argv` + +- **panic** — the subprocess died from `SIGSEGV`, `SIGABRT`, `SIGILL`, or + any other signal. Non-zero exit codes are NOT failures; rivet is + expected to reject malformed argv with a non-zero status. +- **silent-accept** — `--format json` returned exit 0 with non-empty stdout + that does not parse as JSON. CI pipelines pipe that to `jq`. + +### `artifact_ids` + +- **panic** — any panic from `parse_generic_yaml` or `Store::insert`. +- **roundtrip mismatch** — `Store::get(parsed_id)` either returned `None`, + or returned an artifact whose `.id` byte-differs from the id we stored. + Both indicate silent normalization (whitespace, Unicode, case) in the + id handling code path. + +## How to classify a finding + +When a crash reproducer lands in `fuzz/artifacts//crash-*`, run: + +```bash +cargo +nightly fuzz fmt fuzz/artifacts//crash- +``` + +to pretty-print the structured input. Classify as follows: + +| Symptom in panic message | Class | Likely root cause | +|---|---|---| +| `silent-accept: … not present in source` | silent-accept | serde Value coerced YAML 1.1 bool/null/version into a different Rust string | +| `silent-accept: phantom link` | silent-accept | `yaml_hir.rs` extracted a link target from a `null`/`~`/`""` scalar | +| `id-roundtrip: … returned None` | roundtrip-bug | `Store` insert-key and lookup-key differ (normalization mismatch) | +| `rivet-cli died from signal …` | panic | CLI path hit an uncaught assertion or stack overflow | +| `--format json returned success but stdout is not JSON` | silent-accept | JSON path printed a human-readable error on stdout | +| plain Rust panic stack | panic | investigate directly; often `unwrap()` on schema lookup | + +If the finding reproduces a bug documented in the Mythos pass (e.g., +`yaml_hir.rs:530-549` phantom-link, `yaml_cst.rs:517` multi-doc truncation, +`formats/generic.rs:138` unknown-top-level-key acceptance), that's +empirical confirmation — file the minimal reproducer as a regression test +under `rivet-core/tests/yaml_edge_cases.rs`. + +## CI + +`.github/workflows/fuzz.yml` runs each target for 15 minutes on push-to-main +and nightly at 06:17 UTC. Fuzz runs are `continue-on-error: true` so a new +crash does not block main; crashes upload as workflow artifacts. The +evolved corpus is uploaded as an artifact and cached between runs. diff --git a/fuzz/examples/oracle_smoke.rs b/fuzz/examples/oracle_smoke.rs new file mode 100644 index 0000000..d0636ee --- /dev/null +++ b/fuzz/examples/oracle_smoke.rs @@ -0,0 +1,349 @@ +//! Standalone smoke test for the yaml_footguns oracle. +//! +//! Runs the same `probe()` logic the fuzzer uses against a hand-picked +//! set of known-footgun YAML inputs. Intended as a reproducibility +//! harness — if any invariant fires here, the fuzzer will surface the +//! same finding in under a second. +//! +//! Run with: +//! cargo run --release --example oracle_smoke -p rivet-fuzz +//! +//! Exit codes: +//! 0 — no silent-accept bugs triggered in the fixed corpus +//! 1 — at least one invariant panicked; see stderr for details +//! +//! NOTE: this is NOT a replacement for `cargo fuzz run yaml_footguns`. +//! It only exercises the hand-picked Mythos-predicted patterns. + +use rivet_core::formats::generic::parse_generic_yaml; +use rivet_core::model::Artifact; + +fn main() { + // Count the number of probes and the number of silent-accept findings. + let mut probes = 0usize; + let mut findings: Vec = Vec::new(); + + for (name, yaml) in cases() { + probes += 1; + // Diagnostic dump: show what the two parse paths return so a reader + // can classify "silently dropped" vs "returned wrong value" vs + // "correctly rejected". + let serde_res = parse_generic_yaml(yaml, None); + let hir = rivet_core::yaml_hir::extract_generic_artifacts(yaml); + match &serde_res { + Ok(artifacts) => { + eprintln!( + "[{name}] serde_ok={} artifact(s): {:?}", + artifacts.len(), + artifacts + .iter() + .map(|a| (&a.id, &a.artifact_type)) + .collect::>() + ); + for (i, a) in artifacts.iter().enumerate() { + if !a.links.is_empty() { + eprintln!( + " a[{i}].links = {:?}", + a.links.iter().map(|l| (&l.link_type, &l.target)).collect::>() + ); + } + } + } + Err(e) => eprintln!("[{name}] serde_err = {e}"), + } + eprintln!( + "[{name}] hir artifacts={} diagnostics={}", + hir.artifacts.len(), + hir.diagnostics.len() + ); + for sa in &hir.artifacts { + if !sa.artifact.links.is_empty() { + eprintln!( + " hir links = {:?}", + sa.artifact + .links + .iter() + .map(|l| (&l.link_type, &l.target)) + .collect::>() + ); + } + } + + let finding = check(name, yaml); + if let Some(msg) = finding { + eprintln!("FINDING [{name}]: {msg}"); + findings.push(name.to_string()); + } + } + + println!("smoke probes: {probes}"); + println!("silent-accept findings: {}", findings.len()); + for f in &findings { + println!(" - {f}"); + } + if !findings.is_empty() { + std::process::exit(1); + } +} + +fn cases() -> &'static [(&'static str, &'static str)] { + &[ + ( + "null-shorthand-link", + concat!( + "artifacts:\n", + " - id: REQ-001\n", + " type: requirement\n", + " title: seed\n", + " links:\n", + " - type: derives-from\n", + " target: null\n", + ), + ), + ( + "tilde-shorthand-link", + concat!( + "artifacts:\n", + " - id: REQ-001\n", + " type: requirement\n", + " title: seed\n", + " links:\n", + " - type: derives-from\n", + " target: ~\n", + ), + ), + ( + "empty-string-link-target", + concat!( + "artifacts:\n", + " - id: REQ-001\n", + " type: requirement\n", + " title: seed\n", + " links:\n", + " - type: derives-from\n", + " target: \"\"\n", + ), + ), + ( + "multi-document", + concat!( + "artifacts:\n", + " - id: REQ-001\n", + " type: requirement\n", + " title: seed\n", + "---\n", + "artifacts:\n", + " - id: REQ-999\n", + " type: requirement\n", + " title: second\n", + ), + ), + ( + "norway-problem-status", + concat!( + "artifacts:\n", + " - id: REQ-001\n", + " type: requirement\n", + " title: seed\n", + " status: NO\n", + ), + ), + ( + "norway-problem-id", + concat!( + "artifacts:\n", + " - id: NO\n", + " type: requirement\n", + " title: seed\n", + ), + ), + ( + "unknown-top-level-key", + concat!( + "artifact:\n", + " - id: REQ-001\n", + " type: requirement\n", + " title: seed\n", + ), + ), + ( + "duplicate-id-key", + concat!( + "artifacts:\n", + " - id: REQ-001\n", + " id: REQ-XXX\n", + " type: requirement\n", + " title: seed\n", + ), + ), + ( + "unquoted-date-title", + concat!( + "artifacts:\n", + " - id: REQ-001\n", + " type: requirement\n", + " title: 2026-04-21\n", + ), + ), + ( + "unquoted-version-baseline", + concat!( + "artifacts:\n", + " - id: REQ-001\n", + " type: requirement\n", + " title: seed\n", + " fields:\n", + " baseline: 1.0\n", + ), + ), + ( + "leading-zero-id", + concat!( + "artifacts:\n", + " - id: REQ-0001\n", + " type: requirement\n", + " title: seed\n", + ), + ), + ( + "soft-hyphen-in-id", + "artifacts:\n - id: \"REQ-\u{00AD}001\"\n type: requirement\n title: seed\n", + ), + ] +} + +fn check(_name: &str, yaml: &str) -> Option { + // Use catch_unwind so a panic in probe() becomes a reported finding + // instead of aborting the whole smoke run. + let yaml_string = yaml.to_string(); + let res = std::panic::catch_unwind(std::panic::AssertUnwindSafe(move || probe(&yaml_string))); + match res { + Ok(None) => None, + Ok(Some(msg)) => Some(msg), + Err(_) => Some("probe panicked".to_string()), + } +} + +fn probe(yaml: &str) -> Option { + let serde_result = parse_generic_yaml(yaml, None); + let hir = rivet_core::yaml_hir::extract_generic_artifacts(yaml); + + // Also run the plain serde paths to catch panics. + let _ = serde_yaml::from_str::(yaml); + let _ = serde_yaml::from_str::>(yaml); + + if let Ok(artifacts) = &serde_result { + for a in artifacts { + if a.id.is_empty() { + return Some(format!( + "serde: empty id returned (silent-accept)\nYAML:\n{yaml}" + )); + } + if !yaml.contains(&a.id) { + return Some(format!( + "serde: id {:?} not present in source (silent-accept / coercion)\nYAML:\n{yaml}", + a.id + )); + } + if a.artifact_type.is_empty() { + return Some(format!( + "serde: empty type returned (silent-accept)\nYAML:\n{yaml}" + )); + } + if !yaml.contains(&a.artifact_type) { + return Some(format!( + "serde: type {:?} not present in source\nYAML:\n{yaml}", + a.artifact_type + )); + } + for l in &a.links { + if l.target.is_empty() { + return Some(format!( + "serde: phantom link (empty target)\nYAML:\n{yaml}" + )); + } + if !yaml.contains(&l.target) { + return Some(format!( + "serde: link target {:?} not present in source\nYAML:\n{yaml}", + l.target + )); + } + } + } + } + + for sa in &hir.artifacts { + let a = &sa.artifact; + if !a.id.is_empty() && !yaml.contains(&a.id) { + return Some(format!( + "hir: id {:?} not present in source\nYAML:\n{yaml}", + a.id + )); + } + for l in &a.links { + if l.target.is_empty() { + return Some(format!( + "hir: phantom link (empty target — yaml_hir.rs:530 bug class)\nYAML:\n{yaml}" + )); + } + if !yaml.contains(&l.target) { + return Some(format!( + "hir: link target {:?} not present in source\nYAML:\n{yaml}", + l.target + )); + } + } + } + + // Oracle: null-ish link targets. + let null_ish = |t: &str| matches!(t.trim(), "null" | "NULL" | "Null" | "~"); + if let Ok(arts) = &serde_result { + for a in arts { + for l in &a.links { + if null_ish(&l.target) { + return Some(format!( + "serde: link target coerced from YAML null: {:?} (yaml_hir.rs:530 class)\nYAML:\n{yaml}", + l.target + )); + } + } + } + } + for sa in &hir.artifacts { + for l in &sa.artifact.links { + if null_ish(&l.target) { + return Some(format!( + "hir: link target coerced from YAML null: {:?}\nYAML:\n{yaml}", + l.target + )); + } + } + } + + // Oracle: serde rejected but hir silently accepted 0 artifacts. + if serde_result.is_err() + && hir.artifacts.is_empty() + && hir.diagnostics.is_empty() + && yaml.contains("id:") + { + return Some(format!( + "hir: serde rejected but HIR returned 0 artifacts / 0 diagnostics (formats/generic.rs:138 class)\nYAML:\n{yaml}" + )); + } + + // Oracle: multi-document silent truncation. + if yaml.contains("\n---\n") { + let declared: usize = yaml + .lines() + .filter(|l| l.trim_start().starts_with("- id:")) + .count(); + if declared > hir.artifacts.len() && hir.diagnostics.is_empty() { + return Some(format!( + "hir: multi-document truncation — source declares {declared} artifacts, HIR returned {} (yaml_cst.rs:517 class)\nYAML:\n{yaml}", + hir.artifacts.len() + )); + } + } + + None +} diff --git a/fuzz/fuzz_targets/artifact_ids.rs b/fuzz/fuzz_targets/artifact_ids.rs new file mode 100644 index 0000000..b68edaf --- /dev/null +++ b/fuzz/fuzz_targets/artifact_ids.rs @@ -0,0 +1,97 @@ +#![no_main] +//! Artifact-ID round-trip fuzzer. +//! +//! Feeds arbitrary byte sequences as `id:` values inside an otherwise-valid +//! artifact YAML document. Oracle: if the YAML parses at all, the id string +//! must round-trip through the `Store` — `insert` then `get` returns an +//! artifact whose id is byte-identical to the one we fed in. +//! +//! This catches silent normalization (whitespace stripping, unicode +//! canonicalization, case folding) and insert/get key mismatches. + +use libfuzzer_sys::fuzz_target; +use rivet_core::formats::generic::parse_generic_yaml; +use rivet_core::store::Store; + +fuzz_target!(|data: &[u8]| { + let Ok(raw) = std::str::from_utf8(data) else { + return; + }; + + // Sanitize the candidate id so it is embeddable as a YAML plain scalar + // on the id: line. We intentionally DO allow exotic unicode, since that + // is part of what we want to probe. We DO strip newlines and NULs + // because those would break the surrounding YAML grammar itself (not + // rivet's fault). + let id_raw: String = raw + .chars() + .filter(|&c| c != '\n' && c != '\r' && c != '\0') + .take(128) + .collect(); + + if id_raw.is_empty() { + return; + } + + // YAML-quote the id so even `:` and `#` survive into the scalar. Double + // quotes with escaping handle everything except a stray `"` or `\` — we + // escape those. + let quoted = yaml_double_quote(&id_raw); + + let yaml = format!( + "artifacts:\n - id: {quoted}\n type: requirement\n title: Fuzz\n" + ); + + let Ok(artifacts) = parse_generic_yaml(&yaml, None) else { + return; + }; + if artifacts.is_empty() { + return; + } + + // There must be exactly one artifact returned. Anything else is a bug. + assert_eq!( + artifacts.len(), + 1, + "id-roundtrip: expected 1 artifact, got {} for id={id_raw:?}", + artifacts.len() + ); + + let parsed_id = artifacts[0].id.clone(); + + // Round-trip through the store. + let mut store = Store::new(); + let artifact = artifacts.into_iter().next().unwrap(); + store.insert(artifact).expect("first insert cannot fail"); + + // Lookup by the id returned from the parser. + let fetched = store.get(&parsed_id).unwrap_or_else(|| { + panic!( + "id-roundtrip: Store::insert succeeded but Store::get({parsed_id:?}) returned None" + ) + }); + + assert_eq!( + fetched.id, parsed_id, + "id-roundtrip: fetched id differs from inserted id\n inserted={parsed_id:?}\n fetched={:?}", + fetched.id + ); +}); + +fn yaml_double_quote(s: &str) -> String { + let mut out = String::with_capacity(s.len() + 2); + out.push('"'); + for c in s.chars() { + match c { + '"' => out.push_str("\\\""), + '\\' => out.push_str("\\\\"), + // Control chars other than tab must be escaped as \uXXXX. + c if (c as u32) < 0x20 && c != '\t' => { + out.push_str(&format!("\\u{:04X}", c as u32)); + } + c => out.push(c), + } + } + out.push('"'); + out +} diff --git a/fuzz/fuzz_targets/cli_argv.rs b/fuzz/fuzz_targets/cli_argv.rs new file mode 100644 index 0000000..0d8b8d5 --- /dev/null +++ b/fuzz/fuzz_targets/cli_argv.rs @@ -0,0 +1,238 @@ +#![no_main] +//! CLI argv fuzzer. +//! +//! Drives `rivet-cli` with structurally-generated argv sequences. Oracle: +//! * process must exit with a sane status code (0/1/2), never panic or +//! SIGSEGV, +//! * when `--format json` is requested, stdout must be parseable JSON *or* +//! stdout must be empty (with a human-readable error on stderr), +//! * path arguments containing `../` or absolute `/etc/` must be rejected +//! (we don't hard-assert — we record and surface via log). +//! +//! IMPORTANT: this target spawns `rivet` as a subprocess per iteration. That +//! is 10^4-10^5 x slower than an in-process fuzzer and will not produce +//! millions of execs/sec. It is still useful for hitting clap parsing paths +//! and panic-surface in argument validation. The env var `RIVET_BIN` must +//! point at a pre-built rivet binary; we skip the target if unset, so the +//! fuzzer does not crash-loop on a missing binary. +//! +//! To run this target locally after building: +//! cargo build --release --bin rivet +//! RIVET_BIN=$PWD/target/release/rivet \ +//! cargo +nightly fuzz run cli_argv -- -max_total_time=60 + +use arbitrary::{Arbitrary, Unstructured}; +use libfuzzer_sys::fuzz_target; +use std::process::{Command, Stdio}; +use std::time::Duration; + +#[derive(Debug, Arbitrary)] +enum Subcommand { + Validate, + List, + ListJson, + Coverage, + Stats, + Commits, + Add, + Modify, + Stamp, + Query, + Variant, + Help, +} + +#[derive(Debug, Arbitrary)] +enum Flag { + FormatJson, + FormatYaml, + FormatText, + Type(String), + Baseline(String), + Path(String), + Unknown(String), +} + +#[derive(Debug, Arbitrary)] +struct ArgvInput { + subcommand: Subcommand, + flags: Vec, + positional: Vec, +} + +fn subcommand_name(s: &Subcommand) -> Option<&'static [&'static str]> { + match s { + Subcommand::Validate => Some(&["validate"]), + Subcommand::List => Some(&["list"]), + Subcommand::ListJson => Some(&["list", "--format", "json"]), + Subcommand::Coverage => Some(&["coverage"]), + Subcommand::Stats => Some(&["stats"]), + Subcommand::Commits => Some(&["commits"]), + Subcommand::Add => Some(&["add"]), + Subcommand::Modify => Some(&["modify"]), + Subcommand::Stamp => Some(&["stamp"]), + Subcommand::Query => Some(&["query"]), + Subcommand::Variant => Some(&["variant"]), + Subcommand::Help => Some(&["--help"]), + } +} + +fn sanitize(s: &str) -> String { + // Remove NULs (which std::process rejects on unix) and bound length. + s.chars() + .filter(|&c| c != '\0') + .take(64) + .collect::() +} + +fn build_argv(input: &ArgvInput) -> Vec { + let mut argv: Vec = Vec::new(); + if let Some(parts) = subcommand_name(&input.subcommand) { + for p in parts { + argv.push(p.to_string()); + } + } + for flag in input.flags.iter().take(6) { + match flag { + Flag::FormatJson => { + argv.push("--format".into()); + argv.push("json".into()); + } + Flag::FormatYaml => { + argv.push("--format".into()); + argv.push("yaml".into()); + } + Flag::FormatText => { + argv.push("--format".into()); + argv.push("text".into()); + } + Flag::Type(t) => { + argv.push("--type".into()); + argv.push(sanitize(t)); + } + Flag::Baseline(b) => { + argv.push("--baseline".into()); + argv.push(sanitize(b)); + } + Flag::Path(p) => { + argv.push("-p".into()); + argv.push(sanitize(p)); + } + Flag::Unknown(u) => { + let cleaned = sanitize(u); + if !cleaned.is_empty() { + argv.push(cleaned); + } + } + } + } + for p in input.positional.iter().take(4) { + let c = sanitize(p); + if !c.is_empty() { + argv.push(c); + } + } + argv +} + +/// Returns true if the argv requested JSON output. +fn is_json_format(argv: &[String]) -> bool { + let mut i = 0; + while i + 1 < argv.len() { + if argv[i] == "--format" && argv[i + 1] == "json" { + return true; + } + i += 1; + } + false +} + +fuzz_target!(|input: ArgvInput| { + let Ok(bin) = std::env::var("RIVET_BIN") else { + // No binary configured → skip quietly. We don't want the fuzzer to + // treat a missing binary as a crash. + return; + }; + let argv = build_argv(&input); + let json_mode = is_json_format(&argv); + + let mut cmd = Command::new(&bin); + cmd.args(&argv) + .stdin(Stdio::null()) + .stdout(Stdio::piped()) + .stderr(Stdio::piped()) + // Avoid leaking the calling shell's CWD config. + .env_clear() + .env("PATH", std::env::var_os("PATH").unwrap_or_default()) + .env("HOME", std::env::var_os("HOME").unwrap_or_default()) + // Prevent update check from making network calls. + .env("RIVET_NO_UPDATE_CHECK", "1"); + + let Ok(mut child) = cmd.spawn() else { + return; + }; + + // Poor-man's 5-second timeout: spawn a reaper thread. We cannot use + // `std::process::Child::wait_timeout` without adding a dep. + let start = std::time::Instant::now(); + loop { + match child.try_wait() { + Ok(Some(status)) => { + // Oracle: exit code must be in {0, 1, 2, 64..}. A SIGSEGV + // (signal 11) or SIGABRT (signal 6) surfaces as a panic. + if let Some(sig) = status_signal(&status) { + panic!("rivet-cli died from signal {sig} on argv {argv:?}"); + } + // Read stdout/stderr for the JSON oracle. If stdout is + // supposed to be JSON, it must parse OR be empty. + let output = child + .wait_with_output() + .ok() + .or_else(|| Some(std::process::Output { + status, + stdout: Vec::new(), + stderr: Vec::new(), + })) + .unwrap(); + if json_mode && status.success() && !output.stdout.is_empty() { + let stdout = std::str::from_utf8(&output.stdout).unwrap_or(""); + if serde_json::from_str::(stdout).is_err() { + // Not a panic — surface it as a finding. We keep + // the target lenient because some subcommands may + // not support --format json and should error out. + // Require a non-zero exit in that case, which is + // caught by status.success() above. + panic!( + "--format json returned success but stdout is not JSON\nargv={argv:?}\nstdout={stdout:?}" + ); + } + } + return; + } + Ok(None) => { + if start.elapsed() > Duration::from_secs(5) { + let _ = child.kill(); + let _ = child.wait(); + return; + } + std::thread::sleep(Duration::from_millis(20)); + } + Err(_) => return, + } + } +}); + +#[cfg(unix)] +fn status_signal(status: &std::process::ExitStatus) -> Option { + use std::os::unix::process::ExitStatusExt; + status.signal() +} + +#[cfg(not(unix))] +fn status_signal(_status: &std::process::ExitStatus) -> Option { + None +} + +// Re-export of arbitrary so the derive sees `Unstructured` at expected path. +#[allow(dead_code)] +fn _unstructured_marker(_u: Unstructured<'_>) {} diff --git a/fuzz/fuzz_targets/yaml_footguns.rs b/fuzz/fuzz_targets/yaml_footguns.rs new file mode 100644 index 0000000..04566c2 --- /dev/null +++ b/fuzz/fuzz_targets/yaml_footguns.rs @@ -0,0 +1,385 @@ +#![no_main] +//! YAML-footguns fuzzer. +//! +//! Empirically measures how often rivet's artifact-ingest pipeline silently +//! corrupts structurally-wrong YAML inputs (arxiv:2604.13108 claim: +//! "YAML silently corrupts ~50% of structural errors"). +//! +//! Oracle: for each adversarial mutation of a *known-valid* artifact YAML, +//! rivet must either +//! (a) reject the input with an Error-severity diagnostic, or +//! (b) preserve the intended value exactly +//! but never silently coerce/drop/synthesize a changed artifact without error. +//! +//! Complements `rivet-core/tests/differential_yaml.rs` (which catches cases +//! where rowan and serde_yaml *disagree*). This target catches cases where +//! they *agree on a wrong AST* — the silent-accept class. +//! +//! Classification of findings (see `fuzz/README.md`): +//! * panic — any target panic (double fault) +//! * silent-accept — YAML parses to Ok(artifacts), but the resulting id / +//! link / field differs from the textually-present value +//! * coercion — scalar value (YAML 1.1 "Norway", version, date, etc.) +//! was silently re-typed + +use arbitrary::Arbitrary; +use libfuzzer_sys::fuzz_target; +use rivet_core::formats::generic::parse_generic_yaml; +use rivet_core::model::Artifact; + +/// A single adversarial mutation applied to a seed artifact-YAML document. +#[derive(Debug, Clone, Arbitrary)] +enum Footgun { + /// YAML 1.1 Norway problem: replace a scalar with an unquoted boolean/null. + Norway { which_field: u8, variant: u8 }, + /// Strip quotes from a version-like string. + VersionCoercion { which_field: u8 }, + /// Prepend `0` to the integer suffix of the id. + LeadingZeroId, + /// Replace the title scalar with an unquoted date. + UnquotedDate, + /// Duplicate the `id:` or `type:` key. + DuplicateKey { which: u8 }, + /// Replace a space-indent line with tab indent. + TabIndent { line_offset: u8 }, + /// Inject a second `---\nartifacts: [...]\n` document. + MultiDocument, + /// Set a shorthand-link-style field to null / ~ / "". + NullShorthandLink { variant: u8 }, + /// Rename the top-level `artifacts:` key. + UnknownTopLevelKey { variant: u8 }, + /// Emit an anchor / alias cycle inside a field value. + AnchorCycle, + /// Deeply nest a list inside the `fields:` map. + DeepNesting { depth: u8 }, + /// Insert a NUL/soft-hyphen/trailing-space into the id value. + ControlCharInId { variant: u8 }, +} + +/// Wrapper to drive multiple footgun mutations per input. +#[derive(Debug, Arbitrary)] +struct FuzzInput { + footguns: Vec, +} + +const SEED_YAML: &str = "artifacts:\n - id: REQ-001\n type: requirement\n title: Seed requirement\n status: draft\n tags: [safety]\n links:\n - type: derives-from\n target: REQ-000\n fields:\n priority: must\n baseline: v0.1.0\n"; + +fuzz_target!(|input: FuzzInput| { + let mut yaml = SEED_YAML.to_string(); + // Apply up to 3 footgun mutations (more than that often yields invalid YAML that + // just errors out — not interesting). + for f in input.footguns.iter().take(3) { + yaml = apply_footgun(&yaml, f); + } + probe(&yaml); +}); + +/// Feed an empty-Unstructured fallback path so cargo-fuzz can also consume raw +/// bytes when it wants to. Not the primary oracle path. +#[allow(dead_code)] +fn probe_raw(data: &[u8]) { + if let Ok(s) = std::str::from_utf8(data) { + probe(s); + } +} + +/// Runs the oracle: parse through both the serde path and the rowan path. +/// Any panic fails the target automatically. Any *semantic discrepancy* +/// between returned artifacts and text-present values flags a silent bug. +fn probe(yaml: &str) { + // 1. Direct serde parse (`formats::generic::parse_generic_yaml`). + let serde_result = parse_generic_yaml(yaml, None); + + // 2. Rowan HIR extraction, the path the LSP uses. + let hir = rivet_core::yaml_hir::extract_generic_artifacts(yaml); + + // 3. Full artifact-level deserialize (some adapters use this). + let _ = serde_yaml::from_str::(yaml); + let _ = serde_yaml::from_str::>(yaml); + + // Oracle 1: if parse_generic_yaml returned Ok, every returned id must + // literally appear in the source text. A returned id that is NOT a + // substring of the source is a silent-synthesis bug (Norway coercion, + // duplicate key merge, etc.). + if let Ok(artifacts) = &serde_result { + for a in artifacts { + // An empty id passes all substring checks but is itself a silent + // acceptance bug — every artifact must have a non-empty id. + assert!( + !a.id.is_empty(), + "silent-accept: empty id returned by parse_generic_yaml\nYAML:\n{yaml}" + ); + // Exact substring match: the id must appear as-is in the source. + // This catches Norway-problem coercions (e.g., `NO` being turned + // into `false` and re-serialized as the string `"false"`). + assert!( + yaml.contains(&a.id), + "silent-accept: parse_generic_yaml returned id {:?} not present in source\nYAML:\n{yaml}", + a.id + ); + // Same oracle for artifact_type. + assert!( + !a.artifact_type.is_empty(), + "silent-accept: empty type returned by parse_generic_yaml\nYAML:\n{yaml}" + ); + assert!( + yaml.contains(&a.artifact_type), + "silent-accept: parse_generic_yaml returned type {:?} not present in source\nYAML:\n{yaml}", + a.artifact_type + ); + // Link targets must also be source-present substrings. + for l in &a.links { + assert!( + !l.target.is_empty(), + "silent-accept: link with empty target (phantom link)\nYAML:\n{yaml}" + ); + assert!( + yaml.contains(&l.target), + "silent-accept: link target {:?} not present in source\nYAML:\n{yaml}", + l.target + ); + } + } + } + + // Oracle 2: HIR path. Same substring invariant. + for sa in &hir.artifacts { + let a = &sa.artifact; + if !a.id.is_empty() { + assert!( + yaml.contains(&a.id), + "silent-accept: yaml_hir returned id {:?} not present in source\nYAML:\n{yaml}", + a.id + ); + } + for l in &a.links { + assert!( + !l.target.is_empty(), + "silent-accept: yaml_hir phantom link (empty target)\nYAML:\n{yaml}" + ); + assert!( + yaml.contains(&l.target), + "silent-accept: yaml_hir link target {:?} not present in source\nYAML:\n{yaml}", + l.target + ); + } + } + + // Oracle 3: "null-ish" link targets are always a phantom link. + // serde_yaml happily materializes `target: null`, `target: ~`, and + // `target: ""` as a link with a string-ish target that is not a real + // artifact id. This is the `yaml_hir.rs:530-549` bug class. + for list in [ + serde_result.as_ref().ok().map(|v| v.as_slice()).unwrap_or(&[]), + ] { + for a in list { + for l in &a.links { + let t = l.target.trim(); + assert!( + t != "null" && t != "~" && t != "NULL" && t != "Null", + "silent-accept: link target coerced from YAML null: {:?}\nYAML:\n{yaml}", + l.target + ); + } + } + } + for sa in &hir.artifacts { + for l in &sa.artifact.links { + let t = l.target.trim(); + assert!( + t != "null" && t != "~" && t != "NULL" && t != "Null", + "silent-accept: hir link target coerced from YAML null: {:?}\nYAML:\n{yaml}", + l.target + ); + } + } + + // Oracle 4: HIR+serde disagree on parse outcome for the top-level + // `artifacts:` key. If serde rejects with "missing field `artifacts`" + // and HIR returns 0 artifacts with 0 diagnostics, that is the + // `formats/generic.rs:138` Ok(vec![]) silent-accept. We only flag the + // specific 0-artifacts / 0-diagnostics / serde-error shape. + if serde_result.is_err() && hir.artifacts.is_empty() && hir.diagnostics.is_empty() { + // If the source text contains NO mention of any artifact id shape, + // zero artifacts is the correct outcome. We only panic when the + // source clearly intended to declare artifacts but HIR dropped them + // silently. Heuristic: the source contains `id:` or `- id:`. + if yaml.contains("id:") { + panic!( + "silent-accept: serde rejected input but yaml_hir returned 0 artifacts / 0 diagnostics (formats/generic.rs:138 class)\nYAML:\n{yaml}" + ); + } + } + + // Oracle 5: multi-document silent truncation (`yaml_cst.rs:517`). + // If the source contains a literal `---` document separator preceded + // by an `artifacts:` block, HIR will often keep only the first doc. + // We compare the number of declared `- id:` occurrences on top-level + // artifact-list lines against the number of artifacts HIR returned. + // This is heuristic but empirically catches the known multi-doc bug. + if yaml.contains("\n---\n") { + // Count approximate declared artifacts. We count lines matching + // `^ - id:` (the canonical list-item indent for artifacts:). + let declared: usize = yaml + .lines() + .filter(|l| l.trim_start().starts_with("- id:")) + .count(); + if declared > hir.artifacts.len() && hir.diagnostics.is_empty() { + panic!( + "silent-accept: multi-document truncation — source declares {declared} artifacts but HIR returned {} with no diagnostics (yaml_cst.rs:517 class)\nYAML:\n{yaml}", + hir.artifacts.len() + ); + } + } +} + +// ── Mutation machinery ──────────────────────────────────────────────────── + +fn apply_footgun(yaml: &str, f: &Footgun) -> String { + match f { + Footgun::Norway { which_field, variant } => { + let payload = norway_variant(*variant); + // Replace the first scalar value at column 4+ that matches the + // chosen field. Keep it simple: pick one of id/title/status/ + // target/priority/baseline. + let field = pick_field(*which_field); + replace_field_value(yaml, field, payload) + } + Footgun::VersionCoercion { which_field } => { + let field = pick_field(*which_field); + // Baseline values are quoted in the seed; swap `"v0.1.0"` for + // `v0.1.0` and also handle the 1.0 -> no quotes case. + replace_field_value(yaml, field, "1.0") + } + Footgun::LeadingZeroId => yaml.replace("REQ-001", "REQ-0001"), + Footgun::UnquotedDate => replace_field_value(yaml, "title", "2026-04-21"), + Footgun::DuplicateKey { which } => { + let key = if *which % 2 == 0 { "id" } else { "type" }; + // Duplicate the key on the same artifact with a different value. + yaml.replace( + &format!(" {key}:"), + &format!(" {key}: DUPLICATE-VAL\n {key}:"), + ) + } + Footgun::TabIndent { line_offset } => { + // Convert one of the 4-space-indent lines to a tab. + let mut lines: Vec = yaml.lines().map(|s| s.to_string()).collect(); + if !lines.is_empty() { + let idx = (*line_offset as usize) % lines.len(); + lines[idx] = lines[idx].replacen(" ", "\t", 1); + } + lines.join("\n") + "\n" + } + Footgun::MultiDocument => { + format!("{yaml}\n---\nartifacts:\n - id: REQ-999\n type: requirement\n title: Second doc\n") + } + Footgun::NullShorthandLink { variant } => { + let value = match variant % 3 { + 0 => "null", + 1 => "~", + _ => "\"\"", + }; + // Overwrite the `target:` scalar with a null form. The seed has + // `target: REQ-000`; this exercises the phantom-link bug at + // yaml_hir.rs:530. + yaml.replace("target: REQ-000", &format!("target: {value}")) + } + Footgun::UnknownTopLevelKey { variant } => { + let key = match variant % 3 { + 0 => "artifact:", // singular typo + 1 => "Artifacts:", // case + _ => "artifcats:", // misspelling + }; + yaml.replacen("artifacts:", key, 1) + } + Footgun::AnchorCycle => { + // Insert an anchor/alias cycle inside the fields: block. + yaml.replace( + " fields:\n", + " fields:\n cycle: &x\n self: *x\n", + ) + } + Footgun::DeepNesting { depth } => { + let d = (*depth % 40).max(2) as usize; + let mut nested = String::from("["); + for _ in 0..d { + nested.push('['); + } + nested.push_str("inner"); + for _ in 0..d { + nested.push(']'); + } + nested.push(']'); + yaml.replace( + " priority: must\n", + &format!(" priority: must\n deep: {nested}\n"), + ) + } + Footgun::ControlCharInId { variant } => { + let bad: &str = match variant % 3 { + // NUL — should be rejected by any sane parser + 0 => "REQ-\u{0000}001", + // Soft hyphen — visually invisible + 1 => "REQ-\u{00AD}001", + // Trailing whitespace + _ => "REQ-001 ", + }; + yaml.replace("REQ-001", bad) + } + } +} + +fn norway_variant(v: u8) -> &'static str { + match v % 12 { + 0 => "NO", + 1 => "no", + 2 => "Off", + 3 => "off", + 4 => "yes", + 5 => "YES", + 6 => "true", + 7 => "TRUE", + 8 => "FALSE", + 9 => "~", + 10 => "null", + _ => "NULL", + } +} + +fn pick_field(idx: u8) -> &'static str { + match idx % 6 { + 0 => "id", + 1 => "title", + 2 => "status", + 3 => "target", + 4 => "priority", + _ => "baseline", + } +} + +/// Replace the first occurrence of `: ` with `: `. +/// Preserves indentation and trailing newline. +fn replace_field_value(yaml: &str, field: &str, new_value: &str) -> String { + let mut out = String::with_capacity(yaml.len() + new_value.len()); + let mut replaced = false; + for line in yaml.split_inclusive('\n') { + if replaced { + out.push_str(line); + continue; + } + let trimmed = line.trim_start(); + if let Some(rest) = trimmed.strip_prefix(&format!("{field}:")) { + let indent_len = line.len() - trimmed.len(); + let _ = rest; // unused; we replace whatever followed the colon + out.push_str(&line[..indent_len]); + out.push_str(field); + out.push_str(": "); + out.push_str(new_value); + out.push('\n'); + replaced = true; + } else { + out.push_str(line); + } + } + out +}