From a8df04b1dab6e0ddae7417aafc86c5b211319678 Mon Sep 17 00:00:00 2001 From: Carlos Scheidegger Date: Fri, 8 May 2026 09:13:02 -0500 Subject: [PATCH] Scrub all S-suffix source-info keys in round-trip JSON comparison (bd-j9wp) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit remove_location_fields in crates/pampa/tests/test.rs (used by test_qmd_roundtrip_consistency to compare JSON1 vs JSON3 modulo source info) was scrubbing only attrS, targetS, citationIdS — but the JSON writer emits seven more S-suffix keys for table internals: bodiesS, bodyS, captionS, cellsS, footS, headS, rowsS. captionS in particular is a scalar foreign-key into astContext.sourceInfoPool that sits directly on the Table object, so it survived the existing scrub as a dangling integer reference and could fail an otherwise content-stable round trip. This was deterministic, not flaky: same input always produced the same captionS:N on JSON1 and the same captionS:M on JSON3, because the two parses build differently-sized sourceInfoPools (the regenerated qmd has different source positions from the original) and the traversal-order IDs into each pool are stable. After scrubbing astContext (which removes the pools themselves), the bare IDs are references to nothing, but the test compared them numerically anyway. Add the missing seven keys to the scrub list, plus a regression fixture (table_with_inline_nbsp_in_cell.qmd) that exercises the gap — the fixture was originally written for #162 / bd-1aip but had to be dropped from that PR because of this scrub gap; it's reinstated here. Closes bd-j9wp. --- .../table_with_inline_nbsp_in_cell.qmd | 3 ++ crates/pampa/tests/test.rs | 31 +++++++++++++++---- 2 files changed, 28 insertions(+), 6 deletions(-) create mode 100644 crates/pampa/tests/roundtrip_tests/qmd-json-qmd/table_with_inline_nbsp_in_cell.qmd diff --git a/crates/pampa/tests/roundtrip_tests/qmd-json-qmd/table_with_inline_nbsp_in_cell.qmd b/crates/pampa/tests/roundtrip_tests/qmd-json-qmd/table_with_inline_nbsp_in_cell.qmd new file mode 100644 index 00000000..f73abc93 --- /dev/null +++ b/crates/pampa/tests/roundtrip_tests/qmd-json-qmd/table_with_inline_nbsp_in_cell.qmd @@ -0,0 +1,3 @@ +| col | other | +| ----- | ----- | +| x \ y | z | diff --git a/crates/pampa/tests/test.rs b/crates/pampa/tests/test.rs index e2da0377..b271f3c3 100644 --- a/crates/pampa/tests/test.rs +++ b/crates/pampa/tests/test.rs @@ -392,14 +392,33 @@ fn normalize_api_version(pandoc_json: &mut serde_json::Value, our_json: &serde_j } } +/// Strip every field that is part of the source-location side of the JSON +/// shape. After scrubbing, the remaining structure is the pure-content view +/// of the AST and can be compared by equality across two parses of the +/// same document, even if the two parses produced differently-sized +/// `astContext.sourceInfoPool`s. +/// +/// The S-suffix convention (`attrS`, `captionS`, `bodiesS`, ...) is used +/// throughout `crates/pampa/src/writers/json.rs` for foreign keys into +/// `astContext.sourceInfoPool` (or for source-info envelopes containing +/// such keys). Every S-suffix key emitted by the JSON writer is listed +/// here; if a new one is added, this list must be extended too. fn remove_location_fields(json: &mut serde_json::Value) { if let Some(obj) = json.as_object_mut() { - obj.remove("l"); // Remove the "l" field (old SourceInfo) - obj.remove("s"); // Remove the "s" field (new quarto_source_map::SourceInfo) - obj.remove("astContext"); // Remove the astContext field (includes metaTopLevelKeySources) - obj.remove("attrS"); // Remove the "attrS" field (AttrSourceInfo) - obj.remove("targetS"); // Remove the "targetS" field (TargetSourceInfo) - obj.remove("citationIdS"); // Remove the "citationIdS" field (Citation id source) + obj.remove("l"); // old SourceInfo + obj.remove("s"); // quarto_source_map::SourceInfo foreign key + obj.remove("astContext"); // pool itself + metaTopLevelKeySources + // S-suffix source-info foreign keys / envelopes: + obj.remove("attrS"); + obj.remove("bodiesS"); + obj.remove("bodyS"); + obj.remove("captionS"); + obj.remove("cellsS"); + obj.remove("citationIdS"); + obj.remove("footS"); + obj.remove("headS"); + obj.remove("rowsS"); + obj.remove("targetS"); for value in obj.values_mut() { remove_location_fields(value); }