Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 17 additions & 4 deletions crates/quarto-markdown-pandoc/src/pandoc/meta.rs
Original file line number Diff line number Diff line change
Expand Up @@ -635,8 +635,9 @@ pub fn rawblock_to_meta_with_source_info(
let content = extract_between_delimiters(&block.text).unwrap();

// Calculate offsets within RawBlock.text
// The text is "---\n<content>\n---", so content starts at index 4
let yaml_start = block.text.find("---\n").unwrap() + 4;
// Find the actual position of the trimmed content in the original text
// extract_between_delimiters trims the content, so we need to find where it actually starts
let yaml_start = block.text.find(content).unwrap();

// block.source_info is already quarto_source_map::SourceInfo
let parent = block.source_info.clone();
Expand All @@ -646,7 +647,7 @@ pub fn rawblock_to_meta_with_source_info(
quarto_source_map::SourceInfo::substring(parent, yaml_start, yaml_start + content.len());

// Parse YAML with source tracking
let yaml = match quarto_yaml::parse_with_parent(content, yaml_parent) {
let yaml = match quarto_yaml::parse_with_parent(content, yaml_parent.clone()) {
Ok(y) => y,
Err(e) => panic!(
"(unimplemented syntax error - this is a bug!) Failed to parse metadata block as YAML: {}",
Expand All @@ -656,7 +657,19 @@ pub fn rawblock_to_meta_with_source_info(

// Transform YamlWithSourceInfo to MetaValueWithSourceInfo
// Pass by value since yaml is no longer needed
yaml_to_meta_with_source_info(yaml, context, diagnostics)
let mut result = yaml_to_meta_with_source_info(yaml, context, diagnostics);

// For the top-level metadata, replace the source_info with yaml_parent
// to ensure it spans the entire YAML content, not just where the mapping starts
if let MetaValueWithSourceInfo::MetaMap {
ref mut source_info,
..
} = result
{
*source_info = yaml_parent;
}

result
}

/// Legacy version: Convert RawBlock to Meta (old implementation)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,15 @@ pub fn process_document(
PandocNativeIntermediate::IntermediateSection(section) => {
blocks.extend(section);
}
PandocNativeIntermediate::IntermediateMetadataString(text, _range) => {
PandocNativeIntermediate::IntermediateMetadataString(text, range) => {
// for now we assume it's metadata and emit it as a rawblock
blocks.push(Block::RawBlock(RawBlock {
format: "quarto_minus_metadata".to_string(),
text,
source_info: node_source_info_with_context(node, context),
source_info: quarto_source_map::SourceInfo::from_range(
context.current_file_id(),
range,
),
}));
}
_ => panic!("Expected Block or Section, got {:?}", child),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,15 @@ pub fn process_section(
PandocNativeIntermediate::IntermediateSection(section) => {
blocks.extend(section);
}
PandocNativeIntermediate::IntermediateMetadataString(text, _range) => {
PandocNativeIntermediate::IntermediateMetadataString(text, range) => {
// for now we assume it's metadata and emit it as a rawblock
blocks.push(Block::RawBlock(RawBlock {
format: "quarto_minus_metadata".to_string(),
text,
source_info: node_source_info_with_context(section_node, context),
source_info: quarto_source_map::SourceInfo::from_range(
context.current_file_id(),
range,
),
}));
}
_ => panic!("Expected Block or Section, got {:?}", child),
Expand Down
22 changes: 20 additions & 2 deletions crates/quarto-markdown-pandoc/src/readers/qmd.rs
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,8 @@ pub fn read<T: Write>(
};
// Store complete MetaMapEntry objects to preserve key_source information
let mut meta_from_parses: Vec<crate::pandoc::meta::MetaMapEntry> = Vec::new();
// Track the source_info of the metadata block (for simple case with single block)
let mut meta_source_info: Option<quarto_source_map::SourceInfo> = None;
// Create a separate diagnostic collector for metadata parsing warnings
let mut meta_diagnostics = DiagnosticCollector::new();

Expand Down Expand Up @@ -222,7 +224,15 @@ pub fn read<T: Write>(
);

// Extract MetaMapEntry objects (preserving key_source) and store them
if let MetaValueWithSourceInfo::MetaMap { entries, .. } = parsed_meta {
if let MetaValueWithSourceInfo::MetaMap {
entries,
source_info,
} = parsed_meta
{
// Store the source_info (for simple case with single metadata block)
if meta_source_info.is_none() {
meta_source_info = Some(source_info);
}
for entry in entries {
meta_from_parses.push(entry);
}
Expand All @@ -240,10 +250,18 @@ pub fn read<T: Write>(
// Merge meta_from_parses into result.meta
// result.meta is MetaValueWithSourceInfo::MetaMap, so we need to append entries
// Now meta_from_parses contains complete MetaMapEntry objects with key_source preserved
if let MetaValueWithSourceInfo::MetaMap { entries, .. } = &mut result.meta {
if let MetaValueWithSourceInfo::MetaMap {
entries,
source_info,
} = &mut result.meta
{
for entry in meta_from_parses.into_iter() {
entries.push(entry);
}
// Update the overall metadata source_info if we captured one
if let Some(captured_source_info) = meta_source_info {
*source_info = captured_source_info;
}
}

// Merge metadata diagnostics into main error_collector
Expand Down
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"astContext":{"files":[{"line_breaks":[3,20,24,25,35,36,40,53,57,58,62],"name":"tests/snapshots/json/002.qmd","total_length":63}],"metaTopLevelKeySources":{"nested":14,"title":12},"sourceInfoPool":[{"d":0,"r":[0,8],"t":0},{"d":0,"r":[8,9],"t":0},{"d":[[0,0,8],[1,8,1]],"r":[0,9],"t":2},{"d":0,"r":[0,63],"t":0},{"d":3,"r":[4,20],"t":1},{"d":4,"r":[7,16],"t":1},{"d":0,"r":[0,4],"t":0},{"d":0,"r":[37,58],"t":0},{"d":7,"r":[4,16],"t":1},{"d":8,"r":[8,12],"t":1},{"d":0,"r":[26,63],"t":0},{"d":3,"r":[4,20],"t":1},{"d":11,"r":[0,5],"t":1},{"d":7,"r":[4,16],"t":1},{"d":13,"r":[0,6],"t":1}]},"blocks":[{"c":[["",["hello"],[]],[]],"s":10,"t":"Div"}],"meta":{"nested":{"c":[{"c":"meta","s":6,"t":"Str"}],"s":9,"t":"MetaInlines"},"title":{"c":[{"c":"metadata1","s":2,"t":"Str"}],"s":5,"t":"MetaInlines"}},"pandoc-api-version":[1,23,1]}
{"astContext":{"files":[{"line_breaks":[3,20,24,25,35,36,40,53,57,58,62],"name":"tests/snapshots/json/002.qmd","total_length":63}],"metaTopLevelKeySources":{"nested":14,"title":12},"sourceInfoPool":[{"d":0,"r":[0,8],"t":0},{"d":0,"r":[8,9],"t":0},{"d":[[0,0,8],[1,8,1]],"r":[0,9],"t":2},{"d":0,"r":[0,25],"t":0},{"d":3,"r":[4,20],"t":1},{"d":4,"r":[7,16],"t":1},{"d":0,"r":[0,4],"t":0},{"d":0,"r":[37,58],"t":0},{"d":7,"r":[4,16],"t":1},{"d":8,"r":[8,12],"t":1},{"d":0,"r":[26,63],"t":0},{"d":3,"r":[4,20],"t":1},{"d":11,"r":[0,5],"t":1},{"d":7,"r":[4,16],"t":1},{"d":13,"r":[0,6],"t":1}]},"blocks":[{"c":[["",["hello"],[]],[]],"s":10,"t":"Div"}],"meta":{"nested":{"c":[{"c":"meta","s":6,"t":"Str"}],"s":9,"t":"MetaInlines"},"title":{"c":[{"c":"metadata1","s":2,"t":"Str"}],"s":5,"t":"MetaInlines"}},"pandoc-api-version":[1,23,1]}
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"astContext":{"files":[{"line_breaks":[3,20,24,25,35,36,40,56,69,73,74,78],"name":"tests/snapshots/json/003.qmd","total_length":79}],"metaTopLevelKeySources":{"title":22},"sourceInfoPool":[{"d":0,"r":[0,8],"t":0},{"d":0,"r":[8,9],"t":0},{"d":[[0,0,8],[1,8,1]],"r":[0,9],"t":2},{"d":0,"r":[0,79],"t":0},{"d":3,"r":[4,20],"t":1},{"d":4,"r":[7,16],"t":1},{"d":0,"r":[37,74],"t":0},{"d":6,"r":[4,32],"t":1},{"d":7,"r":[0,6],"t":1},{"d":0,"r":[0,7],"t":0},{"d":6,"r":[4,32],"t":1},{"d":10,"r":[8,15],"t":1},{"d":6,"r":[4,32],"t":1},{"d":12,"r":[16,22],"t":1},{"d":0,"r":[0,4],"t":0},{"d":6,"r":[4,32],"t":1},{"d":15,"r":[24,28],"t":1},{"d":6,"r":[4,32],"t":1},{"d":17,"r":[6,28],"t":1},{"d":0,"r":[37,74],"t":0},{"d":0,"r":[26,79],"t":0},{"d":3,"r":[4,20],"t":1},{"d":21,"r":[0,5],"t":1}]},"blocks":[{"c":[["",["hello"],[]],[{"c":{"c":[{"key":"_scope","key_source":8,"value":{"c":[{"c":"lexical","s":9,"t":"Str"}],"s":11,"t":"MetaInlines"}},{"key":"nested","key_source":13,"value":{"c":[{"c":"meta","s":14,"t":"Str"}],"s":16,"t":"MetaInlines"}}],"s":18,"t":"MetaMap"},"s":19,"t":"BlockMetadata"}]],"s":20,"t":"Div"}],"meta":{"title":{"c":[{"c":"metadata1","s":2,"t":"Str"}],"s":5,"t":"MetaInlines"}},"pandoc-api-version":[1,23,1]}
{"astContext":{"files":[{"line_breaks":[3,20,24,25,35,36,40,56,69,73,74,78],"name":"tests/snapshots/json/003.qmd","total_length":79}],"metaTopLevelKeySources":{"title":21},"sourceInfoPool":[{"d":0,"r":[0,8],"t":0},{"d":0,"r":[8,9],"t":0},{"d":[[0,0,8],[1,8,1]],"r":[0,9],"t":2},{"d":0,"r":[0,25],"t":0},{"d":3,"r":[4,20],"t":1},{"d":4,"r":[7,16],"t":1},{"d":0,"r":[37,74],"t":0},{"d":6,"r":[4,32],"t":1},{"d":7,"r":[0,6],"t":1},{"d":0,"r":[0,7],"t":0},{"d":6,"r":[4,32],"t":1},{"d":10,"r":[8,15],"t":1},{"d":6,"r":[4,32],"t":1},{"d":12,"r":[16,22],"t":1},{"d":0,"r":[0,4],"t":0},{"d":6,"r":[4,32],"t":1},{"d":15,"r":[24,28],"t":1},{"d":6,"r":[4,32],"t":1},{"d":0,"r":[37,74],"t":0},{"d":0,"r":[26,79],"t":0},{"d":3,"r":[4,20],"t":1},{"d":20,"r":[0,5],"t":1}]},"blocks":[{"c":[["",["hello"],[]],[{"c":{"c":[{"key":"_scope","key_source":8,"value":{"c":[{"c":"lexical","s":9,"t":"Str"}],"s":11,"t":"MetaInlines"}},{"key":"nested","key_source":13,"value":{"c":[{"c":"meta","s":14,"t":"Str"}],"s":16,"t":"MetaInlines"}}],"s":17,"t":"MetaMap"},"s":18,"t":"BlockMetadata"}]],"s":19,"t":"Div"}],"meta":{"title":{"c":[{"c":"metadata1","s":2,"t":"Str"}],"s":5,"t":"MetaInlines"}},"pandoc-api-version":[1,23,1]}
Original file line number Diff line number Diff line change
Expand Up @@ -227,3 +227,69 @@ description: This is a description
);
eprintln!("✓ LinkedHashMap fix working - key source information preserved!");
}

#[test]
fn test_metadata_block_overall_source_info() {
// Test that the overall metadata block's source info points to the full metadata
// content (not just the opening "---\n" delimiter)
//
// This test verifies that when we have:
// ---
// title: Test
// author: Me
// ---
//
// The MetaMap's source_info should point to the entire YAML content
// "title: Test\nauthor: Me\n", not just "---\n"

let input = r#"---
title: Test Document
author: Test Author
---

Some content here.
"#;

let (pandoc, _context, _warnings) =
readers::qmd::read(input.as_bytes(), false, "test.qmd", &mut std::io::sink())
.expect("Failed to parse");

// Extract metadata
let MetaValueWithSourceInfo::MetaMap {
entries,
source_info,
} = pandoc.meta
else {
panic!("Expected MetaMap");
};

// Verify the overall metadata source info
// The YAML content starts at offset 4 (after "---\n")
// and should span the entire YAML content area
let meta_offset = resolve_source_offset(&source_info);

eprintln!("\nMetadata block resolved offset: {}", meta_offset);
eprintln!("Metadata entries count: {}", entries.len());

// The metadata content starts at offset 4 (after "---\n")
assert_eq!(
meta_offset, 4,
"Metadata block should start at offset 4 (after opening '---\\n'), got {}",
meta_offset
);

// Also verify we have the expected entries
assert_eq!(entries.len(), 2, "Should have 2 metadata entries");

let has_title = entries.iter().any(|e| e.key == "title");
let has_author = entries.iter().any(|e| e.key == "author");

assert!(has_title, "Should have 'title' entry");
assert!(has_author, "Should have 'author' entry");

eprintln!("\n✅ Metadata block overall source info test passed!");
eprintln!(
"✓ Metadata block source points to correct offset ({})",
meta_offset
);
}