diff --git a/Cargo.lock b/Cargo.lock index 6fcaa4b..e785fbd 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -63,9 +63,9 @@ dependencies = [ [[package]] name = "bitflags" -version = "2.9.3" +version = "2.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34efbcccd345379ca2868b2b2c9d3782e9cc58ba87bc7d79d5b53d9c9ae6f25d" +checksum = "2261d10cca569e4643e526d8dc2e62e433cc8aba21ab764233731f8d369bf394" [[package]] name = "cfg-if" @@ -75,18 +75,18 @@ checksum = "2fd1289c04a9ea8cb22300a459a72a385d7c73d3259e2ed7dcb2af674838cfa9" [[package]] name = "clap" -version = "4.5.46" +version = "4.5.47" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c5e4fcf9c21d2e544ca1ee9d8552de13019a42aa7dbf32747fa7aaf1df76e57" +checksum = "7eac00902d9d136acd712710d71823fb8ac8004ca445a89e73a41d45aa712931" dependencies = [ "clap_builder", ] [[package]] name = "clap_builder" -version = "4.5.46" +version = "4.5.47" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fecb53a0e6fcfb055f686001bc2e2592fa527efaf38dbe81a6a9563562e57d41" +checksum = "2ad9bbf750e73b5884fb8a211a9424a1906c1e156724260fdae972f31d70e1d6" dependencies = [ "anstream", "anstyle", @@ -149,9 +149,9 @@ checksum = "cd945864f07fe9f5371a27ad7b52a172b4b499999f1d97574c9fa68373937e12" [[package]] name = "log" -version = "0.4.27" +version = "0.4.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94" +checksum = "34080505efa8e45a4b816c349525ebe327ceaa8559756f0356cba97ef3bf7432" [[package]] name = "matchers" @@ -333,7 +333,7 @@ dependencies = [ [[package]] name = "technique" -version = "0.4.0" +version = "0.4.1" dependencies = [ "clap", "owo-colors", diff --git a/Cargo.toml b/Cargo.toml index a17720a..71d614c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "technique" -version = "0.4.0" +version = "0.4.1" edition = "2021" description = "A domain specific language for procedures." authors = [ "Andrew Cowie" ] diff --git a/src/main.rs b/src/main.rs index f7ea8ef..928ef46 100644 --- a/src/main.rs +++ b/src/main.rs @@ -160,16 +160,26 @@ fn main() { std::process::exit(1); } }; + let technique = match parsing::parse(&filename, &content) { Ok(document) => document, - Err(error) => { - eprintln!( - "{}", - problem::full_parsing_error(&error, &filename, &content, &Terminal) - ); + Err(errors) => { + for (i, error) in errors + .iter() + .enumerate() + { + if i > 0 { + eprintln!(); + } + eprintln!( + "{}", + problem::full_parsing_error(&error, &filename, &content, &Terminal) + ); + } std::process::exit(1); } }; + // TODO continue with validation of the returned technique eprintln!("{}", "ok".bright_green()); @@ -205,12 +215,26 @@ fn main() { std::process::exit(1); } }; + let technique = match parsing::parse(&filename, &content) { Ok(document) => document, - Err(error) => { + Err(errors) => { + for (i, error) in errors + .iter() + .enumerate() + { + if i > 0 { + eprintln!(); + } + eprintln!( + "{}", + problem::concise_parsing_error(&error, &filename, &content, &Terminal) + ); + } + eprintln!( - "{}", - problem::concise_parsing_error(&error, &filename, &content, &Terminal) + "\nUnable to parse input file. Try `technique check {}` for details.", + &filename.to_string_lossy() ); std::process::exit(1); } @@ -251,16 +275,27 @@ fn main() { std::process::exit(1); } }; + let technique = match parsing::parse(&filename, &content) { Ok(document) => document, - Err(error) => { + Err(errors) => { // It is possible that we will want to render the error // into the PDF document rather than crashing here. We'll // see in the future. - eprintln!( - "{}", - problem::full_parsing_error(&error, &filename, &content, &Terminal) - ); + + for (i, error) in errors + .iter() + .enumerate() + { + if i > 0 { + eprintln!(); + } + + eprintln!( + "{}", + problem::concise_parsing_error(&error, &filename, &content, &Terminal) + ); + } std::process::exit(1); } }; diff --git a/src/parsing/mod.rs b/src/parsing/mod.rs index 5f5bd1e..2b04538 100644 --- a/src/parsing/mod.rs +++ b/src/parsing/mod.rs @@ -35,9 +35,10 @@ pub fn load(filename: &Path) -> Result { } } -/// Parse text into a Technique object, or error out. -pub fn parse<'i>(filename: &'i Path, content: &'i str) -> Result, ParsingError<'i>> { - let result = parser::parse_via_taking(filename, content); +/// Parse text into a Document object, or return the list of errors +/// encountered. +pub fn parse<'i>(filename: &'i Path, content: &'i str) -> Result, Vec> { + let result = parser::parse_with_recovery(filename, content); match result { Ok(document) => { @@ -66,9 +67,9 @@ pub fn parse<'i>(filename: &'i Path, content: &'i str) -> Result, P } Ok(document) } - Err(error) => { - debug!("error: {:?}", error); - Err(error) + Err(errors) => { + debug!("errors: {}", errors.len()); + Err(errors) } } } diff --git a/src/parsing/parser.rs b/src/parsing/parser.rs index ae01eb8..b597daf 100644 --- a/src/parsing/parser.rs +++ b/src/parsing/parser.rs @@ -3,28 +3,39 @@ use std::path::Path; use crate::language::*; use crate::regex::*; -pub fn parse_via_taking<'i>( +// This could be adapted to return both the partial document and the errors. +// But for our purposes if the parse fails then there's no point trying to do +// deeper validation or analysis; the input syntax is broken and the user +// needs to fix it. Should a partial parse turn out have meaning then the +// return type of this can change to ParseResult<'i> but for now it is fine +// to use Result. +pub fn parse_with_recovery<'i>( path: &'i Path, content: &'i str, -) -> Result, ParsingError<'i>> { +) -> Result, Vec> { let mut input = Parser::new(); input.filename(path); input.initialize(content); - input.parse_from_start() + input.parse_collecting_errors() } -#[derive(Debug, Clone, PartialEq, Eq)] -pub enum ParsingError<'i> { +// Most general errors first, most specific last (when removing redundant +// errors, we prefer being able to give a more specific error message to the +// user) +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)] +pub enum ParsingError { + // lowest priority IllegalParserState(usize), Unimplemented(usize), Unrecognized(usize), // improve this + UnexpectedEndOfInput(usize), Expected(usize, &'static str), ExpectedMatchingChar(usize, &'static str, char, char), - InvalidHeader(usize), + // more specific errors InvalidCharacter(usize, char), - UnexpectedEndOfInput(usize), - InvalidIdentifier(usize, &'i str), + InvalidHeader(usize), + InvalidIdentifier(usize, String), InvalidForma(usize), InvalidGenus(usize), InvalidSignature(usize), @@ -33,20 +44,22 @@ pub enum ParsingError<'i> { InvalidInvocation(usize), InvalidFunction(usize), InvalidCodeBlock(usize), - InvalidMultiline(usize), InvalidStep(usize), InvalidSubstep(usize), - InvalidForeach(usize), InvalidResponse(usize), + InvalidMultiline(usize), + InvalidForeach(usize), InvalidIntegral(usize), InvalidQuantity(usize), InvalidQuantityDecimal(usize), InvalidQuantityUncertainty(usize), InvalidQuantityMagnitude(usize), InvalidQuantitySymbol(usize), + // highest priority + UnclosedInterpolation(usize), } -impl<'i> ParsingError<'i> { +impl ParsingError { pub fn offset(&self) -> usize { match self { ParsingError::IllegalParserState(offset) => *offset, @@ -54,6 +67,7 @@ impl<'i> ParsingError<'i> { ParsingError::Unrecognized(offset) => *offset, ParsingError::Expected(offset, _) => *offset, ParsingError::ExpectedMatchingChar(offset, _, _, _) => *offset, + ParsingError::UnclosedInterpolation(offset) => *offset, ParsingError::InvalidHeader(offset) => *offset, ParsingError::InvalidCharacter(offset, _) => *offset, ParsingError::UnexpectedEndOfInput(offset) => *offset, @@ -81,12 +95,42 @@ impl<'i> ParsingError<'i> { } } +/// Remove redundant errors, keeping only the most specific error at each offset. +/// When multiple errors occur at the same offset, keep only the most specific one. +/// Since ParsingError derives Ord with general errors first and specific errors last, +/// we use > to prefer the higher Ord value (more specific) errors. +fn remove_redundant_errors(errors: Vec) -> Vec { + let mut deduped = Vec::new(); + + for error in errors { + let error_offset = error.offset(); + + // Check if we have an existing error at this offset + if let Some(existing_idx) = deduped + .iter() + .position(|e: &ParsingError| e.offset() == error_offset) + { + // Keep the more specific error + if error > deduped[existing_idx] { + deduped[existing_idx] = error; + } + // Otherwise, keep the existing error + } else { + // No error at this offset yet, add it + deduped.push(error); + } + } + + deduped +} + #[derive(Debug)] pub struct Parser<'i> { filename: &'i Path, original: &'i str, source: &'i str, offset: usize, + problems: Vec, } impl<'i> Parser<'i> { @@ -96,6 +140,7 @@ impl<'i> Parser<'i> { original: "", source: "", offset: 0, + problems: Vec::new(), } } @@ -106,6 +151,8 @@ impl<'i> Parser<'i> { self.original = content; self.source = content; self.offset = 0; + self.problems + .clear(); } fn advance(&mut self, width: usize) { @@ -114,10 +161,38 @@ impl<'i> Parser<'i> { self.offset += width; } - fn parse_from_start(&mut self) -> Result, ParsingError<'i>> { - // Check if header is present by looking for magic line + /// Skip to the beginning of the next line (or end of input). This is used + /// when an error is encountered; we attempt to recover back to a newline + /// as that may well be in a parent scope and we can continue. + fn skip_to_next_line(&mut self) { + if let Some(pos) = self + .source + .find('\n') + { + self.advance(pos + 1); + } else { + self.advance( + self.source + .len(), + ); + } + } + + fn parse_collecting_errors(&mut self) -> Result, Vec> { + // Clear any existing errors + self.problems + .clear(); + + // Parse header, collecting errors if encountered let header = if is_magic_line(self.source) { - Some(self.read_technique_header()?) + match self.read_technique_header() { + Ok(header) => Some(header), + Err(error) => { + self.problems + .push(error); + None + } + } } else { None }; @@ -133,9 +208,8 @@ impl<'i> Parser<'i> { break; } - // Check if this Technique is a a single set of one or more + // Check if this Technique is a single set of one or more // top-level Scope::SectionChunk - if is_section(self.source) && procedures.is_empty() { while !self.is_finished() { self.trim_whitespace(); @@ -144,47 +218,69 @@ impl<'i> Parser<'i> { } if is_section(self.source) { - let section = self.read_section()?; - sections.push(section); + match self.read_section() { + Ok(section) => sections.push(section), + Err(error) => { + self.problems + .push(error); + self.skip_to_next_line(); + } + } } else { - return Err(ParsingError::Unrecognized(self.offset)); + self.problems + .push(ParsingError::Unrecognized(self.offset)); + self.skip_to_next_line(); } } break; } else if is_procedure_declaration(self.source) { - let mut procedure = self.take_block_lines( + match self.take_block_lines( is_procedure_declaration, - |line| is_section(line) || is_procedure_declaration(line), + |line| is_section(line) || potential_procedure_declaration(line), |inner| inner.read_procedure(), - )?; - - // Check if there are sections following this procedure - while !self.is_finished() { - self.trim_whitespace(); - if self.is_finished() { - break; - } + ) { + Ok(mut procedure) => { + // Check if there are sections following this procedure + while !self.is_finished() { + self.trim_whitespace(); + if self.is_finished() { + break; + } - if is_section(self.source) { - let section = self.read_section()?; - if let Some(Element::Steps(ref mut steps)) = procedure - .elements - .last_mut() - { - steps.push(section); - } else { - // Create a new Steps element if one doesn't exist - procedure - .elements - .push(Element::Steps(vec![section])); + if is_section(self.source) { + match self.read_section() { + Ok(section) => { + if let Some(Element::Steps(ref mut steps)) = procedure + .elements + .last_mut() + { + steps.push(section); + } else { + // Create a new Steps element if one doesn't exist + procedure + .elements + .push(Element::Steps(vec![section])); + } + } + Err(error) => { + self.problems + .push(error); + break; + } + } + } else { + // If we hit something that's not a section, stop parsing sections + break; + } } - } else { - // If we hit something that's not a section, stop parsing sections - break; + + procedures.push(procedure); + } + Err(error) => { + self.problems + .push(error); } } - - procedures.push(procedure); } else if self .source .contains(':') @@ -192,16 +288,23 @@ impl<'i> Parser<'i> { // It might be that we've encountered a malformed procedure // declaration, so we try parsing it anyway to get a more // specific error message. - let _procedure = self.take_block_lines( + match self.take_block_lines( |_| true, // Accept the line regardless - |line| is_section(line) || is_procedure_declaration(line), + |line| is_section(line) || potential_procedure_declaration(line), |inner| inner.read_procedure(), - )?; - // If we reach here, read_procedure() succeeded but - // is_procedure_declaration() failed, which is undefined. - return Err(ParsingError::IllegalParserState(self.offset)); + ) { + Ok(procedure) => { + procedures.push(procedure); + } + Err(error) => { + self.problems + .push(error); + } + } } else { - return Err(ParsingError::Unrecognized(self.offset)); + self.problems + .push(ParsingError::Unrecognized(self.offset)); + self.skip_to_next_line(); } } @@ -213,13 +316,23 @@ impl<'i> Parser<'i> { None }; - Ok(Document { header, body }) + let document = Document { header, body }; + let errors = std::mem::take(&mut self.problems); + + if errors.is_empty() { + Ok(document) + } else { + // Remove redundant errors, keeping only the most specific error + // at each offset + let errors = remove_redundant_errors(errors); + Err(errors) + } } /// consume up to but not including newline (or end), then take newline - fn take_line(&mut self, f: F) -> Result> + fn take_line(&mut self, f: F) -> Result where - F: Fn(&mut Parser<'i>) -> Result>, + F: Fn(&mut Parser<'i>) -> Result, { let result = self.take_until(&['\n'], f)?; self.require_newline()?; @@ -236,9 +349,9 @@ impl<'i> Parser<'i> { start_predicate: P1, end_predicate: P2, function: F, - ) -> Result> + ) -> Result where - F: Fn(&mut Parser<'i>) -> Result>, + F: Fn(&mut Parser<'i>) -> Result, P1: Fn(&str) -> bool, P2: Fn(&str) -> bool, { @@ -274,13 +387,16 @@ impl<'i> Parser<'i> { let mut parser = self.subparser(0, block); // Pass to closure for processing - let result = function(&mut parser)?; + let result = function(&mut parser); + + self.problems + .extend(parser.problems); // Advance parser state self.source = &self.source[i..]; self.offset += i; - Ok(result) + result } fn take_block_chars( @@ -290,9 +406,9 @@ impl<'i> Parser<'i> { end_char: char, skip_string_content: bool, function: F, - ) -> Result> + ) -> Result where - F: Fn(&mut Parser<'i>) -> Result>, + F: Fn(&mut Parser<'i>) -> Result, { let mut l = 0; let mut begun = false; @@ -359,6 +475,9 @@ impl<'i> Parser<'i> { // Pass to closure for processing let result = function(&mut parser)?; + self.problems + .extend(parser.problems); + // Advance parser state self.source = &self.source[l..]; self.offset += l; @@ -370,9 +489,9 @@ impl<'i> Parser<'i> { &mut self, delimiter: &str, function: F, - ) -> Result> + ) -> Result where - F: Fn(&mut Parser<'i>) -> Result>, + F: Fn(&mut Parser<'i>) -> Result, { let width = delimiter.len(); @@ -402,6 +521,9 @@ impl<'i> Parser<'i> { // Pass to closure for processing let result = function(&mut parser)?; + self.problems + .extend(parser.problems); + // Advance parser state past the entire delimited block let end = end + width; self.source = &self.source[end..]; @@ -410,9 +532,9 @@ impl<'i> Parser<'i> { Ok(result) } - fn take_until(&mut self, pattern: &[char], function: F) -> Result> + fn take_until(&mut self, pattern: &[char], function: F) -> Result where - F: Fn(&mut Parser<'i>) -> Result>, + F: Fn(&mut Parser<'i>) -> Result, { let content = self.source; let end_pos = content @@ -425,6 +547,9 @@ impl<'i> Parser<'i> { // Pass to closure for processing let result = function(&mut parser)?; + self.problems + .extend(parser.problems); + // Advance parser state self.source = &self.source[end_pos..]; self.offset += end_pos; @@ -432,13 +557,9 @@ impl<'i> Parser<'i> { Ok(result) } - fn take_split_by( - &mut self, - delimiter: char, - function: F, - ) -> Result, ParsingError<'i>> + fn take_split_by(&mut self, delimiter: char, function: F) -> Result, ParsingError> where - F: Fn(&mut Parser<'i>) -> Result>, + F: Fn(&mut Parser<'i>) -> Result, { let content = self.source; let mut results = Vec::new(); @@ -453,6 +574,8 @@ impl<'i> Parser<'i> { } let mut parser = self.subparser(0, trimmed); results.push(function(&mut parser)?); + self.problems + .extend(parser.problems); } // Advance parser past all consumed content @@ -461,9 +584,9 @@ impl<'i> Parser<'i> { Ok(results) } - fn take_paragraph(&mut self, function: F) -> Result> + fn take_paragraph(&mut self, function: F) -> Result where - F: Fn(&mut Parser<'i>) -> Result>, + F: Fn(&mut Parser<'i>) -> Result, { // Find the end of this paragraph (\n\n or end of input) let content = self.source; @@ -475,6 +598,9 @@ impl<'i> Parser<'i> { let mut parser = self.subparser(0, paragraph); let result = function(&mut parser)?; + self.problems + .extend(parser.problems); + // Advance past this paragraph and the \n\n delimiter if present if i < content.len() { i += 2; @@ -499,6 +625,7 @@ impl<'i> Parser<'i> { original: self.original, source: content, offset: indent + self.offset, + problems: Vec::new(), }; // and return @@ -507,7 +634,7 @@ impl<'i> Parser<'i> { // because test cases and trivial single-line examples might omit an // ending newline, this also returns Ok if end of input is reached. - fn require_newline(&mut self) -> Result<(), ParsingError<'i>> { + fn require_newline(&mut self) -> Result<(), ParsingError> { for (i, c) in self .source .char_indices() @@ -537,7 +664,7 @@ impl<'i> Parser<'i> { // hard wire the version for now. If we ever grow to supporting multiple major // versions then this will be a lot more complicated than just dealing with a // different natural number here. - fn read_magic_line(&mut self) -> Result> { + fn read_magic_line(&mut self) -> Result { self.take_until(&['\n'], |inner| { let re = regex!(r"%\s*technique\s+v1\s*$"); @@ -552,7 +679,7 @@ impl<'i> Parser<'i> { // This one is awkward because if a SPDX line is present, then it really needs // to have a license, whereas the copyright part is optional. - fn read_spdx_line(&mut self) -> Result<(Option<&'i str>, Option<&'i str>), ParsingError<'i>> { + fn read_spdx_line(&mut self) -> Result<(Option<&'i str>, Option<&'i str>), ParsingError> { self.take_until(&['\n'], |inner| { let re = regex!(r"^!\s*([^;]+)(?:;\s*(?:\(c\)|\(C\)|©)\s*(.+))?$"); @@ -588,7 +715,7 @@ impl<'i> Parser<'i> { }) } - fn read_template_line(&mut self) -> Result, ParsingError<'i>> { + fn read_template_line(&mut self) -> Result, ParsingError> { self.take_until(&['\n'], |inner| { let re = regex!(r"^&\s*(.+)$"); @@ -606,7 +733,7 @@ impl<'i> Parser<'i> { }) } - pub fn read_technique_header(&mut self) -> Result, ParsingError<'i>> { + pub fn read_technique_header(&mut self) -> Result, ParsingError> { // Process magic line let version = if is_magic_line(self.source) { let result = self.read_magic_line()?; @@ -642,7 +769,7 @@ impl<'i> Parser<'i> { }) } - fn read_signature(&mut self) -> Result, ParsingError<'i>> { + fn read_signature(&mut self) -> Result, ParsingError> { let re = regex!(r"\s*(.+?)\s*->\s*(.+?)\s*$"); let cap = match re.captures(self.source) { @@ -678,7 +805,7 @@ impl<'i> Parser<'i> { Option>>, Option>, ), - ParsingError<'i>, + ParsingError, > { // These capture groups use .+? to make "match more than one, but // lazily" so that the subsequent grabs of whitespace and the all @@ -698,8 +825,11 @@ impl<'i> Parser<'i> { let text = one.as_str(); let (name, parameters) = if let Some((before, list)) = text.split_once('(') { - let name = validate_identifier(before) - .ok_or(ParsingError::InvalidIdentifier(self.offset, before))?; + let before = before.trim(); + let name = validate_identifier(before).ok_or(ParsingError::InvalidIdentifier( + self.offset, + before.to_string(), + ))?; // Extract parameters from parentheses if !list.ends_with(')') { @@ -713,7 +843,11 @@ impl<'i> Parser<'i> { let mut params = Vec::new(); for item in list.split(',') { let param = validate_identifier(item.trim_ascii()).ok_or( - ParsingError::InvalidIdentifier(self.offset, item.trim_ascii()), + ParsingError::InvalidIdentifier( + self.offset, + item.trim_ascii() + .to_string(), + ), )?; params.push(param); } @@ -722,8 +856,10 @@ impl<'i> Parser<'i> { (name, parameters) } else { - let name = validate_identifier(text) - .ok_or(ParsingError::InvalidIdentifier(self.offset, text))?; + let name = validate_identifier(text).ok_or(ParsingError::InvalidIdentifier( + self.offset, + text.to_string(), + ))?; (name, None) }; @@ -740,7 +876,7 @@ impl<'i> Parser<'i> { } // assumes we've set up the precondition that indeed there is a # present. - fn parse_procedure_title(&mut self) -> Result<&'i str, ParsingError<'i>> { + fn parse_procedure_title(&mut self) -> Result<&'i str, ParsingError> { self.trim_whitespace(); if self.peek_next_char() == Some('#') { @@ -752,108 +888,250 @@ impl<'i> Parser<'i> { } } - pub fn read_procedure(&mut self) -> Result, ParsingError<'i>> { - let procedure = self.take_block_lines( - is_procedure_declaration, - is_procedure_declaration, - |outer| { - // Extract the declaration, and parse it - let declaration = outer.take_block_lines( - is_procedure_declaration, - |line| is_procedure_body(line), - |inner| inner.parse_procedure_declaration(), - )?; + /// Parse a procedure with error recovery - collects multiple errors instead of stopping at the first one + pub fn read_procedure(&mut self) -> Result, ParsingError> { + // Find the procedure block boundaries + let mut i = 0; + let mut begun = false; - // Parse procedure elements in order - let mut elements = vec![]; + for line in self + .source + .lines() + { + if !begun && is_procedure_declaration(line) { + begun = true; + i += line.len() + 1; + continue; + } else if begun && is_procedure_declaration(line) { + // don't include this line + break; + } - while !outer.is_finished() { - let content = outer.source; + i += line.len() + 1; + } - if is_procedure_title(content) { - let title = outer.take_block_lines( - |line| { - line.trim_ascii_start() - .starts_with('#') - }, - |line| !line.starts_with('#'), - |inner| { - let text = inner.parse_procedure_title()?; - Ok(text) - }, - )?; - elements.push(Element::Title(title)); - } else if is_code_block(content) { - let expression = outer.read_code_block()?; - elements.push(Element::CodeBlock(expression)); - } else if is_attribute_assignment(content) { - let attribute_block = outer.read_attribute_scope()?; - elements.push(Element::Steps(vec![attribute_block])); - } else if is_step(content) { - let mut steps = vec![]; - while !outer.is_finished() && is_step(outer.source) { - let content = outer.source; - if is_step_dependent(content) { - let step = outer.read_step_dependent()?; - steps.push(step); - } else if is_step_parallel(content) { - let step = outer.read_step_parallel()?; - steps.push(step); - } else { + if i > self + .source + .len() + { + i -= 1; + } + + // Extract the procedure block + let block = &self.source[..i]; + let mut parser = self.subparser(0, block); + + // Parse the procedure with recovery + let result = self.parse_procedure_block(&mut parser); + + // Collect errors from the subparser + self.problems + .extend(std::mem::take(&mut parser.problems)); + + // Advance main parser state + self.source = &self.source[i..]; + self.offset += i; + + result + } + + fn parse_procedure_block( + &mut self, + parser: &mut Parser<'i>, + ) -> Result, ParsingError> { + // Extract the declaration with recovery + let declaration = match self.parse_declaration(parser) { + Ok(decl) => decl, + Err(err) => return Err(err), + }; + + // Parse procedure elements in order with recovery + let mut elements = vec![]; + + while !parser.is_finished() { + let content = parser.source; + + if is_procedure_title(content) { + match parser.take_block_lines( + |line| { + line.trim_ascii_start() + .starts_with('#') + }, + |line| !line.starts_with('#'), + |inner| { + let text = inner.parse_procedure_title()?; + Ok(text) + }, + ) { + Ok(title) => elements.push(Element::Title(title)), + Err(error) => { + self.problems + .push(error); + parser.skip_to_next_line(); + } + } + } else if is_code_block(content) { + match parser.read_code_block() { + Ok(expression) => elements.push(Element::CodeBlock(expression)), + Err(error) => { + self.problems + .push(error); + parser.skip_to_next_line(); + } + } + } else if is_attribute_assignment(content) { + match parser.read_attribute_scope() { + Ok(attribute_block) => elements.push(Element::Steps(vec![attribute_block])), + Err(error) => { + self.problems + .push(error); + parser.skip_to_next_line(); + } + } + } else if is_step(content) { + let mut steps = vec![]; + while !parser.is_finished() && is_step(parser.source) { + let content = parser.source; + if is_step_dependent(content) { + match parser.read_step_dependent() { + Ok(step) => steps.push(step), + Err(error) => { + self.problems + .push(error); + parser.skip_to_next_line(); break; } } - if !steps.is_empty() { - elements.push(Element::Steps(steps)); + } else if is_step_parallel(content) { + match parser.read_step_parallel() { + Ok(step) => steps.push(step), + Err(error) => { + self.problems + .push(error); + parser.skip_to_next_line(); + break; + } } - } else if malformed_step_pattern(content) { - // Detect and reject invalid step patterns - return Err(ParsingError::InvalidStep(outer.offset)); } else { - // Handle descriptive text - let description = outer.take_block_lines( - |line| { - !is_step(line) - && !is_procedure_title(line) - && !is_code_block(line) - && !malformed_step_pattern(line) - && !is_attribute_assignment(line) - }, - |line| { - is_step(line) - || is_procedure_title(line) - || is_code_block(line) - || malformed_step_pattern(line) - || is_attribute_assignment(line) - }, - |inner| { - let content = inner.source; - if !content.is_empty() { - inner.read_descriptive() - } else { - Ok(vec![]) - } - }, - )?; + break; + } + } + if !steps.is_empty() { + elements.push(Element::Steps(steps)); + } + } else if malformed_step_pattern(content) { + // Store error but continue parsing + self.problems + .push(ParsingError::InvalidStep(parser.offset)); + parser.skip_to_next_line(); + } else { + match parser.take_block_lines( + |line| { + !is_step(line) + && !is_procedure_title(line) + && !is_code_block(line) + && !malformed_step_pattern(line) + && !is_attribute_assignment(line) + }, + |line| { + is_step(line) + || is_procedure_title(line) + || is_code_block(line) + || malformed_step_pattern(line) + || is_attribute_assignment(line) + }, + |inner| { + let content = inner.source; + if !content.is_empty() { + inner.read_descriptive() + } else { + Ok(vec![]) + } + }, + ) { + Ok(description) => { if !description.is_empty() { elements.push(Element::Description(description)); } } + Err(error) => { + self.problems + .push(error); + parser.skip_to_next_line(); + } } + } + } - Ok(Procedure { - name: declaration.0, - parameters: declaration.1, - signature: declaration.2, - elements, - }) - }, - )?; + Ok(Procedure { + name: declaration.0, + parameters: declaration.1, + signature: declaration.2, + elements, + }) + } + + fn parse_declaration( + &mut self, + parser: &mut Parser<'i>, + ) -> Result< + ( + Identifier<'i>, + Option>>, + Option>, + ), + ParsingError, + > { + // Find declaration block boundaries + let mut i = 0; + let mut begun = false; + + for line in parser + .source + .lines() + { + if !begun && is_procedure_declaration(line) { + begun = true; + i += line.len() + 1; + continue; + } else if begun && is_procedure_body(line) { + // don't include this line + break; + } - Ok(procedure) + i += line.len() + 1; + } + + if i > parser + .source + .len() + { + i -= 1; + } + + // Extract declaration block + let block = &parser.source[..i]; + let mut inner = parser.subparser(0, block); + + // Try to parse declaration + match inner.parse_procedure_declaration() { + Ok(decl) => { + // Advance parser past declaration + parser.source = &parser.source[i..]; + parser.offset += i; + Ok(decl) + } + Err(err) => { + // Advance parser past declaration anyway + parser.source = &parser.source[i..]; + parser.offset += i; + // Return the error + Err(err) + } + } } - fn read_section(&mut self) -> Result, ParsingError<'i>> { + fn read_section(&mut self) -> Result, ParsingError> { self.take_block_lines(is_section, is_section, |outer| { // Parse the section header first let (numeral, title) = outer.parse_section_header()?; @@ -878,22 +1156,16 @@ impl<'i> Parser<'i> { break; } if is_procedure_declaration(outer.source) { - let procedure = outer.read_procedure()?; - procedures.push(procedure); + match outer.read_procedure() { + Ok(procedure) => procedures.push(procedure), + Err(_err) => { + // Error is already collected in outer.problems + // Just skip adding this procedure and continue + } + } } else { // Skip non-procedure content line by line - if let Some(newline_pos) = outer - .source - .find('\n') - { - outer.advance(newline_pos + 1); - } else { - outer.advance( - outer - .source - .len(), - ); - } + outer.skip_to_next_line(); } } Ok(Scope::SectionChunk { @@ -919,18 +1191,7 @@ impl<'i> Parser<'i> { steps.push(step); } else { // Skip unrecognized content line by line - if let Some(newline_pos) = outer - .source - .find('\n') - { - outer.advance(newline_pos + 1); - } else { - outer.advance( - outer - .source - .len(), - ); - } + outer.skip_to_next_line(); } } Ok(Scope::SectionChunk { @@ -942,9 +1203,7 @@ impl<'i> Parser<'i> { }) } - fn parse_section_header( - &mut self, - ) -> Result<(&'i str, Option>), ParsingError<'i>> { + fn parse_section_header(&mut self) -> Result<(&'i str, Option>), ParsingError> { self.trim_whitespace(); // Get the current line (up to newline or end) @@ -1000,13 +1259,13 @@ impl<'i> Parser<'i> { Ok((numeral, title)) } - fn read_code_block(&mut self) -> Result, ParsingError<'i>> { + fn read_code_block(&mut self) -> Result, ParsingError> { self.take_block_chars("a code block", '{', '}', true, |outer| { outer.read_expression() }) } - fn read_expression(&mut self) -> Result, ParsingError<'i>> { + fn read_expression(&mut self) -> Result, ParsingError> { self.trim_whitespace(); let content = self .source @@ -1046,7 +1305,7 @@ impl<'i> Parser<'i> { } } - fn read_foreach_expression(&mut self) -> Result, ParsingError<'i>> { + fn read_foreach_expression(&mut self) -> Result, ParsingError> { // Parse "foreach in " where pattern is either: // - identifier // - (identifier, identifier, ...) @@ -1075,7 +1334,7 @@ impl<'i> Parser<'i> { Ok(Expression::Foreach(identifiers, Box::new(expression))) } - fn read_identifiers(&mut self) -> Result>, ParsingError<'i>> { + fn read_identifiers(&mut self) -> Result>, ParsingError> { if self .source .starts_with('(') @@ -1122,7 +1381,7 @@ impl<'i> Parser<'i> { } } - fn read_repeat_expression(&mut self) -> Result, ParsingError<'i>> { + fn read_repeat_expression(&mut self) -> Result, ParsingError> { // Parse "repeat " self.advance(6); self.trim_whitespace(); @@ -1135,7 +1394,7 @@ impl<'i> Parser<'i> { Ok(Expression::Repeat(Box::new(expression))) } - fn read_binding_expression(&mut self) -> Result, ParsingError<'i>> { + fn read_binding_expression(&mut self) -> Result, ParsingError> { // Parse the expression before the ~ operator let expression = self.take_until(&['~'], |inner| inner.read_expression())?; @@ -1148,7 +1407,7 @@ impl<'i> Parser<'i> { Ok(Expression::Binding(Box::new(expression), identifiers)) } - fn read_tablet_expression(&mut self) -> Result, ParsingError<'i>> { + fn read_tablet_expression(&mut self) -> Result, ParsingError> { self.take_block_chars("a tablet", '[', ']', true, |outer| { let mut pairs = Vec::new(); @@ -1212,7 +1471,7 @@ impl<'i> Parser<'i> { }) } - fn parse_string_pieces(&mut self, raw: &'i str) -> Result>, ParsingError<'i>> { + fn parse_string_pieces(&mut self, raw: &'i str) -> Result>, ParsingError> { // Quick check: if no braces, just return a single text piece if !raw.contains('{') { return Ok(vec![Piece::Text(raw)]); @@ -1260,12 +1519,9 @@ impl<'i> Parser<'i> { current_pos = end_pos + 1; } None => { - // Unmatched brace - point to end of string content (at closing quote) - return Err(ParsingError::ExpectedMatchingChar( - self.offset + raw.len(), - "an interpolation", - '{', - '}', + // Unmatched brace - point to the opening brace position + return Err(ParsingError::UnclosedInterpolation( + self.offset + absolute_brace_start, )); } } @@ -1284,7 +1540,7 @@ impl<'i> Parser<'i> { /// Consume an identifier. As with the other smaller read methods, we do a /// general scan of the range here to get the relevant, then call the more /// detailed validation function to actually determine if it's a match. - fn read_identifier(&mut self) -> Result, ParsingError<'i>> { + fn read_identifier(&mut self) -> Result, ParsingError> { self.trim_whitespace(); let content = self.source; @@ -1294,8 +1550,10 @@ impl<'i> Parser<'i> { Some(i) => &content[0..i], }; - let identifier = validate_identifier(possible) - .ok_or(ParsingError::InvalidIdentifier(self.offset, possible))?; + let identifier = validate_identifier(possible).ok_or(ParsingError::InvalidIdentifier( + self.offset, + possible.to_string(), + ))?; self.advance(possible.len()); @@ -1303,7 +1561,7 @@ impl<'i> Parser<'i> { } /// Parse a numeric literal (integer or quantity) - fn read_numeric(&mut self) -> Result, ParsingError<'i>> { + fn read_numeric(&mut self) -> Result, ParsingError> { self.trim_whitespace(); let content = self.source; @@ -1318,7 +1576,7 @@ impl<'i> Parser<'i> { } /// Parse a simple integral number - fn read_numeric_integral(&mut self) -> Result, ParsingError<'i>> { + fn read_numeric_integral(&mut self) -> Result, ParsingError> { let content = self.source; if let Ok(amount) = content @@ -1333,7 +1591,7 @@ impl<'i> Parser<'i> { } /// Parse a scientific quantity with units - fn read_numeric_quantity(&mut self) -> Result, ParsingError<'i>> { + fn read_numeric_quantity(&mut self) -> Result, ParsingError> { self.trim_whitespace(); // Parse mantissa (required) @@ -1420,7 +1678,7 @@ impl<'i> Parser<'i> { Ok(Numeric::Scientific(quantity)) } - fn read_decimal_part(&mut self) -> Result> { + fn read_decimal_part(&mut self) -> Result { use crate::regex::*; let re = regex!(r"^-?[0-9]+(\.[0-9]+)?"); @@ -1437,7 +1695,7 @@ impl<'i> Parser<'i> { } } - fn read_uncertainty_part(&mut self) -> Result> { + fn read_uncertainty_part(&mut self) -> Result { use crate::regex::*; let re = regex!(r"^-?[0-9]+(\.[0-9]+)?"); @@ -1454,7 +1712,7 @@ impl<'i> Parser<'i> { } } - fn read_exponent_ascii(&mut self) -> Result> { + fn read_exponent_ascii(&mut self) -> Result { use crate::regex::*; let re = regex!(r"^-?[0-9]+"); @@ -1489,7 +1747,7 @@ impl<'i> Parser<'i> { } } - fn read_units_symbol(&mut self) -> Result<&'i str, ParsingError<'i>> { + fn read_units_symbol(&mut self) -> Result<&'i str, ParsingError> { // Scan through each character and find the first invalid one let mut valid_end = 0; @@ -1521,7 +1779,7 @@ impl<'i> Parser<'i> { } /// Parse a target like or - fn read_target(&mut self) -> Result, ParsingError<'i>> { + fn read_target(&mut self) -> Result, ParsingError> { self.take_block_chars("an invocation", '<', '>', true, |inner| { let content = inner.source; if content.starts_with("https://") { @@ -1534,7 +1792,7 @@ impl<'i> Parser<'i> { } /// Parse a complete invocation like (params) - fn read_invocation(&mut self) -> Result, ParsingError<'i>> { + fn read_invocation(&mut self) -> Result, ParsingError> { let target = self.read_target()?; let parameters = if self.peek_next_char() == Some('(') { Some(self.read_parameters()?) @@ -1545,7 +1803,7 @@ impl<'i> Parser<'i> { } /// Parse top-level ordered step - pub fn read_step_dependent(&mut self) -> Result, ParsingError<'i>> { + pub fn read_step_dependent(&mut self) -> Result, ParsingError> { self.take_block_lines(is_step_dependent, is_step_dependent, |outer| { outer.trim_whitespace(); @@ -1584,7 +1842,7 @@ impl<'i> Parser<'i> { } /// Parse a top-level concurrent step - pub fn read_step_parallel(&mut self) -> Result, ParsingError<'i>> { + pub fn read_step_parallel(&mut self) -> Result, ParsingError> { self.take_block_lines(is_step_parallel, is_step_parallel, |outer| { outer.trim_whitespace(); @@ -1612,7 +1870,7 @@ impl<'i> Parser<'i> { } /// Parse a dependent substep (a., b., c., etc.) - fn read_substep_dependent(&mut self) -> Result, ParsingError<'i>> { + fn read_substep_dependent(&mut self) -> Result, ParsingError> { self.take_block_lines( is_substep_dependent, |line| is_substep_dependent(line), @@ -1655,7 +1913,7 @@ impl<'i> Parser<'i> { } /// Parse a parallel substep (-) - fn read_substep_parallel(&mut self) -> Result, ParsingError<'i>> { + fn read_substep_parallel(&mut self) -> Result, ParsingError> { self.take_block_lines( is_substep_parallel, |line| is_substep_parallel(line), @@ -1685,7 +1943,7 @@ impl<'i> Parser<'i> { ) } - pub fn read_descriptive(&mut self) -> Result>, ParsingError<'i>> { + pub fn read_descriptive(&mut self) -> Result>, ParsingError> { self.take_block_lines( |_| true, |line| { @@ -1793,15 +2051,13 @@ impl<'i> Parser<'i> { } /// Parse enum responses like 'Yes' | 'No' | 'Not Applicable' - fn read_responses(&mut self) -> Result>, ParsingError<'i>> { + fn read_responses(&mut self) -> Result>, ParsingError> { self.take_split_by('|', |inner| { validate_response(inner.source).ok_or(ParsingError::InvalidResponse(inner.offset)) }) } - fn parse_multiline_content( - &mut self, - ) -> Result<(Option<&'i str>, Vec<&'i str>), ParsingError<'i>> { + fn parse_multiline_content(&mut self) -> Result<(Option<&'i str>, Vec<&'i str>), ParsingError> { let mut lines: Vec<&str> = self .source .lines() @@ -1868,7 +2124,7 @@ impl<'i> Parser<'i> { /// /// ( ```lang some content``` ) /// - fn read_parameters(&mut self) -> Result>, ParsingError<'i>> { + fn read_parameters(&mut self) -> Result>, ParsingError> { self.take_block_chars("parameters for a function", '(', ')', true, |outer| { let mut params = Vec::new(); @@ -1949,7 +2205,7 @@ impl<'i> Parser<'i> { } /// Parse attributes (roles and/or places) like @surgeon, ^kitchen, or @chef + ^bathroom - fn read_attributes(&mut self) -> Result>, ParsingError<'i>> { + fn read_attributes(&mut self) -> Result>, ParsingError> { self.take_line(|inner| { let mut attributes = Vec::new(); @@ -1969,8 +2225,9 @@ impl<'i> Parser<'i> { .get(1) .ok_or(ParsingError::Expected(inner.offset, "role name after @"))? .as_str(); - let identifier = validate_identifier(role_name) - .ok_or(ParsingError::InvalidIdentifier(inner.offset, role_name))?; + let identifier = validate_identifier(role_name).ok_or( + ParsingError::InvalidIdentifier(inner.offset, role_name.to_string()), + )?; attributes.push(Attribute::Role(identifier)); } // Check if it's a place '^' @@ -1979,8 +2236,9 @@ impl<'i> Parser<'i> { .get(1) .ok_or(ParsingError::Expected(inner.offset, "place name after ^"))? .as_str(); - let identifier = validate_identifier(place_name) - .ok_or(ParsingError::InvalidIdentifier(inner.offset, place_name))?; + let identifier = validate_identifier(place_name).ok_or( + ParsingError::InvalidIdentifier(inner.offset, place_name.to_string()), + )?; attributes.push(Attribute::Place(identifier)); } else { return Err(ParsingError::InvalidStep(inner.offset)); @@ -1993,7 +2251,7 @@ impl<'i> Parser<'i> { /// Parse role assignments, substeps, and code blocks, crucially with all /// of their subscopes also parsed. - fn read_scopes(&mut self) -> Result>, ParsingError<'i>> { + fn read_scopes(&mut self) -> Result>, ParsingError> { let mut scopes = Vec::new(); while !self.is_finished() { @@ -2037,7 +2295,7 @@ impl<'i> Parser<'i> { } /// Parse an attribute block (role or place assignment) with its subscopes - fn read_attribute_scope(&mut self) -> Result, ParsingError<'i>> { + fn read_attribute_scope(&mut self) -> Result, ParsingError> { self.take_block_lines(is_attribute_assignment, is_attribute_assignment, |outer| { let attributes = outer.read_attributes()?; let subscopes = outer.read_scopes()?; @@ -2050,7 +2308,7 @@ impl<'i> Parser<'i> { } /// Parse a code block scope with its subscopes (if any) - fn read_code_scope(&mut self) -> Result, ParsingError<'i>> { + fn read_code_scope(&mut self) -> Result, ParsingError> { self.take_block_lines( is_code_block, |_line| { @@ -2287,6 +2545,43 @@ fn is_procedure_declaration(content: &str) -> bool { } } +/// Detects any line that could potentially be a procedure declaration, +/// including malformed ones. Used for detecting the end-boundary of a +/// procedure. +/// +/// The specific motivating case for using this instead of the strict +/// is_procedure_declaration() is that a malformed attempted declaration like +/// +/// MyProcedure : +/// +/// would be consumed as part of the previous procedure's body, +/// preventing us from attempting to parse it as a separate procedure and +/// reporting what turns out to be a better error. +fn potential_procedure_declaration(content: &str) -> bool { + match content.split_once(':') { + Some((before, _after)) => { + let before = before.trim_ascii(); + // Check if it looks like an identifier (possibly with parameters) + // Accept any single token that could be an attempted identifier + if let Some((name, params)) = before.split_once('(') { + // Has parameters: check if params end with ')' + !name + .trim_ascii() + .is_empty() + && params.ends_with(')') + } else { + // No parameters: must be a single token (no spaces) that + // looks identifier-ish This excludes sentences like "Ask + // these questions: ..." + !before.is_empty() && + !before.contains(' ') && // Single token only + before.chars().all(|c| c.is_ascii_alphanumeric() || c == '_') + } + } + None => false, + } +} + fn is_procedure_body(content: &str) -> bool { let line = content.trim_ascii(); @@ -3436,6 +3731,72 @@ This is the first one. } } + #[test] + fn test_potential_procedure_declaration_is_superset() { + // All valid procedure declarations must be matched by potential_procedure_declaration + + // Valid simple declarations + assert!(is_procedure_declaration("foo : A -> B")); + assert!(potential_procedure_declaration("foo : A -> B")); + + assert!(is_procedure_declaration("my_proc :")); + assert!(potential_procedure_declaration("my_proc :")); + + assert!(is_procedure_declaration("step123 : Input -> Output")); + assert!(potential_procedure_declaration("step123 : Input -> Output")); + + // Valid with parameters + assert!(is_procedure_declaration("process(a, b) : X -> Y")); + assert!(potential_procedure_declaration("process(a, b) : X -> Y")); + + assert!(is_procedure_declaration("calc(x) :")); + assert!(potential_procedure_declaration("calc(x) :")); + + // Invalid that should only match potential_ + assert!(!is_procedure_declaration("MyProcedure :")); // Capital letter + assert!(potential_procedure_declaration("MyProcedure :")); + + assert!(!is_procedure_declaration("123foo :")); // Starts with digit + assert!(potential_procedure_declaration("123foo :")); + + // Neither should match sentences with spaces + assert!(!is_procedure_declaration("Ask these questions :")); + assert!(!potential_procedure_declaration("Ask these questions :")); + + // Edge cases with whitespace + assert!(!is_procedure_declaration(" :")); // No name + assert!(!potential_procedure_declaration(" :")); + + assert!(is_procedure_declaration(" foo : ")); // Whitespace around + assert!(potential_procedure_declaration(" foo : ")); + + // Verify the superset property systematically + let test_cases = vec![ + "a :", + "z :", + "abc :", + "test_123 :", + "foo_bar_baz :", + "x() :", + "func(a) :", + "proc(a, b, c) :", + "test(x,y,z) :", + "a_1 :", + "test_ :", + "_test :", // Underscores + ]; + + for case in test_cases { + if is_procedure_declaration(case) { + assert!( + potential_procedure_declaration(case), + "potential_procedure_declaration must match all valid declarations: {}", + case + ); + } + } + } + #[test] fn test_take_block_lines_procedure_wrapper() { let mut input = Parser::new(); @@ -4394,6 +4755,143 @@ echo test ); } + #[test] + fn parse_collecting_errors_basic() { + let mut input = Parser::new(); + + // Test with valid content - should have no errors + input.initialize("% technique v1\nvalid_proc : A -> B\n# Title\nDescription"); + let result = input.parse_collecting_errors(); + match result { + Ok(document) => { + assert!(document + .header + .is_some()); + assert!(document + .body + .is_some()); + } + Err(_) => panic!("Expected successful parse for valid content"), + } + + // Test with invalid header - should collect header error + input.initialize("% wrong v1"); + let result = input.parse_collecting_errors(); + match result { + Ok(_) => panic!("Expected errors for invalid content"), + Err(errors) => { + assert!(errors.len() > 0); + assert!(errors + .iter() + .any(|e| matches!(e, ParsingError::InvalidHeader(_)))); + } + } + + // Test that the method returns Result instead of ParseResult + input.initialize("some content"); + let _result: Result> = input.parse_collecting_errors(); + // If this compiles, the method signature is correct + } + + #[test] + fn test_multiple_error_collection() { + use std::path::Path; + + // Create a string with 3 procedures: 2 with errors and 1 valid + let content = r#" +broken_proc1 : A -> + # This procedure has incomplete signature + + 1. Do something + +valid_proc : A -> B + # This is a valid procedure + + 1. Valid step + 2. Another valid step + +broken_proc2 : -> B + # This procedure has incomplete signature (missing domain) + + 1. Do something else + "#; + + let result = parse_with_recovery(Path::new("test.t"), content); + + // Assert that there are at least 2 errors (from the broken procedures) + match result { + Ok(_) => panic!("Result should have errors"), + Err(errors) => { + let l = errors.len(); + assert!(l >= 2, "Should have at least 2 errors, got {}", l) + } + }; + } + + #[test] + fn test_redundant_error_removal_needed() { + use std::path::Path; + + // Create a malformed procedure that could generate multiple errors at the same offset + let content = r#" +% technique v1 + +broken : + This is not a valid signature line + + 1. Step one + "#; + + let result = parse_with_recovery(Path::new("test.tq"), content); + + // Check that we get an error about the invalid signature + match result { + Err(errors) => { + // Debug: print what errors we actually get + eprintln!("Errors: {:?}", errors); + + // Verify no redundant errors at the same offset + let mut offsets = errors + .iter() + .map(|e| e.offset()) + .collect::>(); + offsets.sort(); + let original_len = offsets.len(); + offsets.dedup(); + assert_eq!( + offsets.len(), + original_len, + "Found redundant errors at same offset" + ); + } + Ok(_) => panic!("Expected errors for malformed content"), + } + } + + #[test] + fn test_redundant_error_removal_unclosed_interpolation() { + let mut input = Parser::new(); + + // Test that UnclosedInterpolation error takes precedence over generic + // ExpectedMatchingChar + input.initialize(r#"{ "string with {unclosed interpolation" }"#); + let result = input.read_code_block(); + + // Should get the specific UnclosedInterpolation error, not a generic + // one + match result { + Err(ParsingError::UnclosedInterpolation(_)) => { + // Good - we got the specific error + } + Err(other) => { + panic!("Expected UnclosedInterpolation error, got: {:?}", other); + } + Ok(_) => { + panic!("Expected error for unclosed interpolation, but parsing succeeded"); + } + } + } + #[test] fn multiline_code_inline() { let mut input = Parser::new(); diff --git a/src/problem/format.rs b/src/problem/format.rs index 281df1e..617fd18 100644 --- a/src/problem/format.rs +++ b/src/problem/format.rs @@ -5,7 +5,7 @@ use technique::{formatting::Render, language::LoadingError, parsing::parser::Par /// Format a parsing error with full details including source code context pub fn full_parsing_error<'i>( - error: &ParsingError<'i>, + error: &ParsingError, filename: &'i Path, source: &'i str, renderer: &impl Render, @@ -58,7 +58,7 @@ pub fn full_parsing_error<'i>( /// Format a parsing error with concise single-line output pub fn concise_parsing_error<'i>( - error: &ParsingError<'i>, + error: &ParsingError, filename: &'i Path, source: &'i str, renderer: &impl Render, diff --git a/src/problem/messages.rs b/src/problem/messages.rs index d38fbd4..c605028 100644 --- a/src/problem/messages.rs +++ b/src/problem/messages.rs @@ -2,10 +2,7 @@ use crate::problem::Present; use technique::{formatting::Render, language::*, parsing::parser::ParsingError}; /// Generate problem and detail messages for parsing errors using AST construction -pub fn generate_error_message<'i>( - error: &ParsingError<'i>, - renderer: &dyn Render, -) -> (String, String) { +pub fn generate_error_message<'i>(error: &ParsingError, renderer: &dyn Render) -> (String, String) { match error { ParsingError::IllegalParserState(_) => ( "Illegal parser state".to_string(), @@ -38,6 +35,16 @@ there was no more input remaining in the current scope. .trim_ascii() .to_string(), ), + ParsingError::UnclosedInterpolation(_) => ( + "Unclosed string interpolation".to_string(), + r#" +Every '{' that starts an interpolation within a string must have a +corresponding '}' to mark where the interpolation ends and the string +literal resumes. + "# + .trim_ascii() + .to_string(), + ), ParsingError::InvalidHeader(_) => { // Format the sample metadata using the same code as the formatter let mut formatted_example = String::new(); @@ -126,6 +133,7 @@ Technique. Common templates include {}, {}, and r#" Identifiers must start with a lowercase letter and contain only lower case letters, numbers, and underscores. Valid examples include: + {} {} {} diff --git a/tests/broken/BadDeclaration.tq b/tests/broken/BadDeclaration.tq index 0a0617d..0cdcff2 100644 --- a/tests/broken/BadDeclaration.tq +++ b/tests/broken/BadDeclaration.tq @@ -1 +1,3 @@ making_coffee : Ingredients Coffee + +makingCoffee : diff --git a/tests/parsing/errors.rs b/tests/parsing/errors.rs index 31fa9ae..88c7da5 100644 --- a/tests/parsing/errors.rs +++ b/tests/parsing/errors.rs @@ -1,24 +1,28 @@ #[cfg(test)] mod syntax { - use technique::parsing::parser::{Parser, ParsingError}; + use std::path::Path; + use technique::parsing::parser::{parse_with_recovery, ParsingError}; /// Helper function to check if parsing produces the expected error type fn expect_error(content: &str, expected: ParsingError) { - let mut input = Parser::new(); - input.initialize(content); - - let result = input.read_procedure(); + let result = parse_with_recovery(Path::new("test.tq"), content); match result { Ok(_) => panic!( "Expected parsing to fail, but it succeeded for input: {}", content ), - Err(error) => { - // Compare error types by discriminant - if std::mem::discriminant(&error) != std::mem::discriminant(&expected) { + Err(errors) => { + // Check if any error matches the expected type + let found_expected = errors + .iter() + .any(|error| { + std::mem::discriminant(error) == std::mem::discriminant(&expected) + }); + + if !found_expected { panic!( "Expected error type like {:?} but got: {:?} for input '{}'", - expected, error, content + expected, errors, content ); } } @@ -32,7 +36,7 @@ mod syntax { Making_Coffee : Ingredients -> Coffee "# .trim_ascii(), - ParsingError::InvalidIdentifier(0, ""), + ParsingError::InvalidIdentifier(0, "".to_string()), ); } @@ -43,7 +47,7 @@ Making_Coffee : Ingredients -> Coffee makeCoffee : Ingredients -> Coffee "# .trim_ascii(), - ParsingError::InvalidIdentifier(0, ""), + ParsingError::InvalidIdentifier(0, "".to_string()), ); } @@ -54,7 +58,7 @@ makeCoffee : Ingredients -> Coffee make-coffee : Ingredients -> Coffee "# .trim_ascii(), - ParsingError::InvalidIdentifier(0, ""), + ParsingError::InvalidIdentifier(0, "".to_string()), ); } @@ -65,7 +69,7 @@ make-coffee : Ingredients -> Coffee make coffee : Ingredients -> Coffee "# .trim_ascii(), - ParsingError::InvalidIdentifier(0, ""), + ParsingError::InvalidIdentifier(0, "".to_string()), ); } @@ -120,7 +124,7 @@ making_coffee : Ingredients Coffee making_coffee Ingredients -> Coffee "# .trim_ascii(), - ParsingError::InvalidDeclaration(0), + ParsingError::Unrecognized(0), ); } @@ -131,7 +135,7 @@ making_coffee Ingredients -> Coffee making_coffee(BadParam) : Ingredients -> Coffee "# .trim_ascii(), - ParsingError::InvalidIdentifier(14, ""), + ParsingError::InvalidIdentifier(14, "".to_string()), ); } diff --git a/tests/parsing/parser.rs b/tests/parsing/parser.rs index 23543ca..90ee577 100644 --- a/tests/parsing/parser.rs +++ b/tests/parsing/parser.rs @@ -4,7 +4,7 @@ mod verify { use std::vec; use technique::language::*; - use technique::parsing::parser::{self, Parser}; + use technique::parsing::parser::Parser; fn trim(s: &str) -> &str { s.strip_prefix('\n') @@ -1089,7 +1089,7 @@ before_leaving : #[test] fn section_parsing() { - let result = technique::parsing::parser::parse_via_taking( + let result = technique::parsing::parser::parse_with_recovery( Path::new(""), trim( r#" @@ -1177,7 +1177,7 @@ second_section_second_procedure : #[test] fn section_with_procedures_only() { - let result = technique::parsing::parser::parse_via_taking( + let result = technique::parsing::parser::parse_with_recovery( Path::new(""), trim( r#" @@ -1248,7 +1248,7 @@ procedure_four : Concept -> Architecture #[test] fn section_with_procedures() { - let result = parser::parse_via_taking( + let result = technique::parsing::parser::parse_with_recovery( Path::new(""), trim( r#"