From e31217e473c0907cc0c12c076159f25bd1ee1613 Mon Sep 17 00:00:00 2001 From: Jeffrey Crocker Date: Tue, 13 Sep 2022 12:14:41 -0400 Subject: [PATCH] Earley predict on complete input An original (maybe overeager!) optimization was included with Earley parsing. If there was no remaining input text, then no more predictions from the grammar were attempted. In most cases, this saves Earley a some work. But the BNF crate supports empty production rules. This invalidates this optimization, because an empty production may still be successful even with no remaining input text. This commit only removes this optimization built on that false assumption. It would be possible to reintroduce this improvement, but *only* for grammars without any empty productions. --- src/earley.rs | 27 +++++++++++++++++++++++---- 1 file changed, 23 insertions(+), 4 deletions(-) diff --git a/src/earley.rs b/src/earley.rs index ec1fcb6..b227cba 100644 --- a/src/earley.rs +++ b/src/earley.rs @@ -502,10 +502,6 @@ impl<'gram> Iterator for ParseIter<'gram> { match matching { // predict Some(matching @ Term::Nonterminal(_)) => { - // no need to predict for more input if input is complete - if input_range.is_complete() { - break; - } let predictions = predict(matching, &input_range, &self.grammar); self.state_arena.alloc_extend(predictions); } @@ -590,6 +586,29 @@ mod tests { assert_eq!(parses.count(), 2); } + #[test] + fn parse_complete_empty() { + let grammar: Grammar = " ::= \"hi\" + ::= \"\"" + .parse() + .unwrap(); + + let input = "hi"; + + let parses = parse(&grammar, input); + assert_eq!(parses.count(), 1); + } + + #[test] + fn parse_empty() { + let grammar: Grammar = " ::= \"\"".parse().unwrap(); + + let input = ""; + + let parses = parse(&grammar, input); + assert_eq!(parses.count(), 1); + } + // (source: ) // Sum -> Sum [+-] Product // Sum -> Product