shnewto · CrockAgile · Feb 11, 2023 · Dec 22, 2022 · Dec 22, 2022 · Dec 22, 2022
diff --git a/Cargo.toml b/Cargo.toml
@@ -17,7 +17,7 @@ license = "MIT"
 [dependencies]
 stacker = { version = "0.1.2", optional = true }
 tracing = { version = "0.1.37", optional = true }
-tracing-subscriber = { version = "0.3.16", optional = true }
+tracing-subscriber = { version = "0.3.16", optional = true, features = ["env-filter"] }
 tracing-flame = { version = "0.2.0", optional = true }
 
 [dependencies.rand]

diff --git a/benches/README.md b/benches/README.md
@@ -40,8 +40,8 @@ These benchmarks are not run during continuous integration testing. But if a dev
 
 #### Tracing
 
-BNF has an optional "tracing" feature which will provide tracing spans during parsing.
+BNF has an optional "tracing" feature which will provide tracing spans during benchmarking.
 
 The benchmarks are enabled to write these tracing spans to `tracing.folded`. This data can then be parsed to provide a flamegraph.
 
-> RUST_LOG=TRACE cargo criterion --features "tracing" && cat tracing.folded | inferno-flamegraph > flamegraph.svg
+> RUST_LOG=DEBUG cargo criterion --features "tracing" && cat tracing.folded | inferno-flamegraph > flamegraph.svg
diff --git a/benches/bnf.rs b/benches/bnf.rs
@@ -6,11 +6,12 @@ use rand::seq::SliceRandom;
 fn init_tracing() -> impl Drop {
     use tracing_flame::FlameLayer;
     use tracing_subscriber::{fmt, prelude::*};
+    let filter_layer = tracing_subscriber::EnvFilter::from_default_env();
     let fmt_layer = fmt::Layer::default();
-
     let (flame_layer, _guard) = FlameLayer::with_file("./tracing.folded").unwrap();
 
     tracing_subscriber::registry()
+        .with(filter_layer)
         .with(fmt_layer)
         .with(flame_layer)
         .init();
@@ -22,10 +23,10 @@ fn init_tracing() -> impl Drop {
 fn init_tracing() {}
 
 fn examples(c: &mut Criterion) {
-    let _tracing = init_tracing();
+    init_tracing();
 
     #[cfg(feature = "tracing")]
-    let _span = tracing::span!(tracing::Level::TRACE, "BENCH ITER").entered();
+    let _span = tracing::span!(tracing::Level::DEBUG, "BENCH EXAMPLES").entered();
 
     c.bench_function("parse postal", |b| {
         let input = std::include_str!("../tests/fixtures/postal_address.terminated.input.bnf");
@@ -57,6 +58,30 @@ fn examples(c: &mut Criterion) {
             let _: Vec<_> = polish_calc_grammar.parse_input(input).collect();
         })
     });
+
+    let infinite_grammar: Grammar = "
+    <a> ::= '' | <b>
+    <b> ::= <a>"
+        .parse()
+        .unwrap();
+
+    let input = "";
+    let mut group = c.benchmark_group("parse infinite nullable grammar");
+    for parse_count in (0usize..=100).step_by(25) {
+        group.throughput(criterion::Throughput::Elements(parse_count as u64));
+        group.bench_with_input(
+            criterion::BenchmarkId::from_parameter(parse_count),
+            &parse_count,
+            |b, &parse_count| {
+                b.iter(|| {
+                    let _: Vec<_> = infinite_grammar
+                        .parse_input(input)
+                        .take(parse_count)
+                        .collect();
+                })
+            },
+        );
+    }
 }
 
 criterion_group!(benches, examples);

diff --git a/src/append_vec.rs b/src/append_vec.rs
@@ -2,7 +2,7 @@
 /// Example usage: `append_only_vec_id!(pub(crate) ProductionId)`;
 macro_rules! append_only_vec_id {
     ($visible:vis $id:ident) => {
-        #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
+        #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
         $visible struct $id(usize);
 
         impl From<usize> for $id {
@@ -59,6 +59,7 @@ where
     pub fn get(&self, id: I) -> Option<&T> {
         self.vec.get::<usize>(id.into())
     }
+    #[cfg(test)]
     pub fn iter(&self) -> impl Iterator<Item = &T> {
         self.vec.iter()
     }

diff --git a/src/earley/grammar.rs b/src/earley/grammar.rs
@@ -1,153 +1,30 @@
 use crate::append_vec::{append_only_vec_id, AppendOnlyVec};
 use crate::tracing;
-use std::rc::Rc;
 
 append_only_vec_id!(pub(crate) ProductionId);
 
-/// A [`crate::Term`] which has been "matched" while parsing input
-#[derive(Debug, Clone)]
-pub(crate) enum TermMatch<'gram> {
-    /// [`crate::Term::Terminal`] which matched with a string literal
-    Terminal(&'gram str),
-    /// [`crate::Term::Nonterminal`] which was matched with a fully completed production
-    Nonterminal(Rc<ProductionMatch<'gram>>),
-}
-
-/// A `Term` to be "matched" with input
-#[derive(Debug, Clone)]
-pub(crate) enum TermMatching<'gram> {
-    /// A [`crate::Term`] which has not yet been matched
-    Unmatched(&'gram crate::Term),
-    /// A [`crate::Term`] which has been matched
-    Matched(TermMatch<'gram>),
-}
-
 /// [`crate::Production`] offers multiple possible "right hand side" [`crate::Expression`]s, which is overly flexible for Earley parsing.
 /// [`Production`] is a one-to-one relationship of [`crate::Term`] -> [`crate::Expression`].
 #[derive(Debug)]
 pub(crate) struct Production<'gram> {
     pub id: ProductionId,
     pub lhs: &'gram crate::Term,
-    rhs: &'gram crate::Expression,
-}
-
-impl<'gram> Production<'gram> {
-    pub fn start_matching(&self) -> ProductionMatching<'gram> {
-        let prod_id = self.id;
-        let lhs = self.lhs;
-        let rhs = self.rhs.terms_iter().map(TermMatching::Unmatched).collect();
-        ProductionMatching {
-            prod_id,
-            lhs,
-            rhs,
-            matched_count: 0,
-        }
-    }
-}
-
-/// An attempt at matching a [`Production`]'s "right hand side" [`crate::Term`]s
-#[derive(Debug, Clone)]
-pub(crate) struct ProductionMatching<'gram> {
-    pub prod_id: ProductionId,
-    pub lhs: &'gram crate::Term,
-    /// "right hand side" [`TermMatching`]s which are partitioned by the matched and unmatched.
-    /// For example: [Matched, Matched, Matched, Unmatched, Unmatched]
-    rhs: Vec<TermMatching<'gram>>,
-    /// The progress cursor used to separate [`TermMatching`]s in the "right hand side"
-    matched_count: usize,
-}
-
-impl<'gram> ProductionMatching<'gram> {
-    /// Attempt to "complete" the production, by having no unmatched terms remaining.
-    pub fn complete(&self) -> Option<ProductionMatch<'gram>> {
-        let rhs: Option<Vec<TermMatch>> = self
-            .rhs
-            .iter()
-            .map(|term| match term {
-                TermMatching::Unmatched(_) => None,
-                TermMatching::Matched(term) => Some(term.clone()),
-            })
-            .collect();
-
-        rhs.map(|rhs| {
-            let input_len = rhs
-                .iter()
-                .map(|term| match term {
-                    TermMatch::Terminal(term) => term.len(),
-                    TermMatch::Nonterminal(prod) => prod.input_len,
-                })
-                .sum();
-
-            ProductionMatch {
-                lhs: self.lhs,
-                rhs,
-                input_len,
-            }
-        })
-    }
-    /// Get the next unmatched [`crate::Term`]
-    pub fn next(&self) -> Option<&'gram crate::Term> {
-        self.rhs.get(self.matched_count).map(|term| match term {
-            TermMatching::Matched(_) => {
-                unreachable!("terms ahead of matching cursor cannot already be matched")
-            }
-            TermMatching::Unmatched(term) => *term,
-        })
-    }
-    /// Get how many [`crate::Term`] have been matched
-    pub fn matched_count(&self) -> usize {
-        self.matched_count
-    }
-    /// Add a [`TermMatch`].
-    /// Does **not** check if the added term is a valid match. That responsibility is on the caller,
-    /// which likely has more context for faster matching of terms.
-    pub fn add_term_match(&self, term_match: TermMatch<'gram>) -> Option<Self> {
-        // only match term if there is next
-        self.next().map(|_| {
-            let Self {
-                lhs,
-                matched_count,
-                rhs,
-                prod_id,
-            } = self;
-            let prod_id = *prod_id;
-
-            let mut rhs = rhs.clone();
-            rhs[*matched_count] = TermMatching::Matched(term_match);
-            let matched_count = matched_count + 1;
-
-            Self {
-                lhs,
-                matched_count,
-                prod_id,
-                rhs,
-            }
-        })
-    }
-}
-
-/// A fully complete [`ProductionMatching`].
-/// Created via [`ProductionMatching::complete`]
-#[derive(Debug, Clone)]
-pub(crate) struct ProductionMatch<'gram> {
-    pub lhs: &'gram crate::Term,
-    pub rhs: Vec<TermMatch<'gram>>,
-    pub input_len: usize,
+    pub rhs: &'gram crate::Expression,
 }
 
 type ProdArena<'gram> = AppendOnlyVec<Production<'gram>, ProductionId>;
 type ProdTermMap<'gram> = std::collections::HashMap<&'gram crate::Term, Vec<ProductionId>>;
 
 /// Similar to [`crate::Grammar`], but using [`Production`] and tables useful for parsing.
 #[derive(Debug)]
-pub(crate) struct GrammarMatching<'gram> {
+pub(crate) struct ParseGrammar<'gram> {
     productions: ProdArena<'gram>,
     prods_by_lhs: ProdTermMap<'gram>,
 }
 
-impl<'gram, 'a> GrammarMatching<'gram> {
+impl<'gram, 'a> ParseGrammar<'gram> {
     pub fn new(grammar: &'gram crate::Grammar) -> Self {
-        let _span = tracing::span!(tracing::Level::TRACE, "GrammarMatching::new").entered();
+        let _span = tracing::span!(tracing::Level::DEBUG, "ParseGrammar_new").entered();
 
         let mut productions = AppendOnlyVec::<Production, ProductionId>::new();
         let mut prods_by_lhs = ProdTermMap::new();
@@ -173,8 +50,8 @@ impl<'gram, 'a> GrammarMatching<'gram> {
             productions,
         }
     }
-    pub fn get_production_by_id(&'a self, prod_id: ProductionId) -> Option<&'a Production<'gram>> {
-        self.productions.get(prod_id)
+    pub fn get_production_by_id(&'a self, prod_id: ProductionId) -> &'a Production<'gram> {
+        self.productions.get(prod_id).expect("valid production ID")
     }
     pub fn get_productions_by_lhs(
         &self,
@@ -184,8 +61,9 @@ impl<'gram, 'a> GrammarMatching<'gram> {
             .get(lhs)
             .into_iter()
             .flatten()
-            .filter_map(|prod_id| self.get_production_by_id(*prod_id))
+            .map(|prod_id| self.get_production_by_id(*prod_id))
     }
+    #[cfg(test)]
     pub fn productions_iter(&self) -> impl Iterator<Item = &Production<'gram>> {
         self.productions.iter()
     }

diff --git a/src/earley/input_range.rs b/src/earley/input_range.rs
@@ -1,4 +1,4 @@
-#[derive(Debug, Clone, PartialEq, Eq, Hash)]
+#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
 pub(crate) struct InputRangeOffset {
     pub start: usize,
     pub len: usize,