Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Traversal Trees #120

Merged
merged 31 commits into from
Feb 11, 2023
Merged
Show file tree
Hide file tree
Changes from 28 commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
cf2a121
test example from issue #118
CrockAgile Dec 22, 2022
b9d6c51
tracing events earley traversals
CrockAgile Dec 22, 2022
bc640e1
stash
CrockAgile Dec 22, 2022
933c12b
minimal reproduction
CrockAgile Dec 22, 2022
50085e5
working?
CrockAgile Dec 23, 2022
cebffbf
test nullable productions issue #117
CrockAgile Dec 23, 2022
3ad8993
remove test logging
CrockAgile Dec 23, 2022
c811a86
repro wip test
CrockAgile Jan 19, 2023
4beef46
traversal tree wip
CrockAgile Jan 25, 2023
df28ade
wip, completion ownership hard
CrockAgile Jan 26, 2023
926547c
borrow checked
CrockAgile Jan 26, 2023
7d57827
infinite recursion
CrockAgile Jan 27, 2023
7a98438
ughhhh
CrockAgile Jan 29, 2023
f7fc5ba
reverse matching iter walk
CrockAgile Jan 29, 2023
f5f5795
cleanup unused
CrockAgile Jan 29, 2023
20f3abe
hmmm
CrockAgile Jan 30, 2023
b904b98
hmmmmmm
CrockAgile Jan 30, 2023
35add76
rename
CrockAgile Jan 31, 2023
0fc9c89
maybe working?
CrockAgile Jan 31, 2023
99eb081
tomorrow
CrockAgile Feb 1, 2023
bb34b9a
limit nullable hack
CrockAgile Feb 1, 2023
9b0d22b
all passing
CrockAgile Feb 1, 2023
38a69e3
remove nullability detection
CrockAgile Feb 1, 2023
d2c76d5
tracing instead of prints
CrockAgile Feb 1, 2023
b193163
log parse trees
CrockAgile Feb 1, 2023
b674cd3
infinite parse benchmark
CrockAgile Feb 2, 2023
c23801e
polish
CrockAgile Feb 4, 2023
a734d6c
clippy
CrockAgile Feb 4, 2023
e4c21b1
snapshot testing
CrockAgile Feb 8, 2023
692ac13
btree for ordered term completions
CrockAgile Feb 11, 2023
baf5a09
snapshot test bugs
CrockAgile Feb 11, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ license = "MIT"
[dependencies]
stacker = { version = "0.1.2", optional = true }
tracing = { version = "0.1.37", optional = true }
tracing-subscriber = { version = "0.3.16", optional = true }
tracing-subscriber = { version = "0.3.16", optional = true, features = ["env-filter"] }
tracing-flame = { version = "0.2.0", optional = true }

[dependencies.rand]
Expand Down
4 changes: 2 additions & 2 deletions benches/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,8 @@ These benchmarks are not run during continuous integration testing. But if a dev

#### Tracing

BNF has an optional "tracing" feature which will provide tracing spans during parsing.
BNF has an optional "tracing" feature which will provide tracing spans during benchmarking.

The benchmarks are enabled to write these tracing spans to `tracing.folded`. This data can then be parsed to provide a flamegraph.

> RUST_LOG=TRACE cargo criterion --features "tracing" && cat tracing.folded | inferno-flamegraph > flamegraph.svg
> RUST_LOG=DEBUG cargo criterion --features "tracing" && cat tracing.folded | inferno-flamegraph > flamegraph.svg
31 changes: 28 additions & 3 deletions benches/bnf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,12 @@ use rand::seq::SliceRandom;
fn init_tracing() -> impl Drop {
use tracing_flame::FlameLayer;
use tracing_subscriber::{fmt, prelude::*};
let filter_layer = tracing_subscriber::EnvFilter::from_default_env();
let fmt_layer = fmt::Layer::default();

let (flame_layer, _guard) = FlameLayer::with_file("./tracing.folded").unwrap();

tracing_subscriber::registry()
.with(filter_layer)
.with(fmt_layer)
.with(flame_layer)
.init();
Expand All @@ -22,10 +23,10 @@ fn init_tracing() -> impl Drop {
fn init_tracing() {}

fn examples(c: &mut Criterion) {
let _tracing = init_tracing();
init_tracing();

#[cfg(feature = "tracing")]
let _span = tracing::span!(tracing::Level::TRACE, "BENCH ITER").entered();
let _span = tracing::span!(tracing::Level::DEBUG, "BENCH EXAMPLES").entered();

c.bench_function("parse postal", |b| {
let input = std::include_str!("../tests/fixtures/postal_address.terminated.input.bnf");
Expand Down Expand Up @@ -57,6 +58,30 @@ fn examples(c: &mut Criterion) {
let _: Vec<_> = polish_calc_grammar.parse_input(input).collect();
})
});

let infinite_grammar: Grammar = "
<a> ::= '' | <b>
<b> ::= <a>"
.parse()
.unwrap();

let input = "";
let mut group = c.benchmark_group("parse infinite nullable grammar");
for parse_count in (0usize..=100).step_by(25) {
group.throughput(criterion::Throughput::Elements(parse_count as u64));
group.bench_with_input(
criterion::BenchmarkId::from_parameter(parse_count),
&parse_count,
|b, &parse_count| {
b.iter(|| {
let _: Vec<_> = infinite_grammar
.parse_input(input)
.take(parse_count)
.collect();
})
},
);
}
}

criterion_group!(benches, examples);
Expand Down
3 changes: 2 additions & 1 deletion src/append_vec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
/// Example usage: `append_only_vec_id!(pub(crate) ProductionId)`;
macro_rules! append_only_vec_id {
($visible:vis $id:ident) => {
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
$visible struct $id(usize);

impl From<usize> for $id {
Expand Down Expand Up @@ -59,6 +59,7 @@ where
pub fn get(&self, id: I) -> Option<&T> {
self.vec.get::<usize>(id.into())
}
#[cfg(test)]
pub fn iter(&self) -> impl Iterator<Item = &T> {
self.vec.iter()
}
Expand Down
138 changes: 8 additions & 130 deletions src/earley/grammar.rs
Original file line number Diff line number Diff line change
@@ -1,153 +1,30 @@
use crate::append_vec::{append_only_vec_id, AppendOnlyVec};
use crate::tracing;
use std::rc::Rc;

append_only_vec_id!(pub(crate) ProductionId);

/// A [`crate::Term`] which has been "matched" while parsing input
#[derive(Debug, Clone)]
pub(crate) enum TermMatch<'gram> {
/// [`crate::Term::Terminal`] which matched with a string literal
Terminal(&'gram str),
/// [`crate::Term::Nonterminal`] which was matched with a fully completed production
Nonterminal(Rc<ProductionMatch<'gram>>),
}

/// A `Term` to be "matched" with input
#[derive(Debug, Clone)]
pub(crate) enum TermMatching<'gram> {
/// A [`crate::Term`] which has not yet been matched
Unmatched(&'gram crate::Term),
/// A [`crate::Term`] which has been matched
Matched(TermMatch<'gram>),
}

/// [`crate::Production`] offers multiple possible "right hand side" [`crate::Expression`]s, which is overly flexible for Earley parsing.
/// [`Production`] is a one-to-one relationship of [`crate::Term`] -> [`crate::Expression`].
#[derive(Debug)]
pub(crate) struct Production<'gram> {
pub id: ProductionId,
pub lhs: &'gram crate::Term,
rhs: &'gram crate::Expression,
}

impl<'gram> Production<'gram> {
pub fn start_matching(&self) -> ProductionMatching<'gram> {
let prod_id = self.id;
let lhs = self.lhs;
let rhs = self.rhs.terms_iter().map(TermMatching::Unmatched).collect();
ProductionMatching {
prod_id,
lhs,
rhs,
matched_count: 0,
}
}
}

/// An attempt at matching a [`Production`]'s "right hand side" [`crate::Term`]s
#[derive(Debug, Clone)]
pub(crate) struct ProductionMatching<'gram> {
pub prod_id: ProductionId,
pub lhs: &'gram crate::Term,
/// "right hand side" [`TermMatching`]s which are partitioned by the matched and unmatched.
/// For example: [Matched, Matched, Matched, Unmatched, Unmatched]
rhs: Vec<TermMatching<'gram>>,
/// The progress cursor used to separate [`TermMatching`]s in the "right hand side"
matched_count: usize,
}

impl<'gram> ProductionMatching<'gram> {
/// Attempt to "complete" the production, by having no unmatched terms remaining.
pub fn complete(&self) -> Option<ProductionMatch<'gram>> {
let rhs: Option<Vec<TermMatch>> = self
.rhs
.iter()
.map(|term| match term {
TermMatching::Unmatched(_) => None,
TermMatching::Matched(term) => Some(term.clone()),
})
.collect();

rhs.map(|rhs| {
let input_len = rhs
.iter()
.map(|term| match term {
TermMatch::Terminal(term) => term.len(),
TermMatch::Nonterminal(prod) => prod.input_len,
})
.sum();

ProductionMatch {
lhs: self.lhs,
rhs,
input_len,
}
})
}
/// Get the next unmatched [`crate::Term`]
pub fn next(&self) -> Option<&'gram crate::Term> {
self.rhs.get(self.matched_count).map(|term| match term {
TermMatching::Matched(_) => {
unreachable!("terms ahead of matching cursor cannot already be matched")
}
TermMatching::Unmatched(term) => *term,
})
}
/// Get how many [`crate::Term`] have been matched
pub fn matched_count(&self) -> usize {
self.matched_count
}
/// Add a [`TermMatch`].
/// Does **not** check if the added term is a valid match. That responsibility is on the caller,
/// which likely has more context for faster matching of terms.
pub fn add_term_match(&self, term_match: TermMatch<'gram>) -> Option<Self> {
// only match term if there is next
self.next().map(|_| {
let Self {
lhs,
matched_count,
rhs,
prod_id,
} = self;
let prod_id = *prod_id;

let mut rhs = rhs.clone();
rhs[*matched_count] = TermMatching::Matched(term_match);
let matched_count = matched_count + 1;

Self {
lhs,
matched_count,
prod_id,
rhs,
}
})
}
}

/// A fully complete [`ProductionMatching`].
/// Created via [`ProductionMatching::complete`]
#[derive(Debug, Clone)]
pub(crate) struct ProductionMatch<'gram> {
pub lhs: &'gram crate::Term,
pub rhs: Vec<TermMatch<'gram>>,
pub input_len: usize,
pub rhs: &'gram crate::Expression,
}

type ProdArena<'gram> = AppendOnlyVec<Production<'gram>, ProductionId>;
type ProdTermMap<'gram> = std::collections::HashMap<&'gram crate::Term, Vec<ProductionId>>;

/// Similar to [`crate::Grammar`], but using [`Production`] and tables useful for parsing.
#[derive(Debug)]
pub(crate) struct GrammarMatching<'gram> {
pub(crate) struct ParseGrammar<'gram> {
productions: ProdArena<'gram>,
prods_by_lhs: ProdTermMap<'gram>,
}

impl<'gram, 'a> GrammarMatching<'gram> {
impl<'gram, 'a> ParseGrammar<'gram> {
pub fn new(grammar: &'gram crate::Grammar) -> Self {
let _span = tracing::span!(tracing::Level::TRACE, "GrammarMatching::new").entered();
let _span = tracing::span!(tracing::Level::DEBUG, "ParseGrammar_new").entered();

let mut productions = AppendOnlyVec::<Production, ProductionId>::new();
let mut prods_by_lhs = ProdTermMap::new();
Expand All @@ -173,8 +50,8 @@ impl<'gram, 'a> GrammarMatching<'gram> {
productions,
}
}
pub fn get_production_by_id(&'a self, prod_id: ProductionId) -> Option<&'a Production<'gram>> {
self.productions.get(prod_id)
pub fn get_production_by_id(&'a self, prod_id: ProductionId) -> &'a Production<'gram> {
self.productions.get(prod_id).expect("valid production ID")
}
pub fn get_productions_by_lhs(
&self,
Expand All @@ -184,8 +61,9 @@ impl<'gram, 'a> GrammarMatching<'gram> {
.get(lhs)
.into_iter()
.flatten()
.filter_map(|prod_id| self.get_production_by_id(*prod_id))
.map(|prod_id| self.get_production_by_id(*prod_id))
}
#[cfg(test)]
pub fn productions_iter(&self) -> impl Iterator<Item = &Production<'gram>> {
self.productions.iter()
}
Expand Down
2 changes: 1 addition & 1 deletion src/earley/input_range.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub(crate) struct InputRangeOffset {
pub start: usize,
pub len: usize,
Expand Down
Loading