Skip to content

Commit

Permalink
Implement ExactSizeIterator for Pair iterators (pest-parser#833)
Browse files Browse the repository at this point in the history
* ExactSizeIterator impl for pair iterators

Signed-off-by: MucTepDayH16 <denisdrozhzhin1999@gmail.com>

* fmt

Signed-off-by: MucTepDayH16 <denisdrozhzhin1999@gmail.com>

* Unit, Style, and Lint Testing

Signed-off-by: MucTepDayH16 <denisdrozhzhin1999@gmail.com>

* Evaluate iter len in linear time

Signed-off-by: MucTepDayH16 <denisdrozhzhin1999@gmail.com>

---------

Signed-off-by: MucTepDayH16 <denisdrozhzhin1999@gmail.com>
  • Loading branch information
MucTepDayH16 committed Apr 17, 2023
1 parent 5a05e69 commit 20f0842
Show file tree
Hide file tree
Showing 4 changed files with 120 additions and 0 deletions.
20 changes: 20 additions & 0 deletions grammars/benches/json.rs
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,10 @@ fn bench_line_col(c: &mut Criterion) {
// pairs nested iter (with LineIndex) time: [14.716 µs 14.822 µs 14.964 µs]
// pairs flatten iter (v2.5.2) time: [1.1230 µs 1.1309 µs 1.1428 µs]
// pairs flatten iter (with LineIndex) time: [5.4637 µs 5.6061 µs 5.7886 µs]
// pairs nested collect (v2.5.7) time: [8.4609 µs 8.4644 µs 8.4680 µs]
// pairs nested collect (ExactSize) time: [7.9492 µs 7.9604 µs 7.9751 µs]
// pairs flatten collect (v2.5.7) time: [11.471 µs 11.475 µs 11.480 µs]
// pairs flatten collect (ExactSize) time: [11.058 µs 11.062 µs 11.066 µs]
fn bench_pairs_iter(c: &mut Criterion) {
let data = include_str!("data.json");

Expand All @@ -111,6 +115,22 @@ fn bench_pairs_iter(c: &mut Criterion) {
}
});
});

c.bench_function("pairs nested collect", |b| {
let pairs = autocorrect::JsonParser::parse(autocorrect::Rule::item, data).unwrap();

b.iter(move || {
let _pairs = pairs.clone().collect::<Vec<_>>();
});
});

c.bench_function("pairs flatten collect", |b| {
let pairs = autocorrect::JsonParser::parse(autocorrect::Rule::item, data).unwrap();

b.iter(move || {
let _pairs = pairs.clone().flatten().collect::<Vec<_>>();
});
});
}

criterion_group!(benches, bench_json_parse, bench_line_col, bench_pairs_iter);
Expand Down
30 changes: 30 additions & 0 deletions pest/src/iterators/flat_pairs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,13 @@ impl<'i, R: RuleType> FlatPairs<'i, R> {
}
}

impl<'i, R: RuleType> ExactSizeIterator for FlatPairs<'i, R> {
fn len(&self) -> usize {
// Tokens len is exactly twice as flatten pairs len
(self.end - self.start) >> 1
}
}

impl<'i, R: RuleType> Iterator for FlatPairs<'i, R> {
type Item = Pair<'i, R>;

Expand All @@ -122,6 +129,11 @@ impl<'i, R: RuleType> Iterator for FlatPairs<'i, R> {

Some(pair)
}

fn size_hint(&self) -> (usize, Option<usize>) {
let len = <Self as ExactSizeIterator>::len(self);
(len, Some(len))
}
}

impl<'i, R: RuleType> DoubleEndedIterator for FlatPairs<'i, R> {
Expand Down Expand Up @@ -214,4 +226,22 @@ mod tests {
assert_eq!(pair.line_col(), (1, 5));
assert_eq!(pair.line_col(), pair.as_span().start_pos().line_col());
}

#[test]
fn exact_size_iter_for_pairs() {
let pairs = AbcParser::parse(Rule::a, "abc\nefgh").unwrap().flatten();
assert_eq!(pairs.len(), pairs.count());

let pairs = AbcParser::parse(Rule::a, "我很漂亮efgh").unwrap().flatten();
assert_eq!(pairs.len(), pairs.count());

let pairs = AbcParser::parse(Rule::a, "abc\nefgh").unwrap().flatten();
let pairs = pairs.rev();
assert_eq!(pairs.len(), pairs.count());

let mut pairs = AbcParser::parse(Rule::a, "abc\nefgh").unwrap().flatten();
let pairs_len = pairs.len();
let _ = pairs.next().unwrap();
assert_eq!(pairs.count() + 1, pairs_len);
}
}
42 changes: 42 additions & 0 deletions pest/src/iterators/pairs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ pub struct Pairs<'i, R> {
input: &'i str,
start: usize,
end: usize,
pairs_count: usize,
line_index: Rc<LineIndex>,
}

Expand All @@ -53,11 +54,24 @@ pub fn new<'i, R: RuleType>(
None => Rc::new(LineIndex::new(input)),
};

let mut pairs_count = 0;
let mut cursor = start;
while cursor < end {
cursor = match queue[cursor] {
QueueableToken::Start {
end_token_index, ..
} => end_token_index,
_ => unreachable!(),
} + 1;
pairs_count += 1;
}

Pairs {
queue,
input,
start,
end,
pairs_count,
line_index,
}
}
Expand Down Expand Up @@ -346,15 +360,28 @@ impl<'i, R: RuleType> Pairs<'i, R> {
}
}

impl<'i, R: RuleType> ExactSizeIterator for Pairs<'i, R> {
#[inline]
fn len(&self) -> usize {
self.pairs_count
}
}

impl<'i, R: RuleType> Iterator for Pairs<'i, R> {
type Item = Pair<'i, R>;

fn next(&mut self) -> Option<Self::Item> {
let pair = self.peek()?;

self.start = self.pair() + 1;
self.pairs_count -= 1;
Some(pair)
}

fn size_hint(&self) -> (usize, Option<usize>) {
let len = <Self as ExactSizeIterator>::len(self);
(len, Some(len))
}
}

impl<'i, R: RuleType> DoubleEndedIterator for Pairs<'i, R> {
Expand All @@ -364,6 +391,7 @@ impl<'i, R: RuleType> DoubleEndedIterator for Pairs<'i, R> {
}

self.end = self.pair_from_end();
self.pairs_count -= 1;

let pair = unsafe {
pair::new(
Expand Down Expand Up @@ -640,4 +668,18 @@ mod tests {
assert_eq!(right_numbers.next().unwrap().as_str(), "2");
assert_eq!(right_numbers.next(), None);
}

#[test]
fn exact_size_iter_for_pairs() {
let pairs = AbcParser::parse(Rule::a, "abc\nefgh").unwrap();
assert_eq!(pairs.len(), pairs.count());

let pairs = AbcParser::parse(Rule::a, "abc\nefgh").unwrap().rev();
assert_eq!(pairs.len(), pairs.count());

let mut pairs = AbcParser::parse(Rule::a, "abc\nefgh").unwrap();
let pairs_len = pairs.len();
let _ = pairs.next().unwrap();
assert_eq!(pairs.count() + 1, pairs_len);
}
}
28 changes: 28 additions & 0 deletions pest/src/iterators/tokens.rs
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,12 @@ impl<'i, R: RuleType> Tokens<'i, R> {
}
}

impl<'i, R: RuleType> ExactSizeIterator for Tokens<'i, R> {
fn len(&self) -> usize {
self.end - self.start
}
}

impl<'i, R: RuleType> Iterator for Tokens<'i, R> {
type Item = Token<'i, R>;

Expand All @@ -106,6 +112,11 @@ impl<'i, R: RuleType> Iterator for Tokens<'i, R> {

Some(token)
}

fn size_hint(&self) -> (usize, Option<usize>) {
let len = <Self as ExactSizeIterator>::len(self);
(len, Some(len))
}
}

impl<'i, R: RuleType> DoubleEndedIterator for Tokens<'i, R> {
Expand Down Expand Up @@ -143,4 +154,21 @@ mod tests {
let reverse_tokens = pairs.tokens().rev().collect::<Vec<Token<'_, Rule>>>();
assert_eq!(tokens, reverse_tokens);
}

#[test]
fn exact_size_iter_for_tokens() {
let tokens = AbcParser::parse(Rule::a, "abcde").unwrap().tokens();
assert_eq!(tokens.len(), tokens.count());

let tokens = AbcParser::parse(Rule::a, "我很漂亮e").unwrap().tokens();
assert_eq!(tokens.len(), tokens.count());

let tokens = AbcParser::parse(Rule::a, "abcde").unwrap().tokens().rev();
assert_eq!(tokens.len(), tokens.count());

let mut tokens = AbcParser::parse(Rule::a, "abcde").unwrap().tokens();
let tokens_len = tokens.len();
let _ = tokens.next().unwrap();
assert_eq!(tokens.count() + 1, tokens_len);
}
}

0 comments on commit 20f0842

Please sign in to comment.