Skip to content

Commit

Permalink
A new stack implementation that should be faster and less space-consu…
Browse files Browse the repository at this point in the history
…ming (#905)

* Benchmark and test Stack

* A new implementation of Stack.

* Implement Default for Stack.

To make clippy happy.

* Remove redundant clone.
  • Loading branch information
TheVeryDarkness committed Aug 2, 2023
1 parent fe53557 commit 734ba66
Show file tree
Hide file tree
Showing 4 changed files with 233 additions and 38 deletions.
9 changes: 8 additions & 1 deletion pest/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,13 @@ const_prec_climber = []
[dependencies]
ucd-trie = { version = "0.1.5", default-features = false }
serde = { version = "1.0.145", optional = true }
serde_json = { version = "1.0.85", optional = true}
serde_json = { version = "1.0.85", optional = true }
thiserror = { version = "1.0.37", optional = true }
memchr = { version = "2", optional = true }

[dev-dependencies]
criterion = { version = "0.5.1", features = ["html_reports"] }

[[bench]]
name = "stack"
harness = false
88 changes: 88 additions & 0 deletions pest/benches/stack.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
// pest. The Elegant Parser
// Copyright (c) 2018 Dragoș Tiselice
//
// Licensed under the Apache License, Version 2.0
// <LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0> or the MIT
// license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. All files in the project carrying such notice may not be copied,
// modified, or distributed except according to those terms.

use criterion::{criterion_group, criterion_main, Criterion};
use pest::Stack;

fn snapshot_push_restore<T: Clone>(elements: impl Iterator<Item = T> + Clone) {
let mut stack = Stack::<T>::new();
for elem in elements {
stack.snapshot();
stack.push(elem.clone());
stack.restore();
stack.push(elem);
}
}

fn snapshot_push_clear_snapshot<T: Clone>(elements: impl Iterator<Item = T> + Clone) {
let mut stack = Stack::<T>::new();
for elem in elements {
stack.snapshot();
stack.push(elem);
stack.clear_snapshot();
}
}

fn snapshot_pop_restore<T: Clone>(elements: impl Iterator<Item = T>) {
let mut stack = Stack::<T>::new();
for elem in elements {
stack.push(elem);
}
while !stack.is_empty() {
stack.snapshot();
stack.pop();
stack.restore();
stack.pop();
}
}

fn snapshot_pop_clear<T: Clone>(elements: impl Iterator<Item = T>) {
let mut stack = Stack::<T>::new();
for elem in elements {
stack.push(elem);
}
while !stack.is_empty() {
stack.snapshot();
stack.pop();
stack.clear_snapshot();
}
}

fn benchmark(b: &mut Criterion) {
use core::iter::repeat;
// use criterion::black_box;
let times = 10000usize;
let small = 0..times;
let medium = ("", 0usize, 1usize);
let medium = repeat(medium).take(times);
let large = [""; 64];
let large = repeat(large).take(times);
macro_rules! test_series {
($kind:ident) => {
b.bench_function(stringify!(push - restore - $kind), |b| {
b.iter(|| snapshot_push_restore($kind.clone()))
})
.bench_function(stringify!(push - clear - $kind), |b| {
b.iter(|| snapshot_push_clear_snapshot($kind.clone()))
})
.bench_function(stringify!(pop - restore - $kind), |b| {
b.iter(|| snapshot_pop_restore($kind.clone()))
})
.bench_function(stringify!(pop - clear - $kind), |b| {
b.iter(|| snapshot_pop_clear($kind.clone()))
})
};
}
test_series!(small);
test_series!(medium);
test_series!(large);
}

criterion_group!(benchmarks, benchmark);
criterion_main!(benchmarks);
1 change: 1 addition & 0 deletions pest/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -340,6 +340,7 @@ pub use crate::parser_state::{
};
pub use crate::position::Position;
pub use crate::span::{merge_spans, Lines, LinesSpan, Span};
pub use crate::stack::Stack;
pub use crate::token::Token;
use core::fmt::Debug;
use core::hash::Hash;
Expand Down
173 changes: 136 additions & 37 deletions pest/src/stack.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,22 +11,48 @@ use alloc::vec;
use alloc::vec::Vec;
use core::ops::{Index, Range};

/// Implementation of a `Stack` which maintains an log of `StackOp`s in order to rewind the stack
/// to a previous state.
/// Implementation of a `Stack` which maintains popped elements and length of previous states
/// in order to rewind the stack to a previous state.
#[derive(Debug)]
pub struct Stack<T: Clone> {
ops: Vec<StackOp<T>>,
/// All elements in the stack.
cache: Vec<T>,
snapshots: Vec<usize>,
/// All elements that are in previous snapshots but may not be in the next state.
/// They will be pushed back to `cache` if the snapshot is restored,
/// otherwise be dropped if the snapshot is cleared.
///
/// Those elements from a sequence of snapshots are stacked in one [`Vec`], and
/// `popped.len() == lengths.iter().map(|(len, remained)| len - remained).sum()`
popped: Vec<T>,
/// Every element corresponds to a snapshot, and each element has two fields:
/// - Length of `cache` when corresponding snapshot is taken (AKA `len`).
/// - Count of elements that come from corresponding snapshot
/// and are still in next snapshot or current state (AKA `remained`).
///
/// And `len` is never less than `remained`.
///
/// On restoring, the `cache` can be divided into two parts:
/// - `0..remained` are untouched since the snapshot is taken.
///
/// There's nothing to do with those elements. Just let them stay where they are.
///
/// - `remained..cache.len()` are pushed after the snapshot is taken.
lengths: Vec<(usize, usize)>,
}

impl<T: Clone> Default for Stack<T> {
fn default() -> Self {
Self::new()
}
}

impl<T: Clone> Stack<T> {
/// Creates a new `Stack`.
pub fn new() -> Self {
Stack {
ops: vec![],
cache: vec![],
snapshots: vec![],
popped: vec![],
lengths: vec![],
}
}

Expand All @@ -43,15 +69,21 @@ impl<T: Clone> Stack<T> {

/// Pushes a `T` onto the `Stack`.
pub fn push(&mut self, elem: T) {
self.ops.push(StackOp::Push(elem.clone()));
self.cache.push(elem);
}

/// Pops the top-most `T` from the `Stack`.
pub fn pop(&mut self) -> Option<T> {
let len = self.cache.len();
let popped = self.cache.pop();
if let Some(ref val) = popped {
self.ops.push(StackOp::Pop(val.clone()));
if let Some(popped) = &popped {
if let Some((_, remained_count)) = self.lengths.last_mut() {
// `len >= *unpopped_count`
if len == *remained_count {
*remained_count -= 1;
self.popped.push(popped.clone());
}
}
}
popped
}
Expand All @@ -63,40 +95,40 @@ impl<T: Clone> Stack<T> {

/// Takes a snapshot of the current `Stack`.
pub fn snapshot(&mut self) {
self.snapshots.push(self.ops.len());
self.lengths.push((self.cache.len(), self.cache.len()))
}

/// The parsing after the last snapshot was successful so clearing it.
pub fn clear_snapshot(&mut self) {
self.snapshots.pop();
if let Some((len, unpopped)) = self.lengths.pop() {
// Popped elements from previous state are no longer needed.
self.popped.truncate(self.popped.len() - (len - unpopped));
}
}

/// Rewinds the `Stack` to the most recent `snapshot()`. If no `snapshot()` has been taken, this
/// function return the stack to its initial state.
pub fn restore(&mut self) {
match self.snapshots.pop() {
Some(ops_index) => {
self.rewind_to(ops_index);
self.ops.truncate(ops_index);
match self.lengths.pop() {
Some((len_stack, remained)) => {
if remained < self.cache.len() {
// Remove those elements that are pushed after the snapshot.
self.cache.truncate(remained);
}
if len_stack > remained {
let rewind_count = len_stack - remained;
let new_len = self.popped.len() - rewind_count;
let recovered_elements = self.popped.drain(new_len..);
self.cache.extend(recovered_elements.rev());
debug_assert_eq!(self.popped.len(), new_len);
}
}
None => {
self.cache.clear();
self.ops.clear();
}
}
}

// Rewind the stack to a particular index
fn rewind_to(&mut self, index: usize) {
let ops_to_rewind = &self.ops[index..];
for op in ops_to_rewind.iter().rev() {
match *op {
StackOp::Push(_) => {
self.cache.pop();
}
StackOp::Pop(ref elem) => {
self.cache.push(elem.clone());
}
// As `self.popped` and `self.lengths` should already be empty,
// there is no need to clear it.
debug_assert!(self.popped.is_empty());
debug_assert!(self.lengths.is_empty());
}
}
}
Expand All @@ -110,12 +142,6 @@ impl<T: Clone> Index<Range<usize>> for Stack<T> {
}
}

#[derive(Debug)]
enum StackOp<T> {
Push(T),
Pop(T),
}

#[cfg(test)]
mod test {
use super::Stack;
Expand Down Expand Up @@ -146,6 +172,79 @@ mod test {

assert_eq!(stack[0..stack.len()], [0]);
}
#[test]
fn restore_without_snapshot() {
let mut stack = Stack::new();

stack.push(0);
stack.restore();

assert_eq!(stack[0..stack.len()], [0; 0]);
}

#[test]
fn snapshot_pop_restore() {
let mut stack = Stack::new();

stack.push(0);
stack.snapshot();
stack.pop();
stack.restore();

assert_eq!(stack[0..stack.len()], [0]);
}

#[test]
fn snapshot_pop_push_restore() {
let mut stack = Stack::new();

stack.push(0);
stack.snapshot();
stack.pop();
stack.push(1);
stack.restore();

assert_eq!(stack[0..stack.len()], [0]);
}

#[test]
fn snapshot_push_pop_restore() {
let mut stack = Stack::new();

stack.push(0);
stack.snapshot();
stack.push(1);
stack.push(2);
stack.pop();
stack.restore();

assert_eq!(stack[0..stack.len()], [0]);
}

#[test]
fn snapshot_push_clear() {
let mut stack = Stack::new();

stack.push(0);
stack.snapshot();
stack.push(1);
stack.clear_snapshot();

assert_eq!(stack[0..stack.len()], [0, 1]);
}

#[test]
fn snapshot_pop_clear() {
let mut stack = Stack::new();

stack.push(0);
stack.push(1);
stack.snapshot();
stack.pop();
stack.clear_snapshot();

assert_eq!(stack[0..stack.len()], [0]);
}

#[test]
fn stack_ops() {
Expand Down

0 comments on commit 734ba66

Please sign in to comment.