Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: speed up Position::line_col for large inputs using SIMD #707

Merged
merged 1 commit into from
Sep 12, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions derive/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[package]
name = "pest_derive"
description = "pest's derive macro"
version = "2.3.0"
version = "2.3.1"
edition = "2018"
authors = ["Dragoș Tiselice <dragostiselice@gmail.com>"]
homepage = "https://pest-parser.github.io/"
Expand All @@ -23,5 +23,5 @@ std = ["pest/std", "pest_generator/std"]

[dependencies]
# for tests, included transitively anyway
pest = { path = "../pest", version = "2.3.0", default-features = false }
pest_generator = { path = "../generator", version = "2.3.0", default-features = false }
pest = { path = "../pest", version = "2.3.1", default-features = false }
pest_generator = { path = "../generator", version = "2.3.1", default-features = false }
6 changes: 3 additions & 3 deletions generator/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[package]
name = "pest_generator"
description = "pest code generator"
version = "2.3.0"
version = "2.3.1"
edition = "2018"
authors = ["Dragoș Tiselice <dragostiselice@gmail.com>"]
homepage = "https://pest-parser.github.io/"
Expand All @@ -18,8 +18,8 @@ default = ["std"]
std = ["pest/std"]

[dependencies]
pest = { path = "../pest", version = "2.3.0", default-features = false }
pest_meta = { path = "../meta", version = "2.3.0" }
pest = { path = "../pest", version = "2.3.1", default-features = false }
pest_meta = { path = "../meta", version = "2.3.1" }
proc-macro2 = "1.0"
quote = "1.0"
syn = "1.0"
6 changes: 3 additions & 3 deletions grammars/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[package]
name = "pest_grammars"
description = "pest popular grammar implementations"
version = "2.3.0"
version = "2.3.1"
edition = "2018"
authors = ["Dragoș Tiselice <dragostiselice@gmail.com>"]
homepage = "https://pest-parser.github.io/"
Expand All @@ -14,8 +14,8 @@ readme = "_README.md"
rust-version = "1.56"

[dependencies]
pest = { path = "../pest", version = "2.3.0" }
pest_derive = { path = "../derive", version = "2.3.0" }
pest = { path = "../pest", version = "2.3.1" }
pest_derive = { path = "../derive", version = "2.3.1" }

[dev-dependencies]
criterion = "0.3"
Expand Down
42 changes: 41 additions & 1 deletion grammars/benches/json.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,5 +30,45 @@ fn criterion_benchmark(c: &mut Criterion) {
});
}

criterion_group!(benches, criterion_benchmark);
mod autocorrect {
use pest_derive::Parser;

#[derive(Parser)]
#[grammar_inline = r#"
newline = ${ "\n" | "\r" }
space = ${ " "+ }

other = ${ !(pair) ~ ANY }
comment = ${ single_line_comment | multiline_comment }
single_line_comment = _{ "//" ~ (!(newline) ~ ANY)* }
multiline_comment = _{ "/*" ~ (!("*/") ~ ANY)* ~ "*/"}

string_type = _{
("\"" ~ (!(newline | "\"") ~ ANY)* ~ "\"")
}
key = ${ string_type ~ (" ")* ~ ":" ~ (" ")* }
string = ${ string_type }
pair = _{ key ~ string }

line = _{ pair | comment | space | other | newline }
item = _{ SOI ~ line* ~ EOI }
"#]
pub struct JsonParser;
}

fn line_col_benchmark(c: &mut Criterion) {
let mut file = File::open("benches/main.i18n.json").unwrap();
let mut data = String::new();

file.read_to_string(&mut data).unwrap();
let pairs = autocorrect::JsonParser::parse(autocorrect::Rule::item, &data).unwrap();
let last_pair = pairs.last().unwrap();
c.bench_function("line col", |b| {
b.iter(|| {
let _ = last_pair.as_span().start_pos().line_col();
});
});
}

criterion_group!(benches, criterion_benchmark, line_col_benchmark,);
criterion_main!(benches);
10,128 changes: 10,128 additions & 0 deletions grammars/benches/main.i18n.json

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions meta/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[package]
name = "pest_meta"
description = "pest meta language parser and validator"
version = "2.3.0"
version = "2.3.1"
edition = "2018"
authors = ["Dragoș Tiselice <dragostiselice@gmail.com>"]
homepage = "https://pest-parser.github.io/"
Expand All @@ -16,7 +16,7 @@ include = ["Cargo.toml", "src/**/*", "src/grammar.rs", "_README.md", "LICENSE-*"
rust-version = "1.56"

[dependencies]
pest = { path = "../pest", version = "2.3.0" }
pest = { path = "../pest", version = "2.3.1" }
once_cell = "1.8.0"

[build-dependencies]
Expand Down
7 changes: 6 additions & 1 deletion pest/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[package]
name = "pest"
description = "The Elegant Parser"
version = "2.3.0"
version = "2.3.1"
edition = "2018"
authors = ["Dragoș Tiselice <dragostiselice@gmail.com>"]
homepage = "https://pest-parser.github.io/"
Expand All @@ -21,9 +21,14 @@ std = ["ucd-trie/std", "thiserror"]
pretty-print = ["serde", "serde_json"]
# Enable const fn constructor for `PrecClimber`
const_prec_climber = []
# Enable faster `Position::line_col` calculation using SIMD
# (note that this may have extra overhead for small inputs)
fast-line-col = ["memchr", "bytecount"]

[dependencies]
ucd-trie = { version = "0.1.1", default-features = false }
serde = { version = "1.0.89", optional = true }
serde_json = { version = "1.0.39", optional = true}
thiserror = { version = "1.0.31", optional = true }
memchr = { version = "2", optional = true }
bytecount = { version = "0.6", optional = true }
104 changes: 66 additions & 38 deletions pest/src/position.rs
Original file line number Diff line number Diff line change
Expand Up @@ -135,45 +135,14 @@ impl<'i> Position<'i> {
if self.pos > self.input.len() {
panic!("position out of bounds");
}

let mut pos = self.pos;
// Position's pos is always a UTF-8 border.
let slice = &self.input[..pos];
let mut chars = slice.chars().peekable();

let mut line_col = (1, 1);

while pos != 0 {
match chars.next() {
Some('\r') => {
if let Some(&'\n') = chars.peek() {
chars.next();

if pos == 1 {
pos -= 1;
} else {
pos -= 2;
}

line_col = (line_col.0 + 1, 1);
} else {
pos -= 1;
line_col = (line_col.0, line_col.1 + 1);
}
}
Some('\n') => {
pos -= 1;
line_col = (line_col.0 + 1, 1);
}
Some(c) => {
pos -= c.len_utf8();
line_col = (line_col.0, line_col.1 + 1);
}
None => unreachable!(),
}
#[cfg(feature = "fast-line-col")]
{
fast_line_col(self.input, self.pos)
}
#[cfg(not(feature = "fast-line-col"))]
{
original_line_col(self.input, self.pos)
}

line_col
}

/// Returns the entire line of the input that contains this `Position`.
Expand Down Expand Up @@ -432,6 +401,63 @@ impl<'i> Hash for Position<'i> {
}
}

#[inline]
#[cfg(not(feature = "fast-line-col"))]
fn original_line_col(input: &str, mut pos: usize) -> (usize, usize) {
// Position's pos is always a UTF-8 border.
let slice = &input[..pos];
let mut chars = slice.chars().peekable();

let mut line_col = (1, 1);

while pos != 0 {
match chars.next() {
Some('\r') => {
if let Some(&'\n') = chars.peek() {
chars.next();

if pos == 1 {
pos -= 1;
} else {
pos -= 2;
}

line_col = (line_col.0 + 1, 1);
} else {
pos -= 1;
line_col = (line_col.0, line_col.1 + 1);
}
}
Some('\n') => {
pos -= 1;
line_col = (line_col.0 + 1, 1);
}
Some(c) => {
pos -= c.len_utf8();
line_col = (line_col.0, line_col.1 + 1);
}
None => unreachable!(),
}
}

line_col
}

#[inline]
#[cfg(feature = "fast-line-col")]
fn fast_line_col(input: &str, pos: usize) -> (usize, usize) {
// Position's pos is always a UTF-8 border.
let slice = &input[..pos];

let prec_ln = memchr::memrchr(b'\n', slice.as_bytes());
if let Some(prec_nl_pos) = prec_ln {
let lines = bytecount::count(slice[..=prec_nl_pos].as_bytes(), b'\n') + 1;
(lines, slice[prec_nl_pos..].chars().count())
} else {
(1, slice.chars().count() + 1)
}
}

#[cfg(test)]
mod tests {
use super::*;
Expand Down Expand Up @@ -465,6 +491,8 @@ mod tests {
assert_eq!(Position::new(input, 7).unwrap().line_col(), (3, 1));
assert_eq!(Position::new(input, 8).unwrap().line_col(), (3, 2));
assert_eq!(Position::new(input, 11).unwrap().line_col(), (3, 3));
let input = "abcd嗨";
assert_eq!(Position::new(input, 7).unwrap().line_col(), (1, 6));
}

#[test]
Expand Down
6 changes: 3 additions & 3 deletions vm/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[package]
name = "pest_vm"
description = "pest grammar virtual machine"
version = "2.3.0"
version = "2.3.1"
edition = "2018"
authors = ["Dragoș Tiselice <dragostiselice@gmail.com>"]
homepage = "https://pest-parser.github.io/"
Expand All @@ -14,5 +14,5 @@ readme = "_README.md"
rust-version = "1.56"

[dependencies]
pest = { path = "../pest", version = "2.3.0" }
pest_meta = { path = "../meta", version = "2.3.0" }
pest = { path = "../pest", version = "2.3.1" }
pest_meta = { path = "../meta", version = "2.3.1" }