Skip to content

Commit

Permalink
perf(parser): handle unescaping of escaped markers in parser code dir…
Browse files Browse the repository at this point in the history
…ectly

The changes in #247 and
#249 enabled some
improvements to the string parsing code which this change implements.

Namely, the unescaping of escaped template markers (`$${` -> `${` and `%%{` ->
`%{`) is done directly when they are encountered now. This speeds up
common cases quite a bit.

Before:

```
parse/hcl-edit/deeply_nested.tf
                        time:   [19.631 µs 19.647 µs 19.668 µs]
                        thrpt:  [35.929 MiB/s 35.968 MiB/s 35.997 MiB/s]
parse/hcl-edit/large.tf time:   [2.1344 ms 2.1389 ms 2.1445 ms]
                        thrpt:  [38.015 MiB/s 38.114 MiB/s 38.194 MiB/s]
parse/hcl-edit/medium.tf
                        time:   [449.06 µs 451.22 µs 453.42 µs]
                        thrpt:  [31.667 MiB/s 31.821 MiB/s 31.975 MiB/s]
parse/hcl-edit/small.tf time:   [27.485 µs 27.600 µs 27.744 µs]
                        thrpt:  [34.133 MiB/s 34.311 MiB/s 34.456 MiB/s]
```

After:

```
parse/hcl-edit/deeply_nested.tf
                        time:   [18.461 µs 18.488 µs 18.526 µs]
                        thrpt:  [38.146 MiB/s 38.223 MiB/s 38.280 MiB/s]
                 change:
                        time:   [-5.8990% -4.6557% -2.0981%] (p = 0.00 < 0.05)
                        thrpt:  [+2.1431% +4.8830% +6.2688%]
                        Performance has improved.
parse/hcl-edit/large.tf time:   [1.7640 ms 1.7699 ms 1.7787 ms]
                        thrpt:  [45.833 MiB/s 46.061 MiB/s 46.216 MiB/s]
                 change:
                        time:   [-17.613% -17.253% -16.777%] (p = 0.00 < 0.05)
                        thrpt:  [+20.160% +20.850% +21.379%]
                        Performance has improved.
parse/hcl-edit/medium.tf
                        time:   [407.78 µs 408.77 µs 409.99 µs]
                        thrpt:  [35.022 MiB/s 35.127 MiB/s 35.212 MiB/s]
                 change:
                        time:   [-9.9006% -9.4090% -8.9185%] (p = 0.00 < 0.05)
                        thrpt:  [+9.7918% +10.386% +10.988%]
                        Performance has improved.
parse/hcl-edit/small.tf time:   [24.139 µs 24.249 µs 24.385 µs]
                        thrpt:  [38.835 MiB/s 39.053 MiB/s 39.230 MiB/s]
                 change:
                        time:   [-12.375% -12.060% -11.751%] (p = 0.00 < 0.05)
                        thrpt:  [+13.316% +13.713% +14.123%]
                        Performance has improved.
```
  • Loading branch information
martinohmann committed Jun 18, 2023
1 parent e0c86f1 commit ff0de24
Show file tree
Hide file tree
Showing 2 changed files with 103 additions and 48 deletions.
127 changes: 85 additions & 42 deletions crates/hcl-edit/src/parser/string.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,90 +5,133 @@ use super::{
IResult, Input,
};
use crate::{Decorated, Ident, RawString};
use hcl_primitives::template::unescape_markers;
use std::borrow::Cow;
use winnow::{
combinator::{alt, cut_err, delimited, fail, not, opt, preceded, repeat, success},
dispatch,
stream::AsChar,
token::{any, one_of, tag, take_while},
token::{any, one_of, take, take_while},
Parser,
};

pub(super) fn string(input: Input) -> IResult<Input, String> {
delimited(b'"', opt(build_string), b'"')
delimited(b'"', opt(build_string(quoted_string_fragment)), b'"')
.map(Option::unwrap_or_default)
.map(|s| unescape_markers(&s).into())
.output_into()
.parse_next(input)
}

pub(super) fn build_string(input: Input) -> IResult<Input, Cow<str>> {
let (mut input, mut string) = match string_fragment(input) {
Ok((input, fragment)) => match fragment {
StringFragment::Literal(s) => (input, Cow::Borrowed(s)),
StringFragment::EscapedChar(c) => (input, Cow::Owned(String::from(c))),
},
Err(err) => return Err(err),
};

loop {
match string_fragment(input) {
Ok((rest, fragment)) => {
match fragment {
StringFragment::Literal(s) => string.to_mut().push_str(s),
StringFragment::EscapedChar(c) => string.to_mut().push(c),
};
input = rest;
pub(super) fn build_string<'a, F>(
mut fragment_parser: F,
) -> impl Parser<Input<'a>, Cow<'a, str>, ParseError<Input<'a>>>
where
F: Parser<Input<'a>, StringFragment<'a>, ParseError<Input<'a>>>,
{
move |input: Input<'a>| {
let (mut input, mut string) = match fragment_parser.parse_next(input) {
Ok((input, fragment)) => match fragment {
StringFragment::Literal(s) => (input, Cow::Borrowed(s)),
StringFragment::EscapedChar(c) => (input, Cow::Owned(String::from(c))),
StringFragment::EscapedMarker(m) => (input, Cow::Borrowed(m.unescape())),
},
Err(err) => return Err(err),
};

loop {
match fragment_parser.parse_next(input) {
Ok((rest, fragment)) => {
match fragment {
StringFragment::Literal(s) => string.to_mut().push_str(s),
StringFragment::EscapedChar(c) => string.to_mut().push(c),
StringFragment::EscapedMarker(m) => string.to_mut().push_str(m.unescape()),
};
input = rest;
}
Err(_) => return Ok((input, string)),
}
Err(_) => return Ok((input, string)),
}
}
}

/// A string fragment contains a fragment of a string being parsed: either
/// a non-empty Literal (a series of non-escaped characters) or a single
/// parsed escaped character.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum StringFragment<'a> {
/// a non-empty Literal (a series of non-escaped characters), a single
/// parsed escaped character or an escaped template start marker.
#[derive(Clone)]
pub(super) enum StringFragment<'a> {
Literal(&'a str),
EscapedChar(char),
EscapedMarker(EscapedMarker),
}

fn string_fragment(input: Input) -> IResult<Input, StringFragment> {
/// An escaped marker which would start a template interpolation or directive if unescaped.
#[derive(Clone)]
pub(super) enum EscapedMarker {
Interpolation,
Directive,
}

impl EscapedMarker {
// Returns the unescaped form of the escaped marker.
fn unescape(&self) -> &'static str {
match self {
EscapedMarker::Interpolation => "${",
EscapedMarker::Directive => "%{",
}
}
}

pub(super) fn quoted_string_fragment(input: Input) -> IResult<Input, StringFragment> {
alt((
escaped_marker.map(StringFragment::EscapedMarker),
string_literal.map(StringFragment::Literal),
escaped_char.map(StringFragment::EscapedChar),
))
.parse_next(input)
}

/// Parse a non-empty block of text that doesn't include `\`, `"` or non-escaped template
pub(super) fn template_string_fragment<'a, F, T>(
mut literal_end: F,
) -> impl Parser<Input<'a>, StringFragment<'a>, ParseError<Input<'a>>>
where
F: Parser<Input<'a>, T, ParseError<Input<'a>>>,
{
move |input: Input<'a>| {
alt((
escaped_marker.map(StringFragment::EscapedMarker),
any_until(literal_end.by_ref()).map(StringFragment::Literal),
))
.parse_next(input)
}
}

/// Parse a non-empty block of text that doesn't include `"` or non-escaped template
/// interpolation/directive start markers.
fn string_literal(input: Input) -> IResult<Input, &str> {
let literal_end = dispatch! {any;
b'\"' | b'\\' => success(true),
b'$' | b'%' => b'{'.value(true),
_ => fail,
};
literal_until(literal_end).parse_next(input)
any_until(literal_end).parse_next(input)
}

pub(super) fn literal_until<'a, F, T>(
literal_end: F,
) -> impl Parser<Input<'a>, &'a str, ParseError<Input<'a>>>
fn any_until<'a, F, T>(literal_end: F) -> impl Parser<Input<'a>, &'a str, ParseError<Input<'a>>>
where
F: Parser<Input<'a>, T, ParseError<Input<'a>>>,
{
void(repeat(
1..,
alt((
tag("$${"),
tag("%%{"),
preceded(not(literal_end), any).recognize(),
)),
))
.recognize()
.try_map(std::str::from_utf8)
void(repeat(1.., preceded(not(literal_end), any)))
.recognize()
.try_map(std::str::from_utf8)
}

/// Parse an escaped start marker for a template interpolation or directive.
fn escaped_marker(input: Input) -> IResult<Input, EscapedMarker> {
dispatch! {take::<_, Input, _>(3usize);
b"$${" => success(EscapedMarker::Interpolation),
b"%%{" => success(EscapedMarker::Directive),
_ => fail,
}
.parse_next(input)
}

/// Parse an escaped character: `\n`, `\t`, `\r`, `\u00AC`, etc.
Expand Down
24 changes: 18 additions & 6 deletions crates/hcl-edit/src/parser/template.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,10 @@ use super::{
error::ParseError,
expr::expr,
repr::{decorated, spanned},
string::{build_string, from_utf8_unchecked, literal_until, raw_string},
string::{
build_string, from_utf8_unchecked, quoted_string_fragment, raw_string,
template_string_fragment,
},
trivia::ws,
IResult, Input,
};
Expand All @@ -15,7 +18,6 @@ use crate::{
},
SetSpan, Span, Spanned,
};
use hcl_primitives::template::unescape_markers;
use std::borrow::Cow;
use winnow::{
ascii::{line_ending, space0},
Expand All @@ -24,14 +26,14 @@ use winnow::{
};

pub(super) fn string_template(input: Input) -> IResult<Input, StringTemplate> {
delimited(b'"', elements(build_string), b'"')
delimited(b'"', elements(build_string(quoted_string_fragment)), b'"')
.output_into()
.parse_next(input)
}

pub(super) fn template(input: Input) -> IResult<Input, Template> {
let literal_end = alt((b"${", b"%{"));
let literal = literal_until(literal_end).output_into();
let literal = template_literal(literal_end);
elements(literal).output_into().parse_next(input)
}

Expand All @@ -51,7 +53,7 @@ pub(super) fn heredoc_template<'a>(
// the line ending to the last template element below.
let heredoc_end = (line_ending, space0, delim).recognize();
let literal_end = alt((b"${", b"%{", heredoc_end));
let literal = literal_until(literal_end).output_into();
let literal = template_literal(literal_end);

// Use `opt` to handle an empty template.
opt((elements(literal), line_ending.with_span()).map(
Expand Down Expand Up @@ -79,14 +81,24 @@ pub(super) fn heredoc_template<'a>(
}
}

#[inline]
fn template_literal<'a, F, T>(
literal_end: F,
) -> impl Parser<Input<'a>, Cow<'a, str>, ParseError<Input<'a>>>
where
F: Parser<Input<'a>, T, ParseError<Input<'a>>>,
{
build_string(template_string_fragment(literal_end))
}

fn elements<'a, P>(literal: P) -> impl Parser<Input<'a>, Vec<Element>, ParseError<Input<'a>>>
where
P: Parser<Input<'a>, Cow<'a, str>, ParseError<Input<'a>>>,
{
repeat(
0..,
spanned(alt((
literal.map(|s| Element::Literal(Spanned::new(unescape_markers(&s).into()))),
literal.map(|s| Element::Literal(Spanned::new(s.into()))),
interpolation.map(Element::Interpolation),
directive.map(Element::Directive),
))),
Expand Down

0 comments on commit ff0de24

Please sign in to comment.