Skip to content

Commit

Permalink
syntax: accept {,n} as an equivalent to {0,n}
Browse files Browse the repository at this point in the history
Most regular expression engines don't accept the `{,n}` syntax, but
some other do it (namely Python's `re` library). This introduces a new
parser configuration option that enables the `{,n}` syntax.

PR #1086
  • Loading branch information
plusvic committed Mar 26, 2024
1 parent aa2d8bd commit f5d0b69
Showing 1 changed file with 61 additions and 8 deletions.
69 changes: 61 additions & 8 deletions regex-syntax/src/ast/parse.rs
Expand Up @@ -124,6 +124,7 @@ pub struct ParserBuilder {
ignore_whitespace: bool,
nest_limit: u32,
octal: bool,
empty_min_range: bool,
}

impl Default for ParserBuilder {
Expand All @@ -139,6 +140,7 @@ impl ParserBuilder {
ignore_whitespace: false,
nest_limit: 250,
octal: false,
empty_min_range: false,
}
}

Expand All @@ -149,6 +151,7 @@ impl ParserBuilder {
capture_index: Cell::new(0),
nest_limit: self.nest_limit,
octal: self.octal,
empty_min_range: self.empty_min_range,
initial_ignore_whitespace: self.ignore_whitespace,
ignore_whitespace: Cell::new(self.ignore_whitespace),
comments: RefCell::new(vec![]),
Expand Down Expand Up @@ -221,6 +224,18 @@ impl ParserBuilder {
self.ignore_whitespace = yes;
self
}

/// Allow using `{,n}` as an equivalent to `{0,n}`.
///
/// When enabled, the parser accepts `{,n}` as valid syntax for `{0,n}`.
/// Most regular expression engines don't support the `{,n}` syntax, but
/// some others do it, namely Python's `re` library.
///
/// This is disabled by default.
pub fn empty_min_range(&mut self, yes: bool) -> &mut ParserBuilder {
self.empty_min_range = yes;
self
}
}

/// A regular expression parser.
Expand All @@ -246,6 +261,9 @@ pub struct Parser {
/// The initial setting for `ignore_whitespace` as provided by
/// `ParserBuilder`. It is used when resetting the parser's state.
initial_ignore_whitespace: bool,
/// Whether the parser supports `{,n}` repetitions as an equivalent to
/// `{0,n}.`
empty_min_range: bool,
/// Whether whitespace should be ignored. When enabled, comments are
/// also permitted.
ignore_whitespace: Cell<bool>,
Expand Down Expand Up @@ -1114,32 +1132,48 @@ impl<'s, P: Borrow<Parser>> ParserI<'s, P> {
self.parse_decimal(),
ast::ErrorKind::DecimalEmpty,
ast::ErrorKind::RepetitionCountDecimalEmpty,
)?;
let mut range = ast::RepetitionRange::Exactly(count_start);
);
if self.is_eof() {
return Err(self.error(
Span::new(start, self.pos()),
ast::ErrorKind::RepetitionCountUnclosed,
));
}
if self.char() == ',' {
let range = if self.char() == ',' {
if !self.bump_and_bump_space() {
return Err(self.error(
Span::new(start, self.pos()),
ast::ErrorKind::RepetitionCountUnclosed,
));
}
if self.char() != '}' {
let count_start = match count_start {
Ok(c) => c,
Err(err)
if err.kind
== ast::ErrorKind::RepetitionCountDecimalEmpty =>
{
if self.parser().empty_min_range {
0
} else {
return Err(err);
}
}
err => err?,
};
let count_end = specialize_err(
self.parse_decimal(),
ast::ErrorKind::DecimalEmpty,
ast::ErrorKind::RepetitionCountDecimalEmpty,
)?;
range = ast::RepetitionRange::Bounded(count_start, count_end);
ast::RepetitionRange::Bounded(count_start, count_end)
} else {
range = ast::RepetitionRange::AtLeast(count_start);
ast::RepetitionRange::AtLeast(count_start?)
}
}
} else {
ast::RepetitionRange::Exactly(count_start?)
};

if self.is_eof() || self.char() != '}' {
return Err(self.error(
Span::new(start, self.pos()),
Expand Down Expand Up @@ -2459,6 +2493,11 @@ mod tests {
ParserI::new(parser, pattern)
}

fn parser_empty_min_range(pattern: &str) -> ParserI<'_, Parser> {
let parser = ParserBuilder::new().empty_min_range(true).build();
ParserI::new(parser, pattern)
}

fn parser_nest_limit(
pattern: &str,
nest_limit: u32,
Expand Down Expand Up @@ -3376,6 +3415,20 @@ bar
ast: Box::new(lit('a', 0)),
}))
);
assert_eq!(
parser_empty_min_range(r"a{,9}").parse(),
Ok(Ast::repetition(ast::Repetition {
span: span(0..5),
op: ast::RepetitionOp {
span: span(1..5),
kind: ast::RepetitionKind::Range(
ast::RepetitionRange::Bounded(0, 9)
),
},
greedy: true,
ast: Box::new(lit('a', 0)),
}))
);
assert_eq!(
parser_ignore_whitespace(r"a{5,9} ?").parse(),
Ok(Ast::repetition(ast::Repetition {
Expand Down Expand Up @@ -4596,8 +4649,8 @@ bar
assert_eq!(
parser(r"\b{ ").parse().unwrap_err(),
TestError {
span: span(4..4),
kind: ast::ErrorKind::RepetitionCountDecimalEmpty,
span: span(2..4),
kind: ast::ErrorKind::RepetitionCountUnclosed,
}
);
// In this case, we got some valid chars that makes it look like the
Expand Down

0 comments on commit f5d0b69

Please sign in to comment.