Skip to content

Commit

Permalink
automata/meta: revert broadening of reverse suffix optimization
Browse files Browse the repository at this point in the history
This reverts commit 8a8d599 and
includes a regression test, as well as a tweak to a log message.

Essentially, the broadening was improper. We have to be careful when
dealing with suffixes as opposed to prefixes. Namely, my logic
previously was that the broadening was okay because we were already
doing it for the reverse inner optimization. But the reverse inner
optimization works with prefixes, not suffixes. So the comparison wasn't
quite correct.

This goes back to only applying the reverse suffix optimization when
there is a non-empty single common suffix.

Fixes #1110
Ref astral-sh/ruff#7980
  • Loading branch information
BurntSushi committed Oct 16, 2023
1 parent e7bd19d commit 7eefc5d
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 13 deletions.
39 changes: 26 additions & 13 deletions regex-automata/src/meta/strategy.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1167,21 +1167,34 @@ impl ReverseSuffix {
return Err(core);
}
let kind = core.info.config().get_match_kind();
let suffixseq = crate::util::prefilter::suffixes(kind, hirs);
let Some(suffixes) = suffixseq.literals() else {
debug!(
"skipping reverse suffix optimization because \
the extract suffix sequence is not finite",
);
return Err(core);
let suffixes = crate::util::prefilter::suffixes(kind, hirs);
let lcs = match suffixes.longest_common_suffix() {
None => {
debug!(
"skipping reverse suffix optimization because \
a longest common suffix could not be found",
);
return Err(core);
}
Some(lcs) if lcs.is_empty() => {
debug!(
"skipping reverse suffix optimization because \
the longest common suffix is the empty string",
);
return Err(core);
}
Some(lcs) => lcs,
};
let Some(pre) = Prefilter::new(kind, suffixes) else {
debug!(
"skipping reverse suffix optimization because \
let pre = match Prefilter::new(kind, &[lcs]) {
Some(pre) => pre,
None => {
debug!(
"skipping reverse suffix optimization because \
a prefilter could not be constructed from the \
longest common suffix",
);
return Err(core);
);
return Err(core);
}
};
if !pre.is_fast() {
debug!(
Expand Down Expand Up @@ -1268,7 +1281,7 @@ impl ReverseSuffix {
e.try_search_half_rev_limited(&input, min_start)
} else if let Some(e) = self.core.hybrid.get(&input) {
trace!(
"using lazy DFA for reverse inner search at {:?}, \
"using lazy DFA for reverse suffix search at {:?}, \
but will be stopped at {} to avoid quadratic behavior",
input.get_span(),
min_start,
Expand Down
15 changes: 15 additions & 0 deletions testdata/regression.toml
Original file line number Diff line number Diff line change
Expand Up @@ -813,3 +813,18 @@ name = "hir-optimization-out-of-order-class"
regex = '^[[:alnum:]./-]+$'
haystack = "a-b"
matches = [[0, 3]]

# This is a regression test for an improper reverse suffix optimization. This
# occurred when I "broadened" the applicability of the optimization to include
# multiple possible literal suffixes instead of only sticking to a non-empty
# longest common suffix. It turns out that, at least given how the reverse
# suffix optimization works, we need to stick to the longest common suffix for
# now.
#
# See: https://github.com/rust-lang/regex/issues/1110
# See also: https://github.com/astral-sh/ruff/pull/7980
[[test]]
name = 'improper-reverse-suffix-optimization'
regex = '(\\N\{[^}]+})|([{}])'
haystack = 'hiya \N{snowman} bye'
matches = [[[5, 16], [5, 16], []]]

0 comments on commit 7eefc5d

Please sign in to comment.