Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

automata/meta: revert broadening of reverse suffix optimization #1111

Merged
merged 1 commit into from
Oct 16, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
39 changes: 26 additions & 13 deletions regex-automata/src/meta/strategy.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1167,21 +1167,34 @@ impl ReverseSuffix {
return Err(core);
}
let kind = core.info.config().get_match_kind();
let suffixseq = crate::util::prefilter::suffixes(kind, hirs);
let Some(suffixes) = suffixseq.literals() else {
debug!(
"skipping reverse suffix optimization because \
the extract suffix sequence is not finite",
);
return Err(core);
let suffixes = crate::util::prefilter::suffixes(kind, hirs);
let lcs = match suffixes.longest_common_suffix() {
None => {
debug!(
"skipping reverse suffix optimization because \
a longest common suffix could not be found",
);
return Err(core);
}
Some(lcs) if lcs.is_empty() => {
debug!(
"skipping reverse suffix optimization because \
the longest common suffix is the empty string",
);
return Err(core);
}
Some(lcs) => lcs,
};
let Some(pre) = Prefilter::new(kind, suffixes) else {
debug!(
"skipping reverse suffix optimization because \
let pre = match Prefilter::new(kind, &[lcs]) {
Some(pre) => pre,
None => {
debug!(
"skipping reverse suffix optimization because \
a prefilter could not be constructed from the \
longest common suffix",
);
return Err(core);
);
return Err(core);
}
};
if !pre.is_fast() {
debug!(
Expand Down Expand Up @@ -1268,7 +1281,7 @@ impl ReverseSuffix {
e.try_search_half_rev_limited(&input, min_start)
} else if let Some(e) = self.core.hybrid.get(&input) {
trace!(
"using lazy DFA for reverse inner search at {:?}, \
"using lazy DFA for reverse suffix search at {:?}, \
but will be stopped at {} to avoid quadratic behavior",
input.get_span(),
min_start,
Expand Down
15 changes: 15 additions & 0 deletions testdata/regression.toml
Original file line number Diff line number Diff line change
Expand Up @@ -813,3 +813,18 @@ name = "hir-optimization-out-of-order-class"
regex = '^[[:alnum:]./-]+$'
haystack = "a-b"
matches = [[0, 3]]

# This is a regression test for an improper reverse suffix optimization. This
# occurred when I "broadened" the applicability of the optimization to include
# multiple possible literal suffixes instead of only sticking to a non-empty
# longest common suffix. It turns out that, at least given how the reverse
# suffix optimization works, we need to stick to the longest common suffix for
# now.
#
# See: https://github.com/rust-lang/regex/issues/1110
# See also: https://github.com/astral-sh/ruff/pull/7980
[[test]]
name = 'improper-reverse-suffix-optimization'
regex = '(\\N\{[^}]+})|([{}])'
haystack = 'hiya \N{snowman} bye'
matches = [[[5, 16], [5, 16], []]]