Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 47 additions & 0 deletions regex-automata/src/meta/limited.rs
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,41 @@ pub(crate) fn dfa_try_search_half_rev(
return Err(RetryError::Quadratic(RetryQuadraticError::new()));
}
}
let was_dead = dfa.is_dead_state(sid);
dfa_eoi_rev(dfa, input, &mut sid, &mut mat)?;
// If we reach the beginning of the search and we could otherwise still
// potentially keep matching if there was more to match, then we actually
// return an error to indicate giving up on this optimization. Why? Because
// we can't prove that the real match begins at where we would report it.
//
// This only happens when all of the following are true:
//
// 1) We reach the starting point of our search span.
// 2) The match we found is before the starting point.
// 3) The FSM reports we could possibly find a longer match.
//
// We need (1) because otherwise the search stopped before the starting
// point and there is no possible way to find a more leftmost position.
//
// We need (2) because if the match found has an offset equal to the minimum
// possible offset, then there is no possible more leftmost match.
//
// We need (3) because if the FSM couldn't continue anyway (i.e., it's in
// a dead state), then we know we couldn't find anything more leftmost
// than what we have. (We have to check the state we were in prior to the
// EOI transition since the EOI transition will usually bring us to a dead
// state by virtue of it represents the end-of-input.)
if at == input.start()
&& mat.map_or(false, |m| m.offset() > input.start())
&& !was_dead
{
trace!(
"reached beginning of search at offset {} without hitting \
a dead state, quitting to avoid potential false positive match",
at,
);
return Err(RetryError::Quadratic(RetryQuadraticError::new()));
}
Ok(mat)
}

Expand Down Expand Up @@ -140,7 +174,20 @@ pub(crate) fn hybrid_try_search_half_rev(
return Err(RetryError::Quadratic(RetryQuadraticError::new()));
}
}
let was_dead = sid.is_dead();
hybrid_eoi_rev(dfa, cache, input, &mut sid, &mut mat)?;
// See the comments in the full DFA routine above for why we need this.
if at == input.start()
&& mat.map_or(false, |m| m.offset() > input.start())
&& !was_dead
{
trace!(
"reached beginning of search at offset {} without hitting \
a dead state, quitting to avoid potential false positive match",
at,
);
return Err(RetryError::Quadratic(RetryQuadraticError::new()));
}
Ok(mat)
}

Expand Down
17 changes: 17 additions & 0 deletions testdata/regression.toml
Original file line number Diff line number Diff line change
Expand Up @@ -739,3 +739,20 @@ matches = [[0, 9]]
utf8 = false
match-kind = "all"
search-kind = "overlapping"

# See: https://github.com/rust-lang/regex/issues/1060
[[test]]
name = "reverse-inner-plus-shorter-than-expected"
regex = '(?:(\d+)[:.])?(\d{1,2})[:.](\d{2})'
haystack = '102:12:39'
matches = [[[0, 9], [0, 3], [4, 6], [7, 9]]]

# Like reverse-inner-plus-shorter-than-expected, but using a far simpler regex
# to demonstrate the extent of the rot. Sigh.
#
# See: https://github.com/rust-lang/regex/issues/1060
[[test]]
name = "reverse-inner-short"
regex = '(?:([0-9][0-9][0-9]):)?([0-9][0-9]):([0-9][0-9])'
haystack = '102:12:39'
matches = [[[0, 9], [0, 3], [4, 6], [7, 9]]]