Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 18 additions & 25 deletions regex-automata/src/meta/strategy.rs
Original file line number Diff line number Diff line change
Expand Up @@ -353,6 +353,7 @@ impl Pre<()> {
// strategy when len(patterns)==1 if the number of literals is large. In that
// case, literal extraction gives up and will return an infinite set.)
impl<P: PrefilterI> Strategy for Pre<P> {
#[cfg_attr(feature = "perf-inline", inline(always))]
fn group_info(&self) -> &GroupInfo {
&self.group_info
}
Expand All @@ -378,6 +379,7 @@ impl<P: PrefilterI> Strategy for Pre<P> {
self.pre.memory_usage()
}

#[cfg_attr(feature = "perf-inline", inline(always))]
fn search(&self, _cache: &mut Cache, input: &Input<'_>) -> Option<Match> {
if input.is_done() {
return None;
Expand All @@ -393,6 +395,7 @@ impl<P: PrefilterI> Strategy for Pre<P> {
.map(|sp| Match::new(PatternID::ZERO, sp))
}

#[cfg_attr(feature = "perf-inline", inline(always))]
fn search_half(
&self,
cache: &mut Cache,
Expand All @@ -401,10 +404,12 @@ impl<P: PrefilterI> Strategy for Pre<P> {
self.search(cache, input).map(|m| HalfMatch::new(m.pattern(), m.end()))
}

#[cfg_attr(feature = "perf-inline", inline(always))]
fn is_match(&self, cache: &mut Cache, input: &Input<'_>) -> bool {
self.search(cache, input).is_some()
}

#[cfg_attr(feature = "perf-inline", inline(always))]
fn search_slots(
&self,
cache: &mut Cache,
Expand All @@ -421,6 +426,7 @@ impl<P: PrefilterI> Strategy for Pre<P> {
Some(m.pattern())
}

#[cfg_attr(feature = "perf-inline", inline(always))]
fn which_overlapping_matches(
&self,
cache: &mut Cache,
Expand Down Expand Up @@ -1161,34 +1167,21 @@ impl ReverseSuffix {
return Err(core);
}
let kind = core.info.config().get_match_kind();
let suffixes = crate::util::prefilter::suffixes(kind, hirs);
let lcs = match suffixes.longest_common_suffix() {
None => {
debug!(
"skipping reverse suffix optimization because \
a longest common suffix could not be found",
);
return Err(core);
}
Some(lcs) if lcs.is_empty() => {
debug!(
"skipping reverse suffix optimization because \
the longest common suffix is the empty string",
);
return Err(core);
}
Some(lcs) => lcs,
let suffixseq = crate::util::prefilter::suffixes(kind, hirs);
let Some(suffixes) = suffixseq.literals() else {
debug!(
"skipping reverse suffix optimization because \
the extract suffix sequence is not finite",
);
return Err(core);
};
let pre = match Prefilter::new(kind, &[lcs]) {
Some(pre) => pre,
None => {
debug!(
"skipping reverse suffix optimization because \
let Some(pre) = Prefilter::new(kind, suffixes) else {
debug!(
"skipping reverse suffix optimization because \
a prefilter could not be constructed from the \
longest common suffix",
);
return Err(core);
}
);
return Err(core);
};
if !pre.is_fast() {
debug!(
Expand Down
46 changes: 10 additions & 36 deletions regex-syntax/src/hir/translate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -388,17 +388,10 @@ impl<'t, 'p> Visitor for TranslatorI<'t, 'p> {
}
Ast::Literal(ref x) => match self.ast_literal_to_scalar(x)? {
Either::Right(byte) => self.push_byte(byte),
Either::Left(ch) => {
if !self.flags().unicode() && ch.len_utf8() > 1 {
return Err(
self.error(x.span, ErrorKind::UnicodeNotAllowed)
);
}
match self.case_fold_char(x.span, ch)? {
None => self.push_char(ch),
Some(expr) => self.push(HirFrame::Expr(expr)),
}
}
Either::Left(ch) => match self.case_fold_char(x.span, ch)? {
None => self.push_char(ch),
Some(expr) => self.push(HirFrame::Expr(expr)),
},
},
Ast::Dot(ref span) => {
self.push(HirFrame::Expr(self.hir_dot(**span)?));
Expand Down Expand Up @@ -872,8 +865,8 @@ impl<'t, 'p> TranslatorI<'t, 'p> {
})?;
Ok(Some(Hir::class(hir::Class::Unicode(cls))))
} else {
if c.len_utf8() > 1 {
return Err(self.error(span, ErrorKind::UnicodeNotAllowed));
if !c.is_ascii() {
return Ok(None);
}
// If case folding won't do anything, then don't bother trying.
match c {
Expand Down Expand Up @@ -1211,9 +1204,8 @@ impl<'t, 'p> TranslatorI<'t, 'p> {
match self.ast_literal_to_scalar(ast)? {
Either::Right(byte) => Ok(byte),
Either::Left(ch) => {
let cp = u32::from(ch);
if cp <= 0x7F {
Ok(u8::try_from(cp).unwrap())
if ch.is_ascii() {
Ok(u8::try_from(ch).unwrap())
} else {
// We can't feasibly support Unicode in
// byte oriented classes. Byte classes don't
Expand Down Expand Up @@ -1661,16 +1653,7 @@ mod tests {
assert_eq!(t_bytes(r"(?-u)\x61"), hir_lit("a"));
assert_eq!(t_bytes(r"(?-u)\xFF"), hir_blit(b"\xFF"));

assert_eq!(
t_err("(?-u)☃"),
TestError {
kind: hir::ErrorKind::UnicodeNotAllowed,
span: Span::new(
Position::new(5, 1, 6),
Position::new(8, 1, 7)
),
}
);
assert_eq!(t("(?-u)☃"), hir_lit("☃"));
assert_eq!(
t_err(r"(?-u)\xFF"),
TestError {
Expand Down Expand Up @@ -1748,16 +1731,7 @@ mod tests {
);
assert_eq!(t_bytes(r"(?i-u)\xFF"), hir_blit(b"\xFF"));

assert_eq!(
t_err("(?i-u)β"),
TestError {
kind: hir::ErrorKind::UnicodeNotAllowed,
span: Span::new(
Position::new(6, 1, 7),
Position::new(8, 1, 8),
),
}
);
assert_eq!(t("(?i-u)β"), hir_lit("β"),);
}

#[test]
Expand Down
4 changes: 2 additions & 2 deletions src/bytes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -68,8 +68,8 @@ bytes:
1. The `u` flag can be disabled even when disabling it might cause the regex to
match invalid UTF-8. When the `u` flag is disabled, the regex is said to be in
"ASCII compatible" mode.
2. In ASCII compatible mode, neither Unicode scalar values nor Unicode
character classes are allowed.
2. In ASCII compatible mode, Unicode character classes are not allowed. Literal
Unicode scalar values outside of character classes are allowed.
3. In ASCII compatible mode, Perl character classes (`\w`, `\d` and `\s`)
revert to their typical ASCII definition. `\w` maps to `[[:word:]]`, `\d` maps
to `[[:digit:]]` and `\s` maps to `[[:space:]]`.
Expand Down