rust-lang · BurntSushi · Oct 14, 2023 · Oct 12, 2023 · Oct 12, 2023 · Oct 13, 2023
diff --git a/regex-automata/src/meta/strategy.rs b/regex-automata/src/meta/strategy.rs
@@ -353,6 +353,7 @@ impl Pre<()> {
 // strategy when len(patterns)==1 if the number of literals is large. In that
 // case, literal extraction gives up and will return an infinite set.)
 impl<P: PrefilterI> Strategy for Pre<P> {
+    #[cfg_attr(feature = "perf-inline", inline(always))]
     fn group_info(&self) -> &GroupInfo {
         &self.group_info
     }
@@ -378,6 +379,7 @@ impl<P: PrefilterI> Strategy for Pre<P> {
         self.pre.memory_usage()
     }
 
+    #[cfg_attr(feature = "perf-inline", inline(always))]
     fn search(&self, _cache: &mut Cache, input: &Input<'_>) -> Option<Match> {
         if input.is_done() {
             return None;
@@ -393,6 +395,7 @@ impl<P: PrefilterI> Strategy for Pre<P> {
             .map(|sp| Match::new(PatternID::ZERO, sp))
     }
 
+    #[cfg_attr(feature = "perf-inline", inline(always))]
     fn search_half(
         &self,
         cache: &mut Cache,
@@ -401,10 +404,12 @@ impl<P: PrefilterI> Strategy for Pre<P> {
         self.search(cache, input).map(|m| HalfMatch::new(m.pattern(), m.end()))
     }
 
+    #[cfg_attr(feature = "perf-inline", inline(always))]
     fn is_match(&self, cache: &mut Cache, input: &Input<'_>) -> bool {
         self.search(cache, input).is_some()
     }
 
+    #[cfg_attr(feature = "perf-inline", inline(always))]
     fn search_slots(
         &self,
         cache: &mut Cache,
@@ -421,6 +426,7 @@ impl<P: PrefilterI> Strategy for Pre<P> {
         Some(m.pattern())
     }
 
+    #[cfg_attr(feature = "perf-inline", inline(always))]
     fn which_overlapping_matches(
         &self,
         cache: &mut Cache,
@@ -1161,34 +1167,21 @@ impl ReverseSuffix {
             return Err(core);
         }
         let kind = core.info.config().get_match_kind();
-        let suffixes = crate::util::prefilter::suffixes(kind, hirs);
-        let lcs = match suffixes.longest_common_suffix() {
-            None => {
-                debug!(
-                    "skipping reverse suffix optimization because \
-                     a longest common suffix could not be found",
-                );
-                return Err(core);
-            }
-            Some(lcs) if lcs.is_empty() => {
-                debug!(
-                    "skipping reverse suffix optimization because \
-                     the longest common suffix is the empty string",
-                );
-                return Err(core);
-            }
-            Some(lcs) => lcs,
+        let suffixseq = crate::util::prefilter::suffixes(kind, hirs);
+        let Some(suffixes) = suffixseq.literals() else {
+            debug!(
+                "skipping reverse suffix optimization because \
+                 the extract suffix sequence is not finite",
+            );
+            return Err(core);
         };
-        let pre = match Prefilter::new(kind, &[lcs]) {
-            Some(pre) => pre,
-            None => {
-                debug!(
-                    "skipping reverse suffix optimization because \
+        let Some(pre) = Prefilter::new(kind, suffixes) else {
+            debug!(
+                "skipping reverse suffix optimization because \
                      a prefilter could not be constructed from the \
                      longest common suffix",
-                );
-                return Err(core);
-            }
+            );
+            return Err(core);
         };
         if !pre.is_fast() {
             debug!(

diff --git a/regex-syntax/src/hir/translate.rs b/regex-syntax/src/hir/translate.rs
@@ -388,17 +388,10 @@ impl<'t, 'p> Visitor for TranslatorI<'t, 'p> {
             }
             Ast::Literal(ref x) => match self.ast_literal_to_scalar(x)? {
                 Either::Right(byte) => self.push_byte(byte),
-                Either::Left(ch) => {
-                    if !self.flags().unicode() && ch.len_utf8() > 1 {
-                        return Err(
-                            self.error(x.span, ErrorKind::UnicodeNotAllowed)
-                        );
-                    }
-                    match self.case_fold_char(x.span, ch)? {
-                        None => self.push_char(ch),
-                        Some(expr) => self.push(HirFrame::Expr(expr)),
-                    }
-                }
+                Either::Left(ch) => match self.case_fold_char(x.span, ch)? {
+                    None => self.push_char(ch),
+                    Some(expr) => self.push(HirFrame::Expr(expr)),
+                },
             },
             Ast::Dot(ref span) => {
                 self.push(HirFrame::Expr(self.hir_dot(**span)?));
@@ -872,8 +865,8 @@ impl<'t, 'p> TranslatorI<'t, 'p> {
             })?;
             Ok(Some(Hir::class(hir::Class::Unicode(cls))))
         } else {
-            if c.len_utf8() > 1 {
-                return Err(self.error(span, ErrorKind::UnicodeNotAllowed));
+            if !c.is_ascii() {
+                return Ok(None);
             }
             // If case folding won't do anything, then don't bother trying.
             match c {
@@ -1211,9 +1204,8 @@ impl<'t, 'p> TranslatorI<'t, 'p> {
         match self.ast_literal_to_scalar(ast)? {
             Either::Right(byte) => Ok(byte),
             Either::Left(ch) => {
-                let cp = u32::from(ch);
-                if cp <= 0x7F {
-                    Ok(u8::try_from(cp).unwrap())
+                if ch.is_ascii() {
+                    Ok(u8::try_from(ch).unwrap())
                 } else {
                     // We can't feasibly support Unicode in
                     // byte oriented classes. Byte classes don't
@@ -1661,16 +1653,7 @@ mod tests {
         assert_eq!(t_bytes(r"(?-u)\x61"), hir_lit("a"));
         assert_eq!(t_bytes(r"(?-u)\xFF"), hir_blit(b"\xFF"));
 
-        assert_eq!(
-            t_err("(?-u)☃"),
-            TestError {
-                kind: hir::ErrorKind::UnicodeNotAllowed,
-                span: Span::new(
-                    Position::new(5, 1, 6),
-                    Position::new(8, 1, 7)
-                ),
-            }
-        );
+        assert_eq!(t("(?-u)☃"), hir_lit("☃"));
         assert_eq!(
             t_err(r"(?-u)\xFF"),
             TestError {
@@ -1748,16 +1731,7 @@ mod tests {
         );
         assert_eq!(t_bytes(r"(?i-u)\xFF"), hir_blit(b"\xFF"));
 
-        assert_eq!(
-            t_err("(?i-u)β"),
-            TestError {
-                kind: hir::ErrorKind::UnicodeNotAllowed,
-                span: Span::new(
-                    Position::new(6, 1, 7),
-                    Position::new(8, 1, 8),
-                ),
-            }
-        );
+        assert_eq!(t("(?i-u)β"), hir_lit("β"),);
     }
 
     #[test]

diff --git a/src/bytes.rs b/src/bytes.rs
@@ -68,8 +68,8 @@ bytes:
 1. The `u` flag can be disabled even when disabling it might cause the regex to
 match invalid UTF-8. When the `u` flag is disabled, the regex is said to be in
 "ASCII compatible" mode.
-2. In ASCII compatible mode, neither Unicode scalar values nor Unicode
-character classes are allowed.
+2. In ASCII compatible mode, Unicode character classes are not allowed. Literal
+Unicode scalar values outside of character classes are allowed.
 3. In ASCII compatible mode, Perl character classes (`\w`, `\d` and `\s`)
 revert to their typical ASCII definition. `\w` maps to `[[:word:]]`, `\d` maps
 to `[[:digit:]]` and `\s` maps to `[[:space:]]`.