rust-lang · BurntSushi · Oct 9, 2025 · Sep 28, 2025 · Sep 12, 2025 · Sep 12, 2025
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,13 +1,20 @@
-1.11.4 (TBD)
+1.12.0 (TBD)
 ============
 TODO
 
+Improvements:
+
+* [FEATURE #1146](https://github.com/rust-lang/regex/issues/1146):
+Add `Capture::get_match` for returning the overall match without `unwrap()`.
+
 Bug fixes:
 
 * [BUG #1116](https://github.com/rust-lang/regex/issues/1116):
 Fixes a memory usage regression for large regexes (introduced in `regex 1.9`).
 * [BUG #1165](https://github.com/rust-lang/regex/issues/1083):
 Fixes a panic in the lazy DFA (can only occur for especially large regexes).
+* [BUG #1295](https://github.com/rust-lang/regex/pull/1295):
+Fixes a panic when deserializing a corrupted dense DFA.
 
 
 1.11.3 (2025-09-25)

diff --git a/regex-automata/src/dfa/dense.rs b/regex-automata/src/dfa/dense.rs
@@ -2344,10 +2344,17 @@ impl<'a> DFA<&'a [u32]> {
         // table, match states and accelerators below. If any validation fails,
         // then we return an error.
         let (dfa, nread) = unsafe { DFA::from_bytes_unchecked(slice)? };
+        // Note that validation order is important here:
+        //
+        // * `MatchState::validate` can be called with an untrusted DFA.
+        // * `TransistionTable::validate` uses `dfa.ms` through `match_len`.
+        // * `StartTable::validate` needs a valid transition table.
+        //
+        // So... validate the match states first.
+        dfa.accels.validate()?;
+        dfa.ms.validate(&dfa)?;
         dfa.tt.validate(&dfa)?;
         dfa.st.validate(&dfa)?;
-        dfa.ms.validate(&dfa)?;
-        dfa.accels.validate()?;
         // N.B. dfa.special doesn't have a way to do unchecked deserialization,
         // so it has already been validated.
         for state in dfa.states() {
@@ -5234,4 +5241,20 @@ mod tests {
         let got = dfa.try_search_rev(&input);
         assert_eq!(Err(expected), got);
     }
+
+    // This panics in `TransitionTable::validate` if the match states are not
+    // validated first.
+    //
+    // See: https://github.com/rust-lang/regex/pull/1295
+    #[test]
+    fn regression_validation_order() {
+        let mut dfa = DFA::new("abc").unwrap();
+        dfa.ms = MatchStates {
+            slices: vec![],
+            pattern_ids: vec![],
+            pattern_len: 1,
+        };
+        let (buf, _) = dfa.to_bytes_native_endian();
+        DFA::from_bytes(&buf).unwrap_err();
+    }
 }
diff --git a/regex-automata/src/dfa/sparse.rs b/regex-automata/src/dfa/sparse.rs
@@ -1860,6 +1860,12 @@ impl StartTable<Vec<u8>> {
             let new_start_id = remap[dfa.to_index(old_start_id)];
             sl.set_start(anchored, sty, new_start_id);
         }
+        if let Some(ref mut id) = sl.universal_start_anchored {
+            *id = remap[dfa.to_index(*id)];
+        }
+        if let Some(ref mut id) = sl.universal_start_unanchored {
+            *id = remap[dfa.to_index(*id)];
+        }
         Ok(sl)
     }
 }

diff --git a/regex-automata/tests/dfa/api.rs b/regex-automata/tests/dfa/api.rs
@@ -3,7 +3,7 @@ use std::error::Error;
 use regex_automata::{
     dfa::{dense, Automaton, OverlappingState},
     nfa::thompson,
-    HalfMatch, Input, MatchError,
+    Anchored, HalfMatch, Input, MatchError,
 };
 
 // Tests that quit bytes in the forward direction work correctly.
@@ -67,3 +67,96 @@ fn unicode_word_implicitly_works() -> Result<(), Box<dyn Error>> {
     assert_eq!(Ok(Some(expected)), dfa.try_search_fwd(&Input::new(b" a")));
     Ok(())
 }
+
+// A variant of [`Automaton::is_special_state`]'s doctest, but with universal
+// start states.
+//
+// See: https://github.com/rust-lang/regex/pull/1195
+#[test]
+fn universal_start_search() -> Result<(), Box<dyn Error>> {
+    fn find<A: Automaton>(
+        dfa: &A,
+        haystack: &[u8],
+    ) -> Result<Option<HalfMatch>, MatchError> {
+        let mut state = dfa
+            .universal_start_state(Anchored::No)
+            .expect("regex should not require lookbehind");
+        let mut last_match = None;
+        // Walk all the bytes in the haystack. We can quit early if we see
+        // a dead or a quit state. The former means the automaton will
+        // never transition to any other state. The latter means that the
+        // automaton entered a condition in which its search failed.
+        for (i, &b) in haystack.iter().enumerate() {
+            state = dfa.next_state(state, b);
+            if dfa.is_special_state(state) {
+                if dfa.is_match_state(state) {
+                    last_match =
+                        Some(HalfMatch::new(dfa.match_pattern(state, 0), i));
+                } else if dfa.is_dead_state(state) {
+                    return Ok(last_match);
+                } else if dfa.is_quit_state(state) {
+                    // It is possible to enter into a quit state after
+                    // observing a match has occurred. In that case, we
+                    // should return the match instead of an error.
+                    if last_match.is_some() {
+                        return Ok(last_match);
+                    }
+                    return Err(MatchError::quit(b, i));
+                }
+                // Implementors may also want to check for start or accel
+                // states and handle them differently for performance
+                // reasons. But it is not necessary for correctness.
+            }
+        }
+        // Matches are always delayed by 1 byte, so we must explicitly walk
+        // the special "EOI" transition at the end of the search.
+        state = dfa.next_eoi_state(state);
+        if dfa.is_match_state(state) {
+            last_match = Some(HalfMatch::new(
+                dfa.match_pattern(state, 0),
+                haystack.len(),
+            ));
+        }
+        Ok(last_match)
+    }
+
+    fn check_impl(
+        dfa: impl Automaton,
+        haystack: &str,
+        pat: usize,
+        offset: usize,
+    ) -> Result<(), Box<dyn Error>> {
+        let haystack = haystack.as_bytes();
+        let mat = find(&dfa, haystack)?.unwrap();
+        assert_eq!(mat.pattern().as_usize(), pat);
+        assert_eq!(mat.offset(), offset);
+        Ok(())
+    }
+
+    fn check(
+        dfa: &dense::DFA<Vec<u32>>,
+        haystack: &str,
+        pat: usize,
+        offset: usize,
+    ) -> Result<(), Box<dyn Error>> {
+        check_impl(dfa, haystack, pat, offset)?;
+        check_impl(dfa.to_sparse()?, haystack, pat, offset)?;
+        Ok(())
+    }
+
+    let dfa = dense::DFA::new(r"[a-z]+")?;
+    let haystack = "123 foobar 4567";
+    check(&dfa, haystack, 0, 10)?;
+
+    let dfa = dense::DFA::new(r"[0-9]{4}")?;
+    let haystack = "123 foobar 4567";
+    check(&dfa, haystack, 0, 15)?;
+
+    let dfa = dense::DFA::new_many(&[r"[a-z]+", r"[0-9]+"])?;
+    let haystack = "123 foobar 4567";
+    check(&dfa, haystack, 1, 3)?;
+    check(&dfa, &haystack[3..], 0, 7)?;
+    check(&dfa, &haystack[10..], 1, 5)?;
+
+    Ok(())
+}
diff --git a/regex-capi/Cargo.toml b/regex-capi/Cargo.toml
@@ -11,7 +11,8 @@ description = """
 A C API for Rust's regular expression library.
 """
 workspace = ".."
-edition = "2018"
+edition = "2021"
+rust-version = "1.65"
 
 [lib]
 name = "rure"

diff --git a/regex-capi/src/error.rs b/regex-capi/src/error.rs
@@ -60,7 +60,7 @@ ffi_fn! {
 ffi_fn! {
     fn rure_error_message(err: *mut Error) -> *const c_char {
         let err = unsafe { &mut *err };
-        let cmsg = match CString::new(format!("{}", err)) {
+        let cmsg = match CString::new(format!("{err}")) {
             Ok(msg) => msg,
             Err(err) => {
                 // I guess this can probably happen if the regex itself has a

diff --git a/regex-capi/src/macros.rs b/regex-capi/src/macros.rs
@@ -20,8 +20,8 @@ macro_rules! ffi_fn {
                     };
                     let _ = writeln!(
                         &mut io::stderr(),
-                        "panic unwind caught, aborting: {:?}",
-                        msg);
+                        "panic unwind caught, aborting: {msg:?}"
+                    );
                     unsafe { abort() }
                 }
             }

diff --git a/regex-capi/src/rure.rs b/regex-capi/src/rure.rs
@@ -82,7 +82,7 @@ ffi_fn! {
         let re = rure_compile(
             pat, len, RURE_DEFAULT_FLAGS, ptr::null(), &mut err);
         if err.is_err() {
-            let _ = writeln!(&mut io::stderr(), "{}", err);
+            let _ = writeln!(&mut io::stderr(), "{err}");
             let _ = writeln!(
                 &mut io::stderr(), "aborting from rure_compile_must");
             unsafe { abort() }
@@ -579,7 +579,7 @@ ffi_fn! {
         let mut err = Error::new(ErrorKind::None);
         let esc = rure_escape(pat, len, &mut err);
         if err.is_err() {
-            let _ = writeln!(&mut io::stderr(), "{}", err);
+            let _ = writeln!(&mut io::stderr(), "{err}");
             let _ = writeln!(
                 &mut io::stderr(), "aborting from rure_escape_must");
             unsafe { abort() }

diff --git a/regex-cli/args/flags.rs b/regex-cli/args/flags.rs
@@ -50,9 +50,7 @@ impl std::str::FromStr for ByteSet {
         for &byte in Vec::unescape_bytes(s).iter() {
             anyhow::ensure!(
                 !seen[usize::from(byte)],
-                "saw duplicate byte 0x{:2X} in '{}'",
-                byte,
-                s,
+                "saw duplicate byte 0x{byte:2X} in '{s}'",
             );
             seen[usize::from(byte)] = true;
             set.push(byte);
@@ -96,7 +94,7 @@ impl std::str::FromStr for StartKind {
             "both" => regex_automata::dfa::StartKind::Both,
             "unanchored" => regex_automata::dfa::StartKind::Unanchored,
             "anchored" => regex_automata::dfa::StartKind::Anchored,
-            unk => anyhow::bail!("unrecognized start kind '{}'", unk),
+            unk => anyhow::bail!("unrecognized start kind '{unk}'"),
         };
         Ok(StartKind { kind })
     }
@@ -147,7 +145,7 @@ impl std::str::FromStr for MatchKind {
         let kind = match s {
             "leftmost-first" => regex_automata::MatchKind::LeftmostFirst,
             "all" => regex_automata::MatchKind::All,
-            unk => anyhow::bail!("unrecognized match kind '{}'", unk),
+            unk => anyhow::bail!("unrecognized match kind '{unk}'"),
         };
         Ok(MatchKind { kind })
     }

diff --git a/regex-cli/args/mod.rs b/regex-cli/args/mod.rs
@@ -143,11 +143,11 @@ pub fn next_as_command(usage: &str, p: &mut Parser) -> anyhow::Result<String> {
     let usage = usage.trim();
     let arg = match p.next()? {
         Some(arg) => arg,
-        None => anyhow::bail!("{}", usage),
+        None => anyhow::bail!("{usage}"),
     };
     let cmd = match arg {
         Arg::Value(cmd) => cmd.string()?,
-        Arg::Short('h') | Arg::Long("help") => anyhow::bail!("{}", usage),
+        Arg::Short('h') | Arg::Long("help") => anyhow::bail!("{usage}"),
         arg => return Err(arg.unexpected().into()),
     };
     Ok(cmd)

diff --git a/regex-cli/args/syntax.rs b/regex-cli/args/syntax.rs
@@ -45,7 +45,7 @@ impl Config {
             .map(|(i, p)| {
                 let p = p.as_ref();
                 self.ast(p).with_context(|| {
-                    format!("failed to parse pattern {} to AST: '{}'", i, p,)
+                    format!("failed to parse pattern {i} to AST: '{p}'",)
                 })
             })
             .collect()
@@ -80,10 +80,7 @@ impl Config {
             .map(|(i, (pat, ast))| {
                 let (pat, ast) = (pat.as_ref(), ast.borrow());
                 self.hir(pat, ast).with_context(|| {
-                    format!(
-                        "failed to translate pattern {} to HIR: '{}'",
-                        i, pat,
-                    )
+                    format!("failed to translate pattern {i} to HIR: '{pat}'")
                 })
             })
             .collect()

diff --git a/regex-cli/cmd/compile_test.rs b/regex-cli/cmd/compile_test.rs
@@ -127,11 +127,11 @@ OPTIONS:
         } else {
             write!(wtr, "regex,")?;
         }
-        write!(wtr, "{},", revision)?;
+        write!(wtr, "{revision},")?;
         write!(wtr, "{},", tdir.test.profile.as_str())?;
         write!(wtr, "{:?},", m.duration)?;
         write!(wtr, "{:?},", m.size)?;
-        write!(wtr, "{:?}", relative_size)?;
+        write!(wtr, "{relative_size:?}")?;
         write!(wtr, "\n")?;
     }
     Ok(())
@@ -439,7 +439,7 @@ impl Test {
         let features = self
             .features
             .iter()
-            .map(|f| format!(r#""{}""#, f))
+            .map(|f| format!(r#""{f}""#))
             .collect::<Vec<String>>()
             .join(", ");
         format!(
@@ -480,7 +480,7 @@ strip = "symbols"
         let features = self
             .features
             .iter()
-            .map(|f| format!(r#""{}""#, f))
+            .map(|f| format!(r#""{f}""#))
             .collect::<Vec<String>>()
             .join(", ");
         format!(
@@ -521,7 +521,7 @@ strip = "symbols"
         let features = self
             .features
             .iter()
-            .map(|f| format!(r#""{}""#, f))
+            .map(|f| format!(r#""{f}""#))
             .collect::<Vec<String>>()
             .join(", ");
         format!(
@@ -800,8 +800,7 @@ fn baseline_size(parent_dir: &Path, profile: Profile) -> anyhow::Result<u64> {
         .with_context(|| format!("'cargo clean' failed for baseline"))?;
     anyhow::ensure!(
         status.success(),
-        "'cargo clean' got an error exit code of {:?} for baseline",
-        status,
+        "'cargo clean' got an error exit code of {status:?} for baseline",
     );
     let status = Command::new("cargo")
         .arg("build")
@@ -814,8 +813,7 @@ fn baseline_size(parent_dir: &Path, profile: Profile) -> anyhow::Result<u64> {
         .with_context(|| format!("'cargo build' failed for baseline"))?;
     anyhow::ensure!(
         status.success(),
-        "'cargo build' got an error exit code of {:?} for baseline",
-        status,
+        "'cargo build' got an error exit code of {status:?} for baseline",
     );
     let bin = dir
         .join("target")