From 4b3e43466ebef394a32772ff04b8ed46427f462d Mon Sep 17 00:00:00 2001 From: Andrew Gallant Date: Tue, 23 Sep 2025 18:29:12 -0400 Subject: [PATCH] automata: call `Vec::shrink_to_fit` in a few strategic places Other parts of `regex-automata` do this implicitly by using `Box<[T]>`, btu it's not always straight-forward to use `Box<[T]>`. (Or, at least, non-annoying.) In some of those cases here, we call `Vec::shrink_to_fit` to decrease memory usage. These are probably the biggest offenders, but I didn't do a thorough investigation here. Fixes #1297 --- CHANGELOG.md | 10 ++++++++++ regex-automata/src/dfa/dense.rs | 4 ++++ regex-automata/src/dfa/onepass.rs | 2 ++ regex-automata/src/dfa/sparse.rs | 2 ++ regex-automata/src/nfa/thompson/nfa.rs | 2 ++ regex-automata/src/util/captures.rs | 3 +++ 6 files changed, 23 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index f6348b51f..bbbbc1fbc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,13 @@ +1.11.3 (TBD) +============ +TODO + +Improvements: + +* [BUG #1297](https://github.com/rust-lang/regex/issues/1297): +Improve memory usage by trimming excess memory capacity in some spots. + + 1.11.2 (2025-08-24) =================== This is a new patch release of `regex` with some minor fixes. A larger number diff --git a/regex-automata/src/dfa/dense.rs b/regex-automata/src/dfa/dense.rs index 057536303..456cd386b 100644 --- a/regex-automata/src/dfa/dense.rs +++ b/regex-automata/src/dfa/dense.rs @@ -1274,6 +1274,10 @@ impl Builder { } // Look for and set the universal starting states. dfa.set_universal_starts(); + dfa.tt.table.shrink_to_fit(); + dfa.st.table.shrink_to_fit(); + dfa.ms.slices.shrink_to_fit(); + dfa.ms.pattern_ids.shrink_to_fit(); Ok(dfa) } diff --git a/regex-automata/src/dfa/onepass.rs b/regex-automata/src/dfa/onepass.rs index 700f2b18b..85f820ef5 100644 --- a/regex-automata/src/dfa/onepass.rs +++ b/regex-automata/src/dfa/onepass.rs @@ -722,6 +722,8 @@ impl<'a> InternalBuilder<'a> { } } self.shuffle_states(); + self.dfa.starts.shrink_to_fit(); + self.dfa.table.shrink_to_fit(); Ok(self.dfa) } diff --git a/regex-automata/src/dfa/sparse.rs b/regex-automata/src/dfa/sparse.rs index c03d02c86..13c0b6edc 100644 --- a/regex-automata/src/dfa/sparse.rs +++ b/regex-automata/src/dfa/sparse.rs @@ -393,6 +393,8 @@ impl DFA> { new_state.set_next_at(i, next); } } + new.tt.sparse.shrink_to_fit(); + new.st.table.shrink_to_fit(); debug!( "created sparse DFA, memory usage: {} (dense memory usage: {})", new.memory_usage(), diff --git a/regex-automata/src/nfa/thompson/nfa.rs b/regex-automata/src/nfa/thompson/nfa.rs index 2a0cc9c16..405aa7533 100644 --- a/regex-automata/src/nfa/thompson/nfa.rs +++ b/regex-automata/src/nfa/thompson/nfa.rs @@ -1338,6 +1338,8 @@ impl Inner { self.look_set_prefix_any = self.look_set_prefix_any.union(prefix_any); } + self.states.shrink_to_fit(); + self.start_pattern.shrink_to_fit(); NFA(Arc::new(self)) } diff --git a/regex-automata/src/util/captures.rs b/regex-automata/src/util/captures.rs index 8e6e5aa8b..5376f348d 100644 --- a/regex-automata/src/util/captures.rs +++ b/regex-automata/src/util/captures.rs @@ -1606,6 +1606,9 @@ impl GroupInfo { } } group_info.fixup_slot_ranges()?; + group_info.slot_ranges.shrink_to_fit(); + group_info.name_to_index.shrink_to_fit(); + group_info.index_to_name.shrink_to_fit(); Ok(GroupInfo(Arc::new(group_info))) }