From ae0497a733462dbcef913699ebdf0b3fa015b44a Mon Sep 17 00:00:00 2001 From: Rain Date: Fri, 22 May 2026 12:57:32 -0700 Subject: [PATCH 1/6] [spr] initial version Created using spr 1.3.6-beta.1 --- crates/iddqd/Cargo.toml | 1 + crates/iddqd/src/bi_hash_map/imp.rs | 46 ++--- crates/iddqd/src/id_hash_map/imp.rs | 26 +-- crates/iddqd/src/support/hash_table.rs | 175 +++++++++++++----- crates/iddqd/src/tri_hash_map/imp.rs | 64 ++----- crates/iddqd/tests/integration/bi_hash_map.rs | 9 +- crates/iddqd/tests/integration/id_hash_map.rs | 14 +- crates/iddqd/tests/integration/id_ord_map.rs | 5 +- .../iddqd/tests/integration/panic_safety.rs | 4 - .../iddqd/tests/integration/pathological.rs | 58 +++++- .../iddqd/tests/integration/tri_hash_map.rs | 9 +- 11 files changed, 236 insertions(+), 175 deletions(-) diff --git a/crates/iddqd/Cargo.toml b/crates/iddqd/Cargo.toml index 39a85fa..cc0a4d9 100644 --- a/crates/iddqd/Cargo.toml +++ b/crates/iddqd/Cargo.toml @@ -36,6 +36,7 @@ proptest = { workspace = true, optional = true } [dev-dependencies] expectorate.workspace = true +foldhash.workspace = true iddqd-test-utils.workspace = true proptest.workspace = true serde.workspace = true diff --git a/crates/iddqd/src/bi_hash_map/imp.rs b/crates/iddqd/src/bi_hash_map/imp.rs index f40a1f1..df512eb 100644 --- a/crates/iddqd/src/bi_hash_map/imp.rs +++ b/crates/iddqd/src/bi_hash_map/imp.rs @@ -11,9 +11,10 @@ use crate::{ internal::{ValidateCompact, ValidationError}, support::{ ItemIndex, - alloc::{AllocWrapper, Allocator, Global, global_alloc}, + alloc::{Allocator, Global, global_alloc}, borrow::DormantMutRef, fmt_utils::StrDisplayAsDebug, + hash_table, item_set::ItemSet, map_hash::MapHash, }, @@ -24,7 +25,6 @@ use core::{ hash::{BuildHasher, Hash}, }; use equivalent::Equivalent; -use hashbrown::hash_table; /// A 1:1 (bijective) map for two keys and a value. /// @@ -759,14 +759,8 @@ impl BiHashMap { /// ``` pub fn reserve(&mut self, additional: usize) { self.items.reserve(additional); - let items = &self.items; - let state = &self.tables.state; - self.tables - .k1_to_item - .reserve(additional, |ix| state.hash_one(items[*ix].key1())); - self.tables - .k2_to_item - .reserve(additional, |ix| state.hash_one(items[*ix].key2())); + self.tables.k1_to_item.reserve(additional); + self.tables.k2_to_item.reserve(additional); } /// Tries to reserve capacity for at least `additional` more elements to be @@ -820,15 +814,13 @@ impl BiHashMap { additional: usize, ) -> Result<(), crate::errors::TryReserveError> { self.items.try_reserve(additional)?; - let items = &self.items; - let state = &self.tables.state; self.tables .k1_to_item - .try_reserve(additional, |ix| state.hash_one(items[*ix].key1())) + .try_reserve(additional) .map_err(crate::errors::TryReserveError::from_hashbrown)?; self.tables .k2_to_item - .try_reserve(additional, |ix| state.hash_one(items[*ix].key2())) + .try_reserve(additional) .map_err(crate::errors::TryReserveError::from_hashbrown)?; Ok(()) } @@ -875,14 +867,8 @@ impl BiHashMap { self.tables.k1_to_item.remap_indexes(&remap); self.tables.k2_to_item.remap_indexes(&remap); } - let items = &self.items; - let state = &self.tables.state; - self.tables - .k1_to_item - .shrink_to_fit(|ix| state.hash_one(items[*ix].key1())); - self.tables - .k2_to_item - .shrink_to_fit(|ix| state.hash_one(items[*ix].key2())); + self.tables.k1_to_item.shrink_to_fit(); + self.tables.k2_to_item.shrink_to_fit(); } /// Shrinks the capacity of the map with a lower limit. It will drop @@ -932,14 +918,8 @@ impl BiHashMap { self.tables.k1_to_item.remap_indexes(&remap); self.tables.k2_to_item.remap_indexes(&remap); } - let items = &self.items; - let state = &self.tables.state; - self.tables - .k1_to_item - .shrink_to(min_capacity, |ix| state.hash_one(items[*ix].key1())); - self.tables - .k2_to_item - .shrink_to(min_capacity, |ix| state.hash_one(items[*ix].key2())); + self.tables.k1_to_item.shrink_to(min_capacity); + self.tables.k2_to_item.shrink_to(min_capacity); } /// Returns an iterator over all items in the map. @@ -2436,13 +2416,13 @@ impl Eq } fn detect_dup_or_insert<'a, A: Allocator>( - item: hash_table::Entry<'a, ItemIndex, AllocWrapper>, + item: hash_table::Entry<'a, A>, duplicates: &mut BTreeSet, -) -> Option>> { +) -> Option> { match item { hash_table::Entry::Vacant(slot) => Some(slot), hash_table::Entry::Occupied(slot) => { - duplicates.insert(*slot.get()); + duplicates.insert(slot.get()); None } } diff --git a/crates/iddqd/src/id_hash_map/imp.rs b/crates/iddqd/src/id_hash_map/imp.rs index 29317c2..c34ac26 100644 --- a/crates/iddqd/src/id_hash_map/imp.rs +++ b/crates/iddqd/src/id_hash_map/imp.rs @@ -10,6 +10,7 @@ use crate::{ ItemIndex, alloc::{Allocator, Global, global_alloc}, borrow::DormantMutRef, + hash_table, item_set::ItemSet, map_hash::MapHash, }, @@ -20,7 +21,6 @@ use core::{ hash::{BuildHasher, Hash}, }; use equivalent::Equivalent; -use hashbrown::hash_table; /// A hash map where the key is part of the value. /// @@ -642,11 +642,7 @@ impl IdHashMap { /// ``` pub fn reserve(&mut self, additional: usize) { self.items.reserve(additional); - let items = &self.items; - let state = &self.tables.state; - self.tables - .key_to_item - .reserve(additional, |ix| state.hash_one(items[*ix].key())); + self.tables.key_to_item.reserve(additional); } /// Tries to reserve capacity for at least `additional` more elements to be @@ -696,11 +692,9 @@ impl IdHashMap { additional: usize, ) -> Result<(), crate::errors::TryReserveError> { self.items.try_reserve(additional)?; - let items = &self.items; - let state = &self.tables.state; self.tables .key_to_item - .try_reserve(additional, |ix| state.hash_one(items[*ix].key())) + .try_reserve(additional) .map_err(crate::errors::TryReserveError::from_hashbrown)?; Ok(()) } @@ -742,11 +736,7 @@ impl IdHashMap { if !remap.is_identity() { self.tables.key_to_item.remap_indexes(&remap); } - let items = &self.items; - let state = &self.tables.state; - self.tables - .key_to_item - .shrink_to_fit(|ix| state.hash_one(items[*ix].key())); + self.tables.key_to_item.shrink_to_fit(); } /// Shrinks the capacity of the map with a lower limit. It will drop @@ -791,11 +781,7 @@ impl IdHashMap { if !remap.is_identity() { self.tables.key_to_item.remap_indexes(&remap); } - let items = &self.items; - let state = &self.tables.state; - self.tables - .key_to_item - .shrink_to(min_capacity, |ix| state.hash_one(items[*ix].key())); + self.tables.key_to_item.shrink_to(min_capacity); } /// Iterates over the items in the map. @@ -1403,7 +1389,7 @@ impl IdHashMap { .entry(state, key, |index| self.items[index].key()) { hash_table::Entry::Occupied(slot) => { - duplicates.insert(*slot.get()); + duplicates.insert(slot.get()); None } hash_table::Entry::Vacant(slot) => Some(slot), diff --git a/crates/iddqd/src/support/hash_table.rs b/crates/iddqd/src/support/hash_table.rs index 90245b6..9e5762d 100644 --- a/crates/iddqd/src/support/hash_table.rs +++ b/crates/iddqd/src/support/hash_table.rs @@ -1,4 +1,24 @@ -//! A wrapper around a hash table with some random state. +//! A wrapper around a hash table that caches each entry's key hash. +//! +//! # Why we cache the hash +//! +//! Hashbrown's `RawTable::reserve_rehash` may choose to rehash in plae when a +//! reserve is requested on a tombstone-heavy table. That path invokes the +//! caller-supplied rehash hasher on every surviving entry, and hashbrown +//! documents it as not panic-safe (a panic mid-rehash can leave the table with +//! duplicate stored values). +//! +//! Duplicate indexes are quite bad! +//! +//! * For ordered maps they immediately lead to mutable aliasing during `iter_mut`. +//! * For hash maps, we don't walk the item set in hash order today, so this +//! isn't a soundness issue. But this is a very thin guarantee -- a future change +//! to walk the item set in hash order would easily result in mutable aliasing. +//! +//! By storing the per-entry hash alongside its [`ItemIndex`], we can supply a +//! rehash hasher of the form `|stored| stored.hash` that reads the cached +//! value and never invokes user `Hash`. Rehash is then panic-free by +//! construction. use super::{ ItemIndex, @@ -13,14 +33,35 @@ use core::{ hash::{BuildHasher, Hash}, }; use equivalent::Equivalent; -use hashbrown::{ - HashTable, - hash_table::{AbsentEntry, Entry, OccupiedEntry}, -}; +use hashbrown::{HashTable, hash_table}; + +/// An [`ItemIndex`] stored in [`MapHashTable`] together with the cached hash +/// of its key. +/// +/// See the module docs for why we cache the hash. +#[derive(Clone, Debug)] +pub(crate) struct HashedIndex { + pub(crate) ix: ItemIndex, + pub(crate) hash: u64, +} + +impl HashedIndex { + #[inline] + fn new(ix: ItemIndex, hash: u64) -> Self { + Self { ix, hash } + } +} + +/// Panic-free rehash hasher. Used everywhere we hand a hasher closure to +/// hashbrown for reserve / shrink / rehash paths. +#[inline] +fn cached_hasher(stored: &HashedIndex) -> u64 { + stored.hash +} #[derive(Clone, Default)] pub(crate) struct MapHashTable { - pub(super) items: HashTable>, + pub(super) items: HashTable>, } impl fmt::Debug for MapHashTable { @@ -60,7 +101,8 @@ impl MapHashTable { ValidateCompact::Compact => { // All items between 0 (inclusive) and self.len() (exclusive) // are expected to be present, and there are no duplicates. - let mut values: Vec<_> = self.items.iter().copied().collect(); + let mut values: Vec<_> = + self.items.iter().map(|h| h.ix).collect(); values.sort_unstable(); for (i, value) in values.iter().enumerate() { if value.as_u32() as usize != i { @@ -72,7 +114,7 @@ impl MapHashTable { } ValidateCompact::NonCompact => { // There should be no duplicates. - let values: Vec<_> = self.items.iter().copied().collect(); + let values: Vec<_> = self.items.iter().map(|h| h.ix).collect(); let value_set: BTreeSet<_> = values.iter().copied().collect(); if value_set.len() != values.len() { return Err(TableValidationError::new(format!( @@ -108,7 +150,9 @@ impl MapHashTable { Q: ?Sized + Hash + Equivalent, { let hash = state.hash_one(key); - self.items.find(hash, |index| key.equivalent(&lookup(*index))).copied() + self.items + .find(hash, |stored| key.equivalent(&lookup(stored.ix))) + .map(|stored| stored.ix) } pub(crate) fn entry( @@ -116,37 +160,44 @@ impl MapHashTable { state: &S, key: K, lookup: F, - ) -> Entry<'_, ItemIndex, AllocWrapper> + ) -> Entry<'_, A> where F: Fn(ItemIndex) -> K, { let hash = state.hash_one(&key); - self.items.entry( + match self.items.entry( hash, - |index| lookup(*index) == key, - |v| state.hash_one(lookup(*v)), - ) + |stored| lookup(stored.ix) == key, + cached_hasher, + ) { + hash_table::Entry::Occupied(inner) => { + Entry::Occupied(OccupiedEntry { inner }) + } + hash_table::Entry::Vacant(inner) => { + Entry::Vacant(VacantEntry { inner, hash }) + } + } } pub(crate) fn find_entry_by_hash( &mut self, hash: u64, mut f: F, - ) -> Result< - OccupiedEntry<'_, ItemIndex, AllocWrapper>, - AbsentEntry<'_, ItemIndex, AllocWrapper>, - > + ) -> Result, ()> where F: FnMut(ItemIndex) -> bool, { - self.items.find_entry(hash, |index| f(*index)) + match self.items.find_entry(hash, |stored| f(stored.ix)) { + Ok(inner) => Ok(OccupiedEntry { inner }), + Err(_) => Err(()), + } } pub(crate) fn retain(&mut self, mut f: F) where F: FnMut(ItemIndex) -> bool, { - self.items.retain(|index| f(*index)); + self.items.retain(|stored| f(stored.ix)); } /// Clears the hash table, removing all items. @@ -157,18 +208,13 @@ impl MapHashTable { /// Reserves capacity for at least `additional` more items. /// - /// `hasher` is invoked for every entry that hashbrown has to re-slot during - /// a growth rehash, and must return the same hash that was used to insert - /// that entry originally. Anything else silently corrupts the table: all - /// surviving entries land in the same bucket, and subsequent lookups — - /// which probe using the real key hash — miss. + /// The rehash closure reads the cached hash on each stored entry (see + /// the module docs), so this call never invokes user `Hash` even when + /// hashbrown falls into `rehash_in_place`. That makes `reserve` + /// panic-safe by construction. #[inline] - pub(crate) fn reserve( - &mut self, - additional: usize, - hasher: impl Fn(&ItemIndex) -> u64, - ) { - self.items.reserve(additional, hasher); + pub(crate) fn reserve(&mut self, additional: usize) { + self.items.reserve(additional, cached_hasher); } /// Rewrites every stored index via `remap`. @@ -181,40 +227,77 @@ impl MapHashTable { /// [`ItemSet::shrink_to_fit`]: super::item_set::ItemSet::shrink_to_fit /// [`ItemSet::shrink_to`]: super::item_set::ItemSet::shrink_to pub(crate) fn remap_indexes(&mut self, remap: &IndexRemap) { - for slot in self.items.iter_mut() { - *slot = remap.remap(*slot); + for stored in self.items.iter_mut() { + stored.ix = remap.remap(stored.ix); } } /// Shrinks the capacity of the hash table as much as possible. /// - /// See [`Self::reserve`] for the contract `hasher` must satisfy. + /// See [`Self::reserve`] for why the rehash closure is panic-free. #[inline] - pub(crate) fn shrink_to_fit(&mut self, hasher: impl Fn(&ItemIndex) -> u64) { - self.items.shrink_to_fit(hasher); + pub(crate) fn shrink_to_fit(&mut self) { + self.items.shrink_to_fit(cached_hasher); } /// Shrinks the capacity of the hash table with a lower limit. /// - /// See [`Self::reserve`] for the contract `hasher` must satisfy. + /// See [`Self::reserve`] for why the rehash closure is panic-free. #[inline] - pub(crate) fn shrink_to( - &mut self, - min_capacity: usize, - hasher: impl Fn(&ItemIndex) -> u64, - ) { - self.items.shrink_to(min_capacity, hasher); + pub(crate) fn shrink_to(&mut self, min_capacity: usize) { + self.items.shrink_to(min_capacity, cached_hasher); } /// Tries to reserve capacity for at least `additional` more items. /// - /// See [`Self::reserve`] for the contract `hasher` must satisfy. + /// See [`Self::reserve`] for why the rehash closure is panic-free. #[inline] pub(crate) fn try_reserve( &mut self, additional: usize, - hasher: impl Fn(&ItemIndex) -> u64, ) -> Result<(), hashbrown::TryReserveError> { - self.items.try_reserve(additional, hasher) + self.items.try_reserve(additional, cached_hasher) + } +} + +/// An entry in [`MapHashTable`]. +/// +/// Wraps hashbrown's `hash_table::Entry` to keep the cached-hash bookkeeping +/// inside this module: callers see [`ItemIndex`] only. +pub(crate) enum Entry<'a, A: Allocator> { + Occupied(OccupiedEntry<'a, A>), + Vacant(VacantEntry<'a, A>), +} + +pub(crate) struct OccupiedEntry<'a, A: Allocator> { + inner: hash_table::OccupiedEntry<'a, HashedIndex, AllocWrapper>, +} + +impl<'a, A: Allocator> OccupiedEntry<'a, A> { + /// Returns the [`ItemIndex`] stored in this entry. + #[inline] + pub(crate) fn get(&self) -> ItemIndex { + self.inner.get().ix + } + + /// Removes this entry from the table. + #[inline] + pub(crate) fn remove(self) { + let _ = self.inner.remove(); + } +} + +pub(crate) struct VacantEntry<'a, A: Allocator> { + inner: hash_table::VacantEntry<'a, HashedIndex, AllocWrapper>, + /// The hash used to obtain this `VacantEntry`. + hash: u64, +} + +impl<'a, A: Allocator> VacantEntry<'a, A> { + /// Inserts the given index with the hash captured when this entry was + /// obtained. + #[inline] + pub(crate) fn insert(self, ix: ItemIndex) { + let _ = self.inner.insert(HashedIndex::new(ix, self.hash)); } } diff --git a/crates/iddqd/src/tri_hash_map/imp.rs b/crates/iddqd/src/tri_hash_map/imp.rs index e6eb09b..99693e6 100644 --- a/crates/iddqd/src/tri_hash_map/imp.rs +++ b/crates/iddqd/src/tri_hash_map/imp.rs @@ -5,9 +5,10 @@ use crate::{ internal::ValidationError, support::{ ItemIndex, - alloc::{AllocWrapper, Allocator, Global, global_alloc}, + alloc::{Allocator, Global, global_alloc}, borrow::DormantMutRef, fmt_utils::StrDisplayAsDebug, + hash_table, item_set::ItemSet, map_hash::MapHash, }, @@ -18,7 +19,6 @@ use core::{ hash::{BuildHasher, Hash}, }; use equivalent::Equivalent; -use hashbrown::hash_table::{Entry, VacantEntry}; /// A 1:1:1 (trijective) map for three keys and a value. /// @@ -855,17 +855,9 @@ impl TriHashMap { /// ``` pub fn reserve(&mut self, additional: usize) { self.items.reserve(additional); - let items = &self.items; - let state = &self.tables.state; - self.tables - .k1_to_item - .reserve(additional, |ix| state.hash_one(items[*ix].key1())); - self.tables - .k2_to_item - .reserve(additional, |ix| state.hash_one(items[*ix].key2())); - self.tables - .k3_to_item - .reserve(additional, |ix| state.hash_one(items[*ix].key3())); + self.tables.k1_to_item.reserve(additional); + self.tables.k2_to_item.reserve(additional); + self.tables.k3_to_item.reserve(additional); } /// Tries to reserve capacity for at least `additional` more elements to be @@ -924,19 +916,17 @@ impl TriHashMap { additional: usize, ) -> Result<(), crate::errors::TryReserveError> { self.items.try_reserve(additional)?; - let items = &self.items; - let state = &self.tables.state; self.tables .k1_to_item - .try_reserve(additional, |ix| state.hash_one(items[*ix].key1())) + .try_reserve(additional) .map_err(crate::errors::TryReserveError::from_hashbrown)?; self.tables .k2_to_item - .try_reserve(additional, |ix| state.hash_one(items[*ix].key2())) + .try_reserve(additional) .map_err(crate::errors::TryReserveError::from_hashbrown)?; self.tables .k3_to_item - .try_reserve(additional, |ix| state.hash_one(items[*ix].key3())) + .try_reserve(additional) .map_err(crate::errors::TryReserveError::from_hashbrown)?; Ok(()) } @@ -999,17 +989,9 @@ impl TriHashMap { self.tables.k2_to_item.remap_indexes(&remap); self.tables.k3_to_item.remap_indexes(&remap); } - let items = &self.items; - let state = &self.tables.state; - self.tables - .k1_to_item - .shrink_to_fit(|ix| state.hash_one(items[*ix].key1())); - self.tables - .k2_to_item - .shrink_to_fit(|ix| state.hash_one(items[*ix].key2())); - self.tables - .k3_to_item - .shrink_to_fit(|ix| state.hash_one(items[*ix].key3())); + self.tables.k1_to_item.shrink_to_fit(); + self.tables.k2_to_item.shrink_to_fit(); + self.tables.k3_to_item.shrink_to_fit(); } /// Shrinks the capacity of the map with a lower limit. It will drop @@ -1075,17 +1057,9 @@ impl TriHashMap { self.tables.k2_to_item.remap_indexes(&remap); self.tables.k3_to_item.remap_indexes(&remap); } - let items = &self.items; - let state = &self.tables.state; - self.tables - .k1_to_item - .shrink_to(min_capacity, |ix| state.hash_one(items[*ix].key1())); - self.tables - .k2_to_item - .shrink_to(min_capacity, |ix| state.hash_one(items[*ix].key2())); - self.tables - .k3_to_item - .shrink_to(min_capacity, |ix| state.hash_one(items[*ix].key3())); + self.tables.k1_to_item.shrink_to(min_capacity); + self.tables.k2_to_item.shrink_to(min_capacity); + self.tables.k3_to_item.shrink_to(min_capacity); } /// Iterates over the items in the map. @@ -2924,13 +2898,13 @@ impl Extend } fn detect_dup_or_insert<'a, A: Allocator>( - item: Entry<'a, ItemIndex, AllocWrapper>, + item: hash_table::Entry<'a, A>, duplicates: &mut BTreeSet, -) -> Option>> { +) -> Option> { match item { - Entry::Vacant(slot) => Some(slot), - Entry::Occupied(slot) => { - duplicates.insert(*slot.get()); + hash_table::Entry::Vacant(slot) => Some(slot), + hash_table::Entry::Occupied(slot) => { + duplicates.insert(slot.get()); None } } diff --git a/crates/iddqd/tests/integration/bi_hash_map.rs b/crates/iddqd/tests/integration/bi_hash_map.rs index 1303b3a..4ab169d 100644 --- a/crates/iddqd/tests/integration/bi_hash_map.rs +++ b/crates/iddqd/tests/integration/bi_hash_map.rs @@ -1304,8 +1304,8 @@ mod proptest_panic_safety { | PanickyAction::Get2(_) | PanickyAction::ContainsKey1(_) | PanickyAction::ContainsKey2(_) => PanicSafety::Atomic, - PanickyAction::RetainModulo(_, _, _) => PanicSafety::StepAtomic, - PanickyAction::Extend(_) => PanicSafety::MayCorruptOnPanic, + PanickyAction::RetainModulo(_, _, _) + | PanickyAction::Extend(_) => PanicSafety::StepAtomic, PanickyAction::Clear => PanicSafety::Atomic, } } @@ -1368,10 +1368,7 @@ mod proptest_panic_safety { let action = op.action; let action_label = format!("{action:?}"); let panic_safety = action.panic_safety(); - let armed = match panic_safety { - PanicSafety::MayCorruptOnPanic => None, - PanicSafety::Atomic | PanicSafety::StepAtomic => op.armed, - }; + let armed = op.armed; let pre_state = sorted_keys(&map, |item| (item.key1, item.key2)); let (panicked, ops) = run_armed(armed, || action.run(&mut map)); diff --git a/crates/iddqd/tests/integration/id_hash_map.rs b/crates/iddqd/tests/integration/id_hash_map.rs index 8ca05b9..13340c9 100644 --- a/crates/iddqd/tests/integration/id_hash_map.rs +++ b/crates/iddqd/tests/integration/id_hash_map.rs @@ -944,10 +944,7 @@ mod proptest_panic_safety { impl PanickyAction { /// Classify panic safety for this action. /// - /// * `RetainModulo` loops over per-step atomic mutations. - /// * `Extend` calls `HashTable::reserve` up front, which on a - /// tombstone-heavy map drops into hashbrown's `rehash_in_place`. - /// Hashbrown documents `rehash_in_place` as not panic-safe. + /// * `RetainModulo` and `Extend` loop over per-step atomic mutations. fn panic_safety(&self) -> PanicSafety { match self { PanickyAction::InsertUnique(_) @@ -955,8 +952,8 @@ mod proptest_panic_safety { | PanickyAction::Remove(_) | PanickyAction::Get(_) | PanickyAction::ContainsKey(_) => PanicSafety::Atomic, - PanickyAction::RetainModulo(_, _, _) => PanicSafety::StepAtomic, - PanickyAction::Extend(_) => PanicSafety::MayCorruptOnPanic, + PanickyAction::RetainModulo(_, _, _) + | PanickyAction::Extend(_) => PanicSafety::StepAtomic, PanickyAction::Clear => PanicSafety::Atomic, } } @@ -1007,10 +1004,7 @@ mod proptest_panic_safety { let action = op.action; let action_label = format!("{action:?}"); let panic_safety = action.panic_safety(); - let armed = match panic_safety { - PanicSafety::MayCorruptOnPanic => None, - PanicSafety::Atomic | PanicSafety::StepAtomic => op.armed, - }; + let armed = op.armed; let pre_state = sorted_keys(&map, |item| item.key); let (panicked, ops) = run_armed(armed, || action.run(&mut map)); diff --git a/crates/iddqd/tests/integration/id_ord_map.rs b/crates/iddqd/tests/integration/id_ord_map.rs index de2a3e0..fb9056d 100644 --- a/crates/iddqd/tests/integration/id_ord_map.rs +++ b/crates/iddqd/tests/integration/id_ord_map.rs @@ -1213,10 +1213,7 @@ mod proptest_panic_safety { let action = op.action; let action_label = format!("{action:?}"); let panic_safety = action.panic_safety(); - let armed = match panic_safety { - PanicSafety::MayCorruptOnPanic => None, - PanicSafety::Atomic | PanicSafety::StepAtomic => op.armed, - }; + let armed = op.armed; let pre_state = sorted_keys(&map, |item| item.key); let (panicked, ops) = run_armed(armed, || action.run(&mut map)); diff --git a/crates/iddqd/tests/integration/panic_safety.rs b/crates/iddqd/tests/integration/panic_safety.rs index 324609c..4557ad4 100644 --- a/crates/iddqd/tests/integration/panic_safety.rs +++ b/crates/iddqd/tests/integration/panic_safety.rs @@ -234,10 +234,6 @@ pub(crate) enum PanicSafety { /// Composed of atomic sub-steps; a panic may leave the map in a different /// but still-valid state. StepAtomic, - /// May corrupt the underlying table. Callers must skip arming a panic for - /// this op. - #[allow(dead_code)] // unused without `default-hasher` - MayCorruptOnPanic, } /// Asserts that every surviving item is findable, and (for atomic ops diff --git a/crates/iddqd/tests/integration/pathological.rs b/crates/iddqd/tests/integration/pathological.rs index fdc16d8..4ca0688 100644 --- a/crates/iddqd/tests/integration/pathological.rs +++ b/crates/iddqd/tests/integration/pathological.rs @@ -4,7 +4,9 @@ //! The tests in this file might leave maps in an inconsistent state, but not in //! a way that should cause UB. -use crate::panic_safety::{PanickyKey, arm_panic_after, disarm_panic}; +use crate::panic_safety::{ + PanickyKey, arm_panic_after, disarm_panic, run_armed, +}; use core::cell::Cell; use iddqd::{ BiHashItem, BiHashMap, Comparable, Equivalent, IdHashItem, IdHashMap, @@ -990,3 +992,57 @@ fn pop_after_chaos_no_ub() { while catch_panic(|| map.pop_last()).flatten().is_some() {} LIE_ORD.with(|c| c.set(None)); } + +// Test: pre-cached-hash, `reserve` on a tombstone-heavy table dropped into +// hashbrown's `rehash_in_place`, which calls the caller-supplied rehash hasher +// on every surviving entry. Hashbrown documents that path as not panic-safe. +// +// With hashes cached alongside each `ItemIndex`, the rehash hasher never +// touches user code. This test asserts that property end-to-end. +// +// The constants here are tuned against +// `foldhash::fast::FixedState::with_seed(0)` to bias the next `reserve` toward +// `rehash_in_place` rather than a fresh-allocation grow. The capacity-stability +// assertion ensures that we're actually doing a rehash_in_place; if it fires, +// retune the constants. + +#[test] +fn reserve_into_tombstone_heavy_map_skips_user_hash() { + let mut map = IdHashMap::::with_hasher( + foldhash::fast::FixedState::with_seed(0), + ); + for &id in &[ + 0u32, 17, 1, 12, 5, 21, 8, 10, 18, 4, 16, 22, 9, 24, 23, 13, 7, 25, 26, + 20, 31, 11, 14, 2, 6, + ] { + let _ = map.insert_overwrite(PanickyItem { id }); + } + // These retains create many tombstones, biasing the next reserve toward + // `rehash_in_place` over a true grow. + map.retain(|item| item.id % 2 == 1); + map.retain(|item| item.id % 3 != 0); + let _ = map.insert_overwrite(PanickyItem { id: 15 }); + + let cap_before = map.capacity(); + let (panicked, ops) = run_armed(Some(0), || { + let _ = catch_panic(|| map.reserve(3)); + }); + let cap_after = map.capacity(); + + assert!(!panicked, "reserve must not panic with cached hashes, but did"); + assert_eq!( + ops, 0, + "reserve must not invoke any PanickyKey method with cached hashes, \ + but invoked {ops}", + ); + assert_eq!( + cap_before, cap_after, + "expected rehash_in_place (capacity unchanged); a capacity change \ + means hashbrown chose a fresh-allocation grow instead, so this test \ + no longer exercises rehash_in_place. Retune the constants against \ + the current hashbrown.", + ); + + map.validate(ValidateCompact::NonCompact) + .expect("map remains valid after reserve"); +} diff --git a/crates/iddqd/tests/integration/tri_hash_map.rs b/crates/iddqd/tests/integration/tri_hash_map.rs index 85146d5..43d8469 100644 --- a/crates/iddqd/tests/integration/tri_hash_map.rs +++ b/crates/iddqd/tests/integration/tri_hash_map.rs @@ -1196,8 +1196,8 @@ mod proptest_panic_safety { | PanickyAction::Get1(_) | PanickyAction::Get2(_) | PanickyAction::Get3(_) => PanicSafety::Atomic, - PanickyAction::RetainModulo(_, _, _) => PanicSafety::StepAtomic, - PanickyAction::Extend(_) => PanicSafety::MayCorruptOnPanic, + PanickyAction::RetainModulo(_, _, _) + | PanickyAction::Extend(_) => PanicSafety::StepAtomic, PanickyAction::Clear => PanicSafety::Atomic, } } @@ -1266,10 +1266,7 @@ mod proptest_panic_safety { let action = op.action; let action_label = format!("{action:?}"); let panic_safety = action.panic_safety(); - let armed = match panic_safety { - PanicSafety::MayCorruptOnPanic => None, - PanicSafety::Atomic | PanicSafety::StepAtomic => op.armed, - }; + let armed = op.armed; let pre_state = sorted_keys(&map, |item| (item.key1, item.key2, item.key3)); From 7b2f4874a2b1538aadbc32ebe01b378e4e6ec582 Mon Sep 17 00:00:00 2001 From: Rain Date: Fri, 22 May 2026 13:05:28 -0700 Subject: [PATCH 2/6] typo Created using spr 1.3.6-beta.1 --- crates/iddqd/src/support/hash_table.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/crates/iddqd/src/support/hash_table.rs b/crates/iddqd/src/support/hash_table.rs index 9e5762d..f2f6248 100644 --- a/crates/iddqd/src/support/hash_table.rs +++ b/crates/iddqd/src/support/hash_table.rs @@ -2,7 +2,7 @@ //! //! # Why we cache the hash //! -//! Hashbrown's `RawTable::reserve_rehash` may choose to rehash in plae when a +//! Hashbrown's `RawTable::reserve_rehash` may choose to rehash in place when a //! reserve is requested on a tombstone-heavy table. That path invokes the //! caller-supplied rehash hasher on every surviving entry, and hashbrown //! documents it as not panic-safe (a panic mid-rehash can leave the table with @@ -19,6 +19,9 @@ //! rehash hasher of the form `|stored| stored.hash` that reads the cached //! value and never invokes user `Hash`. Rehash is then panic-free by //! construction. +//! +//! This does add a u64 to every entry, but for the kinds of items iddqd is +//! targeting (fat database records) the overhead is ideally minimal. use super::{ ItemIndex, From 490d37eddff8194c17c1cd32bdae89840e2fc869 Mon Sep 17 00:00:00 2001 From: Rain Date: Fri, 22 May 2026 13:21:28 -0700 Subject: [PATCH 3/6] switch to a unit test Created using spr 1.3.6-beta.1 --- crates/iddqd/src/id_hash_map/imp.rs | 81 +++++++++++++++++++ crates/iddqd/src/support/hash_table.rs | 21 +++++ .../iddqd/tests/integration/pathological.rs | 57 +------------ 3 files changed, 103 insertions(+), 56 deletions(-) diff --git a/crates/iddqd/src/id_hash_map/imp.rs b/crates/iddqd/src/id_hash_map/imp.rs index c34ac26..7b28492 100644 --- a/crates/iddqd/src/id_hash_map/imp.rs +++ b/crates/iddqd/src/id_hash_map/imp.rs @@ -1775,3 +1775,84 @@ impl map } } + +#[cfg(all(test, feature = "default-hasher", feature = "std"))] +mod cached_rehash_tests { + use super::*; + use core::cell::Cell; + use core::hash::Hasher; + + std::thread_local! { + static USER_HASH_CALLS: Cell = const { Cell::new(0) }; + } + + #[derive(Debug)] + struct CountedKey(u32); + + impl Hash for CountedKey { + fn hash(&self, state: &mut H) { + USER_HASH_CALLS.with(|c| c.set(c.get() + 1)); + self.0.hash(state); + } + } + + impl PartialEq for CountedKey { + fn eq(&self, other: &Self) -> bool { + self.0 == other.0 + } + } + impl Eq for CountedKey {} + + #[derive(Debug)] + struct CountedItem { + id: u32, + } + + impl IdHashItem for CountedItem { + type Key<'a> = CountedKey; + fn key(&self) -> Self::Key<'_> { + CountedKey(self.id) + } + id_upcast!(); + } + + // This is a unit test and not an integration test to ensure rehashing + // actually happens. (Rehashing is not externally observable in integration + // tests.) + #[test] + fn reserve_rehash_uses_cached_hash() { + let mut map = IdHashMap::::with_hasher( + foldhash::fast::FixedState::with_seed(0), + ); + // Insert items (legitimately calls user Hash). + for id in [ + 0u32, 17, 1, 12, 5, 21, 8, 10, 18, 4, 16, 22, 9, 24, 23, 13, 7, 25, + 26, 20, 31, 11, 14, 2, 6, + ] { + let _ = map.insert_overwrite(CountedItem { id }); + } + // Drop most entries, leaving the table heavy with tombstones so the + // next reserve favors `rehash_in_place` over a fresh-allocation grow. + map.retain(|item| item.id % 2 == 1 && item.id % 3 != 0); + + USER_HASH_CALLS.with(|c| c.set(0)); + let rehash_calls = map.tables.key_to_item.reserve_counting_rehash(10); + let user_calls = USER_HASH_CALLS.with(Cell::get); + + assert!( + rehash_calls > 0, + "expected reserve to invoke the rehash callback at least once \ + (setup did not actually trigger a rehash; if hashbrown's \ + growth/rehash heuristic changed, retune the constants)", + ); + assert_eq!( + user_calls, 0, + "reserve must not invoke user `Hash` during rehash; got \ + {user_calls} call(s) for {rehash_calls} rehash-callback \ + invocation(s)", + ); + + map.validate(ValidateCompact::NonCompact) + .expect("map remains valid after reserve"); + } +} diff --git a/crates/iddqd/src/support/hash_table.rs b/crates/iddqd/src/support/hash_table.rs index f2f6248..7dfd19a 100644 --- a/crates/iddqd/src/support/hash_table.rs +++ b/crates/iddqd/src/support/hash_table.rs @@ -261,6 +261,27 @@ impl MapHashTable { ) -> Result<(), hashbrown::TryReserveError> { self.items.try_reserve(additional, cached_hasher) } + + /// Test-only variant of [`Self::reserve`] that returns how many times the + /// rehash closure was invoked. + /// + /// A non-zero return value proves the rehash callback fired, so the + /// caller's setup actually exercises a rehash path rather than landing + /// in hashbrown's "no-op, growth_left already sufficient" branch. The + /// closure delegates to [`cached_hasher`], so this exercises the real + /// production hasher. + #[cfg(test)] + pub(crate) fn reserve_counting_rehash( + &mut self, + additional: usize, + ) -> usize { + let count = core::cell::Cell::new(0usize); + self.items.reserve(additional, |stored| { + count.set(count.get() + 1); + cached_hasher(stored) + }); + count.into_inner() + } } /// An entry in [`MapHashTable`]. diff --git a/crates/iddqd/tests/integration/pathological.rs b/crates/iddqd/tests/integration/pathological.rs index 4ca0688..52fdb9a 100644 --- a/crates/iddqd/tests/integration/pathological.rs +++ b/crates/iddqd/tests/integration/pathological.rs @@ -4,9 +4,7 @@ //! The tests in this file might leave maps in an inconsistent state, but not in //! a way that should cause UB. -use crate::panic_safety::{ - PanickyKey, arm_panic_after, disarm_panic, run_armed, -}; +use crate::panic_safety::{PanickyKey, arm_panic_after, disarm_panic}; use core::cell::Cell; use iddqd::{ BiHashItem, BiHashMap, Comparable, Equivalent, IdHashItem, IdHashMap, @@ -993,56 +991,3 @@ fn pop_after_chaos_no_ub() { LIE_ORD.with(|c| c.set(None)); } -// Test: pre-cached-hash, `reserve` on a tombstone-heavy table dropped into -// hashbrown's `rehash_in_place`, which calls the caller-supplied rehash hasher -// on every surviving entry. Hashbrown documents that path as not panic-safe. -// -// With hashes cached alongside each `ItemIndex`, the rehash hasher never -// touches user code. This test asserts that property end-to-end. -// -// The constants here are tuned against -// `foldhash::fast::FixedState::with_seed(0)` to bias the next `reserve` toward -// `rehash_in_place` rather than a fresh-allocation grow. The capacity-stability -// assertion ensures that we're actually doing a rehash_in_place; if it fires, -// retune the constants. - -#[test] -fn reserve_into_tombstone_heavy_map_skips_user_hash() { - let mut map = IdHashMap::::with_hasher( - foldhash::fast::FixedState::with_seed(0), - ); - for &id in &[ - 0u32, 17, 1, 12, 5, 21, 8, 10, 18, 4, 16, 22, 9, 24, 23, 13, 7, 25, 26, - 20, 31, 11, 14, 2, 6, - ] { - let _ = map.insert_overwrite(PanickyItem { id }); - } - // These retains create many tombstones, biasing the next reserve toward - // `rehash_in_place` over a true grow. - map.retain(|item| item.id % 2 == 1); - map.retain(|item| item.id % 3 != 0); - let _ = map.insert_overwrite(PanickyItem { id: 15 }); - - let cap_before = map.capacity(); - let (panicked, ops) = run_armed(Some(0), || { - let _ = catch_panic(|| map.reserve(3)); - }); - let cap_after = map.capacity(); - - assert!(!panicked, "reserve must not panic with cached hashes, but did"); - assert_eq!( - ops, 0, - "reserve must not invoke any PanickyKey method with cached hashes, \ - but invoked {ops}", - ); - assert_eq!( - cap_before, cap_after, - "expected rehash_in_place (capacity unchanged); a capacity change \ - means hashbrown chose a fresh-allocation grow instead, so this test \ - no longer exercises rehash_in_place. Retune the constants against \ - the current hashbrown.", - ); - - map.validate(ValidateCompact::NonCompact) - .expect("map remains valid after reserve"); -} From 8bac9dd4e596462f1c5ea8c1fabfcad5b81630f0 Mon Sep 17 00:00:00 2001 From: Rain Date: Fri, 22 May 2026 13:21:44 -0700 Subject: [PATCH 4/6] rename module Created using spr 1.3.6-beta.1 --- crates/iddqd/src/id_hash_map/imp.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/iddqd/src/id_hash_map/imp.rs b/crates/iddqd/src/id_hash_map/imp.rs index 7b28492..9758621 100644 --- a/crates/iddqd/src/id_hash_map/imp.rs +++ b/crates/iddqd/src/id_hash_map/imp.rs @@ -1777,7 +1777,7 @@ impl } #[cfg(all(test, feature = "default-hasher", feature = "std"))] -mod cached_rehash_tests { +mod tests { use super::*; use core::cell::Cell; use core::hash::Hasher; From f96a9e7c90e56222bad8d44ab26695efb561b85a Mon Sep 17 00:00:00 2001 From: Rain Date: Fri, 22 May 2026 13:24:32 -0700 Subject: [PATCH 5/6] fix cfg() Created using spr 1.3.6-beta.1 --- crates/iddqd/src/id_hash_map/imp.rs | 2 +- crates/iddqd/src/support/hash_table.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/crates/iddqd/src/id_hash_map/imp.rs b/crates/iddqd/src/id_hash_map/imp.rs index 9758621..af7f674 100644 --- a/crates/iddqd/src/id_hash_map/imp.rs +++ b/crates/iddqd/src/id_hash_map/imp.rs @@ -1776,7 +1776,7 @@ impl } } -#[cfg(all(test, feature = "default-hasher", feature = "std"))] +#[cfg(all(test, feature = "std"))] mod tests { use super::*; use core::cell::Cell; diff --git a/crates/iddqd/src/support/hash_table.rs b/crates/iddqd/src/support/hash_table.rs index 7dfd19a..1384048 100644 --- a/crates/iddqd/src/support/hash_table.rs +++ b/crates/iddqd/src/support/hash_table.rs @@ -270,7 +270,7 @@ impl MapHashTable { /// in hashbrown's "no-op, growth_left already sufficient" branch. The /// closure delegates to [`cached_hasher`], so this exercises the real /// production hasher. - #[cfg(test)] + #[cfg(all(test, feature = "std"))] pub(crate) fn reserve_counting_rehash( &mut self, additional: usize, From fb717458774adc72d9fbde498b1a6e7925c3145a Mon Sep 17 00:00:00 2001 From: Rain Date: Fri, 22 May 2026 13:30:42 -0700 Subject: [PATCH 6/6] rustfmt Created using spr 1.3.6-beta.1 --- crates/iddqd/src/id_hash_map/imp.rs | 3 +-- crates/iddqd/tests/integration/pathological.rs | 1 - 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/crates/iddqd/src/id_hash_map/imp.rs b/crates/iddqd/src/id_hash_map/imp.rs index af7f674..e8c9352 100644 --- a/crates/iddqd/src/id_hash_map/imp.rs +++ b/crates/iddqd/src/id_hash_map/imp.rs @@ -1779,8 +1779,7 @@ impl #[cfg(all(test, feature = "std"))] mod tests { use super::*; - use core::cell::Cell; - use core::hash::Hasher; + use core::{cell::Cell, hash::Hasher}; std::thread_local! { static USER_HASH_CALLS: Cell = const { Cell::new(0) }; diff --git a/crates/iddqd/tests/integration/pathological.rs b/crates/iddqd/tests/integration/pathological.rs index 52fdb9a..fdc16d8 100644 --- a/crates/iddqd/tests/integration/pathological.rs +++ b/crates/iddqd/tests/integration/pathological.rs @@ -990,4 +990,3 @@ fn pop_after_chaos_no_ub() { while catch_panic(|| map.pop_last()).flatten().is_some() {} LIE_ORD.with(|c| c.set(None)); } -