From 52ef1334ce356f66741a96602e900f40777b6e87 Mon Sep 17 00:00:00 2001 From: ltdk Date: Fri, 8 Sep 2023 23:57:15 -0400 Subject: [PATCH] Move RandomState and DefaultHasher into std::hash, but in a sneaky way --- library/std/src/collections/hash/map.rs | 150 +--------------- library/std/src/collections/hash/map/tests.rs | 2 +- library/std/src/collections/hash/set.rs | 4 +- library/std/src/collections/hash/set/tests.rs | 2 +- library/std/src/collections/mod.rs | 5 + library/std/src/hash/mod.rs | 90 ++++++++++ library/std/src/hash/private.rs | 161 ++++++++++++++++++ library/std/src/lib.rs | 3 +- 8 files changed, 262 insertions(+), 155 deletions(-) create mode 100644 library/std/src/hash/mod.rs create mode 100644 library/std/src/hash/private.rs diff --git a/library/std/src/collections/hash/map.rs b/library/std/src/collections/hash/map.rs index be173a7ace6d8..d9efb64e92955 100644 --- a/library/std/src/collections/hash/map.rs +++ b/library/std/src/collections/hash/map.rs @@ -6,16 +6,14 @@ use self::Entry::*; use hashbrown::hash_map as base; use crate::borrow::Borrow; -use crate::cell::Cell; use crate::collections::TryReserveError; use crate::collections::TryReserveErrorKind; use crate::error::Error; use crate::fmt::{self, Debug}; #[allow(deprecated)] -use crate::hash::{BuildHasher, Hash, Hasher, SipHasher13}; +use crate::hash::{private::RandomState, BuildHasher, Hash}; use crate::iter::FusedIterator; use crate::ops::Index; -use crate::sys; /// A [hash map] implemented with quadratic probing and SIMD lookup. /// @@ -3072,152 +3070,6 @@ where } } -/// `RandomState` is the default state for [`HashMap`] types. -/// -/// A particular instance `RandomState` will create the same instances of -/// [`Hasher`], but the hashers created by two different `RandomState` -/// instances are unlikely to produce the same result for the same values. -/// -/// # Examples -/// -/// ``` -/// use std::collections::HashMap; -/// use std::collections::hash_map::RandomState; -/// -/// let s = RandomState::new(); -/// let mut map = HashMap::with_hasher(s); -/// map.insert(1, 2); -/// ``` -#[derive(Clone)] -#[stable(feature = "hashmap_build_hasher", since = "1.7.0")] -pub struct RandomState { - k0: u64, - k1: u64, -} - -impl RandomState { - /// Constructs a new `RandomState` that is initialized with random keys. - /// - /// # Examples - /// - /// ``` - /// use std::collections::hash_map::RandomState; - /// - /// let s = RandomState::new(); - /// ``` - #[inline] - #[allow(deprecated)] - // rand - #[must_use] - #[stable(feature = "hashmap_build_hasher", since = "1.7.0")] - pub fn new() -> RandomState { - // Historically this function did not cache keys from the OS and instead - // simply always called `rand::thread_rng().gen()` twice. In #31356 it - // was discovered, however, that because we re-seed the thread-local RNG - // from the OS periodically that this can cause excessive slowdown when - // many hash maps are created on a thread. To solve this performance - // trap we cache the first set of randomly generated keys per-thread. - // - // Later in #36481 it was discovered that exposing a deterministic - // iteration order allows a form of DOS attack. To counter that we - // increment one of the seeds on every RandomState creation, giving - // every corresponding HashMap a different iteration order. - thread_local!(static KEYS: Cell<(u64, u64)> = { - Cell::new(sys::hashmap_random_keys()) - }); - - KEYS.with(|keys| { - let (k0, k1) = keys.get(); - keys.set((k0.wrapping_add(1), k1)); - RandomState { k0, k1 } - }) - } -} - -#[stable(feature = "hashmap_build_hasher", since = "1.7.0")] -impl BuildHasher for RandomState { - type Hasher = DefaultHasher; - #[inline] - #[allow(deprecated)] - fn build_hasher(&self) -> DefaultHasher { - DefaultHasher(SipHasher13::new_with_keys(self.k0, self.k1)) - } -} - -/// The default [`Hasher`] used by [`RandomState`]. -/// -/// The internal algorithm is not specified, and so it and its hashes should -/// not be relied upon over releases. -#[stable(feature = "hashmap_default_hasher", since = "1.13.0")] -#[allow(deprecated)] -#[derive(Clone, Debug)] -pub struct DefaultHasher(SipHasher13); - -impl DefaultHasher { - /// Creates a new `DefaultHasher`. - /// - /// This hasher is not guaranteed to be the same as all other - /// `DefaultHasher` instances, but is the same as all other `DefaultHasher` - /// instances created through `new` or `default`. - #[stable(feature = "hashmap_default_hasher", since = "1.13.0")] - #[inline] - #[allow(deprecated)] - #[rustc_const_unstable(feature = "const_hash", issue = "104061")] - #[must_use] - pub const fn new() -> DefaultHasher { - DefaultHasher(SipHasher13::new_with_keys(0, 0)) - } -} - -#[stable(feature = "hashmap_default_hasher", since = "1.13.0")] -impl Default for DefaultHasher { - /// Creates a new `DefaultHasher` using [`new`]. - /// See its documentation for more. - /// - /// [`new`]: DefaultHasher::new - #[inline] - fn default() -> DefaultHasher { - DefaultHasher::new() - } -} - -#[stable(feature = "hashmap_default_hasher", since = "1.13.0")] -impl Hasher for DefaultHasher { - // The underlying `SipHasher13` doesn't override the other - // `write_*` methods, so it's ok not to forward them here. - - #[inline] - fn write(&mut self, msg: &[u8]) { - self.0.write(msg) - } - - #[inline] - fn write_str(&mut self, s: &str) { - self.0.write_str(s); - } - - #[inline] - fn finish(&self) -> u64 { - self.0.finish() - } -} - -#[stable(feature = "hashmap_build_hasher", since = "1.7.0")] -impl Default for RandomState { - /// Constructs a new `RandomState`. - #[inline] - fn default() -> RandomState { - RandomState::new() - } -} - -#[stable(feature = "std_debug", since = "1.16.0")] -impl fmt::Debug for RandomState { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.debug_struct("RandomState").finish_non_exhaustive() - } -} - #[inline] fn map_entry<'a, K: 'a, V: 'a>(raw: base::RustcEntry<'a, K, V>) -> Entry<'a, K, V> { match raw { diff --git a/library/std/src/collections/hash/map/tests.rs b/library/std/src/collections/hash/map/tests.rs index 91a3776e7be84..b6e6f5522f9a3 100644 --- a/library/std/src/collections/hash/map/tests.rs +++ b/library/std/src/collections/hash/map/tests.rs @@ -1,8 +1,8 @@ use super::Entry::{Occupied, Vacant}; use super::HashMap; -use super::RandomState; use crate::assert_matches::assert_matches; use crate::cell::RefCell; +use crate::hash::private::RandomState; use crate::test_helpers::test_rng; use rand::Rng; use realstd::collections::TryReserveErrorKind::*; diff --git a/library/std/src/collections/hash/set.rs b/library/std/src/collections/hash/set.rs index 6d85b26af5fa2..fa9a3a8d1c437 100644 --- a/library/std/src/collections/hash/set.rs +++ b/library/std/src/collections/hash/set.rs @@ -6,11 +6,11 @@ use hashbrown::hash_set as base; use crate::borrow::Borrow; use crate::collections::TryReserveError; use crate::fmt; -use crate::hash::{BuildHasher, Hash}; +use crate::hash::{private::RandomState, BuildHasher, Hash}; use crate::iter::{Chain, FusedIterator}; use crate::ops::{BitAnd, BitOr, BitXor, Sub}; -use super::map::{map_try_reserve_error, RandomState}; +use super::map::map_try_reserve_error; /// A [hash set] implemented as a `HashMap` where the value is `()`. /// diff --git a/library/std/src/collections/hash/set/tests.rs b/library/std/src/collections/hash/set/tests.rs index e0cd80b44f8e6..251229cee2f4d 100644 --- a/library/std/src/collections/hash/set/tests.rs +++ b/library/std/src/collections/hash/set/tests.rs @@ -1,6 +1,6 @@ -use super::super::map::RandomState; use super::HashSet; +use crate::hash::private::RandomState; use crate::panic::{catch_unwind, AssertUnwindSafe}; use crate::sync::atomic::{AtomicU32, Ordering}; use crate::sync::Arc; diff --git a/library/std/src/collections/mod.rs b/library/std/src/collections/mod.rs index 42f738acb9f7b..6ce5bb6ae240f 100644 --- a/library/std/src/collections/mod.rs +++ b/library/std/src/collections/mod.rs @@ -439,6 +439,11 @@ pub mod hash_map { //! A hash map implemented with quadratic probing and SIMD lookup. #[stable(feature = "rust1", since = "1.0.0")] pub use super::hash::map::*; + + #[stable(feature = "hashmap_build_hasher", since = "1.7.0")] + pub use crate::hash::private::DefaultHasher; + #[stable(feature = "hashmap_build_hasher", since = "1.7.0")] + pub use crate::hash::private::RandomState; } #[stable(feature = "rust1", since = "1.0.0")] diff --git a/library/std/src/hash/mod.rs b/library/std/src/hash/mod.rs new file mode 100644 index 0000000000000..f5528000111ff --- /dev/null +++ b/library/std/src/hash/mod.rs @@ -0,0 +1,90 @@ +//! Generic hashing support. +//! +//! This module provides a generic way to compute the [hash] of a value. +//! Hashes are most commonly used with [`HashMap`] and [`HashSet`]. +//! +//! [hash]: https://en.wikipedia.org/wiki/Hash_function +//! [`HashMap`]: ../../std/collections/struct.HashMap.html +//! [`HashSet`]: ../../std/collections/struct.HashSet.html +//! +//! The simplest way to make a type hashable is to use `#[derive(Hash)]`: +//! +//! # Examples +//! +//! ```rust +//! use std::collections::hash_map::DefaultHasher; +//! use std::hash::{Hash, Hasher}; +//! +//! #[derive(Hash)] +//! struct Person { +//! id: u32, +//! name: String, +//! phone: u64, +//! } +//! +//! let person1 = Person { +//! id: 5, +//! name: "Janet".to_string(), +//! phone: 555_666_7777, +//! }; +//! let person2 = Person { +//! id: 5, +//! name: "Bob".to_string(), +//! phone: 555_666_7777, +//! }; +//! +//! assert!(calculate_hash(&person1) != calculate_hash(&person2)); +//! +//! fn calculate_hash(t: &T) -> u64 { +//! let mut s = DefaultHasher::new(); +//! t.hash(&mut s); +//! s.finish() +//! } +//! ``` +//! +//! If you need more control over how a value is hashed, you need to implement +//! the [`Hash`] trait: +//! +//! ```rust +//! use std::collections::hash_map::DefaultHasher; +//! use std::hash::{Hash, Hasher}; +//! +//! struct Person { +//! id: u32, +//! # #[allow(dead_code)] +//! name: String, +//! phone: u64, +//! } +//! +//! impl Hash for Person { +//! fn hash(&self, state: &mut H) { +//! self.id.hash(state); +//! self.phone.hash(state); +//! } +//! } +//! +//! let person1 = Person { +//! id: 5, +//! name: "Janet".to_string(), +//! phone: 555_666_7777, +//! }; +//! let person2 = Person { +//! id: 5, +//! name: "Bob".to_string(), +//! phone: 555_666_7777, +//! }; +//! +//! assert_eq!(calculate_hash(&person1), calculate_hash(&person2)); +//! +//! fn calculate_hash(t: &T) -> u64 { +//! let mut s = DefaultHasher::new(); +//! t.hash(&mut s); +//! s.finish() +//! } +//! ``` +#![stable(feature = "rust1", since = "1.0.0")] + +pub(crate) mod private; + +#[stable(feature = "rust1", since = "1.0.0")] +pub use core::hash::*; diff --git a/library/std/src/hash/private.rs b/library/std/src/hash/private.rs new file mode 100644 index 0000000000000..0b1c52c0e6151 --- /dev/null +++ b/library/std/src/hash/private.rs @@ -0,0 +1,161 @@ +//! This module exists to isolate [`RandomState`] and [`DefaultHasher`] outside of the +//! [`collections`] module without actually publicly exporting them, so that parts of that +//! implementation can more easily be moved to the [`alloc`] crate. +//! +//! Although its items are public and contain stability attributes, they can't actually be accessed +//! outside this crate. +//! +//! [`collections`]: crate::collections +#[allow(deprecated)] +use super::{BuildHasher, Hasher, SipHasher13}; +use crate::cell::Cell; +use crate::fmt; +use crate::sys; + +/// `RandomState` is the default state for [`HashMap`] types. +/// +/// A particular instance `RandomState` will create the same instances of +/// [`Hasher`], but the hashers created by two different `RandomState` +/// instances are unlikely to produce the same result for the same values. +/// +/// [`HashMap`]: crate::collections::HashMap +/// +/// # Examples +/// +/// ``` +/// use std::collections::HashMap; +/// use std::collections::hash_map::RandomState; +/// +/// let s = RandomState::new(); +/// let mut map = HashMap::with_hasher(s); +/// map.insert(1, 2); +/// ``` +#[stable(feature = "hashmap_build_hasher", since = "1.7.0")] +#[derive(Clone)] +pub struct RandomState { + k0: u64, + k1: u64, +} + +impl RandomState { + /// Constructs a new `RandomState` that is initialized with random keys. + /// + /// # Examples + /// + /// ``` + /// use std::collections::hash_map::RandomState; + /// + /// let s = RandomState::new(); + /// ``` + #[inline] + #[allow(deprecated)] + // rand + #[must_use] + #[stable(feature = "hashmap_build_hasher", since = "1.7.0")] + pub fn new() -> RandomState { + // Historically this function did not cache keys from the OS and instead + // simply always called `rand::thread_rng().gen()` twice. In #31356 it + // was discovered, however, that because we re-seed the thread-local RNG + // from the OS periodically that this can cause excessive slowdown when + // many hash maps are created on a thread. To solve this performance + // trap we cache the first set of randomly generated keys per-thread. + // + // Later in #36481 it was discovered that exposing a deterministic + // iteration order allows a form of DOS attack. To counter that we + // increment one of the seeds on every RandomState creation, giving + // every corresponding HashMap a different iteration order. + thread_local!(static KEYS: Cell<(u64, u64)> = { + Cell::new(sys::hashmap_random_keys()) + }); + + KEYS.with(|keys| { + let (k0, k1) = keys.get(); + keys.set((k0.wrapping_add(1), k1)); + RandomState { k0, k1 } + }) + } +} + +#[stable(feature = "hashmap_build_hasher", since = "1.7.0")] +impl BuildHasher for RandomState { + type Hasher = DefaultHasher; + #[inline] + #[allow(deprecated)] + fn build_hasher(&self) -> DefaultHasher { + DefaultHasher(SipHasher13::new_with_keys(self.k0, self.k1)) + } +} + +/// The default [`Hasher`] used by [`RandomState`]. +/// +/// The internal algorithm is not specified, and so it and its hashes should +/// not be relied upon over releases. +#[allow(deprecated)] +#[derive(Clone, Debug)] +#[stable(feature = "hashmap_build_hasher", since = "1.7.0")] +pub struct DefaultHasher(SipHasher13); + +impl DefaultHasher { + /// Creates a new `DefaultHasher`. + /// + /// This hasher is not guaranteed to be the same as all other + /// `DefaultHasher` instances, but is the same as all other `DefaultHasher` + /// instances created through `new` or `default`. + #[stable(feature = "hashmap_default_hasher", since = "1.13.0")] + #[inline] + #[allow(deprecated)] + #[rustc_const_unstable(feature = "const_hash", issue = "104061")] + #[must_use] + pub const fn new() -> DefaultHasher { + DefaultHasher(SipHasher13::new_with_keys(0, 0)) + } +} + +#[stable(feature = "hashmap_default_hasher", since = "1.13.0")] +impl Default for DefaultHasher { + /// Creates a new `DefaultHasher` using [`new`]. + /// See its documentation for more. + /// + /// [`new`]: DefaultHasher::new + #[inline] + fn default() -> DefaultHasher { + DefaultHasher::new() + } +} + +#[stable(feature = "hashmap_default_hasher", since = "1.13.0")] +impl Hasher for DefaultHasher { + // The underlying `SipHasher13` doesn't override the other + // `write_*` methods, so it's ok not to forward them here. + + #[inline] + fn write(&mut self, msg: &[u8]) { + self.0.write(msg) + } + + #[inline] + fn write_str(&mut self, s: &str) { + self.0.write_str(s); + } + + #[inline] + fn finish(&self) -> u64 { + self.0.finish() + } +} + +#[stable(feature = "hashmap_build_hasher", since = "1.7.0")] +impl Default for RandomState { + /// Constructs a new `RandomState`. + #[inline] + fn default() -> RandomState { + RandomState::new() + } +} + +#[stable(feature = "std_debug", since = "1.16.0")] +impl fmt::Debug for RandomState { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("RandomState").finish_non_exhaustive() + } +} diff --git a/library/std/src/lib.rs b/library/std/src/lib.rs index 1955ef815ff80..a51366a9d87df 100644 --- a/library/std/src/lib.rs +++ b/library/std/src/lib.rs @@ -467,8 +467,6 @@ pub use core::convert; pub use core::default; #[stable(feature = "futures_api", since = "1.36.0")] pub use core::future; -#[stable(feature = "rust1", since = "1.0.0")] -pub use core::hash; #[stable(feature = "core_hint", since = "1.27.0")] pub use core::hint; #[stable(feature = "i128", since = "1.26.0")] @@ -538,6 +536,7 @@ pub mod env; pub mod error; pub mod ffi; pub mod fs; +pub mod hash; pub mod io; pub mod net; pub mod num;