feat: deterministic pseudorandom operations (#140)

* Deterministic randomization * Update documentation
tari-project · Dec 22, 2022 · 306cf1b · 306cf1b
1 parent 656fe7a
commit 306cf1b
Show file tree

Hide file tree

Showing 3 changed files with 192 additions and 0 deletions.
diff --git a/Cargo.toml b/Cargo.toml
@@ -26,6 +26,8 @@ log = "0.4.0"
 merlin = { version = "2.0.1", default-features = false }
 once_cell = "1.8.0"
 rand = { version = "0.7.3", default-features = false }
+rand_chacha = "0.3.1"
+rand_core = "0.6.4"
 serde = "1.0.89"
 serde_json = "1.0"
 serde-wasm-bindgen = { version = "0.4", optional = true }
@@ -37,6 +39,7 @@ zeroize = "1"
 [dev-dependencies]
 bincode = "1.1.4"
 criterion = "0.3.4"
+rand_chacha = "0.3.1"
 sha2 = "0.9.5"
 wasm-bindgen-test = "0.3.24"
 

diff --git a/src/deterministic_randomizer.rs b/src/deterministic_randomizer.rs
@@ -0,0 +1,188 @@
+// Copyright 2022 The Tari Project
+// SPDX-License-Identifier: BSD-3-Clause
+
+//! A deterministic randomizer with utility functions for operating on numbers and arrays in a reproducible and
+//! platform-indepdent way.
+
+use std::convert::TryFrom;
+
+use rand_core::{CryptoRng, RngCore, SeedableRng};
+use zeroize::Zeroize;
+
+/// Error representing a failed shuffle
+#[derive(Debug)]
+pub struct RandomizerError;
+
+/// A `DeterministicRandomizer` contains utility functions for working with pseudorandom number generators (PRNGs) where
+/// reproducibility is important. While the `rand` crate has functionality for things like uniform sampling and
+/// shuffling that can be used with PRNGs, there is no guarantee that these functions will always be implemented in the
+/// same way across versions, which could lead to incompatibility or dependency troubles. This library has no specific
+/// `rand` dependency, and implements deterministic sampling and shuffling that are implemented in straightforward ways
+/// that will work identically across platforms. If you provide the same seed to the PRNG, you'll get the same results.
+/// This can be useful for cases like consensus applications, when a set of clients need to agree on pseudorandom
+/// results.
+///
+/// To avoid certain pitfalls, you need to choose a cryptographically-secure pseudorandom number generator (CSPRNG) that
+/// implements `CryptoRng + RngCore + SeedableRng`; a good choice is something like `ChaCha12Rng` from the `rand_chacha`
+/// crate (which happens to be the current `rand` default).
+///
+/// Once you instantiate the `DeterministicRandomizer` with your CSPRNG and a starting seed, you can start generating
+/// bounded pseudorandom numbers or sampling or shuffling arrays deterministically. You can always reseed the randomizer
+/// with a different seed, or reset it to the initial seed.
+///
+/// ```edition2018
+/// # use tari_crypto::deterministic_randomizer::DeterministicRandomizer;
+/// use rand_chacha::ChaCha12Rng;
+///
+/// // In consensus-type applications, the seed might come from a hash output whose input is some kind of state
+/// // We'll just make one arbitrarily; it must be `[u8; N]` with `N <= 32`
+/// let seed = [1u8; 32];
+/// let mut randomizer = DeterministicRandomizer::<ChaCha12Rng>::new(seed);
+///
+/// // Generate bounded numbers in the range `[0, 1000)`
+/// // Because it's deterministic, we know exactly what the results should be!
+/// assert_eq!(randomizer.next_bounded_u64(1000).unwrap(), 573);
+/// assert_eq!(randomizer.next_bounded_u64(1000).unwrap(), 786);
+///
+/// // Reset the generator, which uses the original seed we provided
+/// randomizer.reset();
+/// assert_eq!(randomizer.next_bounded_u64(1000).unwrap(), 573);
+/// assert_eq!(randomizer.next_bounded_u64(1000).unwrap(), 786);
+///
+/// // We can also reseed using a new seed
+/// // This has the same effect as creating a fresh `DeterministicRandomizer` with the new seed
+/// let new_seed = [2u8; 32];
+/// randomizer.reseed(new_seed);
+///
+/// // Shuffle an array in place (which may be of any type or length)
+/// // Because it's still deterministic, we know exactly what the result should be!
+/// let mut data = [0u32, 1u32, 2u32, 3u32, 4u32];
+/// randomizer.shuffle(&mut data).unwrap();
+/// assert_eq!(&data, &[2u32, 0u32, 4u32, 3u32, 1u32]);
+///
+/// // Get a shuffled sample of an array, leaving the original unchanged
+/// let data = [0u32, 1u32, 2u32, 3u32, 4u32];
+/// let sample = randomizer.sample(&data, 3).unwrap();
+/// assert_eq!(&sample, &[3u32, 0u32, 4u32]);
+/// ```
+///
+/// **WARNING**: While `DeterministicRandomizer` requires a cryptographically-secure pseudorandom number generator
+/// (CSPRNG), it is _not_ suitable for non-deterministic use cases like key or nonce generation. If you aren't
+/// absolutely sure that you need the functionality provided here, you should use a high-entropy non-deterministic
+/// generator instead.
+#[derive(Zeroize)]
+pub struct DeterministicRandomizer<R>
+where R: SeedableRng
+{
+    prng: R,
+    seed: <R as SeedableRng>::Seed,
+}
+
+impl<R> DeterministicRandomizer<R>
+where
+    R: CryptoRng + RngCore + SeedableRng,
+    <R as SeedableRng>::Seed: Clone,
+{
+    /// Initialize the randomizer with a seed
+    pub fn new(seed: <R as SeedableRng>::Seed) -> Self {
+        Self {
+            prng: R::from_seed(seed.clone()),
+            seed,
+        }
+    }
+
+    /// Reseed the randomizer with a new seed
+    pub fn reseed(&mut self, seed: <R as SeedableRng>::Seed) {
+        self.prng = R::from_seed(seed.clone());
+        self.seed = seed;
+    }
+
+    /// Reset the randomizer using the seed last provided by either the constructor or reseeding
+    pub fn reset(&mut self) {
+        self.reseed(self.seed.clone());
+    }
+
+    /// Sample elements without replacement, shuffling their order
+    pub fn sample<T>(&mut self, data: &[T], k: usize) -> Result<Vec<T>, RandomizerError>
+    where T: Clone {
+        // This currently reallocates; it could be made more efficient
+        let mut result = data.to_vec();
+        self.partial_shuffle(&mut result, k)?;
+
+        Ok(result[(data.len() - k)..].to_vec())
+    }
+
+    /// Shuffle an array in place
+    pub fn shuffle<T>(&mut self, data: &mut [T]) -> Result<(), RandomizerError> {
+        self.partial_shuffle(data, data.len())?;
+
+        Ok(())
+    }
+
+    /// Utility function for a partial in-place Fisher-Yates shuffle
+    /// The last `n` items are fully shuffled, while the remaining items are not!
+    #[allow(clippy::cast_possible_truncation)]
+    fn partial_shuffle<T>(&mut self, data: &mut [T], n: usize) -> Result<(), RandomizerError> {
+        if n > data.len() {
+            return Err(RandomizerError);
+        }
+        let low = (data.len() - n) as u64;
+        let high = data.len() as u64;
+
+        // Perform the Durstenfeld variant of the Fisher-Yates shuffle
+        for i in (low..high).rev() {
+            // Note that we have to cast from `u64` to `usize`, so we need to ignore the truncation warning (we're fine
+            // on 64-bit targets)
+            let j = self.next_bounded_u64(i + 1).map(|j| j as usize)?;
+            data.swap(i as usize, j);
+        }
+
+        Ok(())
+    }
+
+    /// Choose a random bounded 64-bit unsigned integer with exclusive upper bound
+    pub fn next_bounded_u64(&mut self, upper: u64) -> Result<u64, RandomizerError> {
+        // We can't get a `u128` directly from the generator
+        let x = u128::from(self.prng.next_u64()) << 64 | u128::from(self.prng.next_u64());
+
+        u64::try_from(x % u128::from(upper)).map_err(|_| RandomizerError)
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use rand_chacha::ChaCha12Rng;
+
+    use super::DeterministicRandomizer;
+
+    type R = DeterministicRandomizer<ChaCha12Rng>;
+
+    #[test]
+    fn test_shuffle() {
+        let seed = [1u8; 32];
+        let mut data = [0u8, 1u8, 2u8, 3u8];
+        let mut randomizer = R::new(seed);
+
+        randomizer.shuffle(&mut data).unwrap();
+        assert_eq!(&data, &[0u8, 2u8, 3u8, 1u8]);
+    }
+
+    #[test]
+    fn test_bounded_u64() {
+        let seed = [1u8; 32];
+        let mut randomizer = R::new(seed);
+
+        assert_eq!(randomizer.next_bounded_u64(1000).unwrap(), 573);
+        assert_eq!(randomizer.next_bounded_u64(1000).unwrap(), 786);
+    }
+
+    #[test]
+    fn test_sample() {
+        let seed = [1u8; 32];
+        let data = [0u8, 1u8, 2u8, 3u8];
+        let mut randomizer = R::new(seed);
+
+        let sample = randomizer.sample(&data, 3).unwrap();
+        assert_eq!(&sample, &[2u8, 3u8, 1u8]);
+    }
+}
diff --git a/src/lib.rs b/src/lib.rs
@@ -9,6 +9,7 @@ extern crate lazy_static;
 #[macro_use]
 mod macros;
 pub mod commitment;
+pub mod deterministic_randomizer;
 pub mod dhke;
 pub mod hash;
 pub mod hashing;