diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index e4549e6b..129ff0b1 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -1,4 +1,4 @@ -name: Main tests +name: Tests on: push: @@ -44,7 +44,7 @@ jobs: with: toolchain: nightly - name: rand_core - run: cargo doc --all-features --no-deps + run: cargo doc --no-deps test: runs-on: ${{ matrix.os }} @@ -92,12 +92,9 @@ jobs: - run: ${{ matrix.deps }} - name: Maybe minimal versions if: ${{ matrix.variant == 'minimal_versions' }} - run: | - cargo generate-lockfile -Z minimal-versions + run: cargo generate-lockfile -Z minimal-versions - name: Test rand_core - run: | - cargo test --target ${{ matrix.target }} --no-default-features - cargo test --target ${{ matrix.target }} --features serde + run: cargo test --target ${{ matrix.target }} test-cross: runs-on: ${{ matrix.os }} @@ -124,9 +121,7 @@ jobs: - name: Install cross run: cargo install cross || true - name: Test - run: | - # all stable features: - cross test --no-fail-fast --target ${{ matrix.target }} + run: cross test --no-fail-fast --target ${{ matrix.target }} test-miri: runs-on: ubuntu-latest @@ -138,10 +133,7 @@ jobs: rustup override set nightly cargo miri setup - name: Test rand - run: | - cargo miri test - cargo miri test --features=serde - cargo miri test --no-default-features + run: cargo miri test test-no-std: runs-on: ubuntu-latest @@ -152,7 +144,7 @@ jobs: with: target: thumbv6m-none-eabi - name: Build top-level only - run: cargo build --target=thumbv6m-none-eabi --no-default-features + run: cargo build --target=thumbv6m-none-eabi test-ios: runs-on: macos-latest diff --git a/Cargo.toml b/Cargo.toml index 22f66d0a..9bca7ff8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,18 +15,3 @@ categories = ["algorithms", "no-std"] edition = "2024" rust-version = "1.85" exclude = ["/.github"] - -[package.metadata.docs.rs] -# To build locally: -# RUSTDOCFLAGS="--cfg docsrs" cargo +nightly doc --all-features --no-deps --open -all-features = true -rustdoc-args = ["--generate-link-to-definition"] - -[package.metadata.playground] -all-features = true - -[features] -serde = ["dep:serde"] # enables serde for BlockRng wrapper - -[dependencies] -serde = { version = "1.0.103", features = ["derive"], optional = true } diff --git a/src/block.rs b/src/block.rs deleted file mode 100644 index b9c722ab..00000000 --- a/src/block.rs +++ /dev/null @@ -1,555 +0,0 @@ -//! The `BlockRngCore` trait and implementation helpers -//! -//! The [`BlockRngCore`] trait exists to assist in the implementation of RNGs -//! which generate a block of data in a cache instead of returning generated -//! values directly. -//! -//! Usage of this trait is optional, but provides two advantages: -//! implementations only need to concern themselves with generation of the -//! block, not the various [`RngCore`] methods (especially [`fill_bytes`], where -//! the optimal implementations are not trivial), and this allows -//! `ReseedingRng` (see [`rand`](https://docs.rs/rand) crate) perform periodic -//! reseeding with very low overhead. -//! -//! # Example -//! -//! ```no_run -//! use rand_core::{RngCore, SeedableRng}; -//! use rand_core::block::{BlockRngCore, BlockRng}; -//! -//! struct MyRngCore; -//! -//! impl BlockRngCore for MyRngCore { -//! type Item = u32; -//! type Results = [u32; 16]; -//! -//! fn generate(&mut self, results: &mut Self::Results) { -//! unimplemented!() -//! } -//! } -//! -//! impl SeedableRng for MyRngCore { -//! type Seed = [u8; 32]; -//! fn from_seed(seed: Self::Seed) -> Self { -//! unimplemented!() -//! } -//! } -//! -//! // optionally, also implement CryptoBlockRng for MyRngCore -//! -//! // Final RNG. -//! let mut rng = BlockRng::::seed_from_u64(0); -//! println!("First value: {}", rng.next_u32()); -//! ``` -//! -//! [`BlockRngCore`]: crate::block::BlockRngCore -//! [`fill_bytes`]: RngCore::fill_bytes - -use crate::le::fill_via_chunks; -use crate::{CryptoRng, RngCore, SeedableRng, TryRngCore}; -use core::fmt; -#[cfg(feature = "serde")] -use serde::{Deserialize, Serialize}; - -/// A trait for RNGs which do not generate random numbers individually, but in -/// blocks (typically `[u32; N]`). This technique is commonly used by -/// cryptographic RNGs to improve performance. -/// -/// See the [module][crate::block] documentation for details. -pub trait BlockRngCore { - /// Results element type, e.g. `u32`. - type Item; - - /// Results type. This is the 'block' an RNG implementing `BlockRngCore` - /// generates, which will usually be an array like `[u32; 16]`. - type Results: AsRef<[Self::Item]> + AsMut<[Self::Item]> + Default; - - /// Generate a new block of results. - fn generate(&mut self, results: &mut Self::Results); -} - -/// A marker trait used to indicate that an [`RngCore`] implementation is -/// supposed to be cryptographically secure. -/// -/// See [`CryptoRng`] docs for more information. -pub trait CryptoBlockRng: BlockRngCore {} - -/// A wrapper type implementing [`RngCore`] for some type implementing -/// [`BlockRngCore`] with `u32` array buffer; i.e. this can be used to implement -/// a full RNG from just a `generate` function. -/// -/// The `core` field may be accessed directly but the results buffer may not. -/// PRNG implementations can simply use a type alias -/// (`pub type MyRng = BlockRng;`) but might prefer to use a -/// wrapper type (`pub struct MyRng(BlockRng);`); the latter must -/// re-implement `RngCore` but hides the implementation details and allows -/// extra functionality to be defined on the RNG -/// (e.g. `impl MyRng { fn set_stream(...){...} }`). -/// -/// `BlockRng` has heavily optimized implementations of the [`RngCore`] methods -/// reading values from the results buffer, as well as -/// calling [`BlockRngCore::generate`] directly on the output array when -/// [`fill_bytes`] is called on a large array. These methods also handle -/// the bookkeeping of when to generate a new batch of values. -/// -/// No whole generated `u32` values are thrown away and all values are consumed -/// in-order. [`next_u32`] simply takes the next available `u32` value. -/// [`next_u64`] is implemented by combining two `u32` values, least -/// significant first. [`fill_bytes`] consume a whole number of `u32` values, -/// converting each `u32` to a byte slice in little-endian order. If the requested byte -/// length is not a multiple of 4, some bytes will be discarded. -/// -/// See also [`BlockRng64`] which uses `u64` array buffers. Currently there is -/// no direct support for other buffer types. -/// -/// For easy initialization `BlockRng` also implements [`SeedableRng`]. -/// -/// [`next_u32`]: RngCore::next_u32 -/// [`next_u64`]: RngCore::next_u64 -/// [`fill_bytes`]: RngCore::fill_bytes -#[derive(Clone)] -#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] -#[cfg_attr( - feature = "serde", - serde( - bound = "for<'x> R: Serialize + Deserialize<'x>, for<'x> R::Results: Serialize + Deserialize<'x>" - ) -)] -pub struct BlockRng { - results: R::Results, - index: usize, - /// The *core* part of the RNG, implementing the `generate` function. - pub core: R, -} - -// Custom Debug implementation that does not expose the contents of `results`. -impl fmt::Debug for BlockRng { - fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { - fmt.debug_struct("BlockRng") - .field("core", &self.core) - .field("result_len", &self.results.as_ref().len()) - .field("index", &self.index) - .finish() - } -} - -impl BlockRng { - /// Create a new `BlockRng` from an existing RNG implementing - /// `BlockRngCore`. Results will be generated on first use. - #[inline] - pub fn new(core: R) -> BlockRng { - let results_empty = R::Results::default(); - BlockRng { - core, - index: results_empty.as_ref().len(), - results: results_empty, - } - } - - /// Get the index into the result buffer. - /// - /// If this is equal to or larger than the size of the result buffer then - /// the buffer is "empty" and `generate()` must be called to produce new - /// results. - #[inline(always)] - pub fn index(&self) -> usize { - self.index - } - - /// Reset the number of available results. - /// This will force a new set of results to be generated on next use. - #[inline] - pub fn reset(&mut self) { - self.index = self.results.as_ref().len(); - } - - /// Generate a new set of results immediately, setting the index to the - /// given value. - #[inline] - pub fn generate_and_set(&mut self, index: usize) { - assert!(index < self.results.as_ref().len()); - self.core.generate(&mut self.results); - self.index = index; - } -} - -impl> RngCore for BlockRng { - #[inline] - fn next_u32(&mut self) -> u32 { - if self.index >= self.results.as_ref().len() { - self.generate_and_set(0); - } - - let value = self.results.as_ref()[self.index]; - self.index += 1; - value - } - - #[inline] - fn next_u64(&mut self) -> u64 { - let read_u64 = |results: &[u32], index| { - let data = &results[index..=index + 1]; - (u64::from(data[1]) << 32) | u64::from(data[0]) - }; - - let len = self.results.as_ref().len(); - - let index = self.index; - if index < len - 1 { - self.index += 2; - // Read an u64 from the current index - read_u64(self.results.as_ref(), index) - } else if index >= len { - self.generate_and_set(2); - read_u64(self.results.as_ref(), 0) - } else { - let x = u64::from(self.results.as_ref()[len - 1]); - self.generate_and_set(1); - let y = u64::from(self.results.as_ref()[0]); - (y << 32) | x - } - } - - #[inline] - fn fill_bytes(&mut self, dest: &mut [u8]) { - let mut read_len = 0; - while read_len < dest.len() { - if self.index >= self.results.as_ref().len() { - self.generate_and_set(0); - } - let (consumed_u32, filled_u8) = - fill_via_chunks(&self.results.as_mut()[self.index..], &mut dest[read_len..]); - - self.index += consumed_u32; - read_len += filled_u8; - } - } -} - -impl SeedableRng for BlockRng { - type Seed = R::Seed; - - #[inline(always)] - fn from_seed(seed: Self::Seed) -> Self { - Self::new(R::from_seed(seed)) - } - - #[inline(always)] - fn seed_from_u64(seed: u64) -> Self { - Self::new(R::seed_from_u64(seed)) - } - - #[inline(always)] - fn from_rng(rng: &mut S) -> Self { - Self::new(R::from_rng(rng)) - } - - #[inline(always)] - fn try_from_rng(rng: &mut S) -> Result { - R::try_from_rng(rng).map(Self::new) - } -} - -impl> CryptoRng for BlockRng {} - -/// A wrapper type implementing [`RngCore`] for some type implementing -/// [`BlockRngCore`] with `u64` array buffer; i.e. this can be used to implement -/// a full RNG from just a `generate` function. -/// -/// This is similar to [`BlockRng`], but specialized for algorithms that operate -/// on `u64` values. -/// -/// No whole generated `u64` values are thrown away and all values are consumed -/// in-order. [`next_u64`] simply takes the next available `u64` value. -/// [`next_u32`] is however a bit special: half of a `u64` is consumed, leaving -/// the other half in the buffer. If the next function called is [`next_u32`] -/// then the other half is then consumed, however both [`next_u64`] and -/// [`fill_bytes`] discard the rest of any half-consumed `u64`s when called. -/// -/// [`fill_bytes`] consumes a whole number of `u64` values. If the requested length -/// is not a multiple of 8, some bytes will be discarded. -/// -/// [`next_u32`]: RngCore::next_u32 -/// [`next_u64`]: RngCore::next_u64 -/// [`fill_bytes`]: RngCore::fill_bytes -#[derive(Clone)] -#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] -pub struct BlockRng64 { - results: R::Results, - index: usize, - half_used: bool, // true if only half of the previous result is used - /// The *core* part of the RNG, implementing the `generate` function. - pub core: R, -} - -// Custom Debug implementation that does not expose the contents of `results`. -impl fmt::Debug for BlockRng64 { - fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { - fmt.debug_struct("BlockRng64") - .field("core", &self.core) - .field("result_len", &self.results.as_ref().len()) - .field("index", &self.index) - .field("half_used", &self.half_used) - .finish() - } -} - -impl BlockRng64 { - /// Create a new `BlockRng` from an existing RNG implementing - /// `BlockRngCore`. Results will be generated on first use. - #[inline] - pub fn new(core: R) -> BlockRng64 { - let results_empty = R::Results::default(); - BlockRng64 { - core, - index: results_empty.as_ref().len(), - half_used: false, - results: results_empty, - } - } - - /// Get the index into the result buffer. - /// - /// If this is equal to or larger than the size of the result buffer then - /// the buffer is "empty" and `generate()` must be called to produce new - /// results. - #[inline(always)] - pub fn index(&self) -> usize { - self.index - } - - /// Reset the number of available results. - /// This will force a new set of results to be generated on next use. - #[inline] - pub fn reset(&mut self) { - self.index = self.results.as_ref().len(); - self.half_used = false; - } - - /// Generate a new set of results immediately, setting the index to the - /// given value. - #[inline] - pub fn generate_and_set(&mut self, index: usize) { - assert!(index < self.results.as_ref().len()); - self.core.generate(&mut self.results); - self.index = index; - self.half_used = false; - } -} - -impl> RngCore for BlockRng64 { - #[inline] - fn next_u32(&mut self) -> u32 { - let mut index = self.index - self.half_used as usize; - if index >= self.results.as_ref().len() { - self.core.generate(&mut self.results); - self.index = 0; - index = 0; - // `self.half_used` is by definition `false` - self.half_used = false; - } - - let shift = 32 * (self.half_used as usize); - - self.half_used = !self.half_used; - self.index += self.half_used as usize; - - (self.results.as_ref()[index] >> shift) as u32 - } - - #[inline] - fn next_u64(&mut self) -> u64 { - if self.index >= self.results.as_ref().len() { - self.core.generate(&mut self.results); - self.index = 0; - } - - let value = self.results.as_ref()[self.index]; - self.index += 1; - self.half_used = false; - value - } - - #[inline] - fn fill_bytes(&mut self, dest: &mut [u8]) { - let mut read_len = 0; - self.half_used = false; - while read_len < dest.len() { - if self.index >= self.results.as_ref().len() { - self.core.generate(&mut self.results); - self.index = 0; - } - - let (consumed_u64, filled_u8) = - fill_via_chunks(&self.results.as_mut()[self.index..], &mut dest[read_len..]); - - self.index += consumed_u64; - read_len += filled_u8; - } - } -} - -impl SeedableRng for BlockRng64 { - type Seed = R::Seed; - - #[inline(always)] - fn from_seed(seed: Self::Seed) -> Self { - Self::new(R::from_seed(seed)) - } - - #[inline(always)] - fn seed_from_u64(seed: u64) -> Self { - Self::new(R::seed_from_u64(seed)) - } - - #[inline(always)] - fn from_rng(rng: &mut S) -> Self { - Self::new(R::from_rng(rng)) - } - - #[inline(always)] - fn try_from_rng(rng: &mut S) -> Result { - R::try_from_rng(rng).map(Self::new) - } -} - -impl> CryptoRng for BlockRng64 {} - -#[cfg(test)] -mod test { - use crate::block::{BlockRng, BlockRng64, BlockRngCore}; - use crate::{RngCore, SeedableRng}; - - #[derive(Debug, Clone)] - struct DummyRng { - counter: u32, - } - - impl BlockRngCore for DummyRng { - type Item = u32; - type Results = [u32; 16]; - - fn generate(&mut self, results: &mut Self::Results) { - for r in results { - *r = self.counter; - self.counter = self.counter.wrapping_add(3511615421); - } - } - } - - impl SeedableRng for DummyRng { - type Seed = [u8; 4]; - - fn from_seed(seed: Self::Seed) -> Self { - DummyRng { - counter: u32::from_le_bytes(seed), - } - } - } - - #[test] - fn blockrng_next_u32_vs_next_u64() { - let mut rng1 = BlockRng::::from_seed([1, 2, 3, 4]); - let mut rng2 = rng1.clone(); - let mut rng3 = rng1.clone(); - - let mut a = [0; 16]; - a[..4].copy_from_slice(&rng1.next_u32().to_le_bytes()); - a[4..12].copy_from_slice(&rng1.next_u64().to_le_bytes()); - a[12..].copy_from_slice(&rng1.next_u32().to_le_bytes()); - - let mut b = [0; 16]; - b[..4].copy_from_slice(&rng2.next_u32().to_le_bytes()); - b[4..8].copy_from_slice(&rng2.next_u32().to_le_bytes()); - b[8..].copy_from_slice(&rng2.next_u64().to_le_bytes()); - assert_eq!(a, b); - - let mut c = [0; 16]; - c[..8].copy_from_slice(&rng3.next_u64().to_le_bytes()); - c[8..12].copy_from_slice(&rng3.next_u32().to_le_bytes()); - c[12..].copy_from_slice(&rng3.next_u32().to_le_bytes()); - assert_eq!(a, c); - } - - #[derive(Debug, Clone)] - struct DummyRng64 { - counter: u64, - } - - impl BlockRngCore for DummyRng64 { - type Item = u64; - type Results = [u64; 8]; - - fn generate(&mut self, results: &mut Self::Results) { - for r in results { - *r = self.counter; - self.counter = self.counter.wrapping_add(2781463553396133981); - } - } - } - - impl SeedableRng for DummyRng64 { - type Seed = [u8; 8]; - - fn from_seed(seed: Self::Seed) -> Self { - DummyRng64 { - counter: u64::from_le_bytes(seed), - } - } - } - - #[test] - fn blockrng64_next_u32_vs_next_u64() { - let mut rng1 = BlockRng64::::from_seed([1, 2, 3, 4, 5, 6, 7, 8]); - let mut rng2 = rng1.clone(); - let mut rng3 = rng1.clone(); - - let mut a = [0; 16]; - a[..4].copy_from_slice(&rng1.next_u32().to_le_bytes()); - a[4..12].copy_from_slice(&rng1.next_u64().to_le_bytes()); - a[12..].copy_from_slice(&rng1.next_u32().to_le_bytes()); - - let mut b = [0; 16]; - b[..4].copy_from_slice(&rng2.next_u32().to_le_bytes()); - b[4..8].copy_from_slice(&rng2.next_u32().to_le_bytes()); - b[8..].copy_from_slice(&rng2.next_u64().to_le_bytes()); - assert_ne!(a, b); - assert_eq!(&a[..4], &b[..4]); - assert_eq!(&a[4..12], &b[8..]); - - let mut c = [0; 16]; - c[..8].copy_from_slice(&rng3.next_u64().to_le_bytes()); - c[8..12].copy_from_slice(&rng3.next_u32().to_le_bytes()); - c[12..].copy_from_slice(&rng3.next_u32().to_le_bytes()); - assert_eq!(b, c); - } - - #[test] - fn blockrng64_generate_and_set() { - let mut rng = BlockRng64::::from_seed([1, 2, 3, 4, 5, 6, 7, 8]); - assert_eq!(rng.index(), rng.results.as_ref().len()); - - rng.generate_and_set(5); - assert_eq!(rng.index(), 5); - } - - #[test] - #[should_panic(expected = "index < self.results.as_ref().len()")] - fn blockrng64_generate_and_set_panic() { - let mut rng = BlockRng64::::from_seed([1, 2, 3, 4, 5, 6, 7, 8]); - rng.generate_and_set(rng.results.as_ref().len()); - } - - #[test] - fn blockrng_next_u64() { - let mut rng = BlockRng::::from_seed([1, 2, 3, 4]); - let result_size = rng.results.as_ref().len(); - for _i in 0..result_size / 2 - 1 { - rng.next_u64(); - } - rng.next_u32(); - - let _ = rng.next_u64(); - assert_eq!(rng.index(), 1); - } -} diff --git a/src/le.rs b/src/le.rs index e42c572d..b0e116ea 100644 --- a/src/le.rs +++ b/src/le.rs @@ -1,45 +1,307 @@ -//! # Little-Endian utilities +//! Helper utilities. //! //! For cross-platform reproducibility, Little-Endian order (least-significant //! part first) has been chosen as the standard for inter-type conversion. -//! For example, ``next_u64_via_u32`] takes `u32` -//! values `x, y`, then outputs `(y << 32) | x`. +//! For example, [`next_u64_via_u32`] generates two `u32` values `x, y`, +//! then outputs `(y << 32) | x`. //! //! Byte-swapping (like the std `to_le` functions) is only needed to convert //! to/from byte sequences, and since its purpose is reproducibility, //! non-reproducible sources (e.g. `OsRng`) need not bother with it. //! -//! ### Implementing [`RngCore`] +//! # Implementing [`SeedableRng`] //! -//! Usually an implementation of [`RngCore`] will implement one of the three -//! methods over its internal source. The following helpers are provided for -//! the remaining implementations. +//! In many cases, [`SeedableRng::Seed`] must be converted to `[u32]` or `[u64]`. +//! We provide the [`read_words`] helper function for this. The examples below +//! demonstrate how it can be used in practice. //! -//! **`fn next_u32`:** -//! - `self.next_u64() as u32` -//! - `(self.next_u64() >> 32) as u32` -//! - [next_u32_via_fill][](self) +//! [`SeedableRng`]: crate::SeedableRng +//! [`SeedableRng::Seed`]: crate::SeedableRng::Seed //! -//! **`fn next_u64`:** -//! - [next_u64_via_u32][](self) -//! - [next_u64_via_fill][](self) +//! # Implementing [`RngCore`] //! -//! **`fn fill_bytes`:** -//! - [fill_bytes_via_next][](self, dest) +//! Usually an implementation of [`RngCore`] will implement one of the three methods +//! over its internal source, while remaining methods are implemented on top of it. //! -//! ### Implementing [`SeedableRng`] +//! Some RNGs instead generate fixed-size blocks of data. In this case the implementations must +//! handle buffering of the generated blocks. //! -//! In many cases, [`SeedableRng::Seed`] must be converted to `[u32]` or -//! `[u64]`. The following helpers are provided: +//! If an implementation can generate several blocks simultaneously (e.g. using SIMD), we recommend +//! to treat multiple generated blocks as a single large block (i.e. you should treat +//! `[[u32; N]; M]` as `[u32; N * M]`). If the number of simultaneously generated blocks depends +//! on CPU target features, we recommend to use the largest supported number of blocks +//! for all target features. //! -//! - [`read_u32_into`] -//! - [`read_u64_into`] +//! # Examples +//! +//! The examples below demonstrate how functions in this module can be used to implement +//! [`RngCore`] and [`SeedableRng`] for common RNG algorithm classes. +//! +//! ## RNG outputs `u32` +//! +//! ``` +//! use rand_core::{RngCore, SeedableRng, le}; +//! +//! pub struct Step32Rng { +//! state: u32 +//! } +//! +//! impl SeedableRng for Step32Rng { +//! type Seed = [u8; 4]; +//! +//! fn from_seed(seed: Self::Seed) -> Self { +//! // Always use little-endian byte order to ensure portable results +//! let state = u32::from_le_bytes(seed); +//! Self { state } +//! } +//! } +//! +//! impl RngCore for Step32Rng { +//! fn next_u32(&mut self) -> u32 { +//! // ... +//! # let val = self.state; +//! # self.state = val + 1; +//! # val +//! } +//! +//! fn next_u64(&mut self) -> u64 { +//! le::next_u64_via_u32(self) +//! } +//! +//! fn fill_bytes(&mut self, dst: &mut [u8]) { +//! le::fill_bytes_via_next_word(dst, || self.next_u32()); +//! } +//! } +//! +//! # let mut rng = Step32Rng::seed_from_u64(42); +//! # assert_eq!(rng.next_u32(), 0x7ba1_8fa4); +//! # assert_eq!(rng.next_u64(), 0x7ba1_8fa6_7ba1_8fa5); +//! # let mut buf = [0u8; 5]; +//! # rng.fill_bytes(&mut buf); +//! # assert_eq!(buf, [0xa7, 0x8f, 0xa1, 0x7b, 0xa8]); +//! ``` +//! +//! ## RNG outputs `u64` +//! +//! ``` +//! use rand_core::{RngCore, SeedableRng, le}; +//! +//! pub struct Step64Rng { +//! state: u64 +//! } +//! +//! impl SeedableRng for Step64Rng { +//! type Seed = [u8; 8]; +//! +//! fn from_seed(seed: Self::Seed) -> Self { +//! // Always use little-endian byte order to ensure portable results +//! let state = u64::from_le_bytes(seed); +//! Self { state } +//! } +//! } +//! +//! impl RngCore for Step64Rng { +//! fn next_u32(&mut self) -> u32 { +//! self.next_u64() as u32 +//! } +//! +//! fn next_u64(&mut self) -> u64 { +//! // ... +//! # let val = self.state; +//! # self.state = val + 1; +//! # val +//! } +//! +//! fn fill_bytes(&mut self, dst: &mut [u8]) { +//! le::fill_bytes_via_next_word(dst, || self.next_u64()); +//! } +//! } +//! +//! # let mut rng = Step64Rng::seed_from_u64(42); +//! # assert_eq!(rng.next_u32(), 0x7ba1_8fa4); +//! # assert_eq!(rng.next_u64(), 0x0a3d_3258_7ba1_8fa5); +//! # let mut buf = [0u8; 5]; +//! # rng.fill_bytes(&mut buf); +//! # assert_eq!(buf, [0xa6, 0x8f, 0xa1, 0x7b, 0x58]); +//! ``` +//! +//! ## RNG outputs `[u32; N]` +//! +//! ``` +//! use rand_core::{RngCore, SeedableRng, le}; +//! +//! struct Block8x32RngInner { +//! // ... +//! # state: [u32; 8] +//! } +//! +//! impl Block8x32RngInner { +//! fn new(seed: [u32; 8]) -> Self { +//! // ... +//! # Self { state: seed } +//! } +//! +//! fn next_block(&mut self, block: &mut [u32; 8]) { +//! // ... +//! # *block = self.state; +//! # self.state.iter_mut().for_each(|v| *v += 1); +//! } +//! } +//! +//! pub struct Block8x32Rng { +//! inner: Block8x32RngInner, +//! buffer: [u32; 8], +//! } +//! +//! impl SeedableRng for Block8x32Rng { +//! type Seed = [u8; 32]; +//! +//! fn from_seed(seed: Self::Seed) -> Self { +//! let seed: [u32; 8] = le::read_words(&seed); +//! Self { +//! inner: Block8x32RngInner::new(seed), +//! buffer: le::new_buffer(), +//! } +//! } +//! } +//! +//! impl RngCore for Block8x32Rng { +//! fn next_u32(&mut self) -> u32 { +//! let Self { inner, buffer } = self; +//! le::next_word_via_gen_block(buffer, |block| inner.next_block(block)) +//! } +//! +//! fn next_u64(&mut self) -> u64 { +//! let Self { inner, buffer } = self; +//! le::next_u64_via_gen_block(buffer, |block| inner.next_block(block)) +//! } +//! +//! fn fill_bytes(&mut self, dst: &mut [u8]) { +//! let Self { inner, buffer } = self; +//! le::fill_bytes_via_gen_block(dst, buffer, |block| inner.next_block(block)); +//! } +//! } +//! +//! # let mut rng = Block8x32Rng::seed_from_u64(42); +//! # assert_eq!(rng.next_u32(), 0x7ba1_8fa4); +//! # assert_eq!(rng.next_u64(), 0xcca1_b8ea_0a3d_3258); +//! # let mut buf = [0u8; 5]; +//! # rng.fill_bytes(&mut buf); +//! # assert_eq!(buf, [0x69, 0x01, 0x14, 0xb8, 0x2b]); +//! ``` +//! +//! ## RNG outputs `[u64; N]` +//! +//! ``` +//! use rand_core::{RngCore, SeedableRng, le}; +//! +//! struct Block4x64RngInner { +//! // ... +//! # state: [u64; 4], +//! } +//! +//! impl Block4x64RngInner { +//! fn new(seed: [u64; 4]) -> Self { +//! // ... +//! # Self { state: seed } +//! } +//! +//! fn next_block(&mut self, block: &mut [u64; 4]) { +//! // ... +//! # *block = self.state; +//! # self.state.iter_mut().for_each(|v| *v += 1); +//! } +//! } +//! +//! pub struct Block4x64Rng { +//! inner: Block4x64RngInner, +//! buffer: [u64; 4], +//! } +//! +//! impl SeedableRng for Block4x64Rng { +//! type Seed = [u8; 32]; +//! +//! fn from_seed(seed: Self::Seed) -> Self { +//! let seed: [u64; 4] = le::read_words(&seed); +//! Self { +//! inner: Block4x64RngInner::new(seed), +//! buffer: le::new_buffer(), +//! } +//! } +//! } +//! +//! impl RngCore for Block4x64Rng { +//! fn next_u32(&mut self) -> u32 { +//! self.next_u64() as u32 +//! } +//! +//! fn next_u64(&mut self) -> u64 { +//! let Self { inner, buffer } = self; +//! le::next_word_via_gen_block(buffer, |block| inner.next_block(block)) +//! } +//! +//! fn fill_bytes(&mut self, dst: &mut [u8]) { +//! let Self { inner, buffer } = self; +//! le::fill_bytes_via_gen_block(dst, buffer, |block| inner.next_block(block)); +//! } +//! } +//! +//! # let mut rng = Block4x64Rng::seed_from_u64(42); +//! # assert_eq!(rng.next_u32(), 0x7ba1_8fa4); +//! # assert_eq!(rng.next_u64(), 0xb814_0169_cca1_b8ea); +//! # let mut buf = [0u8; 5]; +//! # rng.fill_bytes(&mut buf); +//! # assert_eq!(buf, [0x2b, 0x8c, 0xc8, 0x75, 0x18]); +//! ``` +//! +//! ## RNG outputs bytes +//! +//! ``` +//! use rand_core::RngCore; +//! +//! pub struct FillRng { +//! // ... +//! # state: u8, +//! } +//! +//! impl RngCore for FillRng { +//! fn next_u32(&mut self) -> u32 { +//! let mut buf = [0; 4]; +//! self.fill_bytes(&mut buf); +//! u32::from_le_bytes(buf) +//! } +//! +//! fn next_u64(&mut self) -> u64 { +//! let mut buf = [0; 8]; +//! self.fill_bytes(&mut buf); +//! u64::from_le_bytes(buf) +//! } +//! +//! fn fill_bytes(&mut self, dst: &mut [u8]) { +//! // ... +//! # for byte in dst { +//! # let val = self.state; +//! # self.state = val + 1; +//! # *byte = val; +//! # } +//! } +//! } +//! +//! # let mut rng = FillRng { state: 0 }; +//! # assert_eq!(rng.next_u32(), 0x03_020100); +//! # assert_eq!(rng.next_u64(), 0x0b0a_0908_0706_0504); +//! # let mut buf = [0u8; 5]; +//! # rng.fill_bytes(&mut buf); +//! # assert_eq!(buf, [0x0c, 0x0d, 0x0e, 0x0f, 0x10]); +//! ``` +//! +//! Note that you can use `from_ne_bytes` instead of `from_le_bytes` +//! if your `fill_bytes` implementation is not reproducible. use crate::RngCore; -#[allow(unused)] -use crate::SeedableRng; -/// Implement `next_u64` via `next_u32`, little-endian order. +/// Implement `next_u64` via `next_u32` using little-endian order. +#[inline] pub fn next_u64_via_u32(rng: &mut R) -> u64 { // Use LE; we explicitly generate one value before the next. let x = u64::from(rng.next_u32()); @@ -47,189 +309,207 @@ pub fn next_u64_via_u32(rng: &mut R) -> u64 { (y << 32) | x } -/// Implement `fill_bytes` via `next_u64` and `next_u32`, little-endian order. -/// -/// The fastest way to fill a slice is usually to work as long as possible with -/// integers. That is why this method mostly uses `next_u64`, and only when -/// there are 4 or less bytes remaining at the end of the slice it uses -/// `next_u32` once. -pub fn fill_bytes_via_next(rng: &mut R, dest: &mut [u8]) { - let mut left = dest; - while left.len() >= 8 { - let (l, r) = { left }.split_at_mut(8); - left = r; - let chunk: [u8; 8] = rng.next_u64().to_le_bytes(); - l.copy_from_slice(&chunk); +/// Implement `fill_bytes` via `next_u64` using little-endian order. +#[inline] +pub fn fill_bytes_via_next_word(dst: &mut [u8], mut next_word: impl FnMut() -> W) { + let mut chunks = dst.chunks_exact_mut(size_of::()); + for chunk in &mut chunks { + let val = next_word(); + chunk.copy_from_slice(val.to_le_bytes().as_ref()); } - let n = left.len(); - if n > 4 { - let chunk: [u8; 8] = rng.next_u64().to_le_bytes(); - left.copy_from_slice(&chunk[..n]); - } else if n > 0 { - let chunk: [u8; 4] = rng.next_u32().to_le_bytes(); - left.copy_from_slice(&chunk[..n]); + let rem = chunks.into_remainder(); + if !rem.is_empty() { + let val = next_word().to_le_bytes(); + rem.copy_from_slice(&val.as_ref()[..rem.len()]); } } -pub(crate) trait Observable: Copy { - type Bytes: Sized + AsRef<[u8]>; - fn to_le_bytes(self) -> Self::Bytes; -} -impl Observable for u32 { - type Bytes = [u8; 4]; - - fn to_le_bytes(self) -> Self::Bytes { - Self::to_le_bytes(self) +/// Reads array of words from byte slice `src` using little endian order. +/// +/// # Panics +/// If `size_of_val(src) != size_of::<[W; N]>()`. +#[inline] +pub fn read_words(src: &[u8]) -> [W; N] { + assert_eq!(size_of_val(src), size_of::<[W; N]>()); + let mut dst = [W::from_usize(0); N]; + let chunks = src.chunks_exact(size_of::()); + for (out, chunk) in dst.iter_mut().zip(chunks) { + let Ok(bytes) = chunk.try_into() else { + unreachable!() + }; + *out = W::from_le_bytes(bytes); } + dst } -impl Observable for u64 { - type Bytes = [u8; 8]; - fn to_le_bytes(self) -> Self::Bytes { - Self::to_le_bytes(self) - } +/// Create new block buffer. +/// +/// # Panics +/// If `N` is equal to 0 or can not be represented as `W`. +#[inline] +pub fn new_buffer() -> [W; N] { + let mut res = [W::from_usize(0); N]; + res[0] = W::from_usize(N); + res } -/// Fill dest from src -/// -/// Returns `(n, byte_len)`. `src[..n]` is consumed, -/// `dest[..byte_len]` is filled. `src[n..]` and `dest[byte_len..]` are left -/// unaltered. -pub(crate) fn fill_via_chunks(src: &[T], dest: &mut [u8]) -> (usize, usize) { - let size = core::mem::size_of::(); - - // Always use little endian for portability of results. - - let mut dest = dest.chunks_exact_mut(size); - let mut src = src.iter(); - - let zipped = dest.by_ref().zip(src.by_ref()); - let num_chunks = zipped.len(); - zipped.for_each(|(dest, src)| dest.copy_from_slice(src.to_le_bytes().as_ref())); - - let byte_len = num_chunks * size; - if let Some(src) = src.next() { - // We have consumed all full chunks of dest, but not src. - let dest = dest.into_remainder(); - let n = dest.len(); - if n > 0 { - dest.copy_from_slice(&src.to_le_bytes().as_ref()[..n]); - return (num_chunks + 1, byte_len + n); +/// Implement `next_u32/u64` function using buffer and block generation closure. +#[inline] +pub fn next_word_via_gen_block( + buf: &mut [W; N], + mut generate_block: impl FnMut(&mut [W; N]), +) -> W { + let pos = buf[0].into_usize(); + debug_assert_ne!(pos, 0, "cursor position should not be zero"); + match buf.get(pos) { + Some(&val) => { + buf[0].increment(); + val + } + None => { + generate_block(buf); + core::mem::replace(&mut buf[0], W::from_usize(1)) } } - (num_chunks, byte_len) } -/// Implement `next_u32` via `fill_bytes`, little-endian order. -pub fn next_u32_via_fill(rng: &mut R) -> u32 { - let mut buf = [0; 4]; - rng.fill_bytes(&mut buf); - u32::from_le_bytes(buf) -} +/// Implement `next_u64` function using buffer and block generation closure. +#[inline] +pub fn next_u64_via_gen_block( + buf: &mut [u32; N], + mut generate_block: impl FnMut(&mut [u32; N]), +) -> u64 { + use core::mem::replace; + let pos = usize::try_from(buf[0]).unwrap(); -/// Implement `next_u64` via `fill_bytes`, little-endian order. -pub fn next_u64_via_fill(rng: &mut R) -> u64 { - let mut buf = [0; 8]; - rng.fill_bytes(&mut buf); - u64::from_le_bytes(buf) + let (x, y) = if pos < N - 1 { + let xy = (buf[pos], buf[pos + 1]); + buf[0] += 2; + xy + } else if pos == N - 1 { + let x = buf[pos]; + generate_block(buf); + let y = replace(&mut buf[0], 1); + (x, y) + } else { + generate_block(buf); + let x = replace(&mut buf[0], 2); + let y = buf[1]; + (x, y) + }; + + u64::from(y) << 32 | u64::from(x) } -/// Fills `dst: &mut [u32]` from `src` -/// -/// Reads use Little-Endian byte order, allowing portable reproduction of `dst` -/// from a byte slice. -/// -/// # Panics -/// -/// If `src` has insufficient length (if `src.len() < 4*dst.len()`). +/// Implement `fill_bytes` using buffer and block generation closure. #[inline] -#[track_caller] -pub fn read_u32_into(src: &[u8], dst: &mut [u32]) { - assert!(src.len() >= 4 * dst.len()); - for (out, chunk) in dst.iter_mut().zip(src.chunks_exact(4)) { - *out = u32::from_le_bytes(chunk.try_into().unwrap()); +pub fn fill_bytes_via_gen_block( + mut dst: &mut [u8], + buf: &mut [W; N], + mut generate_block: impl FnMut(&mut [W; N]), +) { + let word_size = size_of::(); + + let pos = buf[0]; + let pos_usize = pos.into_usize(); + debug_assert_ne!(pos_usize, 0, "cursor position should not be zero"); + if pos_usize < buf.len() { + let buf_tail = &buf[pos_usize..]; + let buf_rem = size_of_val(buf_tail); + + if buf_rem >= dst.len() { + let new_pos = read_bytes(buf, dst, pos); + buf[0] = new_pos; + return; + } + + let (l, r) = dst.split_at_mut(buf_rem); + read_bytes(buf, l, pos); + dst = r; + } + + let mut blocks = dst.chunks_exact_mut(N * word_size); + let zero = W::from_usize(0); + for block in &mut blocks { + // We intentionally use the temporary buffer to prevent unnecessary writes + // to the original `buf` and to enable potential optimization of writing + // generated data directly into `block`. + let mut buf = [zero; N]; + generate_block(&mut buf); + read_bytes(&buf, block, zero); } + + let rem = blocks.into_remainder(); + let new_pos = if rem.is_empty() { + W::from_usize(N) + } else { + generate_block(buf); + read_bytes::(buf, rem, zero) + }; + buf[0] = new_pos; } -/// Fills `dst: &mut [u64]` from `src` -/// -/// # Panics +/// Reads bytes from `&block[pos..new_pos]` to `dst` using little endian byte order +/// ignoring the tail bytes if necessary and returns `new_pos`. /// -/// If `src` has insufficient length (if `src.len() < 8*dst.len()`). +/// This function is written in a way which helps the compiler to compile it down +/// to one `memcpy`. The temporary buffer gets eliminated by the compiler, see: +/// https://rust.godbolt.org/z/T8f77KjGc #[inline] -#[track_caller] -pub fn read_u64_into(src: &[u8], dst: &mut [u64]) { - assert!(src.len() >= 8 * dst.len()); - for (out, chunk) in dst.iter_mut().zip(src.chunks_exact(8)) { - *out = u64::from_le_bytes(chunk.try_into().unwrap()); +fn read_bytes(block: &[W; N], dst: &mut [u8], pos: W) -> W { + let word_size = size_of::(); + let pos = pos.into_usize(); + assert!(size_of_val(&block[pos..]) >= size_of_val(dst)); + + // TODO: replace with `[0u8; { size_of::() * N }]` on + // stabilization of `generic_const_exprs` + let mut buf = [W::from_usize(0); N]; + // SAFETY: it's safe to reference `[u32/u64; N]` as `&mut [u8]` + // with length equal to `size_of::() * N` + let buf: &mut [u8] = unsafe { + let p: *mut u8 = buf.as_mut_ptr().cast(); + let len = word_size * N; + core::slice::from_raw_parts_mut(p, len) + }; + + for (src, dst) in block.iter().zip(buf.chunks_exact_mut(word_size)) { + let val = src.to_le_bytes(); + dst.copy_from_slice(val.as_ref()) } + + let offset = pos * word_size; + dst.copy_from_slice(&buf[offset..][..dst.len()]); + let read_words = dst.len().div_ceil(word_size); + W::from_usize(pos + read_words) } +/// Sealed trait implemented for `u32` and `u64`. +pub trait Word: crate::sealed::Sealed {} + +impl Word for u32 {} +impl Word for u64 {} + #[cfg(test)] mod test { use super::*; - #[test] - fn test_fill_via_u32_chunks() { - let src_orig = [1u32, 2, 3]; - - let src = src_orig; - let mut dst = [0u8; 11]; - assert_eq!(fill_via_chunks(&src, &mut dst), (3, 11)); - assert_eq!(dst, [1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0]); - - let src = src_orig; - let mut dst = [0u8; 13]; - assert_eq!(fill_via_chunks(&src, &mut dst), (3, 12)); - assert_eq!(dst, [1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 0]); - - let src = src_orig; - let mut dst = [0u8; 5]; - assert_eq!(fill_via_chunks(&src, &mut dst), (2, 5)); - assert_eq!(dst, [1, 0, 0, 0, 2]); - } - - #[test] - fn test_fill_via_u64_chunks() { - let src_orig = [1u64, 2]; - - let src = src_orig; - let mut dst = [0u8; 11]; - assert_eq!(fill_via_chunks(&src, &mut dst), (2, 11)); - assert_eq!(dst, [1, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0]); - - let src = src_orig; - let mut dst = [0u8; 17]; - assert_eq!(fill_via_chunks(&src, &mut dst), (2, 16)); - assert_eq!(dst, [1, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0]); - - let src = src_orig; - let mut dst = [0u8; 5]; - assert_eq!(fill_via_chunks(&src, &mut dst), (1, 5)); - assert_eq!(dst, [1, 0, 0, 0, 0]); - } - #[test] fn test_read() { let bytes = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; - let mut buf = [0u32; 4]; - read_u32_into(&bytes, &mut buf); + let buf: [u32; 4] = read_words(&bytes); assert_eq!(buf[0], 0x04030201); assert_eq!(buf[3], 0x100F0E0D); - let mut buf = [0u32; 3]; - read_u32_into(&bytes[1..13], &mut buf); // unaligned + let buf: [u32; 3] = read_words(&bytes[1..13]); // unaligned assert_eq!(buf[0], 0x05040302); assert_eq!(buf[2], 0x0D0C0B0A); - let mut buf = [0u64; 2]; - read_u64_into(&bytes, &mut buf); + let buf: [u64; 2] = read_words(&bytes); assert_eq!(buf[0], 0x0807060504030201); assert_eq!(buf[1], 0x100F0E0D0C0B0A09); - let mut buf = [0u64; 1]; - read_u64_into(&bytes[7..15], &mut buf); // unaligned + let buf: [u64; 1] = read_words(&bytes[7..15]); // unaligned assert_eq!(buf[0], 0x0F0E0D0C0B0A0908); } } diff --git a/src/lib.rs b/src/lib.rs index a73dadd1..05690a05 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -29,8 +29,8 @@ use core::{fmt, ops::DerefMut}; -pub mod block; pub mod le; +mod sealed; /// Implementation-level interface for RNGs /// @@ -64,7 +64,7 @@ pub mod le; /// /// Typically an RNG will implement only one of the methods available /// in this trait directly, then use the helper functions from the -/// [`le` module](crate::le) to implement the other methods. +/// [`le`] module to implement the other methods. /// /// Note that implementors of [`RngCore`] also automatically implement /// the [`TryRngCore`] trait with the `Error` associated type being @@ -520,8 +520,7 @@ mod test { type Seed = [u8; 8]; fn from_seed(seed: Self::Seed) -> Self { - let mut x = [0u64; 1]; - le::read_u64_into(&seed, &mut x); + let x: [u64; 1] = le::read_words(&seed); SeedableNum(x[0]) } } diff --git a/src/sealed.rs b/src/sealed.rs new file mode 100644 index 00000000..aa1746b1 --- /dev/null +++ b/src/sealed.rs @@ -0,0 +1,60 @@ +/// Sealed trait implemented for `u32` and `u64`. +pub trait Sealed: Default + Copy + TryFrom { + type Bytes: Sized + AsRef<[u8]> + for<'a> TryFrom<&'a [u8]>; + + fn from_usize(val: usize) -> Self; + fn into_usize(self) -> usize; + fn to_le_bytes(self) -> Self::Bytes; + fn from_le_bytes(bytes: Self::Bytes) -> Self; + fn increment(&mut self); +} + +impl Sealed for u32 { + type Bytes = [u8; 4]; + + #[inline(always)] + fn from_usize(val: usize) -> Self { + val.try_into().unwrap() + } + #[inline(always)] + fn into_usize(self) -> usize { + self.try_into().unwrap() + } + #[inline(always)] + fn to_le_bytes(self) -> Self::Bytes { + u32::to_le_bytes(self) + } + #[inline(always)] + fn from_le_bytes(bytes: Self::Bytes) -> Self { + u32::from_le_bytes(bytes) + } + #[inline(always)] + fn increment(&mut self) { + *self += 1; + } +} + +impl Sealed for u64 { + type Bytes = [u8; 8]; + + #[inline(always)] + fn from_usize(val: usize) -> Self { + val.try_into().unwrap() + } + #[inline(always)] + fn into_usize(self) -> usize { + self.try_into().unwrap() + } + #[inline(always)] + fn to_le_bytes(self) -> Self::Bytes { + u64::to_le_bytes(self) + } + #[inline(always)] + fn from_le_bytes(bytes: Self::Bytes) -> Self { + u64::from_le_bytes(bytes) + } + #[inline(always)] + fn increment(&mut self) { + *self += 1; + } +}