Skip to content

Commit

Permalink
Add IndexedRandom::choose_multiple_array, index::sample_array (#1453)
Browse files Browse the repository at this point in the history
* New private module rand::seq::iterator
* New private module rand::seq::slice
* Add index::sample_array and IndexedRandom::choose_multiple_array
  • Loading branch information
dhardy committed Jun 4, 2024
1 parent ef75e56 commit ca9e119
Show file tree
Hide file tree
Showing 5 changed files with 1,484 additions and 1,389 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ You may also find the [Upgrade Guide](https://rust-random.github.io/book/update.

## [Unreleased]
- Add `rand::distributions::WeightedIndex::{weight, weights, total_weight}` (#1420)
- Add `IndexedRandom::choose_multiple_array`, `index::sample_array` (#1453)
- Bump the MSRV to 1.61.0
- Rename `Rng::gen` to `Rng::random` to avoid conflict with the new `gen` keyword in Rust 2024 (#1435)
- Move all benchmarks to new `benches` crate (#1439)
Expand Down
72 changes: 53 additions & 19 deletions src/seq/index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,35 +7,29 @@
// except according to those terms.

//! Low-level API for sampling indices
use core::{cmp::Ordering, hash::Hash, ops::AddAssign};

#[cfg(feature = "alloc")]
use core::slice;

#[cfg(feature = "alloc")]
use alloc::vec::{self, Vec};
use core::slice;
use core::{hash::Hash, ops::AddAssign};
// BTreeMap is not as fast in tests, but better than nothing.
#[cfg(all(feature = "alloc", not(feature = "std")))]
use alloc::collections::BTreeSet;
#[cfg(feature = "std")]
use std::collections::HashSet;

#[cfg(feature = "std")]
use super::WeightError;

use crate::distributions::uniform::SampleUniform;
#[cfg(feature = "alloc")]
use crate::{
distributions::{uniform::SampleUniform, Distribution, Uniform},
Rng,
};

use crate::distributions::{Distribution, Uniform};
use crate::Rng;
#[cfg(all(feature = "alloc", not(feature = "std")))]
use alloc::collections::BTreeSet;
#[cfg(feature = "serde1")]
use serde::{Deserialize, Serialize};
#[cfg(feature = "std")]
use std::collections::HashSet;

/// A vector of indices.
///
/// Multiple internal representations are possible.
#[derive(Clone, Debug)]
#[cfg(feature = "alloc")]
#[cfg_attr(feature = "serde1", derive(Serialize, Deserialize))]
pub enum IndexVec {
#[doc(hidden)]
Expand All @@ -44,6 +38,7 @@ pub enum IndexVec {
USize(Vec<usize>),
}

#[cfg(feature = "alloc")]
impl IndexVec {
/// Returns the number of indices
#[inline]
Expand Down Expand Up @@ -94,6 +89,7 @@ impl IndexVec {
}
}

#[cfg(feature = "alloc")]
impl IntoIterator for IndexVec {
type IntoIter = IndexVecIntoIter;
type Item = usize;
Expand All @@ -108,6 +104,7 @@ impl IntoIterator for IndexVec {
}
}

#[cfg(feature = "alloc")]
impl PartialEq for IndexVec {
fn eq(&self, other: &IndexVec) -> bool {
use self::IndexVec::*;
Expand All @@ -124,13 +121,15 @@ impl PartialEq for IndexVec {
}
}

#[cfg(feature = "alloc")]
impl From<Vec<u32>> for IndexVec {
#[inline]
fn from(v: Vec<u32>) -> Self {
IndexVec::U32(v)
}
}

#[cfg(feature = "alloc")]
impl From<Vec<usize>> for IndexVec {
#[inline]
fn from(v: Vec<usize>) -> Self {
Expand Down Expand Up @@ -171,6 +170,7 @@ impl<'a> Iterator for IndexVecIter<'a> {
impl<'a> ExactSizeIterator for IndexVecIter<'a> {}

/// Return type of `IndexVec::into_iter`.
#[cfg(feature = "alloc")]
#[derive(Clone, Debug)]
pub enum IndexVecIntoIter {
#[doc(hidden)]
Expand All @@ -179,6 +179,7 @@ pub enum IndexVecIntoIter {
USize(vec::IntoIter<usize>),
}

#[cfg(feature = "alloc")]
impl Iterator for IndexVecIntoIter {
type Item = usize;

Expand All @@ -201,6 +202,7 @@ impl Iterator for IndexVecIntoIter {
}
}

#[cfg(feature = "alloc")]
impl ExactSizeIterator for IndexVecIntoIter {}

/// Randomly sample exactly `amount` distinct indices from `0..length`, and
Expand All @@ -225,6 +227,7 @@ impl ExactSizeIterator for IndexVecIntoIter {}
/// to adapt the internal `sample_floyd` implementation.
///
/// Panics if `amount > length`.
#[cfg(feature = "alloc")]
#[track_caller]
pub fn sample<R>(rng: &mut R, length: usize, amount: usize) -> IndexVec
where
Expand Down Expand Up @@ -267,6 +270,33 @@ where
}
}

/// Randomly sample exactly `N` distinct indices from `0..len`, and
/// return them in random order (fully shuffled).
///
/// This is implemented via Floyd's algorithm. Time complexity is `O(N^2)`
/// and memory complexity is `O(N)`.
///
/// Returns `None` if (and only if) `N > len`.
pub fn sample_array<R, const N: usize>(rng: &mut R, len: usize) -> Option<[usize; N]>
where
R: Rng + ?Sized,
{
if N > len {
return None;
}

// Floyd's algorithm
let mut indices = [0; N];
for (i, j) in (len - N..len).enumerate() {
let t = rng.gen_range(0..=j);
if let Some(pos) = indices[0..i].iter().position(|&x| x == t) {
indices[pos] = j;
}
indices[i] = t;
}
Some(indices)
}

/// Randomly sample exactly `amount` distinct indices from `0..length`, and
/// return them in an arbitrary order (there is no guarantee of shuffling or
/// ordering). The weights are to be provided by the input function `weights`,
Expand Down Expand Up @@ -329,6 +359,8 @@ where
N: UInt,
IndexVec: From<Vec<N>>,
{
use std::cmp::Ordering;

if amount == N::zero() {
return Ok(IndexVec::U32(Vec::new()));
}
Expand Down Expand Up @@ -399,6 +431,7 @@ where
/// The output values are fully shuffled. (Overhead is under 50%.)
///
/// This implementation uses `O(amount)` memory and `O(amount^2)` time.
#[cfg(feature = "alloc")]
fn sample_floyd<R>(rng: &mut R, length: u32, amount: u32) -> IndexVec
where
R: Rng + ?Sized,
Expand Down Expand Up @@ -430,6 +463,7 @@ where
/// performance in all cases).
///
/// Set-up is `O(length)` time and memory and shuffling is `O(amount)` time.
#[cfg(feature = "alloc")]
fn sample_inplace<R>(rng: &mut R, length: u32, amount: u32) -> IndexVec
where
R: Rng + ?Sized,
Expand Down Expand Up @@ -495,6 +529,7 @@ impl UInt for usize {
///
/// This function is generic over X primarily so that results are value-stable
/// over 32-bit and 64-bit platforms.
#[cfg(feature = "alloc")]
fn sample_rejection<X: UInt, R>(rng: &mut R, length: X, amount: X) -> IndexVec
where
R: Rng + ?Sized,
Expand All @@ -519,9 +554,11 @@ where
IndexVec::from(indices)
}

#[cfg(feature = "alloc")]
#[cfg(test)]
mod test {
use super::*;
use alloc::vec;

#[test]
#[cfg(feature = "serde1")]
Expand All @@ -542,9 +579,6 @@ mod test {
}
}

#[cfg(feature = "alloc")]
use alloc::vec;

#[test]
fn test_sample_boundaries() {
let mut r = crate::test::rng(404);
Expand Down

0 comments on commit ca9e119

Please sign in to comment.