Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add IndexedRandom::choose_multiple_array, index::sample_array #1453

Merged
merged 7 commits into from
Jun 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ You may also find the [Upgrade Guide](https://rust-random.github.io/book/update.

## [Unreleased]
- Add `rand::distributions::WeightedIndex::{weight, weights, total_weight}` (#1420)
- Add `IndexedRandom::choose_multiple_array`, `index::sample_array` (#1453)
- Bump the MSRV to 1.61.0
- Rename `Rng::gen` to `Rng::random` to avoid conflict with the new `gen` keyword in Rust 2024 (#1435)
- Move all benchmarks to new `benches` crate (#1439)
Expand Down
72 changes: 53 additions & 19 deletions src/seq/index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,35 +7,29 @@
// except according to those terms.

//! Low-level API for sampling indices
use core::{cmp::Ordering, hash::Hash, ops::AddAssign};

#[cfg(feature = "alloc")]
use core::slice;

#[cfg(feature = "alloc")]
use alloc::vec::{self, Vec};
use core::slice;
use core::{hash::Hash, ops::AddAssign};
// BTreeMap is not as fast in tests, but better than nothing.
#[cfg(all(feature = "alloc", not(feature = "std")))]
use alloc::collections::BTreeSet;
#[cfg(feature = "std")]
use std::collections::HashSet;

#[cfg(feature = "std")]
use super::WeightError;

use crate::distributions::uniform::SampleUniform;
#[cfg(feature = "alloc")]
use crate::{
distributions::{uniform::SampleUniform, Distribution, Uniform},
Rng,
};

use crate::distributions::{Distribution, Uniform};
use crate::Rng;
#[cfg(all(feature = "alloc", not(feature = "std")))]
use alloc::collections::BTreeSet;
#[cfg(feature = "serde1")]
use serde::{Deserialize, Serialize};
#[cfg(feature = "std")]
use std::collections::HashSet;

/// A vector of indices.
///
/// Multiple internal representations are possible.
#[derive(Clone, Debug)]
#[cfg(feature = "alloc")]
#[cfg_attr(feature = "serde1", derive(Serialize, Deserialize))]
pub enum IndexVec {
#[doc(hidden)]
Expand All @@ -44,6 +38,7 @@ pub enum IndexVec {
USize(Vec<usize>),
}

#[cfg(feature = "alloc")]
impl IndexVec {
/// Returns the number of indices
#[inline]
Expand Down Expand Up @@ -94,6 +89,7 @@ impl IndexVec {
}
}

#[cfg(feature = "alloc")]
impl IntoIterator for IndexVec {
type IntoIter = IndexVecIntoIter;
type Item = usize;
Expand All @@ -108,6 +104,7 @@ impl IntoIterator for IndexVec {
}
}

#[cfg(feature = "alloc")]
impl PartialEq for IndexVec {
fn eq(&self, other: &IndexVec) -> bool {
use self::IndexVec::*;
Expand All @@ -124,13 +121,15 @@ impl PartialEq for IndexVec {
}
}

#[cfg(feature = "alloc")]
impl From<Vec<u32>> for IndexVec {
#[inline]
fn from(v: Vec<u32>) -> Self {
IndexVec::U32(v)
}
}

#[cfg(feature = "alloc")]
impl From<Vec<usize>> for IndexVec {
#[inline]
fn from(v: Vec<usize>) -> Self {
Expand Down Expand Up @@ -171,6 +170,7 @@ impl<'a> Iterator for IndexVecIter<'a> {
impl<'a> ExactSizeIterator for IndexVecIter<'a> {}

/// Return type of `IndexVec::into_iter`.
#[cfg(feature = "alloc")]
#[derive(Clone, Debug)]
pub enum IndexVecIntoIter {
#[doc(hidden)]
Expand All @@ -179,6 +179,7 @@ pub enum IndexVecIntoIter {
USize(vec::IntoIter<usize>),
}

#[cfg(feature = "alloc")]
impl Iterator for IndexVecIntoIter {
type Item = usize;

Expand All @@ -201,6 +202,7 @@ impl Iterator for IndexVecIntoIter {
}
}

#[cfg(feature = "alloc")]
impl ExactSizeIterator for IndexVecIntoIter {}

/// Randomly sample exactly `amount` distinct indices from `0..length`, and
Expand All @@ -225,6 +227,7 @@ impl ExactSizeIterator for IndexVecIntoIter {}
/// to adapt the internal `sample_floyd` implementation.
///
/// Panics if `amount > length`.
#[cfg(feature = "alloc")]
#[track_caller]
pub fn sample<R>(rng: &mut R, length: usize, amount: usize) -> IndexVec
where
Expand Down Expand Up @@ -267,6 +270,33 @@ where
}
}

/// Randomly sample exactly `N` distinct indices from `0..len`, and
/// return them in random order (fully shuffled).
///
/// This is implemented via Floyd's algorithm. Time complexity is `O(N^2)`
/// and memory complexity is `O(N)`.
///
/// Returns `None` if (and only if) `N > len`.
pub fn sample_array<R, const N: usize>(rng: &mut R, len: usize) -> Option<[usize; N]>
where
R: Rng + ?Sized,
{
if N > len {
return None;
}

// Floyd's algorithm
let mut indices = [0; N];
for (i, j) in (len - N..len).enumerate() {
let t = rng.gen_range(0..=j);
if let Some(pos) = indices[0..i].iter().position(|&x| x == t) {
indices[pos] = j;
}
indices[i] = t;
}
Some(indices)
}

/// Randomly sample exactly `amount` distinct indices from `0..length`, and
/// return them in an arbitrary order (there is no guarantee of shuffling or
/// ordering). The weights are to be provided by the input function `weights`,
Expand Down Expand Up @@ -329,6 +359,8 @@ where
N: UInt,
IndexVec: From<Vec<N>>,
{
use std::cmp::Ordering;

if amount == N::zero() {
return Ok(IndexVec::U32(Vec::new()));
}
Expand Down Expand Up @@ -399,6 +431,7 @@ where
/// The output values are fully shuffled. (Overhead is under 50%.)
///
/// This implementation uses `O(amount)` memory and `O(amount^2)` time.
#[cfg(feature = "alloc")]
fn sample_floyd<R>(rng: &mut R, length: u32, amount: u32) -> IndexVec
where
R: Rng + ?Sized,
Expand Down Expand Up @@ -430,6 +463,7 @@ where
/// performance in all cases).
///
/// Set-up is `O(length)` time and memory and shuffling is `O(amount)` time.
#[cfg(feature = "alloc")]
fn sample_inplace<R>(rng: &mut R, length: u32, amount: u32) -> IndexVec
where
R: Rng + ?Sized,
Expand Down Expand Up @@ -495,6 +529,7 @@ impl UInt for usize {
///
/// This function is generic over X primarily so that results are value-stable
/// over 32-bit and 64-bit platforms.
#[cfg(feature = "alloc")]
fn sample_rejection<X: UInt, R>(rng: &mut R, length: X, amount: X) -> IndexVec
where
R: Rng + ?Sized,
Expand All @@ -519,9 +554,11 @@ where
IndexVec::from(indices)
}

#[cfg(feature = "alloc")]
#[cfg(test)]
mod test {
use super::*;
use alloc::vec;

#[test]
#[cfg(feature = "serde1")]
Expand All @@ -542,9 +579,6 @@ mod test {
}
}

#[cfg(feature = "alloc")]
use alloc::vec;

#[test]
fn test_sample_boundaries() {
let mut r = crate::test::rng(404);
Expand Down
Loading