Skip to content

Commit

Permalink
Add initial implementation of 'sort_at_index' for slices -- analog to…
Browse files Browse the repository at this point in the history
… C++'s std::nth_element (a.k.a. quickselect)

Add some more notes to the documentation:

- Mention that the median can be found if we used `len() / 2`.
- Mention that this function is usually called "kth element" in other libraries.

Address some comments in PR:

- Change wording on some of the documentation
- Change recursive function into a loop

Update name to `partition_at_index` and add convenience return values.

Address reviewer comments:

- Don't swap on each iteration when searching for min/max element.
- Add some docs about when we panic.
- Test that the sum of the lengths of the output matches the length of the input.
- Style fix for for-loop.

Address more reviewer comments

Fix Rng stuff for test

Fix doc test build

Don't run the partition_at_index test on wasm targets

Miri does not support entropy for test partition_at_index
  • Loading branch information
Mokosha committed Mar 11, 2019
1 parent 88f755f commit 3f306db
Show file tree
Hide file tree
Showing 4 changed files with 355 additions and 0 deletions.
147 changes: 147 additions & 0 deletions src/libcore/slice/mod.rs
Expand Up @@ -1585,6 +1585,153 @@ impl<T> [T] {
sort::quicksort(self, |a, b| f(a).lt(&f(b)));
}

/// Reorder the slice such that the element at `index` is at its final sorted position.
///
/// This reordering has the additional property that any value at position `i < index` will be
/// less than or equal to any value at a position `j > index`. Additionally, this reordering is
/// unstable (i.e. any number of equal elements may end up at position `index`), in-place
/// (i.e. does not allocate), and `O(n)` worst-case. This function is also/ known as "kth
/// element" in other libraries. It returns a triplet of the following values: all elements less
/// than the one at the given index, the value at the given index, and all elements greater than
/// the one at the given index.
///
/// # Current implementation
///
/// The current algorithm is based on the quickselect portion of the same quicksort algorithm
/// used for [`sort_unstable`].
///
/// [`sort_unstable`]: #method.sort_unstable
///
/// # Panics
///
/// Panics when `index >= len()`, meaning it always panics on empty slices.
///
/// # Examples
///
/// ```
/// #![feature(slice_partition_at_index)]
///
/// let mut v = [-5i32, 4, 1, -3, 2];
///
/// // Find the median
/// v.partition_at_index(2);
///
/// // We are only guaranteed the slice will be one of the following, based on the way we sort
/// // about the specified index.
/// assert!(v == [-3, -5, 1, 2, 4] ||
/// v == [-5, -3, 1, 2, 4] ||
/// v == [-3, -5, 1, 4, 2] ||
/// v == [-5, -3, 1, 4, 2]);
/// ```
#[unstable(feature = "slice_partition_at_index", issue = "55300")]
#[inline]
pub fn partition_at_index(&mut self, index: usize) -> (&mut [T], &mut T, &mut [T])
where T: Ord
{
let mut f = |a: &T, b: &T| a.lt(b);
sort::partition_at_index(self, index, &mut f)
}

/// Reorder the slice with a comparator function such that the element at `index` is at its
/// final sorted position.
///
/// This reordering has the additional property that any value at position `i < index` will be
/// less than or equal to any value at a position `j > index` using the comparator function.
/// Additionally, this reordering is unstable (i.e. any number of equal elements may end up at
/// position `index`), in-place (i.e. does not allocate), and `O(n)` worst-case. This function
/// is also known as "kth element" in other libraries. It returns a triplet of the following
/// values: all elements less than the one at the given index, the value at the given index,
/// and all elements greater than the one at the given index, using the provided comparator
/// function.
///
/// # Current implementation
///
/// The current algorithm is based on the quickselect portion of the same quicksort algorithm
/// used for [`sort_unstable`].
///
/// [`sort_unstable`]: #method.sort_unstable
///
/// # Panics
///
/// Panics when `index >= len()`, meaning it always panics on empty slices.
///
/// # Examples
///
/// ```
/// #![feature(slice_partition_at_index)]
///
/// let mut v = [-5i32, 4, 1, -3, 2];
///
/// // Find the median as if the slice were sorted in descending order.
/// v.partition_at_index_by(2, |a, b| b.cmp(a));
///
/// // We are only guaranteed the slice will be one of the following, based on the way we sort
/// // about the specified index.
/// assert!(v == [2, 4, 1, -5, -3] ||
/// v == [2, 4, 1, -3, -5] ||
/// v == [4, 2, 1, -5, -3] ||
/// v == [4, 2, 1, -3, -5]);
/// ```
#[unstable(feature = "slice_partition_at_index", issue = "55300")]
#[inline]
pub fn partition_at_index_by<F>(&mut self, index: usize, mut compare: F)
-> (&mut [T], &mut T, &mut [T])
where F: FnMut(&T, &T) -> Ordering
{
let mut f = |a: &T, b: &T| compare(a, b) == Less;
sort::partition_at_index(self, index, &mut f)
}

/// Reorder the slice with a key extraction function such that the element at `index` is at its
/// final sorted position.
///
/// This reordering has the additional property that any value at position `i < index` will be
/// less than or equal to any value at a position `j > index` using the key extraction function.
/// Additionally, this reordering is unstable (i.e. any number of equal elements may end up at
/// position `index`), in-place (i.e. does not allocate), and `O(n)` worst-case. This function
/// is also known as "kth element" in other libraries. It returns a triplet of the following
/// values: all elements less than the one at the given index, the value at the given index, and
/// all elements greater than the one at the given index, using the provided key extraction
/// function.
///
/// # Current implementation
///
/// The current algorithm is based on the quickselect portion of the same quicksort algorithm
/// used for [`sort_unstable`].
///
/// [`sort_unstable`]: #method.sort_unstable
///
/// # Panics
///
/// Panics when `index >= len()`, meaning it always panics on empty slices.
///
/// # Examples
///
/// ```
/// #![feature(slice_partition_at_index)]
///
/// let mut v = [-5i32, 4, 1, -3, 2];
///
/// // Return the median as if the array were sorted according to absolute value.
/// v.partition_at_index_by_key(2, |a| a.abs());
///
/// // We are only guaranteed the slice will be one of the following, based on the way we sort
/// // about the specified index.
/// assert!(v == [1, 2, -3, 4, -5] ||
/// v == [1, 2, -3, -5, 4] ||
/// v == [2, 1, -3, 4, -5] ||
/// v == [2, 1, -3, -5, 4]);
/// ```
#[unstable(feature = "slice_partition_at_index", issue = "55300")]
#[inline]
pub fn partition_at_index_by_key<K, F>(&mut self, index: usize, mut f: F)
-> (&mut [T], &mut T, &mut [T])
where F: FnMut(&T) -> K, K: Ord
{
let mut g = |a: &T, b: &T| f(a).lt(&f(b));
sort::partition_at_index(self, index, &mut g)
}

/// Moves all consecutive repeated elements to the end of the slice according to the
/// [`PartialEq`] trait implementation.
///
Expand Down
89 changes: 89 additions & 0 deletions src/libcore/slice/sort.rs
Expand Up @@ -691,3 +691,92 @@ pub fn quicksort<T, F>(v: &mut [T], mut is_less: F)

recurse(v, &mut is_less, None, limit);
}

fn partition_at_index_loop<'a, T, F>( mut v: &'a mut [T], mut index: usize, is_less: &mut F
, mut pred: Option<&'a T>) where F: FnMut(&T, &T) -> bool
{
loop {
// For slices of up to this length it's probably faster to simply sort them.
const MAX_INSERTION: usize = 10;
if v.len() <= MAX_INSERTION {
insertion_sort(v, is_less);
return;
}

// Choose a pivot
let (pivot, _) = choose_pivot(v, is_less);

// If the chosen pivot is equal to the predecessor, then it's the smallest element in the
// slice. Partition the slice into elements equal to and elements greater than the pivot.
// This case is usually hit when the slice contains many duplicate elements.
if let Some(p) = pred {
if !is_less(p, &v[pivot]) {
let mid = partition_equal(v, pivot, is_less);

// If we've passed our index, then we're good.
if mid > index {
return;
}

// Otherwise, continue sorting elements greater than the pivot.
v = &mut v[mid..];
index = index - mid;
pred = None;
continue;
}
}

let (mid, _) = partition(v, pivot, is_less);

// Split the slice into `left`, `pivot`, and `right`.
let (left, right) = {v}.split_at_mut(mid);
let (pivot, right) = right.split_at_mut(1);
let pivot = &pivot[0];

if mid < index {
v = right;
index = index - mid - 1;
pred = Some(pivot);
} else if mid > index {
v = left;
} else {
// If mid == index, then we're done, since partition() guaranteed that all elements
// after mid are greater than or equal to mid.
return;
}
}
}

pub fn partition_at_index<T, F>(v: &mut [T], index: usize, mut is_less: F)
-> (&mut [T], &mut T, &mut [T]) where F: FnMut(&T, &T) -> bool
{
use cmp::Ordering::Less;
use cmp::Ordering::Greater;

if index >= v.len() {
panic!("partition_at_index index {} greater than length of slice {}", index, v.len());
}

if mem::size_of::<T>() == 0 {
// Sorting has no meaningful behavior on zero-sized types. Do nothing.
} else if index == v.len() - 1 {
// Find max element and place it in the last position of the array. We're free to use
// `unwrap()` here because we know v must not be empty.
let (max_index, _) = v.iter().enumerate().max_by(
|&(_, x), &(_, y)| if is_less(x, y) { Less } else { Greater }).unwrap();
v.swap(max_index, index);
} else if index == 0 {
// Find min element and place it in the first position of the array. We're free to use
// `unwrap()` here because we know v must not be empty.
let (min_index, _) = v.iter().enumerate().min_by(
|&(_, x), &(_, y)| if is_less(x, y) { Less } else { Greater }).unwrap();
v.swap(min_index, index);
} else {
partition_at_index_loop(v, index, &mut is_less, None);
}

let (left, right) = v.split_at_mut(index);
let (pivot, right) = right.split_at_mut(1);
let pivot = &mut pivot[0];
(left, pivot, right)
}
1 change: 1 addition & 0 deletions src/libcore/tests/lib.rs
Expand Up @@ -21,6 +21,7 @@
#![feature(refcell_replace_swap)]
#![feature(slice_patterns)]
#![feature(sort_internals)]
#![feature(slice_partition_at_index)]
#![feature(specialization)]
#![feature(step_trait)]
#![feature(str_internals)]
Expand Down
118 changes: 118 additions & 0 deletions src/libcore/tests/slice.rs
Expand Up @@ -1084,6 +1084,124 @@ fn sort_unstable() {
assert!(v == [0xDEADBEEF]);
}

#[test]
#[cfg(not(target_arch = "wasm32"))]
#[cfg(not(miri))] // Miri does not support entropy
fn partition_at_index() {
use core::cmp::Ordering::{Equal, Greater, Less};
use rand::rngs::SmallRng;
use rand::seq::SliceRandom;
use rand::{FromEntropy, Rng};

let mut rng = SmallRng::from_entropy();

for len in (2..21).chain(500..501) {
let mut orig = vec![0; len];

for &modulus in &[5, 10, 1000] {
for _ in 0..10 {
for i in 0..len {
orig[i] = rng.gen::<i32>() % modulus;
}

let v_sorted = {
let mut v = orig.clone();
v.sort();
v
};

// Sort in default order.
for pivot in 0..len {
let mut v = orig.clone();
v.partition_at_index(pivot);

assert_eq!(v_sorted[pivot], v[pivot]);
for i in 0..pivot {
for j in pivot..len {
assert!(v[i] <= v[j]);
}
}
}

// Sort in ascending order.
for pivot in 0..len {
let mut v = orig.clone();
let (left, pivot, right) = v.partition_at_index_by(pivot, |a, b| a.cmp(b));

assert_eq!(left.len() + right.len(), len - 1);

for l in left {
assert!(l <= pivot);
for r in right.iter_mut() {
assert!(l <= r);
assert!(pivot <= r);
}
}
}

// Sort in descending order.
let sort_descending_comparator = |a: &i32, b: &i32| b.cmp(a);
let v_sorted_descending = {
let mut v = orig.clone();
v.sort_by(sort_descending_comparator);
v
};

for pivot in 0..len {
let mut v = orig.clone();
v.partition_at_index_by(pivot, sort_descending_comparator);

assert_eq!(v_sorted_descending[pivot], v[pivot]);
for i in 0..pivot {
for j in pivot..len {
assert!(v[j] <= v[i]);
}
}
}
}
}
}

// Sort at index using a completely random comparison function.
// This will reorder the elements *somehow*, but won't panic.
let mut v = [0; 500];
for i in 0..v.len() {
v[i] = i as i32;
}

for pivot in 0..v.len() {
v.partition_at_index_by(pivot, |_, _| *[Less, Equal, Greater].choose(&mut rng).unwrap());
v.sort();
for i in 0..v.len() {
assert_eq!(v[i], i as i32);
}
}

// Should not panic.
[(); 10].partition_at_index(0);
[(); 10].partition_at_index(5);
[(); 10].partition_at_index(9);
[(); 100].partition_at_index(0);
[(); 100].partition_at_index(50);
[(); 100].partition_at_index(99);

let mut v = [0xDEADBEEFu64];
v.partition_at_index(0);
assert!(v == [0xDEADBEEF]);
}

#[test]
#[should_panic(expected = "index 0 greater than length of slice")]
fn partition_at_index_zero_length() {
[0i32; 0].partition_at_index(0);
}

#[test]
#[should_panic(expected = "index 20 greater than length of slice")]
fn partition_at_index_past_length() {
[0i32; 10].partition_at_index(20);
}

pub mod memchr {
use core::slice::memchr::{memchr, memrchr};

Expand Down

0 comments on commit 3f306db

Please sign in to comment.