Skip to content

Commit

Permalink
Using alloc in k_smallest to retain performance
Browse files Browse the repository at this point in the history
  • Loading branch information
ejmount committed Feb 19, 2023
1 parent 5d2c698 commit ec6924d
Showing 1 changed file with 29 additions and 6 deletions.
35 changes: 29 additions & 6 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2732,19 +2732,40 @@ pub trait Itertools : Iterator {
/// itertools::assert_equal(five_smallest, 0..5);
/// ```
#[cfg(feature = "use_alloc")]
fn k_smallest(self, k: usize) -> VecIntoIter<Self::Item>
fn k_smallest(mut self, k: usize) -> VecIntoIter<Self::Item>
where
Self: Sized,
Self::Item: Ord,
{
crate::k_smallest::k_smallest_general(self, k, Self::Item::cmp)
// The stdlib heap has optimised handling of "holes", which is not included in our heap implementation in k_smallest_general.
// While the difference is unlikely to have practical impact unless `T` is very large, this method uses the stdlib structure
// to maintain performance compared to previous versions of the crate.
use alloc::collections::BinaryHeap;

if k == 0 {
return vec![].into_iter();
}

let mut heap = self.by_ref().take(k).collect::<BinaryHeap<_>>();

self.for_each(|i| {
debug_assert_eq!(heap.len(), k);
// Equivalent to heap.push(min(i, heap.pop())) but more efficient.
// This should be done with a single `.peek_mut().unwrap()` but
// `PeekMut` sifts-down unconditionally on Rust 1.46.0 and prior.
if *heap.peek().unwrap() > i {
*heap.peek_mut().unwrap() = i;
}
});

heap.into_sorted_vec().into_iter()
}

/// Sort the k smallest elements into a new iterator using the provided comparison.
///
/// This corresponds to `self.sorted_by(cmp).take(k)` in the same way that
/// [Itertools::k_smallest] corresponds to `self.sorted().take(k)`, in both semantics and complexity.
/// Particularly, the comparison is not cloned.
/// Particularly, a custom heap implementation ensures the comparison is not cloned.
#[cfg(feature = "use_alloc")]
fn k_smallest_by<F>(self, k: usize, cmp: F) -> VecIntoIter<Self::Item>
where
Expand All @@ -2766,11 +2787,13 @@ pub trait Itertools : Iterator {
F: Fn(&Self::Item) -> K,
K: Ord,
{
self.k_smallest_by(k, |a,b| key(&a).cmp(&key(&b)))
self.k_smallest_by(k, |a, b| key(a).cmp(&key(b)))
}

/// Sort the k largest elements into a new iterator, in descending order.
/// Functionally equivalent to `k_smallest` with a reversed `Ord`
/// Semantically equivalent to `k_smallest` with a reversed `Ord`
/// However, this is implemented by way of a custom binary heap
/// which does not have the same performance characteristics for very large `T`
/// ```
/// use itertools::Itertools;
///
Expand All @@ -2793,7 +2816,7 @@ pub trait Itertools : Iterator {
}

/// Sort the k largest elements into a new iterator using the provided comparison.
/// Functionally equivalent to `k_smallest` with a reversed `Ord`
/// Functionally equivalent to `k_smallest_by` with a reversed `Ord`
#[cfg(feature = "use_alloc")]
fn k_largest_by<F>(self, k: usize, cmp: F) -> VecIntoIter<Self::Item>
where
Expand Down

0 comments on commit ec6924d

Please sign in to comment.