diff --git a/src/lib.rs b/src/lib.rs index 878ce3aeb..13a2028e2 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -2732,19 +2732,40 @@ pub trait Itertools : Iterator { /// itertools::assert_equal(five_smallest, 0..5); /// ``` #[cfg(feature = "use_alloc")] - fn k_smallest(self, k: usize) -> VecIntoIter + fn k_smallest(mut self, k: usize) -> VecIntoIter where Self: Sized, Self::Item: Ord, { - crate::k_smallest::k_smallest_general(self, k, Self::Item::cmp) + // The stdlib heap has optimised handling of "holes", which is not included in our heap implementation in k_smallest_general. + // While the difference is unlikely to have practical impact unless `T` is very large, this method uses the stdlib structure + // to maintain performance compared to previous versions of the crate. + use alloc::collections::BinaryHeap; + + if k == 0 { + return vec![].into_iter(); + } + + let mut heap = self.by_ref().take(k).collect::>(); + + self.for_each(|i| { + debug_assert_eq!(heap.len(), k); + // Equivalent to heap.push(min(i, heap.pop())) but more efficient. + // This should be done with a single `.peek_mut().unwrap()` but + // `PeekMut` sifts-down unconditionally on Rust 1.46.0 and prior. + if *heap.peek().unwrap() > i { + *heap.peek_mut().unwrap() = i; + } + }); + + heap.into_sorted_vec().into_iter() } /// Sort the k smallest elements into a new iterator using the provided comparison. /// /// This corresponds to `self.sorted_by(cmp).take(k)` in the same way that /// [Itertools::k_smallest] corresponds to `self.sorted().take(k)`, in both semantics and complexity. - /// Particularly, the comparison is not cloned. + /// Particularly, a custom heap implementation ensures the comparison is not cloned. #[cfg(feature = "use_alloc")] fn k_smallest_by(self, k: usize, cmp: F) -> VecIntoIter where @@ -2766,11 +2787,13 @@ pub trait Itertools : Iterator { F: Fn(&Self::Item) -> K, K: Ord, { - self.k_smallest_by(k, |a,b| key(&a).cmp(&key(&b))) + self.k_smallest_by(k, |a, b| key(a).cmp(&key(b))) } /// Sort the k largest elements into a new iterator, in descending order. - /// Functionally equivalent to `k_smallest` with a reversed `Ord` + /// Semantically equivalent to `k_smallest` with a reversed `Ord` + /// However, this is implemented by way of a custom binary heap + /// which does not have the same performance characteristics for very large `T` /// ``` /// use itertools::Itertools; /// @@ -2793,7 +2816,7 @@ pub trait Itertools : Iterator { } /// Sort the k largest elements into a new iterator using the provided comparison. - /// Functionally equivalent to `k_smallest` with a reversed `Ord` + /// Functionally equivalent to `k_smallest_by` with a reversed `Ord` #[cfg(feature = "use_alloc")] fn k_largest_by(self, k: usize, cmp: F) -> VecIntoIter where