Using alloc in k_smallest to retain performance

rust-itertools · Feb 19, 2023 · ec6924d · ec6924d
1 parent 5d2c698
commit ec6924d
Showing 1 changed file with 29 additions and 6 deletions.
diff --git a/src/lib.rs b/src/lib.rs
@@ -2732,19 +2732,40 @@ pub trait Itertools : Iterator {
     /// itertools::assert_equal(five_smallest, 0..5);
     /// ```
     #[cfg(feature = "use_alloc")]
-    fn k_smallest(self, k: usize) -> VecIntoIter<Self::Item>
+    fn k_smallest(mut self, k: usize) -> VecIntoIter<Self::Item>
     where
         Self: Sized,
         Self::Item: Ord,
     {
-        crate::k_smallest::k_smallest_general(self, k, Self::Item::cmp)
+        // The stdlib heap has optimised handling of "holes", which is not included in our heap implementation in k_smallest_general.
+        // While the difference is unlikely to have practical impact unless `T` is very large, this method uses the stdlib structure
+        // to maintain performance compared to previous versions of the crate.
+        use alloc::collections::BinaryHeap;
+
+        if k == 0 {
+            return vec![].into_iter();
+        }
+
+        let mut heap = self.by_ref().take(k).collect::<BinaryHeap<_>>();
+
+        self.for_each(|i| {
+            debug_assert_eq!(heap.len(), k);
+            // Equivalent to heap.push(min(i, heap.pop())) but more efficient.
+            // This should be done with a single `.peek_mut().unwrap()` but
+            //  `PeekMut` sifts-down unconditionally on Rust 1.46.0 and prior.
+            if *heap.peek().unwrap() > i {
+                *heap.peek_mut().unwrap() = i;
+            }
+        });
+
+        heap.into_sorted_vec().into_iter()
     }
 
     /// Sort the k smallest elements into a new iterator using the provided comparison.
     ///
     /// This corresponds to `self.sorted_by(cmp).take(k)` in the same way that
     /// [Itertools::k_smallest] corresponds to `self.sorted().take(k)`, in both semantics and complexity.
-    /// Particularly, the comparison is not cloned.
+    /// Particularly, a custom heap implementation ensures the comparison is not cloned.
     #[cfg(feature = "use_alloc")]
     fn k_smallest_by<F>(self, k: usize, cmp: F) -> VecIntoIter<Self::Item>
     where
@@ -2766,11 +2787,13 @@ pub trait Itertools : Iterator {
         F: Fn(&Self::Item) -> K,
         K: Ord,
     {
-        self.k_smallest_by(k, |a,b| key(&a).cmp(&key(&b)))
+        self.k_smallest_by(k, |a, b| key(a).cmp(&key(b)))
     }
 
     /// Sort the k largest elements into a new iterator, in descending order.
-    /// Functionally equivalent to `k_smallest` with a reversed `Ord`
+    /// Semantically equivalent to `k_smallest` with a reversed `Ord`
+    /// However, this is implemented by way of a custom binary heap
+    /// which does not have the same performance characteristics for very large `T`
     /// ```
     /// use itertools::Itertools;
     ///
@@ -2793,7 +2816,7 @@ pub trait Itertools : Iterator {
     }
 
     /// Sort the k largest elements into a new iterator using the provided comparison.
-    /// Functionally equivalent to `k_smallest` with a reversed `Ord`
+    /// Functionally equivalent to `k_smallest_by` with a reversed `Ord`
     #[cfg(feature = "use_alloc")]
     fn k_largest_by<F>(self, k: usize, cmp: F) -> VecIntoIter<Self::Item>
     where