From b996caa9d3dd172bb5aae000bad195938d726306 Mon Sep 17 00:00:00 2001 From: Khashayar Fereidani Date: Mon, 8 Dec 2025 23:02:27 +0330 Subject: [PATCH 1/8] Improve retain_mut implementation performance --- library/alloc/src/vec/mod.rs | 66 +++++++++++++++++++++--------------- 1 file changed, 39 insertions(+), 27 deletions(-) diff --git a/library/alloc/src/vec/mod.rs b/library/alloc/src/vec/mod.rs index d2bc5d9bce39d..4fae6271a6d6e 100644 --- a/library/alloc/src/vec/mod.rs +++ b/library/alloc/src/vec/mod.rs @@ -2297,8 +2297,7 @@ impl Vec { unsafe { self.set_len(0) }; // Vec: [Kept, Kept, Hole, Hole, Hole, Hole, Unchecked, Unchecked] - // |<- processed len ->| ^- next to check - // |<- deleted cnt ->| + // | ^- write_index ^- read_index // |<- original_len ->| // Kept: Elements which predicate returns true on. // Hole: Moved or dropped element slot. @@ -2309,31 +2308,31 @@ impl Vec { // In cases when predicate and `drop` never panick, it will be optimized out. struct BackshiftOnDrop<'a, T, A: Allocator> { v: &'a mut Vec, - processed_len: usize, - deleted_cnt: usize, + read_index: usize, + write_index: usize, original_len: usize, } impl Drop for BackshiftOnDrop<'_, T, A> { fn drop(&mut self) { - if self.deleted_cnt > 0 { + if self.write_index < self.read_index { // SAFETY: Trailing unchecked items must be valid since we never touch them. unsafe { ptr::copy( - self.v.as_ptr().add(self.processed_len), - self.v.as_mut_ptr().add(self.processed_len - self.deleted_cnt), - self.original_len - self.processed_len, + self.v.as_ptr().add(self.read_index), + self.v.as_mut_ptr().add(self.write_index), + self.original_len - self.read_index, ); } } // SAFETY: After filling holes, all items are in contiguous memory. unsafe { - self.v.set_len(self.original_len - self.deleted_cnt); + self.v.set_len(self.original_len - (self.read_index - self.write_index)); } } } - let mut g = BackshiftOnDrop { v: self, processed_len: 0, deleted_cnt: 0, original_len }; + let mut g = BackshiftOnDrop { v: self, read_index: 0, write_index: 0, original_len }; fn process_loop( original_len: usize, @@ -2342,31 +2341,44 @@ impl Vec { ) where F: FnMut(&mut T) -> bool, { - while g.processed_len != original_len { + while g.read_index < original_len { // SAFETY: Unchecked element must be valid. - let cur = unsafe { &mut *g.v.as_mut_ptr().add(g.processed_len) }; + let cur = unsafe { &mut *g.v.as_mut_ptr().add(g.read_index) }; if !f(cur) { - // Advance early to avoid double drop if `drop_in_place` panicked. - g.processed_len += 1; - g.deleted_cnt += 1; + if !DELETED { + // We advance all at once to previous g.read_index. + g.write_index = g.read_index; + } + // Advance read_index early to avoid double drop if `drop_in_place` panicked. + g.read_index += 1; // SAFETY: We never touch this element again after dropped. unsafe { ptr::drop_in_place(cur) }; // We already advanced the counter. - if DELETED { - continue; - } else { + + if !DELETED { + // We found the first deleted element. + // Switch to the second stage. + // read_index is now always greater than write_index. break; } - } - if DELETED { - // SAFETY: `deleted_cnt` > 0, so the hole slot must not overlap with current element. - // We use copy for move, and never touch this element again. - unsafe { - let hole_slot = g.v.as_mut_ptr().add(g.processed_len - g.deleted_cnt); - ptr::copy_nonoverlapping(cur, hole_slot, 1); + } else { + if DELETED { + // SAFETY: `read_index`` > `write_index``, so the slots don't overlap. + // We use copy for move, and never touch the source element again. + unsafe { + let hole_slot = g.v.as_mut_ptr().add(g.write_index); + ptr::copy_nonoverlapping(cur, hole_slot, 1); + } + // We can skip advancing write_index in the first stage and do it all at once. + g.write_index += 1; } + g.read_index += 1; } - g.processed_len += 1; + } + if !DELETED && g.read_index == original_len { + // Finished the first stage without finding any deleted element. + // Set write_index to read_index for the drop stage. + g.write_index = g.read_index; } } @@ -2376,7 +2388,7 @@ impl Vec { // Stage 2: Some elements were deleted. process_loop::(original_len, &mut f, &mut g); - // All item are processed. This can be optimized to `set_len` by LLVM. + // All items are processed. This can be optimized to `set_len` by LLVM. drop(g); } From 9a9f58ddb0d1378abfaf7524447730fb29dfca4f Mon Sep 17 00:00:00 2001 From: Khashayar Fereidani Date: Mon, 8 Dec 2025 23:56:07 +0330 Subject: [PATCH 2/8] Fix Drop logic in BackshiftOnDrop --- library/alloc/src/vec/mod.rs | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/library/alloc/src/vec/mod.rs b/library/alloc/src/vec/mod.rs index 4fae6271a6d6e..4cfcc75f2bd46 100644 --- a/library/alloc/src/vec/mod.rs +++ b/library/alloc/src/vec/mod.rs @@ -2315,7 +2315,7 @@ impl Vec { impl Drop for BackshiftOnDrop<'_, T, A> { fn drop(&mut self) { - if self.write_index < self.read_index { + if self.read_index < self.original_len { // SAFETY: Trailing unchecked items must be valid since we never touch them. unsafe { ptr::copy( @@ -2363,7 +2363,7 @@ impl Vec { } } else { if DELETED { - // SAFETY: `read_index`` > `write_index``, so the slots don't overlap. + // SAFETY: `read_index` > `write_index`, so the slots don't overlap. // We use copy for move, and never touch the source element again. unsafe { let hole_slot = g.v.as_mut_ptr().add(g.write_index); @@ -2375,11 +2375,6 @@ impl Vec { g.read_index += 1; } } - if !DELETED && g.read_index == original_len { - // Finished the first stage without finding any deleted element. - // Set write_index to read_index for the drop stage. - g.write_index = g.read_index; - } } // Stage 1: Nothing was deleted. From fa2ae99d03e4bc4c24b63ef3ebc33c1bada3a79e Mon Sep 17 00:00:00 2001 From: Khashayar Fereidani Date: Tue, 9 Dec 2025 00:29:24 +0330 Subject: [PATCH 3/8] Fix logic in BackshiftOnDrop drop implementation --- library/alloc/src/vec/mod.rs | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/library/alloc/src/vec/mod.rs b/library/alloc/src/vec/mod.rs index 4cfcc75f2bd46..d74c5ed3137c8 100644 --- a/library/alloc/src/vec/mod.rs +++ b/library/alloc/src/vec/mod.rs @@ -2315,7 +2315,7 @@ impl Vec { impl Drop for BackshiftOnDrop<'_, T, A> { fn drop(&mut self) { - if self.read_index < self.original_len { + if self.write_index < self.read_index { // SAFETY: Trailing unchecked items must be valid since we never touch them. unsafe { ptr::copy( @@ -2345,10 +2345,6 @@ impl Vec { // SAFETY: Unchecked element must be valid. let cur = unsafe { &mut *g.v.as_mut_ptr().add(g.read_index) }; if !f(cur) { - if !DELETED { - // We advance all at once to previous g.read_index. - g.write_index = g.read_index; - } // Advance read_index early to avoid double drop if `drop_in_place` panicked. g.read_index += 1; // SAFETY: We never touch this element again after dropped. @@ -2369,9 +2365,8 @@ impl Vec { let hole_slot = g.v.as_mut_ptr().add(g.write_index); ptr::copy_nonoverlapping(cur, hole_slot, 1); } - // We can skip advancing write_index in the first stage and do it all at once. - g.write_index += 1; } + g.write_index += 1; g.read_index += 1; } } From 6184c1a9ebd85ece2df4ee8484e74da651d5ee2a Mon Sep 17 00:00:00 2001 From: Khashayar Fereidani Date: Tue, 9 Dec 2025 12:27:13 +0330 Subject: [PATCH 4/8] Add more optimization for retain_mut by improving fast paths --- library/alloc/src/vec/mod.rs | 33 ++++++++++++++++++++++----------- 1 file changed, 22 insertions(+), 11 deletions(-) diff --git a/library/alloc/src/vec/mod.rs b/library/alloc/src/vec/mod.rs index d74c5ed3137c8..db037a73b4bac 100644 --- a/library/alloc/src/vec/mod.rs +++ b/library/alloc/src/vec/mod.rs @@ -2315,24 +2315,32 @@ impl Vec { impl Drop for BackshiftOnDrop<'_, T, A> { fn drop(&mut self) { - if self.write_index < self.read_index { - // SAFETY: Trailing unchecked items must be valid since we never touch them. + if self.read_index < self.original_len { + if self.write_index < self.read_index { + // SAFETY: Trailing unchecked items must be valid since we never touch them. + unsafe { + ptr::copy( + self.v.as_ptr().add(self.read_index), + self.v.as_mut_ptr().add(self.write_index), + self.original_len - self.read_index, + ); + } + } + // SAFETY: After filling holes, all items are in contiguous memory. unsafe { - ptr::copy( - self.v.as_ptr().add(self.read_index), - self.v.as_mut_ptr().add(self.write_index), - self.original_len - self.read_index, - ); + self.v.set_len(self.original_len - (self.read_index - self.write_index)); } + return; } // SAFETY: After filling holes, all items are in contiguous memory. unsafe { - self.v.set_len(self.original_len - (self.read_index - self.write_index)); + self.v.set_len(self.write_index.min(self.read_index)); } } } - let mut g = BackshiftOnDrop { v: self, read_index: 0, write_index: 0, original_len }; + let mut g = + BackshiftOnDrop { v: self, read_index: 0, write_index: usize::MAX, original_len }; fn process_loop( original_len: usize, @@ -2345,12 +2353,15 @@ impl Vec { // SAFETY: Unchecked element must be valid. let cur = unsafe { &mut *g.v.as_mut_ptr().add(g.read_index) }; if !f(cur) { + if !DELETED { + // We set g.write_index only once when we found the first deleted element. + g.write_index = g.read_index; + } // Advance read_index early to avoid double drop if `drop_in_place` panicked. g.read_index += 1; // SAFETY: We never touch this element again after dropped. unsafe { ptr::drop_in_place(cur) }; // We already advanced the counter. - if !DELETED { // We found the first deleted element. // Switch to the second stage. @@ -2365,8 +2376,8 @@ impl Vec { let hole_slot = g.v.as_mut_ptr().add(g.write_index); ptr::copy_nonoverlapping(cur, hole_slot, 1); } + g.write_index += 1; } - g.write_index += 1; g.read_index += 1; } } From 5d55a0f712260c2827a4cf8e59fb3893cf1c9a18 Mon Sep 17 00:00:00 2001 From: Khashayar Fereidani Date: Tue, 9 Dec 2025 13:59:12 +0330 Subject: [PATCH 5/8] Fix Drop implementation for BackshiftOnDrop --- library/alloc/src/vec/mod.rs | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/library/alloc/src/vec/mod.rs b/library/alloc/src/vec/mod.rs index db037a73b4bac..2255e842cc5ce 100644 --- a/library/alloc/src/vec/mod.rs +++ b/library/alloc/src/vec/mod.rs @@ -2316,23 +2316,23 @@ impl Vec { impl Drop for BackshiftOnDrop<'_, T, A> { fn drop(&mut self) { if self.read_index < self.original_len { - if self.write_index < self.read_index { - // SAFETY: Trailing unchecked items must be valid since we never touch them. - unsafe { - ptr::copy( - self.v.as_ptr().add(self.read_index), - self.v.as_mut_ptr().add(self.write_index), - self.original_len - self.read_index, - ); - } + self.write_index = self.write_index.min(self.read_index); + let remaining = self.original_len - self.read_index; + // SAFETY: Trailing unchecked items must be valid since we never touch them. + unsafe { + ptr::copy( + self.v.as_ptr().add(self.read_index), + self.v.as_mut_ptr().add(self.write_index), + remaining, + ); } // SAFETY: After filling holes, all items are in contiguous memory. unsafe { - self.v.set_len(self.original_len - (self.read_index - self.write_index)); + self.v.set_len(self.write_index + remaining); } return; } - // SAFETY: After filling holes, all items are in contiguous memory. + // SAFETY: no panic happened, so all items are in contiguous memory. unsafe { self.v.set_len(self.write_index.min(self.read_index)); } From a503e899f1152f590afa76a7d7b3b59d8c819ef2 Mon Sep 17 00:00:00 2001 From: Khashayar Fereidani Date: Tue, 16 Dec 2025 11:39:29 +0330 Subject: [PATCH 6/8] optimize and simplify `retain_mut` implementation --- library/alloc/src/vec/mod.rs | 83 +++++++++++++++--------------------- 1 file changed, 34 insertions(+), 49 deletions(-) diff --git a/library/alloc/src/vec/mod.rs b/library/alloc/src/vec/mod.rs index 2255e842cc5ce..9ca903bb87450 100644 --- a/library/alloc/src/vec/mod.rs +++ b/library/alloc/src/vec/mod.rs @@ -2315,8 +2315,7 @@ impl Vec { impl Drop for BackshiftOnDrop<'_, T, A> { fn drop(&mut self) { - if self.read_index < self.original_len { - self.write_index = self.write_index.min(self.read_index); + if core::intrinsics::unlikely(self.read_index < self.original_len) { let remaining = self.original_len - self.read_index; // SAFETY: Trailing unchecked items must be valid since we never touch them. unsafe { @@ -2332,63 +2331,49 @@ impl Vec { } return; } - // SAFETY: no panic happened, so all items are in contiguous memory. + // SAFETY: no panic happened, length is just write_index. unsafe { - self.v.set_len(self.write_index.min(self.read_index)); + self.v.set_len(self.write_index); } } } - let mut g = - BackshiftOnDrop { v: self, read_index: 0, write_index: usize::MAX, original_len }; + let mut i = 0; + loop { + // SAFETY: i < original_len + if !f(unsafe { &mut *self.as_mut_ptr().add(i) }) { + break; + } + i += 1; + if i == original_len { + // SAFETY: All elements are kept, set length back to original and return. + unsafe { self.set_len(original_len) }; + return; + } + } - fn process_loop( - original_len: usize, - f: &mut F, - g: &mut BackshiftOnDrop<'_, T, A>, - ) where - F: FnMut(&mut T) -> bool, - { - while g.read_index < original_len { - // SAFETY: Unchecked element must be valid. - let cur = unsafe { &mut *g.v.as_mut_ptr().add(g.read_index) }; - if !f(cur) { - if !DELETED { - // We set g.write_index only once when we found the first deleted element. - g.write_index = g.read_index; - } - // Advance read_index early to avoid double drop if `drop_in_place` panicked. - g.read_index += 1; - // SAFETY: We never touch this element again after dropped. - unsafe { ptr::drop_in_place(cur) }; - // We already advanced the counter. - if !DELETED { - // We found the first deleted element. - // Switch to the second stage. - // read_index is now always greater than write_index. - break; - } - } else { - if DELETED { - // SAFETY: `read_index` > `write_index`, so the slots don't overlap. - // We use copy for move, and never touch the source element again. - unsafe { - let hole_slot = g.v.as_mut_ptr().add(g.write_index); - ptr::copy_nonoverlapping(cur, hole_slot, 1); - } - g.write_index += 1; - } - g.read_index += 1; + // We have found the first deleted element. we need to use guard from this point. + let mut g = BackshiftOnDrop { v: self, read_index: i + 1, write_index: i, original_len }; + while g.read_index < g.original_lens { + // SAFETY: read_index is always less than original_len re. + let cur = unsafe { &mut *g.v.as_mut_ptr().add(g.read_index) }; + if !f(cur) { + // Advance read_index early to avoid double drop if `drop_in_place` panicked. + g.read_index += 1; + // SAFETY: We never touch this element again after dropped. + unsafe { ptr::drop_in_place(cur) }; + } else { + // SAFETY: `read_index` > `write_index`, so the slots don't overlap. + // We use copy for move, and never touch the source element again. + unsafe { + let hole = g.v.as_mut_ptr().add(g.write_index); + ptr::copy_nonoverlapping(cur, hole, 1); } + g.write_index += 1; + g.read_index += 1; } } - // Stage 1: Nothing was deleted. - process_loop::(original_len, &mut f, &mut g); - - // Stage 2: Some elements were deleted. - process_loop::(original_len, &mut f, &mut g); - // All items are processed. This can be optimized to `set_len` by LLVM. drop(g); } From 212983d7ddc9a0d7efe9390cc65ff27aabde8169 Mon Sep 17 00:00:00 2001 From: Khashayar Fereidani Date: Tue, 16 Dec 2025 11:49:45 +0330 Subject: [PATCH 7/8] Fix typo in original_len variable usage --- library/alloc/src/vec/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/library/alloc/src/vec/mod.rs b/library/alloc/src/vec/mod.rs index 9ca903bb87450..42016b772ed6b 100644 --- a/library/alloc/src/vec/mod.rs +++ b/library/alloc/src/vec/mod.rs @@ -2354,7 +2354,7 @@ impl Vec { // We have found the first deleted element. we need to use guard from this point. let mut g = BackshiftOnDrop { v: self, read_index: i + 1, write_index: i, original_len }; - while g.read_index < g.original_lens { + while g.read_index < g.original_len { // SAFETY: read_index is always less than original_len re. let cur = unsafe { &mut *g.v.as_mut_ptr().add(g.read_index) }; if !f(cur) { From b61ad78922bc907d08065caf5ab2b82ca6dadb75 Mon Sep 17 00:00:00 2001 From: Khashayar Fereidani Date: Tue, 16 Dec 2025 16:44:32 +0330 Subject: [PATCH 8/8] improve and optimize `retain_mut` implementation. --- library/alloc/src/vec/mod.rs | 67 +++++++++++++++++++++--------------- 1 file changed, 39 insertions(+), 28 deletions(-) diff --git a/library/alloc/src/vec/mod.rs b/library/alloc/src/vec/mod.rs index 42016b772ed6b..d8cba4e04ce8a 100644 --- a/library/alloc/src/vec/mod.rs +++ b/library/alloc/src/vec/mod.rs @@ -2313,49 +2313,59 @@ impl Vec { original_len: usize, } + impl BackshiftOnDrop<'_, T, A> { + unsafe fn commit(self, new_len: usize) { + // SAFETY: Caller guarantees new_len <= capacity. + unsafe { self.v.set_len(new_len) }; + core::mem::forget(self); + } + } + impl Drop for BackshiftOnDrop<'_, T, A> { + #[cold] fn drop(&mut self) { - if core::intrinsics::unlikely(self.read_index < self.original_len) { - let remaining = self.original_len - self.read_index; - // SAFETY: Trailing unchecked items must be valid since we never touch them. - unsafe { - ptr::copy( - self.v.as_ptr().add(self.read_index), - self.v.as_mut_ptr().add(self.write_index), - remaining, - ); - } - // SAFETY: After filling holes, all items are in contiguous memory. - unsafe { - self.v.set_len(self.write_index + remaining); - } - return; + // Panic happened, we need to backshift unchecked elements to cover holes. + if self.write_index >= self.read_index { + // SAFETY: No elements were removed. + return unsafe { self.v.set_len(self.original_len) }; + } + let remaining = self.original_len - self.read_index; + // SAFETY: Trailing unchecked items must be valid since we never touch them. + unsafe { + ptr::copy( + self.v.as_ptr().add(self.read_index), + self.v.as_mut_ptr().add(self.write_index), + remaining, + ); } - // SAFETY: no panic happened, length is just write_index. + // SAFETY: After filling holes, all items are in contiguous memory. unsafe { - self.v.set_len(self.write_index); + self.v.set_len(self.write_index + remaining); } } } - let mut i = 0; + let mut g = + BackshiftOnDrop { v: self, read_index: 0, write_index: usize::MAX, original_len }; loop { // SAFETY: i < original_len - if !f(unsafe { &mut *self.as_mut_ptr().add(i) }) { + let cur = unsafe { &mut *g.v.as_mut_ptr().add(g.read_index) }; + if core::intrinsics::unlikely(!f(cur)) { + g.write_index = g.read_index; + // Advance read_index early to avoid double drop if `drop_in_place` panicked. + g.read_index += 1; + unsafe { ptr::drop_in_place(cur) }; break; } - i += 1; - if i == original_len { + g.read_index += 1; + if g.read_index == original_len { // SAFETY: All elements are kept, set length back to original and return. - unsafe { self.set_len(original_len) }; - return; + return unsafe { g.commit(original_len) }; } } - // We have found the first deleted element. we need to use guard from this point. - let mut g = BackshiftOnDrop { v: self, read_index: i + 1, write_index: i, original_len }; while g.read_index < g.original_len { - // SAFETY: read_index is always less than original_len re. + // SAFETY: read_index is always less than original_len. let cur = unsafe { &mut *g.v.as_mut_ptr().add(g.read_index) }; if !f(cur) { // Advance read_index early to avoid double drop if `drop_in_place` panicked. @@ -2374,8 +2384,9 @@ impl Vec { } } - // All items are processed. This can be optimized to `set_len` by LLVM. - drop(g); + let new_len = g.write_index; + // SAFETY: After the loop, all kept elements are moved behind write_index. + unsafe { g.commit(new_len) } } /// Removes all but the first of consecutive elements in the vector that resolve to the same