Skip to content

Commit 3ec64be

Browse files
Auto merge of #149784 - fereidani:retain_mut, r=<try>
Improve alloc `Vec::retain_mut` performance
2 parents d5525a7 + 5d55a0f commit 3ec64be

File tree

1 file changed

+41
-28
lines changed

1 file changed

+41
-28
lines changed

library/alloc/src/vec/mod.rs

Lines changed: 41 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -2297,8 +2297,7 @@ impl<T, A: Allocator> Vec<T, A> {
22972297
unsafe { self.set_len(0) };
22982298

22992299
// Vec: [Kept, Kept, Hole, Hole, Hole, Hole, Unchecked, Unchecked]
2300-
// |<- processed len ->| ^- next to check
2301-
// |<- deleted cnt ->|
2300+
// | ^- write_index ^- read_index
23022301
// |<- original_len ->|
23032302
// Kept: Elements which predicate returns true on.
23042303
// Hole: Moved or dropped element slot.
@@ -2309,31 +2308,39 @@ impl<T, A: Allocator> Vec<T, A> {
23092308
// In cases when predicate and `drop` never panick, it will be optimized out.
23102309
struct BackshiftOnDrop<'a, T, A: Allocator> {
23112310
v: &'a mut Vec<T, A>,
2312-
processed_len: usize,
2313-
deleted_cnt: usize,
2311+
read_index: usize,
2312+
write_index: usize,
23142313
original_len: usize,
23152314
}
23162315

23172316
impl<T, A: Allocator> Drop for BackshiftOnDrop<'_, T, A> {
23182317
fn drop(&mut self) {
2319-
if self.deleted_cnt > 0 {
2318+
if self.read_index < self.original_len {
2319+
self.write_index = self.write_index.min(self.read_index);
2320+
let remaining = self.original_len - self.read_index;
23202321
// SAFETY: Trailing unchecked items must be valid since we never touch them.
23212322
unsafe {
23222323
ptr::copy(
2323-
self.v.as_ptr().add(self.processed_len),
2324-
self.v.as_mut_ptr().add(self.processed_len - self.deleted_cnt),
2325-
self.original_len - self.processed_len,
2324+
self.v.as_ptr().add(self.read_index),
2325+
self.v.as_mut_ptr().add(self.write_index),
2326+
remaining,
23262327
);
23272328
}
2329+
// SAFETY: After filling holes, all items are in contiguous memory.
2330+
unsafe {
2331+
self.v.set_len(self.write_index + remaining);
2332+
}
2333+
return;
23282334
}
2329-
// SAFETY: After filling holes, all items are in contiguous memory.
2335+
// SAFETY: no panic happened, so all items are in contiguous memory.
23302336
unsafe {
2331-
self.v.set_len(self.original_len - self.deleted_cnt);
2337+
self.v.set_len(self.write_index.min(self.read_index));
23322338
}
23332339
}
23342340
}
23352341

2336-
let mut g = BackshiftOnDrop { v: self, processed_len: 0, deleted_cnt: 0, original_len };
2342+
let mut g =
2343+
BackshiftOnDrop { v: self, read_index: 0, write_index: usize::MAX, original_len };
23372344

23382345
fn process_loop<F, T, A: Allocator, const DELETED: bool>(
23392346
original_len: usize,
@@ -2342,31 +2349,37 @@ impl<T, A: Allocator> Vec<T, A> {
23422349
) where
23432350
F: FnMut(&mut T) -> bool,
23442351
{
2345-
while g.processed_len != original_len {
2352+
while g.read_index < original_len {
23462353
// SAFETY: Unchecked element must be valid.
2347-
let cur = unsafe { &mut *g.v.as_mut_ptr().add(g.processed_len) };
2354+
let cur = unsafe { &mut *g.v.as_mut_ptr().add(g.read_index) };
23482355
if !f(cur) {
2349-
// Advance early to avoid double drop if `drop_in_place` panicked.
2350-
g.processed_len += 1;
2351-
g.deleted_cnt += 1;
2356+
if !DELETED {
2357+
// We set g.write_index only once when we found the first deleted element.
2358+
g.write_index = g.read_index;
2359+
}
2360+
// Advance read_index early to avoid double drop if `drop_in_place` panicked.
2361+
g.read_index += 1;
23522362
// SAFETY: We never touch this element again after dropped.
23532363
unsafe { ptr::drop_in_place(cur) };
23542364
// We already advanced the counter.
2355-
if DELETED {
2356-
continue;
2357-
} else {
2365+
if !DELETED {
2366+
// We found the first deleted element.
2367+
// Switch to the second stage.
2368+
// read_index is now always greater than write_index.
23582369
break;
23592370
}
2360-
}
2361-
if DELETED {
2362-
// SAFETY: `deleted_cnt` > 0, so the hole slot must not overlap with current element.
2363-
// We use copy for move, and never touch this element again.
2364-
unsafe {
2365-
let hole_slot = g.v.as_mut_ptr().add(g.processed_len - g.deleted_cnt);
2366-
ptr::copy_nonoverlapping(cur, hole_slot, 1);
2371+
} else {
2372+
if DELETED {
2373+
// SAFETY: `read_index` > `write_index`, so the slots don't overlap.
2374+
// We use copy for move, and never touch the source element again.
2375+
unsafe {
2376+
let hole_slot = g.v.as_mut_ptr().add(g.write_index);
2377+
ptr::copy_nonoverlapping(cur, hole_slot, 1);
2378+
}
2379+
g.write_index += 1;
23672380
}
2381+
g.read_index += 1;
23682382
}
2369-
g.processed_len += 1;
23702383
}
23712384
}
23722385

@@ -2376,7 +2389,7 @@ impl<T, A: Allocator> Vec<T, A> {
23762389
// Stage 2: Some elements were deleted.
23772390
process_loop::<F, T, A, true>(original_len, &mut f, &mut g);
23782391

2379-
// All item are processed. This can be optimized to `set_len` by LLVM.
2392+
// All items are processed. This can be optimized to `set_len` by LLVM.
23802393
drop(g);
23812394
}
23822395

0 commit comments

Comments
 (0)