Skip to content

Commit

Permalink
Auto merge of #282 - Zoxc:size-opt, r=Amanieu
Browse files Browse the repository at this point in the history
Make rehashing and resizing less generic

This makes the code in `rehash_in_place`, `resize` and `reserve_rehash` less generic on `T`. It also improves the performance of rustc. That performance increase in partially attributed to the use of `#[inline(always)]`.

This is the effect on rustc runtime:
```
clap:check                        1.9523s   1.9327s  -1.00%
hashmap-instances:check           0.0628s   0.0624s  -0.57%
helloworld:check                  0.0438s   0.0436s  -0.50%
hyper:check                       0.2987s   0.2970s  -0.59%
regex:check                       1.1497s   1.1402s  -0.82%
syn:check                         1.7004s   1.6851s  -0.90%
syntex_syntax:check               6.9232s   6.8546s  -0.99%
winapi:check                      8.3220s   8.2857s  -0.44%

Total                            20.4528s  20.3014s  -0.74%
Summary                           4.0000s   3.9709s  -0.73%
```
`rustc_driver`'s code size is increased by 0.02%.

This is the effect on compile time this has on my [HashMap compile time benchmark](#277 (comment)):
```
hashmap-instances:check           0.0636s   0.0632s  -0.61%
hashmap-instances:release        33.0166s  32.2487s  -2.33%
hashmap-instances:debug           7.8677s   7.2012s  -8.47%

Total                            40.9479s  39.5131s  -3.50%
Summary                           1.5000s   1.4430s  -3.80%
```
The `hashmap-instances:debug` compile time could be further improved if there was a way to apply `#[inline(always)]` only on release builds.
  • Loading branch information
bors committed Jul 21, 2021
2 parents a036b25 + bf8635d commit 4aca8e4
Showing 1 changed file with 213 additions and 120 deletions.
333 changes: 213 additions & 120 deletions src/raw/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,14 @@ use crate::scopeguard::guard;
use crate::TryReserveError;
#[cfg(feature = "nightly")]
use crate::UnavailableMutError;
use core::hint;
use core::iter::FusedIterator;
use core::marker::PhantomData;
use core::mem;
use core::mem::ManuallyDrop;
#[cfg(feature = "nightly")]
use core::mem::MaybeUninit;
use core::ptr::NonNull;
use core::{hint, ptr};

cfg_if! {
// Use the SSE2 implementation if possible: it allows us to scan 16 buckets
Expand Down Expand Up @@ -359,6 +359,7 @@ impl<T> Bucket<T> {
pub unsafe fn as_mut<'a>(&self) -> &'a mut T {
&mut *self.as_ptr()
}
#[cfg(feature = "raw")]
#[cfg_attr(feature = "inline-more", inline)]
pub unsafe fn copy_from_nonoverlapping(&self, other: &Self) {
self.as_ptr().copy_from_nonoverlapping(other.as_ptr(), 1);
Expand Down Expand Up @@ -682,102 +683,18 @@ impl<T, A: Allocator + Clone> RawTable<T, A> {
hasher: impl Fn(&T) -> u64,
fallibility: Fallibility,
) -> Result<(), TryReserveError> {
// Avoid `Option::ok_or_else` because it bloats LLVM IR.
let new_items = match self.table.items.checked_add(additional) {
Some(new_items) => new_items,
None => return Err(fallibility.capacity_overflow()),
};
let full_capacity = bucket_mask_to_capacity(self.table.bucket_mask);
if new_items <= full_capacity / 2 {
// Rehash in-place without re-allocating if we have plenty of spare
// capacity that is locked up due to DELETED entries.
self.rehash_in_place(hasher);
Ok(())
} else {
// Otherwise, conservatively resize to at least the next size up
// to avoid churning deletes into frequent rehashes.
self.resize(
usize::max(new_items, full_capacity + 1),
hasher,
fallibility,
)
}
}

/// Rehashes the contents of the table in place (i.e. without changing the
/// allocation).
///
/// If `hasher` panics then some the table's contents may be lost.
fn rehash_in_place(&mut self, hasher: impl Fn(&T) -> u64) {
unsafe {
// If the hash function panics then properly clean up any elements
// that we haven't rehashed yet. We unfortunately can't preserve the
// element since we lost their hash and have no way of recovering it
// without risking another panic.
self.table.prepare_rehash_in_place();

let mut guard = guard(&mut self.table, move |self_| {
self.table.reserve_rehash_inner(
additional,
&|table, index| hasher(table.bucket::<T>(index).as_ref()),
fallibility,
TableLayout::new::<T>(),
if mem::needs_drop::<T>() {
for i in 0..self_.buckets() {
if *self_.ctrl(i) == DELETED {
self_.set_ctrl(i, EMPTY);
self_.bucket::<T>(i).drop();
self_.items -= 1;
}
}
}
self_.growth_left = bucket_mask_to_capacity(self_.bucket_mask) - self_.items;
});

// At this point, DELETED elements are elements that we haven't
// rehashed yet. Find them and re-insert them at their ideal
// position.
'outer: for i in 0..guard.buckets() {
if *guard.ctrl(i) != DELETED {
continue;
}

'inner: loop {
// Hash the current item
let item = guard.bucket(i);
let hash = hasher(item.as_ref());

// Search for a suitable place to put it
let new_i = guard.find_insert_slot(hash);

// Probing works by scanning through all of the control
// bytes in groups, which may not be aligned to the group
// size. If both the new and old position fall within the
// same unaligned group, then there is no benefit in moving
// it and we can just continue to the next item.
if likely(guard.is_in_same_group(i, new_i, hash)) {
guard.set_ctrl_h2(i, hash);
continue 'outer;
}

// We are moving the current item to a new position. Write
// our H2 to the control byte of the new position.
let prev_ctrl = guard.replace_ctrl_h2(new_i, hash);
if prev_ctrl == EMPTY {
guard.set_ctrl(i, EMPTY);
// If the target slot is empty, simply move the current
// element into the new slot and clear the old control
// byte.
guard.bucket(new_i).copy_from_nonoverlapping(&item);
continue 'outer;
} else {
// If the target slot is occupied, swap the two elements
// and then continue processing the element that we just
// swapped into the old slot.
debug_assert_eq!(prev_ctrl, DELETED);
mem::swap(guard.bucket(new_i).as_mut(), item.as_mut());
continue 'inner;
}
}
}

guard.growth_left = bucket_mask_to_capacity(guard.bucket_mask) - guard.items;
mem::forget(guard);
Some(mem::transmute(ptr::drop_in_place::<T> as unsafe fn(*mut T)))
} else {
None
},
)
}
}

Expand All @@ -790,30 +707,12 @@ impl<T, A: Allocator + Clone> RawTable<T, A> {
fallibility: Fallibility,
) -> Result<(), TryReserveError> {
unsafe {
let mut new_table =
self.table
.prepare_resize(TableLayout::new::<T>(), capacity, fallibility)?;

// Copy all elements to the new table.
for item in self.iter() {
// This may panic.
let hash = hasher(item.as_ref());

// We can use a simpler version of insert() here since:
// - there are no DELETED entries.
// - we know there is enough space in the table.
// - all elements are unique.
let (index, _) = new_table.prepare_insert_slot(hash);
new_table.bucket(index).copy_from_nonoverlapping(&item);
}

// We successfully copied all elements without panicking. Now replace
// self with the new table. The old table will have its memory freed but
// the items will not be dropped (since they have been moved into the
// new table).
mem::swap(&mut self.table, &mut new_table);

Ok(())
self.table.resize_inner(
capacity,
&|table, index| hasher(table.bucket::<T>(index).as_ref()),
fallibility,
TableLayout::new::<T>(),
)
}
}

Expand Down Expand Up @@ -1312,6 +1211,14 @@ impl<A: Allocator + Clone> RawTableInner<A> {
Bucket::from_base_index(self.data_end(), index)
}

#[cfg_attr(feature = "inline-more", inline)]
unsafe fn bucket_ptr(&self, index: usize, size_of: usize) -> *mut u8 {
debug_assert_ne!(self.bucket_mask, 0);
debug_assert!(index < self.buckets());
let base: *mut u8 = self.data_end().as_ptr();
base.sub((index + 1) * size_of)
}

#[cfg_attr(feature = "inline-more", inline)]
unsafe fn data_end<T>(&self) -> NonNull<T> {
NonNull::new_unchecked(self.ctrl.as_ptr().cast())
Expand Down Expand Up @@ -1457,6 +1364,178 @@ impl<A: Allocator + Clone> RawTableInner<A> {
}))
}

/// Reserves or rehashes to make room for `additional` more elements.
///
/// This uses dynamic dispatch to reduce the amount of
/// code generated, but it is eliminated by LLVM optimizations when inlined.
#[allow(clippy::inline_always)]
#[inline(always)]
unsafe fn reserve_rehash_inner(
&mut self,
additional: usize,
hasher: &dyn Fn(&mut Self, usize) -> u64,
fallibility: Fallibility,
layout: TableLayout,
drop: Option<fn(*mut u8)>,
) -> Result<(), TryReserveError> {
// Avoid `Option::ok_or_else` because it bloats LLVM IR.
let new_items = match self.items.checked_add(additional) {
Some(new_items) => new_items,
None => return Err(fallibility.capacity_overflow()),
};
let full_capacity = bucket_mask_to_capacity(self.bucket_mask);
if new_items <= full_capacity / 2 {
// Rehash in-place without re-allocating if we have plenty of spare
// capacity that is locked up due to DELETED entries.
self.rehash_in_place(hasher, layout.size, drop);
Ok(())
} else {
// Otherwise, conservatively resize to at least the next size up
// to avoid churning deletes into frequent rehashes.
self.resize_inner(
usize::max(new_items, full_capacity + 1),
hasher,
fallibility,
layout,
)
}
}

/// Allocates a new table of a different size and moves the contents of the
/// current table into it.
///
/// This uses dynamic dispatch to reduce the amount of
/// code generated, but it is eliminated by LLVM optimizations when inlined.
#[allow(clippy::inline_always)]
#[inline(always)]
unsafe fn resize_inner(
&mut self,
capacity: usize,
hasher: &dyn Fn(&mut Self, usize) -> u64,
fallibility: Fallibility,
layout: TableLayout,
) -> Result<(), TryReserveError> {
let mut new_table = self.prepare_resize(layout, capacity, fallibility)?;

// Copy all elements to the new table.
for i in 0..self.buckets() {
if !is_full(*self.ctrl(i)) {
continue;
}

// This may panic.
let hash = hasher(self, i);

// We can use a simpler version of insert() here since:
// - there are no DELETED entries.
// - we know there is enough space in the table.
// - all elements are unique.
let (index, _) = new_table.prepare_insert_slot(hash);

ptr::copy_nonoverlapping(
self.bucket_ptr(i, layout.size),
new_table.bucket_ptr(index, layout.size),
layout.size,
);
}

// We successfully copied all elements without panicking. Now replace
// self with the new table. The old table will have its memory freed but
// the items will not be dropped (since they have been moved into the
// new table).
mem::swap(self, &mut new_table);

Ok(())
}

/// Rehashes the contents of the table in place (i.e. without changing the
/// allocation).
///
/// If `hasher` panics then some the table's contents may be lost.
///
/// This uses dynamic dispatch to reduce the amount of
/// code generated, but it is eliminated by LLVM optimizations when inlined.
#[allow(clippy::inline_always)]
#[inline(always)]
unsafe fn rehash_in_place(
&mut self,
hasher: &dyn Fn(&mut Self, usize) -> u64,
size_of: usize,
drop: Option<fn(*mut u8)>,
) {
// If the hash function panics then properly clean up any elements
// that we haven't rehashed yet. We unfortunately can't preserve the
// element since we lost their hash and have no way of recovering it
// without risking another panic.
self.prepare_rehash_in_place();

let mut guard = guard(self, move |self_| {
if let Some(drop) = drop {
for i in 0..self_.buckets() {
if *self_.ctrl(i) == DELETED {
self_.set_ctrl(i, EMPTY);
drop(self_.bucket_ptr(i, size_of));
self_.items -= 1;
}
}
}
self_.growth_left = bucket_mask_to_capacity(self_.bucket_mask) - self_.items;
});

// At this point, DELETED elements are elements that we haven't
// rehashed yet. Find them and re-insert them at their ideal
// position.
'outer: for i in 0..guard.buckets() {
if *guard.ctrl(i) != DELETED {
continue;
}

let i_p = guard.bucket_ptr(i, size_of);

'inner: loop {
// Hash the current item
let hash = hasher(*guard, i);

// Search for a suitable place to put it
let new_i = guard.find_insert_slot(hash);
let new_i_p = guard.bucket_ptr(new_i, size_of);

// Probing works by scanning through all of the control
// bytes in groups, which may not be aligned to the group
// size. If both the new and old position fall within the
// same unaligned group, then there is no benefit in moving
// it and we can just continue to the next item.
if likely(guard.is_in_same_group(i, new_i, hash)) {
guard.set_ctrl_h2(i, hash);
continue 'outer;
}

// We are moving the current item to a new position. Write
// our H2 to the control byte of the new position.
let prev_ctrl = guard.replace_ctrl_h2(new_i, hash);
if prev_ctrl == EMPTY {
guard.set_ctrl(i, EMPTY);
// If the target slot is empty, simply move the current
// element into the new slot and clear the old control
// byte.
ptr::copy_nonoverlapping(i_p, new_i_p, size_of);
continue 'outer;
} else {
// If the target slot is occupied, swap the two elements
// and then continue processing the element that we just
// swapped into the old slot.
debug_assert_eq!(prev_ctrl, DELETED);
ptr::swap_nonoverlapping(i_p, new_i_p, size_of);
continue 'inner;
}
}
}

guard.growth_left = bucket_mask_to_capacity(guard.bucket_mask) - guard.items;

mem::forget(guard);
}

#[inline]
unsafe fn free_buckets(&mut self, table_layout: TableLayout) {
// Avoid `Option::unwrap_or_else` because it bloats LLVM IR.
Expand Down Expand Up @@ -2281,6 +2360,20 @@ impl<'a, A: Allocator + Clone> Iterator for RawIterHashInner<'a, A> {
mod test_map {
use super::*;

fn rehash_in_place<T>(table: &mut RawTable<T>, hasher: impl Fn(&T) -> u64) {
unsafe {
table.table.rehash_in_place(
&|table, index| hasher(table.bucket::<T>(index).as_ref()),
mem::size_of::<T>(),
if mem::needs_drop::<T>() {
Some(mem::transmute(ptr::drop_in_place::<T> as unsafe fn(*mut T)))
} else {
None
},
);
}
}

#[test]
fn rehash() {
let mut table = RawTable::new();
Expand All @@ -2296,7 +2389,7 @@ mod test_map {
assert!(table.find(i + 100, |x| *x == i + 100).is_none());
}

table.rehash_in_place(hasher);
rehash_in_place(&mut table, hasher);

for i in 0..100 {
unsafe {
Expand Down

0 comments on commit 4aca8e4

Please sign in to comment.