Skip to content

Commit

Permalink
WIP Switch to a full bitwidth h2
Browse files Browse the repository at this point in the history
* Changes:

- Use all values of h2, not just 130 of it.
- Convert SSE2 implementation for benchmarking.

* Motivation:

Using 256 values instead of 130 could theoretically lower the number of
false-positive residual matches by close to 50%.

On the other hand, it does make h2 slightly more complicated to compute,
and possibly to operate on.
  • Loading branch information
matthieu-m committed Mar 24, 2024
1 parent 3741813 commit 2098bd4
Show file tree
Hide file tree
Showing 2 changed files with 241 additions and 28 deletions.
217 changes: 207 additions & 10 deletions src/raw/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -105,28 +105,29 @@ trait SizedTypeProperties: Sized {
impl<T> SizedTypeProperties for T {}

/// Control byte value for an empty bucket.
const EMPTY: u8 = 0b1111_1111;
const EMPTY: u8 = 0b0111_1111;

/// Control byte value for a deleted bucket.
const DELETED: u8 = 0b1000_0000;
const DELETED: u8 = 0b0111_1110;

/// Checks whether a control byte represents a full bucket (top bit is clear).
#[inline]
fn is_full(ctrl: u8) -> bool {
ctrl & 0x80 == 0
(ctrl as i8) < (DELETED as i8)
}

/// Checks whether a control byte represents a special value (top bit is set).
#[inline]
fn is_special(ctrl: u8) -> bool {
ctrl & 0x80 != 0
(ctrl as i8) >= (DELETED as i8)
}

/// Checks whether a special control value is EMPTY (just check 1 bit).
#[inline]
fn special_is_empty(ctrl: u8) -> bool {
debug_assert!(is_special(ctrl));
ctrl & 0x01 != 0

ctrl == EMPTY
}

/// Primary hash function, used to select the initial bucket to probe from.
Expand All @@ -137,23 +138,46 @@ fn h1(hash: u64) -> usize {
hash as usize
}

// Constant for h2 function that grabing the top 7 bits of the hash.
// Constant for h2 function that grabing the top 8 bits of the hash.
const MIN_HASH_LEN: usize = if mem::size_of::<usize>() < mem::size_of::<u64>() {
mem::size_of::<usize>()
} else {
mem::size_of::<u64>()
};

/// Secondary hash function, saved in the low 7 bits of the control byte.
/// Secondary hash function, saved in the control byte.
#[inline]
#[allow(clippy::cast_possible_truncation)]
fn h2(hash: u64) -> u8 {
// Grab the top 7 bits of the hash. While the hash is normally a full 64-bit
const fn compute_control() -> [u8; 256] {
let mut result = [0; 256];

let mut i = 0;

while i < 256 {
result[i] = i as u8;

i += 1;
}

// Avoid overlap with special values.
result[EMPTY as usize] += 8;
result[DELETED as usize] += 8;

result
}

#[rustfmt::skip]
const CONTROL: [u8; 256] = compute_control();

// Grab the top 8 bits of the hash. While the hash is normally a full 64-bit
// value, some hash functions (such as FxHash) produce a usize result
// instead, which means that the top 32 bits are 0 on 32-bit platforms.
// So we use MIN_HASH_LEN constant to handle this.
let top7 = hash >> (MIN_HASH_LEN * 8 - 7);
(top7 & 0x7f) as u8 // truncation
let top8 = hash >> (MIN_HASH_LEN * 8 - 7);

// Lookup matching control byte, avoid overlap with special control.
CONTROL[top8 as usize]
}

/// Probe sequence based on triangular numbers, which is guaranteed (since our
Expand Down Expand Up @@ -4562,6 +4586,179 @@ impl<T, A: Allocator> RawExtractIf<'_, T, A> {
}
}

#[cfg(test)]
mod test_group {
use super::*;

type RawGroup = [u8; Group::WIDTH];

fn load(raw: RawGroup) -> Group {
// Safety:
// - `raw.len() == Group::WIDTH`.
unsafe { Group::load(raw.as_ptr()) }
}

fn store(group: Group) -> RawGroup {
#[repr(align(16))]
struct Aligned(RawGroup);

let mut result = Aligned(RawGroup::default());

// Safety:
// - `raw.len() == Group::WIDTH`.
// - `raw` is suitably aligned.
unsafe { group.store_aligned(result.0.as_mut_ptr()) }

result.0
}

#[test]
fn test_match_byte() {
use ::alloc::vec::Vec;

let mut raw = RawGroup::default();

for (i, slot) in raw.iter_mut().enumerate() {
if i % 2 == 0 {
*slot = EMPTY;
} else {
*slot = 0x44;
}
}

let group = load(raw);

let is_match = group.match_byte(0x44);

let matched: Vec<_> = is_match.into_iter().collect();

assert_eq!(Group::WIDTH / 2, matched.len(), "{matched:?}");
assert!(matched.iter().all(|i| *i % 2 != 0), "{matched:?}");
}

#[test]
fn test_match_empty() {
use ::alloc::vec::Vec;

let mut raw = RawGroup::default();

for (i, slot) in raw.iter_mut().enumerate() {
if i % 2 == 0 {
*slot = EMPTY;
} else {
*slot = DELETED;
}
}

let group = load(raw);

let is_empty = group.match_empty();

let empty: Vec<_> = is_empty.into_iter().collect();

assert_eq!(Group::WIDTH / 2, empty.len(), "{empty:?}");
assert!(empty.iter().all(|i| *i % 2 == 0), "{empty:?}");
}

#[test]
fn test_match_empty_or_deleted() {
use ::alloc::vec::Vec;

let mut raw = RawGroup::default();

for (i, slot) in raw.iter_mut().enumerate() {
let value = match i % 4 {
0 => EMPTY,
1 => 2,
2 => DELETED,
3 => 255,
_ => unreachable!("i % 4 < 4"),
};

*slot = value;
}

let group = load(raw);

let is_empty_or_deleted = group.match_empty_or_deleted();

let empty_or_deleted: Vec<_> = is_empty_or_deleted.into_iter().collect();

assert_eq!(
Group::WIDTH / 2,
empty_or_deleted.len(),
"{empty_or_deleted:?}"
);
assert!(
empty_or_deleted.iter().all(|i| *i % 2 == 0),
"{empty_or_deleted:?}"
);
}

#[test]
fn test_match_full() {
use ::alloc::vec::Vec;

let mut raw = RawGroup::default();

for (i, slot) in raw.iter_mut().enumerate() {
let value = match i % 4 {
0 => EMPTY,
1 => 2,
2 => DELETED,
3 => 255,
_ => unreachable!("i % 4 < 4"),
};

*slot = value;
}

let group = load(raw);

let is_full = group.match_full();

let full: Vec<_> = is_full.into_iter().collect();

assert_eq!(Group::WIDTH / 2, full.len(), "{full:?}");
assert!(full.iter().all(|i| *i % 2 != 0), "{full:?}");
}

#[test]
fn test_convert_special_to_empty_and_full_to_deleted() {
use ::alloc::vec::Vec;

let mut raw = RawGroup::default();

for (i, slot) in raw.iter_mut().enumerate() {
let value = match i % 4 {
0 => EMPTY,
1 => 2,
2 => DELETED,
3 => 255,
_ => unreachable!("i % 4 < 4"),
};

*slot = value;
}

let group = load(raw);

let converted = group.convert_special_to_empty_and_full_to_deleted();

dbg!(store(converted));

let empty: Vec<_> = converted.match_empty().into_iter().collect();

assert_eq!(Group::WIDTH / 2, empty.len(), "{empty:?}");
assert!(empty.iter().all(|i| *i % 2 == 0), "{empty:?}");

let deleted: Vec<_> = converted.match_byte(DELETED).into_iter().collect();

assert_eq!(Group::WIDTH / 2, deleted.len(), "{deleted:?}");
assert!(deleted.iter().all(|i| *i % 2 != 0), "{deleted:?}");
}
}

#[cfg(test)]
mod test_map {
use super::*;
Expand Down
52 changes: 34 additions & 18 deletions src/raw/sse2.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use super::bitmask::BitMask;
use super::EMPTY;
use super::{DELETED, EMPTY};
use core::mem;
use core::num::NonZeroU16;

Expand Down Expand Up @@ -102,6 +102,9 @@ impl Group {
/// `EMPTY` or `DELETED`.
#[inline]
pub(crate) fn match_empty_or_deleted(self) -> BitMask {
debug_assert_eq!(127, EMPTY);
debug_assert_eq!(126, DELETED);

#[allow(
// byte: i32 as u16
// note: _mm_movemask_epi8 returns a 16-bit mask in a i32, the
Expand All @@ -110,15 +113,30 @@ impl Group {
clippy::cast_possible_truncation
)]
unsafe {
// A byte is EMPTY or DELETED iff the high bit is set
BitMask(x86::_mm_movemask_epi8(self.0) as u16)
// A byte is EMPTY or DELETED iff it is greater than or equal to DELETED.
let is_special = x86::_mm_cmpgt_epi8(self.0, x86::_mm_set1_epi8(DELETED as i8 - 1));
BitMask(x86::_mm_movemask_epi8(is_special) as u16)
}
}

/// Returns a `BitMask` indicating all bytes in the group which are full.
#[inline]
pub(crate) fn match_full(&self) -> BitMask {
self.match_empty_or_deleted().invert()
debug_assert_eq!(127, EMPTY);
debug_assert_eq!(126, DELETED);

#[allow(
// byte: i32 as u16
// note: _mm_movemask_epi8 returns a 16-bit mask in a i32, the
// upper 16-bits of the i32 are zeroed:
clippy::cast_sign_loss,
clippy::cast_possible_truncation
)]
unsafe {
// A byte is full iff it is strictly less than DELETED.
let is_full = x86::_mm_cmplt_epi8(self.0, x86::_mm_set1_epi8(DELETED as i8));
BitMask(x86::_mm_movemask_epi8(is_full) as u16)
}
}

/// Performs the following transformation on all bytes in the group:
Expand All @@ -127,22 +145,20 @@ impl Group {
/// - `FULL => DELETED`
#[inline]
pub(crate) fn convert_special_to_empty_and_full_to_deleted(self) -> Self {
// Map high_bit = 1 (EMPTY or DELETED) to 1111_1111
// and high_bit = 0 (FULL) to 1000_0000
//
// Here's this logic expanded to concrete values:
// let special = 0 > byte = 1111_1111 (true) or 0000_0000 (false)
// 1111_1111 | 1000_0000 = 1111_1111
// 0000_0000 | 1000_0000 = 1000_0000
#[allow(
clippy::cast_possible_wrap, // byte: 0x80_u8 as i8
)]
debug_assert_eq!(127, EMPTY);
debug_assert_eq!(126, DELETED);

#[allow(clippy::cast_sign_loss, clippy::cast_possible_truncation)]
unsafe {
let zero = x86::_mm_setzero_si128();
let special = x86::_mm_cmpgt_epi8(zero, self.0);
let empty = x86::_mm_set1_epi8(EMPTY as i8);
let deleted = x86::_mm_set1_epi8(DELETED as i8);

let is_full = x86::_mm_cmplt_epi8(self.0, deleted);
let is_special = x86::_mm_cmpeq_epi8(is_full, x86::_mm_set1_epi8(0));

Group(x86::_mm_or_si128(
special,
x86::_mm_set1_epi8(0x80_u8 as i8),
x86::_mm_and_si128(is_full, deleted),
x86::_mm_and_si128(is_special, empty),
))
}
}
Expand Down

0 comments on commit 2098bd4

Please sign in to comment.