Skip to content

Commit

Permalink
style: Remove use of fnv in bloom.rs.
Browse files Browse the repository at this point in the history
To support that, this patch also does the following.

- Removes the insert(), remove() and might_contain() methods, because they are
  specialized versions of insert_hash(), remove_hash(), and
  might_contain_hash(), and they are only used by tests within this file.

- Moves hash() from the top level into create_and_insert_some_stuff().

- Changes create_and_insert_some_stuff() so that instead of hashing consecutive
  integers, it instead hashes stringified consecutive integers, which matches
  real usage a little better.

- Raises the false_positives limit a little to account for the above changes.

Bug: 1484096
Reviewed-by: heycam
  • Loading branch information
nnethercote authored and emilio committed Aug 18, 2018
1 parent d63ce55 commit 07ffc09
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 43 deletions.
1 change: 0 additions & 1 deletion components/selectors/Cargo.toml
Expand Up @@ -24,7 +24,6 @@ bitflags = "1.0"
matches = "0.1" matches = "0.1"
cssparser = "0.24.0" cssparser = "0.24.0"
log = "0.4" log = "0.4"
fnv = "1.0"
fxhash = "0.2" fxhash = "0.2"
phf = "0.7.18" phf = "0.7.18"
precomputed-hash = "0.1" precomputed-hash = "0.1"
Expand Down
65 changes: 24 additions & 41 deletions components/selectors/bloom.rs
Expand Up @@ -5,9 +5,7 @@
//! Counting and non-counting Bloom filters tuned for use as ancestor filters //! Counting and non-counting Bloom filters tuned for use as ancestor filters
//! for selector matching. //! for selector matching.


use fnv::FnvHasher;
use std::fmt::{self, Debug}; use std::fmt::{self, Debug};
use std::hash::{Hash, Hasher};


// The top 8 bits of the 32-bit hash value are not used by the bloom filter. // The top 8 bits of the 32-bit hash value are not used by the bloom filter.
// Consumers may rely on this to pack hashes more efficiently. // Consumers may rely on this to pack hashes more efficiently.
Expand Down Expand Up @@ -108,43 +106,27 @@ where
unreachable!() unreachable!()
} }


/// Inserts an item with a particular hash into the bloom filter.
#[inline] #[inline]
pub fn insert_hash(&mut self, hash: u32) { pub fn insert_hash(&mut self, hash: u32) {
self.storage.adjust_first_slot(hash, true); self.storage.adjust_first_slot(hash, true);
self.storage.adjust_second_slot(hash, true); self.storage.adjust_second_slot(hash, true);
} }


/// Inserts an item into the bloom filter. /// Removes an item with a particular hash from the bloom filter.
#[inline]
pub fn insert<T: Hash>(&mut self, elem: &T) {
self.insert_hash(hash(elem))
}

#[inline] #[inline]
pub fn remove_hash(&mut self, hash: u32) { pub fn remove_hash(&mut self, hash: u32) {
self.storage.adjust_first_slot(hash, false); self.storage.adjust_first_slot(hash, false);
self.storage.adjust_second_slot(hash, false); self.storage.adjust_second_slot(hash, false);
} }


/// Removes an item from the bloom filter. /// Check whether the filter might contain an item with the given hash.
#[inline] /// This can sometimes return true even if the item is not in the filter,
pub fn remove<T: Hash>(&mut self, elem: &T) { /// but will never return false for items that are actually in the filter.
self.remove_hash(hash(elem))
}

#[inline] #[inline]
pub fn might_contain_hash(&self, hash: u32) -> bool { pub fn might_contain_hash(&self, hash: u32) -> bool {
!self.storage.first_slot_is_empty(hash) && !self.storage.second_slot_is_empty(hash) !self.storage.first_slot_is_empty(hash) && !self.storage.second_slot_is_empty(hash)
} }

/// Check whether the filter might contain an item. This can
/// sometimes return true even if the item is not in the filter,
/// but will never return false for items that are actually in the
/// filter.
#[inline]
pub fn might_contain<T: Hash>(&self, elem: &T) -> bool {
self.might_contain_hash(hash(elem))
}
} }


impl<S> Debug for CountingBloomFilter<S> impl<S> Debug for CountingBloomFilter<S>
Expand Down Expand Up @@ -296,16 +278,6 @@ impl Clone for BloomStorageBool {
} }
} }


fn hash<T: Hash>(elem: &T) -> u32 {
// We generally use FxHasher in Stylo because it's faster than FnvHasher,
// but the increased collision rate has outsized effect on the bloom
// filter, so we use FnvHasher instead here.
let mut hasher = FnvHasher::default();
elem.hash(&mut hasher);
let hash: u64 = hasher.finish();
(hash >> 32) as u32 ^ (hash as u32)
}

#[inline] #[inline]
fn hash1(hash: u32) -> u32 { fn hash1(hash: u32) -> u32 {
hash & KEY_MASK hash & KEY_MASK
Expand All @@ -318,8 +290,18 @@ fn hash2(hash: u32) -> u32 {


#[test] #[test]
fn create_and_insert_some_stuff() { fn create_and_insert_some_stuff() {
use fxhash::FxHasher;
use std::hash::{Hash, Hasher};
use std::mem::transmute; use std::mem::transmute;


fn hash_as_str(i: usize) -> u32 {
let mut hasher = FxHasher::default();
let s = i.to_string();
s.hash(&mut hasher);
let hash: u64 = hasher.finish();
(hash >> 32) as u32 ^ (hash as u32)
}

let mut bf = BloomFilter::new(); let mut bf = BloomFilter::new();


// Statically assert that ARRAY_SIZE is a multiple of 8, which // Statically assert that ARRAY_SIZE is a multiple of 8, which
Expand All @@ -329,33 +311,34 @@ fn create_and_insert_some_stuff() {
} }


for i in 0_usize..1000 { for i in 0_usize..1000 {
bf.insert(&i); bf.insert_hash(hash_as_str(i));
} }


for i in 0_usize..1000 { for i in 0_usize..1000 {
assert!(bf.might_contain(&i)); assert!(bf.might_contain_hash(hash_as_str(i)));
} }


let false_positives = (1001_usize..2000).filter(|i| bf.might_contain(i)).count(); let false_positives =
(1001_usize..2000).filter(|i| bf.might_contain_hash(hash_as_str(*i))).count();


assert!(false_positives < 160, "{} is not < 160", false_positives); // 16%. assert!(false_positives < 190, "{} is not < 190", false_positives); // 19%.


for i in 0_usize..100 { for i in 0_usize..100 {
bf.remove(&i); bf.remove_hash(hash_as_str(i));
} }


for i in 100_usize..1000 { for i in 100_usize..1000 {
assert!(bf.might_contain(&i)); assert!(bf.might_contain_hash(hash_as_str(i)));
} }


let false_positives = (0_usize..100).filter(|i| bf.might_contain(i)).count(); let false_positives = (0_usize..100).filter(|i| bf.might_contain_hash(hash_as_str(*i))).count();


assert!(false_positives < 20, "{} is not < 20", false_positives); // 20%. assert!(false_positives < 20, "{} is not < 20", false_positives); // 20%.


bf.clear(); bf.clear();


for i in 0_usize..2000 { for i in 0_usize..2000 {
assert!(!bf.might_contain(&i)); assert!(!bf.might_contain_hash(hash_as_str(i)));
} }
} }


Expand Down
1 change: 0 additions & 1 deletion components/selectors/lib.rs
Expand Up @@ -9,7 +9,6 @@
extern crate bitflags; extern crate bitflags;
#[macro_use] #[macro_use]
extern crate cssparser; extern crate cssparser;
extern crate fnv;
extern crate fxhash; extern crate fxhash;
#[macro_use] #[macro_use]
extern crate log; extern crate log;
Expand Down

0 comments on commit 07ffc09

Please sign in to comment.