Skip to content
Permalink
Browse files

style: Remove use of `fnv` in bloom.rs.

To support that, this patch also does the following.

- Removes the insert(), remove() and might_contain() methods, because they are
  specialized versions of insert_hash(), remove_hash(), and
  might_contain_hash(), and they are only used by tests within this file.

- Moves hash() from the top level into create_and_insert_some_stuff().

- Changes create_and_insert_some_stuff() so that instead of hashing consecutive
  integers, it instead hashes stringified consecutive integers, which matches
  real usage a little better.

- Raises the false_positives limit a little to account for the above changes.

Bug: 1484096
Reviewed-by: heycam
  • Loading branch information
nnethercote authored and emilio committed Aug 18, 2018
1 parent d63ce55 commit 07ffc0955f608c2f9a5098494c6ee02f59bd3b94
Showing with 24 additions and 43 deletions.
  1. +0 −1 components/selectors/Cargo.toml
  2. +24 −41 components/selectors/bloom.rs
  3. +0 −1 components/selectors/lib.rs
@@ -24,7 +24,6 @@ bitflags = "1.0"
matches = "0.1"
cssparser = "0.24.0"
log = "0.4"
fnv = "1.0"
fxhash = "0.2"
phf = "0.7.18"
precomputed-hash = "0.1"
@@ -5,9 +5,7 @@
//! Counting and non-counting Bloom filters tuned for use as ancestor filters
//! for selector matching.

use fnv::FnvHasher;
use std::fmt::{self, Debug};
use std::hash::{Hash, Hasher};

// The top 8 bits of the 32-bit hash value are not used by the bloom filter.
// Consumers may rely on this to pack hashes more efficiently.
@@ -108,43 +106,27 @@ where
unreachable!()
}

/// Inserts an item with a particular hash into the bloom filter.
#[inline]
pub fn insert_hash(&mut self, hash: u32) {
self.storage.adjust_first_slot(hash, true);
self.storage.adjust_second_slot(hash, true);
}

/// Inserts an item into the bloom filter.
#[inline]
pub fn insert<T: Hash>(&mut self, elem: &T) {
self.insert_hash(hash(elem))
}

/// Removes an item with a particular hash from the bloom filter.
#[inline]
pub fn remove_hash(&mut self, hash: u32) {
self.storage.adjust_first_slot(hash, false);
self.storage.adjust_second_slot(hash, false);
}

/// Removes an item from the bloom filter.
#[inline]
pub fn remove<T: Hash>(&mut self, elem: &T) {
self.remove_hash(hash(elem))
}

/// Check whether the filter might contain an item with the given hash.
/// This can sometimes return true even if the item is not in the filter,
/// but will never return false for items that are actually in the filter.
#[inline]
pub fn might_contain_hash(&self, hash: u32) -> bool {
!self.storage.first_slot_is_empty(hash) && !self.storage.second_slot_is_empty(hash)
}

/// Check whether the filter might contain an item. This can
/// sometimes return true even if the item is not in the filter,
/// but will never return false for items that are actually in the
/// filter.
#[inline]
pub fn might_contain<T: Hash>(&self, elem: &T) -> bool {
self.might_contain_hash(hash(elem))
}
}

impl<S> Debug for CountingBloomFilter<S>
@@ -296,16 +278,6 @@ impl Clone for BloomStorageBool {
}
}

fn hash<T: Hash>(elem: &T) -> u32 {
// We generally use FxHasher in Stylo because it's faster than FnvHasher,
// but the increased collision rate has outsized effect on the bloom
// filter, so we use FnvHasher instead here.
let mut hasher = FnvHasher::default();
elem.hash(&mut hasher);
let hash: u64 = hasher.finish();
(hash >> 32) as u32 ^ (hash as u32)
}

#[inline]
fn hash1(hash: u32) -> u32 {
hash & KEY_MASK
@@ -318,8 +290,18 @@ fn hash2(hash: u32) -> u32 {

#[test]
fn create_and_insert_some_stuff() {
use fxhash::FxHasher;
use std::hash::{Hash, Hasher};
use std::mem::transmute;

fn hash_as_str(i: usize) -> u32 {
let mut hasher = FxHasher::default();
let s = i.to_string();
s.hash(&mut hasher);
let hash: u64 = hasher.finish();
(hash >> 32) as u32 ^ (hash as u32)
}

let mut bf = BloomFilter::new();

// Statically assert that ARRAY_SIZE is a multiple of 8, which
@@ -329,33 +311,34 @@ fn create_and_insert_some_stuff() {
}

for i in 0_usize..1000 {
bf.insert(&i);
bf.insert_hash(hash_as_str(i));
}

for i in 0_usize..1000 {
assert!(bf.might_contain(&i));
assert!(bf.might_contain_hash(hash_as_str(i)));
}

let false_positives = (1001_usize..2000).filter(|i| bf.might_contain(i)).count();
let false_positives =
(1001_usize..2000).filter(|i| bf.might_contain_hash(hash_as_str(*i))).count();

assert!(false_positives < 160, "{} is not < 160", false_positives); // 16%.
assert!(false_positives < 190, "{} is not < 190", false_positives); // 19%.

for i in 0_usize..100 {
bf.remove(&i);
bf.remove_hash(hash_as_str(i));
}

for i in 100_usize..1000 {
assert!(bf.might_contain(&i));
assert!(bf.might_contain_hash(hash_as_str(i)));
}

let false_positives = (0_usize..100).filter(|i| bf.might_contain(i)).count();
let false_positives = (0_usize..100).filter(|i| bf.might_contain_hash(hash_as_str(*i))).count();

assert!(false_positives < 20, "{} is not < 20", false_positives); // 20%.

bf.clear();

for i in 0_usize..2000 {
assert!(!bf.might_contain(&i));
assert!(!bf.might_contain_hash(hash_as_str(i)));
}
}

@@ -9,7 +9,6 @@
extern crate bitflags;
#[macro_use]
extern crate cssparser;
extern crate fnv;
extern crate fxhash;
#[macro_use]
extern crate log;

0 comments on commit 07ffc09

Please sign in to comment.
You can’t perform that action at this time.