Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Pack bloom filter hashes better and save a word on Rule #17281

Merged
merged 6 commits into from Jun 12, 2017
@@ -97,6 +97,7 @@ use std::convert::TryFrom;
use std::default::Default;
use std::fmt;
use std::rc::Rc;
use style::applicable_declarations::ApplicableDeclarationBlock;
use style::attr::{AttrValue, LengthOrPercentageOrAuto};
use style::context::{QuirksMode, ReflowGoal};
use style::element_state::*;
@@ -109,7 +110,6 @@ use style::selector_parser::extended_filtering;
use style::shared_lock::{SharedRwLock, Locked};
use style::sink::Push;
use style::stylearc::Arc;
use style::stylist::ApplicableDeclarationBlock;
use style::thread_state;
use style::values::{CSSFloat, Either};
use style::values::specified;
@@ -61,6 +61,7 @@ use std::marker::PhantomData;
use std::mem::transmute;
use std::sync::atomic::Ordering;
use style;
use style::applicable_declarations::ApplicableDeclarationBlock;
use style::attr::AttrValue;
use style::computed_values::display;
use style::context::{QuirksMode, SharedStyleContext};
@@ -76,7 +77,6 @@ use style::shared_lock::{SharedRwLock as StyleSharedRwLock, Locked as StyleLocke
use style::sink::Push;
use style::str::is_whitespace;
use style::stylearc::Arc;
use style::stylist::ApplicableDeclarationBlock;

#[derive(Copy, Clone)]
pub struct ServoLayoutNode<'a> {
@@ -7,10 +7,13 @@
use fnv::FnvHasher;
use std::hash::{Hash, Hasher};

// The top 12 bits of the 32-bit hash value are not used by the bloom filter.

This comment has been minimized.

@MattWis

MattWis Jun 23, 2017

Contributor

This comment seems incorrect to me. Based on a cursory reading of this PR, it seems that the hashes are 24 bits, leaving 8 bits unused in a 32 bit word.

This comment has been minimized.

@emilio

emilio Jun 23, 2017

Member

Yes, that's right.

// Consumers may rely on this to pack hashes more efficiently.
pub const BLOOM_HASH_MASK: u32 = 0x00ffffff;
const KEY_SIZE: usize = 12;

const ARRAY_SIZE: usize = 1 << KEY_SIZE;
const KEY_MASK: u32 = (1 << KEY_SIZE) - 1;
const KEY_SHIFT: usize = 16;

/// A counting Bloom filter with 8-bit counters. For now we assume
/// that having two hash functions is enough, but we may revisit that
@@ -183,7 +186,7 @@ fn hash1(hash: u32) -> u32 {

#[inline]
fn hash2(hash: u32) -> u32 {
(hash >> KEY_SHIFT) & KEY_MASK
(hash >> KEY_SIZE) & KEY_MASK
}

#[test]
@@ -3,7 +3,7 @@
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */

use attr::{ParsedAttrSelectorOperation, AttrSelectorOperation, NamespaceConstraint};
use bloom::BloomFilter;
use bloom::{BLOOM_HASH_MASK, BloomFilter};
use parser::{AncestorHashes, Combinator, Component, LocalName};
use parser::{Selector, SelectorImpl, SelectorIter, SelectorList};
use std::borrow::Borrow;
@@ -279,19 +279,30 @@ fn may_match<E>(hashes: &AncestorHashes,
-> bool
where E: Element,
{
// Check against the list of precomputed hashes.
for hash in hashes.0.iter() {
// If we hit the 0 sentinel hash, that means the rest are zero as well.
if *hash == 0 {
break;
}

if !bf.might_contain_hash(*hash) {
// Check the first three hashes. Note that we can check for zero before
// masking off the high bits, since if any of the first three hashes is
// zero the fourth will be as well. We also take care to avoid the
// special-case complexity of the fourth hash until we actually reach it,
// because we usually don't.
//
// To be clear: this is all extremely hot.
for i in 0..3 {
let packed = hashes.packed_hashes[i];
if packed == 0 {
// No more hashes left - unable to fast-reject.
return true;
}

if !bf.might_contain_hash(packed & BLOOM_HASH_MASK) {
// Hooray! We fast-rejected on this hash.
return false;
}
}

true
// Now do the slighty-more-complex work of synthesizing the fourth hash,
// and check it against the filter if it exists.
let fourth = hashes.fourth_hash();
fourth == 0 || bf.might_contain_hash(fourth)
}

/// Tracks whether we are currently looking for relevant links for a given
@@ -4,6 +4,7 @@

use attr::{AttrSelectorWithNamespace, ParsedAttrSelectorOperation, AttrSelectorOperator};
use attr::{ParsedCaseSensitivity, SELECTOR_WHITESPACE, NamespaceConstraint};
use bloom::BLOOM_HASH_MASK;
use cssparser::{ParseError, BasicParseError};
use cssparser::{Token, Parser as CssParser, parse_nth, ToCss, serialize_identifier, CssStringWriter};
use precomputed_hash::PrecomputedHash;
@@ -203,37 +204,64 @@ impl<Impl: SelectorImpl> SelectorList<Impl> {
}
}

/// Copied from Gecko, who copied it from WebKit. Note that increasing the
/// number of hashes here will adversely affect the cache hit when fast-
/// rejecting long lists of Rules with inline hashes.
const NUM_ANCESTOR_HASHES: usize = 4;

/// Ancestor hashes for the bloom filter. We precompute these and store them
/// inline with selectors to optimize cache performance during matching.
/// This matters a lot.
///
/// We use 4 hashes, which is copied from Gecko, who copied it from WebKit.
/// Note that increasing the number of hashes here will adversely affect the
/// cache hit when fast-rejecting long lists of Rules with inline hashes.
///
/// Because the bloom filter only uses the bottom 24 bits of the hash, we pack
/// the fourth hash into the upper bits of the first three hashes in order to
/// shrink Rule (whose size matters a lot). This scheme minimizes the runtime
/// overhead of the packing for the first three hashes (we just need to mask
/// off the upper bits) at the expense of making the fourth somewhat more
/// complicated to assemble, because we often bail out before checking all the
/// hashes.
#[derive(Eq, PartialEq, Clone, Debug)]
pub struct AncestorHashes(pub [u32; NUM_ANCESTOR_HASHES]);
pub struct AncestorHashes {
pub packed_hashes: [u32; 3],
}

impl AncestorHashes {
pub fn new<Impl: SelectorImpl>(s: &Selector<Impl>) -> Self {
Self::from_iter(s.iter())
}

pub fn from_iter<Impl: SelectorImpl>(iter: SelectorIter<Impl>) -> Self {
let mut hashes = [0; NUM_ANCESTOR_HASHES];
// Compute ancestor hashes for the bloom filter.
let mut hashes = [0u32; 4];
let mut hash_iter = AncestorIter::new(iter)
.map(|x| x.ancestor_hash())
.filter(|x| x.is_some())
.map(|x| x.unwrap());
for i in 0..NUM_ANCESTOR_HASHES {
for i in 0..4 {
hashes[i] = match hash_iter.next() {
Some(x) => x,
Some(x) => x & BLOOM_HASH_MASK,
None => break,
}
}

AncestorHashes(hashes)
// Now, pack the fourth hash (if it exists) into the upper byte of each of
// the other three hashes.
let fourth = hashes[3];
if fourth != 0 {
hashes[0] |= (fourth & 0x000000ff) << 24;
hashes[1] |= (fourth & 0x0000ff00) << 16;
hashes[2] |= (fourth & 0x00ff0000) << 8;
}

AncestorHashes {
packed_hashes: [hashes[0], hashes[1], hashes[2]],
}
}

/// Returns the fourth hash, reassembled from parts.
pub fn fourth_hash(&self) -> u32 {
((self.packed_hashes[0] & 0xff000000) >> 24) |
((self.packed_hashes[1] & 0xff000000) >> 16) |
((self.packed_hashes[2] & 0xff000000) >> 8)
}
}

@@ -0,0 +1,137 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */

//! Applicable declarations management.

use properties::PropertyDeclarationBlock;
use rule_tree::{CascadeLevel, StyleSource};
use shared_lock::Locked;
use smallvec::SmallVec;
use std::fmt::{Debug, self};
use std::mem;
use stylearc::Arc;

/// List of applicable declarations. This is a transient structure that shuttles
/// declarations between selector matching and inserting into the rule tree, and
/// therefore we want to avoid heap-allocation where possible.
///
/// In measurements on wikipedia, we pretty much never have more than 8 applicable
/// declarations, so we could consider making this 8 entries instead of 16.
/// However, it may depend a lot on workload, and stack space is cheap.
pub type ApplicableDeclarationList = SmallVec<[ApplicableDeclarationBlock; 16]>;

/// Blink uses 18 bits to store source order, and does not check overflow [1].
/// That's a limit that could be reached in realistic webpages, so we use
/// 24 bits and enforce defined behavior in the overflow case.
///
/// Note that the value of 24 is also hard-coded into the level() accessor,
/// which does a byte-aligned load of the 4th byte. If you change this value
/// you'll need to change that as well.
///
/// [1] https://cs.chromium.org/chromium/src/third_party/WebKit/Source/core/css/
/// RuleSet.h?l=128&rcl=90140ab80b84d0f889abc253410f44ed54ae04f3
const SOURCE_ORDER_BITS: usize = 24;
const SOURCE_ORDER_MASK: u32 = (1 << SOURCE_ORDER_BITS) - 1;
const SOURCE_ORDER_MAX: u32 = SOURCE_ORDER_MASK;

/// Stores the source order of a block and the cascade level it belongs to.
#[cfg_attr(feature = "servo", derive(HeapSizeOf))]
#[derive(Copy, Clone, Eq, PartialEq)]
struct SourceOrderAndCascadeLevel(u32);

impl SourceOrderAndCascadeLevel {
fn new(source_order: u32, cascade_level: CascadeLevel) -> SourceOrderAndCascadeLevel {
let mut bits = ::std::cmp::min(source_order, SOURCE_ORDER_MAX);
bits |= (cascade_level as u8 as u32) << SOURCE_ORDER_BITS;
SourceOrderAndCascadeLevel(bits)
}

fn order(&self) -> u32 {
self.0 & SOURCE_ORDER_MASK
}

fn level(&self) -> CascadeLevel {
unsafe {
// Transmute rather than shifting so that we're sure the compiler
// emits a simple byte-aligned load.
let as_bytes: [u8; 4] = mem::transmute(self.0);
CascadeLevel::from_byte(as_bytes[3])
}
}
}

impl Debug for SourceOrderAndCascadeLevel {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
f.debug_struct("SourceOrderAndCascadeLevel")
.field("order", &self.order())
.field("level", &self.level())
.finish()
}
}

/// A property declaration together with its precedence among rules of equal
/// specificity so that we can sort them.
///
/// This represents the declarations in a given declaration block for a given
/// importance.
#[cfg_attr(feature = "servo", derive(HeapSizeOf))]
#[derive(Debug, Clone, PartialEq)]
pub struct ApplicableDeclarationBlock {
/// The style source, either a style rule, or a property declaration block.
#[cfg_attr(feature = "servo", ignore_heap_size_of = "Arc")]
pub source: StyleSource,
/// The source order of the block, and the cascade level it belongs to.
order_and_level: SourceOrderAndCascadeLevel,
/// The specificity of the selector this block is represented by.
pub specificity: u32,
}

impl ApplicableDeclarationBlock {
/// Constructs an applicable declaration block from a given property
/// declaration block and importance.
#[inline]
pub fn from_declarations(declarations: Arc<Locked<PropertyDeclarationBlock>>,
level: CascadeLevel)
-> Self {
ApplicableDeclarationBlock {
source: StyleSource::Declarations(declarations),
order_and_level: SourceOrderAndCascadeLevel::new(0, level),
specificity: 0,
}
}

/// Constructs an applicable declaration block from the given components
#[inline]
pub fn new(source: StyleSource,
order: u32,
level: CascadeLevel,
specificity: u32) -> Self {
ApplicableDeclarationBlock {
source: source,
order_and_level: SourceOrderAndCascadeLevel::new(order, level),
specificity: specificity,
}

}

/// Returns the source order of the block.
#[inline]
pub fn source_order(&self) -> u32 {
self.order_and_level.order()
}

/// Returns the cascade level of the block.
#[inline]
pub fn level(&self) -> CascadeLevel {
self.order_and_level.level()
}

/// Convenience method to consume self and return the source alongside the
/// level.
#[inline]
pub fn order_and_level(self) -> (StyleSource, CascadeLevel) {
let level = self.level();
(self.source, level)
}
}
@@ -8,6 +8,7 @@
#![deny(missing_docs)]

use {Atom, Namespace, LocalName};
use applicable_declarations::ApplicableDeclarationBlock;
use atomic_refcell::{AtomicRef, AtomicRefCell, AtomicRefMut};
#[cfg(feature = "gecko")] use context::UpdateAnimationsTasks;
use data::ElementData;
@@ -29,7 +30,6 @@ use std::fmt::Debug;
use std::hash::Hash;
use std::ops::Deref;
use stylearc::Arc;
use stylist::ApplicableDeclarationBlock;
use thread_state;

pub use style_traits::UnsafeNode;
@@ -15,6 +15,7 @@
//! the separation between the style system implementation and everything else.

use app_units::Au;
use applicable_declarations::ApplicableDeclarationBlock;
use atomic_refcell::AtomicRefCell;
use context::{QuirksMode, SharedStyleContext, UpdateAnimationsTasks};
use data::ElementData;
@@ -87,7 +88,6 @@ use std::ptr;
use string_cache::{Atom, Namespace, WeakAtom, WeakNamespace};
use stylearc::Arc;
use stylesheets::UrlExtraData;
use stylist::ApplicableDeclarationBlock;

/// A simple wrapper over a non-null Gecko node (`nsINode`) pointer.
///
@@ -91,6 +91,7 @@ extern crate unicode_segmentation;
mod macros;

pub mod animation;
pub mod applicable_declarations;
#[allow(missing_docs)] // TODO.
#[cfg(feature = "servo")] pub mod attr;
pub mod bezier;
@@ -7,6 +7,7 @@
#![allow(unsafe_code)]
#![deny(missing_docs)]

use applicable_declarations::ApplicableDeclarationList;
use cascade_info::CascadeInfo;
use context::{SelectorFlagsMap, SharedStyleContext, StyleContext};
use data::{ComputedStyle, ElementData, RestyleData};
@@ -25,7 +26,7 @@ use selectors::matching::{ElementSelectorFlags, MatchingContext, MatchingMode, S
use selectors::matching::{VisitedHandlingMode, AFFECTED_BY_PSEUDO_ELEMENTS};
use sharing::StyleSharingBehavior;
use stylearc::Arc;
use stylist::{ApplicableDeclarationList, RuleInclusion};
use stylist::RuleInclusion;

/// The way a style should be inherited.
enum InheritMode {
ProTip! Use n and p to navigate between commits in a pull request.
You can’t perform that action at this time.