Skip to content

Commit

Permalink
std::trie: use unsafe code to give a 3x speed up to the iterator.
Browse files Browse the repository at this point in the history
This stores the stack of iterators inline (we have a maximum depth with
`uint` keys), and then uses direct pointer offsetting to manipulate it,
in a blazing fast way:

Before:

    bench_iter_large          ... bench:     43187 ns/iter (+/- 3082)
    bench_iter_small          ... bench:       618 ns/iter (+/- 288)

After:

    bench_iter_large          ... bench:     13497 ns/iter (+/- 1575)
    bench_iter_small          ... bench:       220 ns/iter (+/- 91)
  • Loading branch information
huonw committed Jan 17, 2014
1 parent f0c554d commit 0148055
Showing 1 changed file with 118 additions and 39 deletions.
157 changes: 118 additions & 39 deletions src/libstd/trie.rs
Expand Up @@ -11,14 +11,17 @@
//! Ordered containers with integer keys, implemented as radix tries (`TrieSet` and `TrieMap` types)

use prelude::*;
use mem;
use uint;
use util::replace;
use unstable::intrinsics::init;
use vec;

// FIXME: #5244: need to manually update the TrieNode constructor
static SHIFT: uint = 4;
static SIZE: uint = 1 << SHIFT;
static MASK: uint = SIZE - 1;
static NUM_CHUNKS: uint = uint::bits / SHIFT;

enum Child<T> {
Internal(~TrieNode<T>),
Expand Down Expand Up @@ -113,21 +116,25 @@ impl<T> TrieMap<T> {

/// Get an iterator over the key-value pairs in the map
pub fn iter<'a>(&'a self) -> TrieMapIterator<'a, T> {
TrieMapIterator {
stack: ~[self.root.children.iter()],
remaining_min: self.length,
remaining_max: self.length
}
let mut iter = unsafe {TrieMapIterator::new()};
iter.stack[0] = self.root.children.iter();
iter.length = 1;
iter.remaining_min = self.length;
iter.remaining_max = self.length;

iter
}

/// Get an iterator over the key-value pairs in the map, with the
/// ability to mutate the values.
pub fn mut_iter<'a>(&'a mut self) -> TrieMapMutIterator<'a, T> {
TrieMapMutIterator {
stack: ~[self.root.children.mut_iter()],
remaining_min: self.length,
remaining_max: self.length
}
let mut iter = unsafe {TrieMapMutIterator::new()};
iter.stack[0] = self.root.children.mut_iter();
iter.length = 1;
iter.remaining_min = self.length;
iter.remaining_max = self.length;

iter
}
}

Expand Down Expand Up @@ -176,16 +183,16 @@ macro_rules! bound {

let key = $key;

let mut idx = 0;
let mut it = $iterator_name {
stack: ~[],
remaining_min: 0,
remaining_max: this.length
};
let mut it = unsafe {$iterator_name::new()};
// everything else is zero'd, as we want.
it.remaining_max = this.length;

// this addr is necessary for the `Internal` pattern.
addr!(loop {
let children = unsafe {addr!(& $($mut_)* (*node).children)};
let child_id = chunk(key, idx);
// it.length is the current depth in the iterator and the
// current depth through the `uint` key we've traversed.
let child_id = chunk(key, it.length);
let (slice_idx, ret) = match children[child_id] {
Internal(ref $($mut_)* n) => {
node = addr!(& $($mut_)* **n as * $($mut_)* TrieNode<T>);
Expand All @@ -202,9 +209,10 @@ macro_rules! bound {
(child_id + 1, true)
}
};
it.stack.push(children.$slice_from(slice_idx).$iter());
// push to the stack.
it.stack[it.length] = children.$slice_from(slice_idx).$iter();
it.length += 1;
if ret { return it }
idx += 1;
})
}
}
Expand Down Expand Up @@ -467,15 +475,17 @@ fn remove<T>(count: &mut uint, child: &mut Child<T>, key: uint,

/// Forward iterator over a map
pub struct TrieMapIterator<'a, T> {
priv stack: ~[vec::VecIterator<'a, Child<T>>],
priv stack: [vec::VecIterator<'a, Child<T>>, .. NUM_CHUNKS],
priv length: uint,
priv remaining_min: uint,
priv remaining_max: uint
}

/// Forward iterator over the key-value pairs of a map, with the
/// values being mutable.
pub struct TrieMapMutIterator<'a, T> {
priv stack: ~[vec::VecMutIterator<'a, Child<T>>],
priv stack: [vec::VecMutIterator<'a, Child<T>>, .. NUM_CHUNKS],
priv length: uint,
priv remaining_min: uint,
priv remaining_max: uint
}
Expand All @@ -487,27 +497,96 @@ macro_rules! iterator_impl {
($name:ident,
iter = $iter:ident,
mutability = $($mut_:tt)*) => {
impl<'a, T> $name<'a, T> {
// Create new zero'd iterator. We have a thin gilding of safety by
// using init rather than uninit, so that the worst that can happen
// from failing to initialise correctly after calling these is a
// segfault.
#[cfg(target_word_size="32")]
unsafe fn new() -> $name<'a, T> {
$name {
remaining_min: 0,
remaining_max: 0,
length: 0,
// ick :( ... at least the compiler will tell us if we screwed up.
stack: [init(), init(), init(), init(), init(), init(), init(), init()]
}
}

#[cfg(target_word_size="64")]
unsafe fn new() -> $name<'a, T> {
$name {
remaining_min: 0,
remaining_max: 0,
length: 0,
stack: [init(), init(), init(), init(), init(), init(), init(), init(),
init(), init(), init(), init(), init(), init(), init(), init()]
}
}
}

item!(impl<'a, T> Iterator<(uint, &'a $($mut_)* T)> for $name<'a, T> {
// you might wonder why we're not even trying to act within the
// rules, and are just manipulating raw pointers like there's no
// such thing as invalid pointers and memory unsafety. The
// reason is performance, without doing this we can get the
// bench_iter_large microbenchmark down to about 30000 ns/iter
// (using .unsafe_ref to index self.stack directly, 38000
// ns/iter with [] checked indexing), but this smashes that down
// to 13500 ns/iter.
//
// Fortunately, it's still safe...
//
// We have an invariant that every Internal node
// corresponds to one push to self.stack, and one pop,
// nested appropriately. self.stack has enough storage
// to store the maximum depth of Internal nodes in the
// trie (8 on 32-bit platforms, 16 on 64-bit).
fn next(&mut self) -> Option<(uint, &'a $($mut_)* T)> {
while !self.stack.is_empty() {
match self.stack[self.stack.len() - 1].next() {
None => {
self.stack.pop();
}
Some(child) => {
addr!(match *child {
Internal(ref $($mut_)* node) => {
self.stack.push(node.children.$iter());
}
External(key, ref $($mut_)* value) => {
self.remaining_max -= 1;
if self.remaining_min > 0 {
self.remaining_min -= 1;
let start_ptr = self.stack.as_mut_ptr();

unsafe {
// write_ptr is the next place to write to the stack.
// invariant: start_ptr <= write_ptr < end of the
// vector.
let mut write_ptr = start_ptr.offset(self.length as int);
while write_ptr != start_ptr {
// indexing back one is safe, since write_ptr >
// start_ptr now.
match (*write_ptr.offset(-1)).next() {
// exhausted this iterator (i.e. finished this
// Internal node), so pop from the stack.
//
// don't bother clearing the memory, because the
// next time we use it we'll've written to it
// first.
None => write_ptr = write_ptr.offset(-1),
Some(child) => {
addr!(match *child {
Internal(ref $($mut_)* node) => {
// going down a level, so push
// to the stack (this is the
// write referenced above)
*write_ptr = node.children.$iter();
write_ptr = write_ptr.offset(1);
}
External(key, ref $($mut_)* value) => {
self.remaining_max -= 1;
if self.remaining_min > 0 {
self.remaining_min -= 1;
}
// store the new length of the
// stack, based on our current
// position.
self.length = (write_ptr as uint
- start_ptr as uint) /
mem::size_of_val(&*write_ptr);

return Some((key, value));
}
return Some((key, value));
}
Nothing => {}
})
Nothing => {}
})
}
}
}
}
Expand Down

5 comments on commit 0148055

@bors
Copy link
Contributor

@bors bors commented on 0148055 Jan 18, 2014

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

saw approval from alexcrichton
at huonw@0148055

@bors
Copy link
Contributor

@bors bors commented on 0148055 Jan 18, 2014

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

merging huonw/rust/trie-internal-iter = 0148055 into auto

@bors
Copy link
Contributor

@bors bors commented on 0148055 Jan 18, 2014

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

huonw/rust/trie-internal-iter = 0148055 merged ok, testing candidate = f4498c7

@bors
Copy link
Contributor

@bors bors commented on 0148055 Jan 18, 2014

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@bors
Copy link
Contributor

@bors bors commented on 0148055 Jan 18, 2014

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fast-forwarding master to auto = f4498c7

Please sign in to comment.