Skip to content

Commit

Permalink
sstable: optimize seeks to use next
Browse files Browse the repository at this point in the history
This optimization is used when the seek would position at the
current block of the iterator. It relies on having bounds set
on the iteraor since that is we can claim certain invariants
on the current iterator position. Ascending or descending bounds
is how CockroachDB executes batched scans, and the
SeqSeekGEWithBounds microbenchmarks, that show significant
improvement, imitate this pattern.

name                                            old time/op    new time/op    delta
MergingIterSeekGE/restart=16/count=1-16            841ns ± 5%     806ns ± 3%   -4.12%  (p=0.014 n=8+10)
MergingIterSeekGE/restart=16/count=2-16           1.78µs ± 9%    1.67µs ± 4%   -6.21%  (p=0.001 n=9+10)
MergingIterSeekGE/restart=16/count=3-16           2.60µs ±19%    2.46µs ± 2%     ~     (p=0.529 n=10+10)
MergingIterSeekGE/restart=16/count=4-16           3.44µs ±10%    3.34µs ± 4%     ~     (p=0.123 n=10+10)
MergingIterSeekGE/restart=16/count=5-16           4.35µs ± 5%    4.35µs ± 4%     ~     (p=0.825 n=10+9)
MergingIterNext/restart=16/count=1-16             37.7ns ± 4%    37.2ns ± 1%   -1.29%  (p=0.008 n=9+10)
MergingIterNext/restart=16/count=2-16             60.7ns ± 6%    59.6ns ± 1%     ~     (p=0.209 n=10+10)
MergingIterNext/restart=16/count=3-16             77.4ns ± 3%    75.8ns ± 1%   -2.10%  (p=0.036 n=10+9)
MergingIterNext/restart=16/count=4-16             86.9ns ± 2%    87.7ns ± 1%   +0.91%  (p=0.007 n=9+10)
MergingIterNext/restart=16/count=5-16              104ns ± 2%     103ns ± 2%     ~     (p=0.907 n=10+9)
MergingIterPrev/restart=16/count=1-16             52.2ns ± 3%    51.6ns ± 2%     ~     (p=0.118 n=10+10)
MergingIterPrev/restart=16/count=2-16             76.5ns ± 2%    75.8ns ± 2%     ~     (p=0.130 n=9+9)
MergingIterPrev/restart=16/count=3-16             94.3ns ± 2%    92.2ns ± 2%   -2.22%  (p=0.001 n=10+10)
MergingIterPrev/restart=16/count=4-16              103ns ± 1%     102ns ± 2%   -0.87%  (p=0.038 n=10+10)
MergingIterPrev/restart=16/count=5-16              118ns ± 2%     117ns ± 1%     ~     (p=0.085 n=10+8)
MergingIterSeqSeekGEWithBounds/levelCount=5-16    3.64µs ± 7%    0.73µs ± 2%  -79.86%  (p=0.000 n=10+10)

name                                                old time/op    new time/op    delta
LevelIterSeekGE/restart=16/count=5-16                 1.23µs ± 4%    1.23µs ± 3%     ~     (p=0.765 n=10+9)
LevelIterSeqSeekGEWithBounds/restart=16/count=5-16     862ns ± 8%     147ns ± 3%  -82.93%  (p=0.000 n=10+10)
LevelIterNext/restart=16/count=5-16                   18.7ns ± 4%    18.5ns ± 4%     ~     (p=0.386 n=10+9)
LevelIterPrev/restart=16/count=5-16                   31.4ns ± 8%    32.5ns ± 7%     ~     (p=0.055 n=10+10)
  • Loading branch information
sumeerbhola committed Aug 17, 2020
1 parent 76afcca commit a24bc6c
Show file tree
Hide file tree
Showing 5 changed files with 353 additions and 61 deletions.
13 changes: 8 additions & 5 deletions internal/base/iterator.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,11 +52,14 @@ import "fmt"
// Last if there is an upper bound). This imposition is done in order to
// elevate that enforcement to the caller (generally pebble.Iterator or
// pebble.mergingIter) rather than having it duplicated in every
// InternalIterator implementation. InternalIterator implementations are
// required to respect the iterator bounds, never returning records outside of
// the bounds with one exception: an iterator may generate synthetic RANGEDEL
// marker records. See levelIter.syntheticBoundary for the sole existing
// example of this behavior. [TODO(peter): can we eliminate this exception?]
// InternalIterator implementation. Additionally, the caller needs to ensure
// that SeekGE/SeekPrefixGE are not called with a key > the upper bound, and
// SeekLT is not called with a key < the lower bound.
// InternalIterator implementations are required to respect the iterator
// bounds, never returning records outside of the bounds with one exception:
// an iterator may generate synthetic RANGEDEL marker records. See
// levelIter.syntheticBoundary for the sole existing example of this behavior.
// [TODO(peter): can we eliminate this exception?]
//
// An iterator must be closed after use, but it is not necessary to read an
// iterator until exhaustion.
Expand Down
12 changes: 11 additions & 1 deletion iterator.go
Original file line number Diff line number Diff line change
Expand Up @@ -315,6 +315,8 @@ func (i *Iterator) SeekGE(key []byte) bool {
i.prefix = nil
if lowerBound := i.opts.GetLowerBound(); lowerBound != nil && i.cmp(key, lowerBound) < 0 {
key = lowerBound
} else if upperBound := i.opts.GetUpperBound(); upperBound != nil && i.cmp(key, upperBound) > 0 {
key = upperBound
}

i.iterKey, i.iterValue = i.iter.SeekGE(key)
Expand Down Expand Up @@ -370,6 +372,12 @@ func (i *Iterator) SeekPrefixGE(key []byte) bool {
return false
}
key = lowerBound
} else if upperBound := i.opts.GetUpperBound(); upperBound != nil && i.cmp(key, upperBound) > 0 {
if n := i.split(upperBound); !bytes.Equal(i.prefix, upperBound[:n]) {
i.err = errors.New("pebble: SeekPrefixGE supplied with key outside of upper bound")
return false
}
key = upperBound
}

i.iterKey, i.iterValue = i.iter.SeekPrefixGE(i.prefix, key)
Expand All @@ -382,8 +390,10 @@ func (i *Iterator) SeekPrefixGE(key []byte) bool {
func (i *Iterator) SeekLT(key []byte) bool {
i.err = nil // clear cached iteration error
i.prefix = nil
if upperBound := i.opts.GetUpperBound(); upperBound != nil && i.cmp(key, upperBound) >= 0 {
if upperBound := i.opts.GetUpperBound(); upperBound != nil && i.cmp(key, upperBound) > 0 {
key = upperBound
} else if lowerBound := i.opts.GetLowerBound(); lowerBound != nil && i.cmp(key, lowerBound) < 0 {
key = lowerBound
}

i.iterKey, i.iterValue = i.iter.SeekLT(key)
Expand Down
4 changes: 2 additions & 2 deletions level_iter.go
Original file line number Diff line number Diff line change
Expand Up @@ -72,8 +72,8 @@ type levelIter struct {
iter internalIterator
iterFile *fileMetadata
newIters tableNewIters
// When rangeDelIter != nil, the caller requires that a range del iterator
// corresponding to the current file be placed in *rangeDelIter. When this
// When rangeDelIter != nil, the caller requires that *rangeDelIter must point
// to a range del iterator corresponding to the current file. When this
// iterator returns nil, *rangeDelIter should also be set to nil. Whenever
// a non-nil internalIterator is placed in rangeDelIter, a copy is placed
// in rangeDelIterCopy. This is done for the following special case:
Expand Down
79 changes: 79 additions & 0 deletions sstable/block.go
Original file line number Diff line number Diff line change
Expand Up @@ -238,6 +238,9 @@ type blockIter struct {
cached []blockEntry
cachedBuf []byte
cacheHandle cache.Handle
// The first key in the block. This is used by the caller to set bounds
// for block iteration for already loaded blocks.
firstKey InternalKey
}

// blockIter implements the base.InternalIterator interface.
Expand Down Expand Up @@ -266,6 +269,7 @@ func (i *blockIter) init(cmp Compare, block block, globalSeqNum uint64) error {
i.fullKey = i.fullKey[:0]
i.val = nil
i.clearCache()
i.readFirstKey()
return nil
}

Expand All @@ -284,11 +288,16 @@ func (i *blockIter) invalidate() {
i.data = nil
}

func (i *blockIter) isInvalid() bool {
return i.data == nil
}

func (i *blockIter) resetForReuse() blockIter {
return blockIter{
fullKey: i.fullKey[:0],
cached: i.cached[:0],
cachedBuf: i.cachedBuf[:0],
data: nil,
}
}

Expand Down Expand Up @@ -376,6 +385,76 @@ func (i *blockIter) readEntry() {
i.nextOffset = int32(uintptr(ptr)-uintptr(i.ptr)) + int32(value)
}

func (i *blockIter) readFirstKey() {
ptr := i.ptr

// This is an ugly performance hack. Reading entries from blocks is one of
// the inner-most routines and decoding the 3 varints per-entry takes a
// significant time. Neither go1.11 or go1.12 will inline decodeVarint for
// us, so we do it manually. This provides a 10-15% performance improvement
// on blockIter benchmarks on both go1.11 and go1.12.
//
// TODO(peter): remove this hack if go:inline is ever supported.

var shared uint32
if a := *((*uint8)(ptr)); a < 128 {
shared = uint32(a)
ptr = unsafe.Pointer(uintptr(ptr) + 1)
} else {
panic("first key in block should not share")
}
if shared != 0 {
panic("first key in block should not share")
}

var unshared uint32
if a := *((*uint8)(ptr)); a < 128 {
unshared = uint32(a)
ptr = unsafe.Pointer(uintptr(ptr) + 1)
} else if a, b := a&0x7f, *((*uint8)(unsafe.Pointer(uintptr(ptr) + 1))); b < 128 {
unshared = uint32(b)<<7 | uint32(a)
ptr = unsafe.Pointer(uintptr(ptr) + 2)
} else if b, c := b&0x7f, *((*uint8)(unsafe.Pointer(uintptr(ptr) + 2))); c < 128 {
unshared = uint32(c)<<14 | uint32(b)<<7 | uint32(a)
ptr = unsafe.Pointer(uintptr(ptr) + 3)
} else if c, d := c&0x7f, *((*uint8)(unsafe.Pointer(uintptr(ptr) + 3))); d < 128 {
unshared = uint32(d)<<21 | uint32(c)<<14 | uint32(b)<<7 | uint32(a)
ptr = unsafe.Pointer(uintptr(ptr) + 4)
} else {
d, e := d&0x7f, *((*uint8)(unsafe.Pointer(uintptr(ptr) + 4)))
unshared = uint32(e)<<28 | uint32(d)<<21 | uint32(c)<<14 | uint32(b)<<7 | uint32(a)
ptr = unsafe.Pointer(uintptr(ptr) + 5)
}

// Skip the value length.
if a := *((*uint8)(ptr)); a < 128 {
ptr = unsafe.Pointer(uintptr(ptr) + 1)
} else if a := *((*uint8)(unsafe.Pointer(uintptr(ptr) + 1))); a < 128 {
ptr = unsafe.Pointer(uintptr(ptr) + 2)
} else if a := *((*uint8)(unsafe.Pointer(uintptr(ptr) + 2))); a < 128 {
ptr = unsafe.Pointer(uintptr(ptr) + 3)
} else if a := *((*uint8)(unsafe.Pointer(uintptr(ptr) + 3))); a < 128 {
ptr = unsafe.Pointer(uintptr(ptr) + 4)
} else {
ptr = unsafe.Pointer(uintptr(ptr) + 5)
}

firstKey := getBytes(ptr, int(unshared))
// Manually inlining base.DecodeInternalKey provides a 5-10% speedup on
// BlockIter benchmarks.
if n := len(firstKey) - 8; n >= 0 {
i.firstKey.Trailer = binary.LittleEndian.Uint64(firstKey[n:])
i.firstKey.UserKey = firstKey[:n:n]
if i.globalSeqNum != 0 {
i.firstKey.SetSeqNum(i.globalSeqNum)
}
} else {
// TODO: propagate this error?
i.firstKey.Trailer = uint64(InternalKeyKindInvalid)
i.firstKey.UserKey = nil
}
}

func (i *blockIter) decodeInternalKey(key []byte) {
// Manually inlining base.DecodeInternalKey provides a 5-10% speedup on
// BlockIter benchmarks.
Expand Down
Loading

0 comments on commit a24bc6c

Please sign in to comment.