Skip to content

Commit

Permalink
new-feature: slimtrie: add option: ReduceSameValue to remove adjasent…
Browse files Browse the repository at this point in the history
… records with the same value. By default true
  • Loading branch information
drmingdrmer committed Nov 27, 2020
1 parent 39fa19b commit 2dc0375
Show file tree
Hide file tree
Showing 5 changed files with 45 additions and 18 deletions.
20 changes: 10 additions & 10 deletions trie/nodes.pb.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 5 additions & 5 deletions trie/nodes.proto
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,13 @@ message Bitmap {
repeated uint64 Words = 20;


// RankIndex speeds up rank() by pre-calcuate it
// RankIndex speeds up rank() by pre-calculated it
//
// Since 0.5.10
repeated int32 RankIndex = 30;


// SelectIndex speeds up select() by pre-calcuate it
// SelectIndex speeds up select() by pre-calculated it
//
// Since 0.5.10
repeated int32 SelectIndex = 40;
Expand Down Expand Up @@ -71,7 +71,7 @@ message VLenArray {
// Since 0.5.10
message Nodes {

// BigInnerCnt is number of big (267 bit) inner node.
// BigInnerCnt is number of big (257 bit) inner node.
//
// Since 0.5.10
int32 BigInnerCnt = 11;
Expand Down Expand Up @@ -157,7 +157,7 @@ message Nodes {
// An array element is a control byte followed by several data bytes.
//
// The 0-th bit in the control byte indicates whether a prefix is
// trucated(not aligned to 8-bit).
// truncated(not aligned to 8-bit).
//
// An inner node may have a prefix, if the starting bit of the node > the end
// of previous node.
Expand All @@ -166,7 +166,7 @@ message Nodes {
// Thus we need a bitmap to indicated this.
// If prefix length is not 8-bit aligned, the trailing bits a filled with a
// "1" followed by "0"s.
// To retrieve the acturate prefix, remove the bits from the last "1".
// To retrieve the accurate prefix, remove the bits from the last "1".
// E.g.:
//
// prefix: 11001100 11000011
Expand Down
19 changes: 19 additions & 0 deletions trie/slimtrie.go
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,20 @@ type SlimTrie struct {
//
// Since 0.5.10
type Opt struct {

// ReduceSameValue remove branches that leads to a same value as previous leaf.
// By default it is true.
// Reducing leaves with the same value is a practical way to optimize index. E.g., in memory an application stores indexes of 3 keys:
// a,b,c pointing to disk offset 100, 105, 108.
// In this case the offset between a,b,c are very small and one disk IO costs the same lower than 4KB.
// Thus the index does not need to store exact offset, but instead, only the 4KB-aligned index.
// Thus a,b,c have the same offset 100 / 4KB.
// With this assumption, the in memory index will be significantly reduced.
// By only record the index of a. Because we know that a<b<c, we just load the chunk at this offset.
//
// Since 0.5.10
ReduceSameValue *bool

// CompleteInner tells SlimTrie to store text on a trie branch to inner
// node(not to leaf node), instead of storing only branch length.
// With this option SlimTrie costs more space but reduces false positive
Expand Down Expand Up @@ -167,6 +181,11 @@ func NewSlimTrie(e encode.Encoder, keys []string, values interface{}, opts ...Op
opt.CompleteLeaf = true
}

if opt.ReduceSameValue == nil {
opt.ReduceSameValue = new(bool)
*opt.ReduceSameValue = true
}

return newSlimTrie(e, keys, values, opt)
}

Expand Down
10 changes: 9 additions & 1 deletion trie/slimtrie_create.go
Original file line number Diff line number Diff line change
Expand Up @@ -417,7 +417,15 @@ func newSlimTrie(e encode.Encoder, keys []string, values interface{}, opt Opt) (
}
}

tokeep := newValueToKeep(keys, values)
var tokeep []bool
if *opt.ReduceSameValue {
tokeep = newValueToKeep(keys, values)
} else {
tokeep = make([]bool, n)
for i := 0; i < n; i++ {
tokeep[i] = true
}
}

rvals := reflect.ValueOf(values)

Expand Down
4 changes: 2 additions & 2 deletions trie/slimtrie_query.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ type querySession struct {
key string

// Whether an inner node has common prefix.
// It may stores only length of prefix in prefixBitLen, or extact prefix
// It may stores only length of prefix in prefixBitLen, or exact prefix
// string in prefix.
hasPrefixContent bool

Expand Down Expand Up @@ -77,7 +77,7 @@ func (st *SlimTrie) RangeGet(key string) (interface{}, bool) {

lID, eqID, _ := st.searchID(key)

// an "equal" macth means key is a prefix of either start or end of a range.
// an "equal" match means key is a prefix of either start or end of a range.
if eqID != -1 {
// TODO eqID must be a leaf if it is not -1
return st.getLeaf(eqID), true
Expand Down

0 comments on commit 2dc0375

Please sign in to comment.