Skip to content

Commit

Permalink
Merge 0fee966 into 7e480ed
Browse files Browse the repository at this point in the history
  • Loading branch information
drmingdrmer committed May 21, 2019
2 parents 7e480ed + 0fee966 commit 7231ff8
Show file tree
Hide file tree
Showing 6 changed files with 173 additions and 75 deletions.
2 changes: 2 additions & 0 deletions index/example_range_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@ func Example_indexRanges() {
// Alison |

{Key: "Aaron", Offset: 0},
{Key: "Agatha", Offset: 0},
{Key: "Al", Offset: 0},
{Key: "Albert", Offset: 0},

{Key: "Alexander", Offset: 31},
Expand Down
65 changes: 26 additions & 39 deletions trie/example_slimtrie_range_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,30 +10,17 @@ func ExampleSlimTrie_RangeGet() {

// To index a map of key range to value with SlimTrie is very simple:
//
// Just give two adjacent keys the same value, then SlimTrie knows these
// keys belong to a "range".
// These two keys are left and right boundaries of a range, and are both
// inclusive.
//
// In this example we:
//
// map [abc, abcd] to 1
// map [bc, bc] to 2 // this range has only one key in it.
// map [bcd, bce] to 3
//
// With RangeGet() to get any key that "abc" <= key <= "abcd", such as
// "abc1", "abc2"... should return "1".
//
// False Positive
//
// Just like Bloomfilter, SlimTrie does not contains full information of keys,
// thus there could be a false positive return:
// It returns some value and "true" but the key is not in there.
// Gives a set of key the same value, and use RangeGet() instead of Get().
// SlimTrie does not store branches for adjacent leaves with the same value.

keys := []string{
"abc", "abcd",
"abc",
"abcd",

"bc",
"bcd", "bce",

"bcd",
"bce",
}
values := []int{
1, 1,
Expand All @@ -49,23 +36,23 @@ func ExampleSlimTrie_RangeGet() {
key string
msg string
}{
{"ab", "smaller than any"},
{"ab", "FALSE POSITIVE: all known key starts with a are mapped to 1"},

{"abc", "in range [abc, abcd]"},
{"abc1", "in range [abc, abcd]"},
{"abc2", "in range [abc, abcd]"},
{"abcd", "in range [abc, abcd]"},
{"abc", "in range"},
{"abc1", "FALSE POSITIVE"},
{"abc2", "FALSE POSITIVE"},
{"abcd", "in range"},

{"abcde", "FALSE POSITIVE: a suffix of abcd"},

{"acc", "FALSE POSITIVE: not in range [abc, abcd]"},
{"acc", "FALSE POSITIVE"},

{"bc", "in single key range [bc]"},
{"bc1", "not in single key range [bc]"},
{"bc1", "FALSE POSITIVE"},

{"bcd1", "in range [bcd, bce]"},
{"bcd1", "FALSE POSITIVE"},

{"def", "greater than any"},
{"def", "FALSE POSITIVE"},
}

for _, c := range cases {
Expand All @@ -74,15 +61,15 @@ func ExampleSlimTrie_RangeGet() {
}

// Output:
// ab <nil> false: smaller than any
// abc 1 true : in range [abc, abcd]
// abc1 1 true : in range [abc, abcd]
// abc2 1 true : in range [abc, abcd]
// abcd 1 true : in range [abc, abcd]
// ab 1 true : FALSE POSITIVE: all known key starts with a are mapped to 1
// abc 1 true : in range
// abc1 1 true : FALSE POSITIVE
// abc2 1 true : FALSE POSITIVE
// abcd 1 true : in range
// abcde 1 true : FALSE POSITIVE: a suffix of abcd
// acc 1 true : FALSE POSITIVE: not in range [abc, abcd]
// acc 1 true : FALSE POSITIVE
// bc 2 true : in single key range [bc]
// bc1 <nil> false: not in single key range [bc]
// bcd1 3 true : in range [bcd, bce]
// def <nil> false: greater than any
// bc1 2 true : FALSE POSITIVE
// bcd1 3 true : FALSE POSITIVE
// def 3 true : FALSE POSITIVE
}
45 changes: 13 additions & 32 deletions trie/slimtrie.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,20 +15,8 @@
// Actually besides as a key value map,
// to index a map of key range to value with SlimTrie is also very simple:
//
// Just give two adjacent keys the same value, then SlimTrie
// knows these keys belong to a "range".
// These two keys are left and right boundaries of a range, and are both
// inclusive.
//
// // a to g --> 1
// // h --> 2
// st, err := NewSlimTrie(encode.Int{}, []string{"a", "g", "h"}, []int{1, 1, 2})
//
// st.Get("a") // 1, true A normal key-value Get()
// st.Get("c") // nil, false A key-value Get() got nothing.
// st.RangeGet("c") // 1, true A range get got 1
// st.RangeGet("g") // 1, true
// st.RangeGet("h") // 2, true
// Gives a set of key the same value, and use RangeGet() instead of Get().
// SlimTrie does not store branches for adjacent leaves with the same value.
//
// See SlimTrie.RangeGet .
//
Expand Down Expand Up @@ -123,6 +111,8 @@ func (st *SlimTrie) loadBytes(keys [][]byte, values interface{}) (err error) {
return err
}

trie.removeSameLeaf()

err = st.LoadTrie(trie)
return err
}
Expand Down Expand Up @@ -230,36 +220,27 @@ func (st *SlimTrie) LoadTrie(root *Node) (err error) {
// Since 0.4.3
func (st *SlimTrie) RangeGet(key string) (interface{}, bool) {

lID, eqID, rID := st.searchID(key)
lID, eqID, _ := st.searchID(key)

// an "equal" macth means key is a prefix of either start or end of a range.
if eqID != -1 {
return st.Leaves.Get(eqID)
v, found := st.Leaves.Get(eqID)
if found {
return v, found
}

// else: maybe matched at a inner node.
}

// key is smaller than any range-start or range-end.
if lID == -1 {
return nil, false
}

// key is greater than any range-start or range-end.
if rID == -1 {
return nil, false
}
// Preceding value is the start of this range.
// It might be a false-positive

lVal, _ := st.Leaves.Get(lID)
rVal, _ := st.Leaves.Get(rID)

// If left-value != right-value, the key is between a range-end and next
// range-start.
if lVal != rVal {
return nil, false
}

// If range[i].end == range[i+1].start, it is a false positive.
// SlimTrie can not distinguish this from a positive match.
//
// Otherwise, lVal and rVal must be the start and end of a single range.
return lVal, true
}

Expand Down
38 changes: 35 additions & 3 deletions trie/slimtrie_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -508,9 +508,9 @@ func TestRangeGet(t *testing.T) {
{"bce", 3, true},
{"c", 4, true}, // false positive
{"cde", 4, true},
{"cfe", 4, true}, // false positive
{"cff", 4, true}, // false positive
{"def", nil, false}, // false positive
{"cfe", 4, true}, // false positive
{"cff", 4, true}, // false positive
{"def", 4, true}, // false positive
}

st, err := NewSlimTrie(encode.Int{}, keys, values)
Expand All @@ -528,6 +528,38 @@ func TestRangeGet(t *testing.T) {
}
}

func TestSlimTrie_RangeGet_rangeindex_bug_2019_05_21(t *testing.T) {

// RangeGet has bug found by Liu Baohai:

ta := require.New(t)

keys := []string{
"test/存界needleid00011end",

"test/山我needleid00009end",
"test/界世needleid00005end",
"test/白我needleid00006end",

"test/白测needleid00008end",
"test/试世needleid00014end",
}
values := []int32{
0,
1, 1, 1,
2, 2,
}

st, err := NewSlimTrie(encode.I32{}, keys, values)
ta.Nil(err)

for i, c := range keys {
rst, found := st.RangeGet(c)
ta.Equal(values[i], rst, "%d-th: search: %+v", i+1, c)
ta.Equal(true, found, "%d-th: search: %+v", i+1, c)
}
}

func TestNewSlimTrie(t *testing.T) {

st, err := NewSlimTrie(encode.Int{}, []string{"ab", "cd"}, []int{1, 2})
Expand Down
57 changes: 56 additions & 1 deletion trie/trie.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ type Node struct {

squash bool

// TODO inner node count. fix it
NodeCnt int
}

Expand All @@ -26,7 +27,7 @@ const leafBranch = -1
// key.
func NewTrie(keys [][]byte, values interface{}, squash bool) (root *Node, err error) {

root = &Node{Children: make(map[int]*Node), Step: 1, squash: squash}
root = &Node{Children: make(map[int]*Node), Step: 1, squash: squash, NodeCnt: 1}

if keys == nil {
return
Expand Down Expand Up @@ -81,6 +82,60 @@ func (r *Node) Squash() int {
return cnt
}

// removeSameLeaf removes leaf that has the same value as preceding leaf.
//
// a ------->e =1
// `>b------>f =2
// `>c->d->g =2 // "g" and "d" is removed, c has other child and is kept.
// `--->h =3
//
// Since 0.5.5
func (r *Node) removeSameLeaf() {

var prevValue interface{} = nil

// wrapped as a generalized tree
s := &trieStringly{tnode: r}

DepthFirst(s,
func(t Tree, parent, branch, node interface{}) {

n := node.(*Node)
needRemove := false

v, isLeaf := t.LeafVal(node)
if isLeaf {
if v == prevValue {
// same value no need to store
needRemove = true
} else {
prevValue = v
}
} else {
if len(n.Branches) == 0 {
needRemove = true
}
}

if needRemove && parent != nil && branch != nil {
p := parent.(*Node)
b := branch.(int)

delete(p.Children, b)

for i, bb := range p.Branches {
if bb == b {
p.Branches = append(p.Branches[:i], p.Branches[i+1:]...)
}
}
if !isLeaf {
r.NodeCnt--
}

}
})
}

// Search for `key` in a Trie.
//
// It returns 3 values of:
Expand Down
41 changes: 41 additions & 0 deletions trie/trie_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (

"github.com/openacid/errors"
"github.com/openacid/slim/benchhelper"
"github.com/stretchr/testify/require"
)

func TestTrie(t *testing.T) {
Expand Down Expand Up @@ -527,3 +528,43 @@ func TestToStrings(t *testing.T) {
t.Fatalf("expect: \n%v\n; but: \n%v\n", expect, trie.String())
}
}

func TestTrie_removeSameLeaf(t *testing.T) {

ta := require.New(t)

var keys = [][]byte{
{'a', 'b', 'c'},
{'a', 'b', 'c', 'd'},
{'a', 'b', 'd'},
{'a', 'b', 'd', 'e'},
{'b', 'c'},
{'b', 'c', 'd'},
{'b', 'c', 'd', 'e'},
{'c', 'd', 'e'},
}
var values = []int{0, 0, 0, 3, 4, 5, 5, 5}

want := `
*2
-097->
-098->*2
-099->
-00$->=0
-100->
-101->
-00$->=3
-098->
-099->*2
-00$->=4
-100->
-00$->=5`[1:]

trie, err := NewTrie(keys, values, false)
ta.Nil(err)

trie.removeSameLeaf()

ta.Equal(want, trie.String())
ta.Equal(9, trie.NodeCnt, "non-leaf node count")
}

0 comments on commit 7231ff8

Please sign in to comment.