-
Notifications
You must be signed in to change notification settings - Fork 1
/
bk_tree_test.go
152 lines (129 loc) · 3.61 KB
/
bk_tree_test.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
/*
Package go-bk-tree works like a charm
*/
package go_bk_tree
import (
"fmt"
l "github.com/texttheater/golang-levenshtein/levenshtein"
"math/rand"
"testing"
)
type Word string
func (w Word) DistanceFrom(w2 MetricTensor) Distance {
return Distance(l.DistanceForStrings([]rune(string(w)), []rune(string(w2.(Word))), l.DefaultOptions))
}
func createNewTreeFromWords(words []string) *BKTree {
tree := new(BKTree)
for w := range words {
tree.Add(Word(words[w]))
}
return tree
}
func TestBKTree_Add(t *testing.T) {
wordsList := []string{"a", "ab", "abc", "d"}
tree := createNewTreeFromWords(wordsList)
if rootVal := string(tree.root.MetricTensor.(Word)); rootVal != "a" {
t.Errorf("expected: %s, got: %s", "a", rootVal)
}
level1Children := tree.root.Children
if len(level1Children) != 2 {
t.Errorf("expected: %d, got: %d", 2, len(level1Children))
}
level2Children := tree.root.Children[2].Children // 'd' should be child of 'abc'
if len(level2Children) != 1 {
t.Errorf("expected: %d, got: %d", 1, len(level2Children))
}
}
// Word is a custom struct the implements the MetricTensor interface,
// and it uses the Levenshtein distance as distance function
func ExampleBKTree_Search() {
wordsList := []string{"some", "soft", "sorted", "same", "mole", "soda", "salmon"}
tree := createNewTreeFromWords(wordsList)
// fuzzy match
query := Word("sort")
results := tree.Search(query, 2)
fmt.Println(results)
// exact match
query2 := Word("mole")
results2 := tree.Search(query2, 0)
fmt.Println(results2)
// Output:
// [soft sorted]
// [mole]
}
// ################### Benchmark tests ########################
type Number uint64
// Reference: https://github.com/agatan/bktree/blob/master/bktree_test.go
func hamming(a, b uint64) int {
count := 0
var k uint64 = 1
for i := 0; i < 64; i++ {
if a&k != b&k {
count++
}
k <<= 1
}
return count
}
func (n Number) DistanceFrom(other MetricTensor) Distance {
return Distance(hamming(uint64(n), uint64(other.(Number))))
}
func createNewTreeFromNumbers(nums []Number) *BKTree {
tree := new(BKTree)
for i := range nums {
tree.Add(nums[i])
}
return tree
}
func makeRandomTree(size int) ([]Number, *BKTree) {
fakeStuff := make([]Number, size, size)
for i := 0; i < size; i++ {
fakeStuff = append(fakeStuff, Number(rand.Int()))
}
return fakeStuff, createNewTreeFromNumbers(fakeStuff)
}
func BenchmarkBKTree_Search_ExactMatch(b *testing.B) {
fakeSize := 1000000
fakeStuff, benchmarkTree := makeRandomTree(fakeSize)
b.ResetTimer()
for i := 0; i < b.N; i++ {
randNum := fakeStuff[rand.Intn(fakeSize)]
benchmarkTree.Search(randNum, 0)
}
}
func BenchmarkBKTree_Search_Radius1Match(b *testing.B) {
fakeSize := 1000000
fakeStuff, benchmarkTree := makeRandomTree(fakeSize)
b.ResetTimer()
for i := 0; i < b.N; i++ {
randNum := fakeStuff[rand.Intn(fakeSize)]
benchmarkTree.Search(randNum, 0)
}
}
func BenchmarkBKTree_Search_Radius2Match(b *testing.B) {
fakeSize := 1000000
fakeStuff, benchmarkTree := makeRandomTree(fakeSize)
b.ResetTimer()
for i := 0; i < b.N; i++ {
randNum := fakeStuff[rand.Intn(fakeSize)]
benchmarkTree.Search(randNum, 0)
}
}
func BenchmarkBKTree_Search_Radius4Match(b *testing.B) {
fakeSize := 1000000
fakeStuff, benchmarkTree := makeRandomTree(fakeSize)
b.ResetTimer()
for i := 0; i < b.N; i++ {
randNum := fakeStuff[rand.Intn(fakeSize)]
benchmarkTree.Search(randNum, 0)
}
}
func BenchmarkBKTree_Search_Radius32Match(b *testing.B) {
fakeSize := 1000000
fakeStuff, benchmarkTree := makeRandomTree(fakeSize)
b.ResetTimer()
for i := 0; i < b.N; i++ {
randNum := fakeStuff[rand.Intn(fakeSize)]
benchmarkTree.Search(randNum, 0)
}
}