-
Notifications
You must be signed in to change notification settings - Fork 0
/
bloom_filter.go
101 lines (87 loc) · 2.48 KB
/
bloom_filter.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
package collection
import (
"fmt"
"math"
jsoniter "github.com/json-iterator/go"
"github.com/pedrogao/plib/pkg/hash"
)
// For an explanation of the math, visit
// https://en.wikipedia.org/wiki/Bloom_filter#Probability_of_false_positives.
// A condensed explanation can be found here:
// https://stackoverflow.com/questions/658439/how-many-hash-functions-does-my-bloom-filter-need
// BloomFilter 布隆过滤器
type BloomFilter struct {
falsePositivePob float64
bitArraySize, hashCount int
bit *BitArray
}
// NewBloomFilter 新建布隆过滤器
func NewBloomFilter(numItems int, falsePositivePob float64) *BloomFilter {
bf := &BloomFilter{
falsePositivePob: falsePositivePob,
bit: NewBitArray(numItems),
}
bitArraySize := bf.calculateBitArraySize(numItems, falsePositivePob)
hashCount := bf.calculateHashCount(bitArraySize, numItems)
bf.hashCount = hashCount
bf.bitArraySize = bitArraySize
return bf
}
func (f *BloomFilter) Add(item string) {
data := []byte(item)
for i := 0; i < f.hashCount; i++ {
digest := int(hash.Murmur332(data, uint32(i))) % f.bitArraySize
f.bit.Add(digest)
}
}
func (f *BloomFilter) Check(item string) bool {
data := []byte(item)
for i := 0; i < f.hashCount; i++ {
digest := int(hash.Murmur332(data, uint32(i))) % f.bitArraySize
if !f.bit.Has(digest) {
return false
}
}
return true
}
func (f *BloomFilter) calculateBitArraySize(numItems int,
probability float64) int {
// m = -(n * lg(p)) / (lg(2)^2)
m := -(float64(numItems) * math.Log(probability) / (math.Pow(math.Log(2), 2)))
return int(m)
}
func (f *BloomFilter) calculateHashCount(bitArraySize, numItems int) int {
// k = (m/n) * lg(2)
k := (float64(bitArraySize) / float64(numItems)) * math.Log(2)
return int(k)
}
func (f *BloomFilter) Pack() []byte {
bytes, err := jsoniter.Marshal(map[string]any{
"falsePositivePob": f.falsePositivePob,
"bitArraySize": f.bitArraySize,
"hashCount": f.hashCount,
"bit": f.bit.Pack(),
})
if err != nil {
panic(err)
}
return bytes
}
func (f *BloomFilter) UnPack(data []byte) error {
m := map[string]any{}
var err error
err = jsoniter.Unmarshal(data, &m)
if err != nil {
return fmt.Errorf("unmarshal bloom filter err: %s", err)
}
bit := &BitArray{}
f.falsePositivePob = m["falsePositivePob"].(float64)
f.bitArraySize = m["bitArraySize"].(int)
f.hashCount = m["hashCount"].(int)
err = bit.UnPack(m["bit"].([]byte))
if err != nil {
return err
}
f.bit = bit
return nil
}