forked from ipfs/kubo
-
Notifications
You must be signed in to change notification settings - Fork 0
/
filter.go
124 lines (98 loc) · 2.3 KB
/
filter.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
// package bloom implements a simple bloom filter.
package bloom
import (
"encoding/binary"
"errors"
// Non crypto hash, because speed
"github.com/ipfs/go-ipfs/Godeps/_workspace/src/github.com/mtchavez/jenkins"
"github.com/ipfs/go-ipfs/Godeps/_workspace/src/github.com/steakknife/hamming"
"hash"
)
type Filter interface {
Add([]byte)
Find([]byte) bool
Merge(Filter) (Filter, error)
HammingDistance(Filter) (int, error)
}
func NewFilter(size int) Filter {
return &filter{
hash: jenkins.New(),
filter: make([]byte, size),
k: 3,
}
}
type filter struct {
filter []byte
hash hash.Hash32
k int
}
func BasicFilter() Filter {
return NewFilter(2048)
}
func (f *filter) Add(bytes []byte) {
for _, bit := range f.getBitIndicies(bytes) {
f.setBit(bit)
}
}
func (f *filter) getBitIndicies(bytes []byte) []uint32 {
indicies := make([]uint32, f.k)
f.hash.Write(bytes)
b := make([]byte, 4)
for i := 0; i < f.k; i++ {
res := f.hash.Sum32()
indicies[i] = res % (uint32(len(f.filter)) * 8)
binary.LittleEndian.PutUint32(b, res)
f.hash.Write(b)
}
f.hash.Reset()
return indicies
}
func (f *filter) Find(bytes []byte) bool {
for _, bit := range f.getBitIndicies(bytes) {
if !f.getBit(bit) {
return false
}
}
return true
}
func (f *filter) setBit(i uint32) {
f.filter[i/8] |= (1 << byte(i%8))
}
func (f *filter) getBit(i uint32) bool {
return f.filter[i/8]&(1<<byte(i%8)) != 0
}
func (f *filter) Merge(o Filter) (Filter, error) {
casfil, ok := o.(*filter)
if !ok {
return nil, errors.New("Unsupported filter type")
}
if len(casfil.filter) != len(f.filter) {
return nil, errors.New("filter lengths must match!")
}
if casfil.k != f.k {
return nil, errors.New("filter k-values must match!")
}
nfilt := new(filter)
nfilt.hash = f.hash
nfilt.filter = make([]byte, len(f.filter))
nfilt.k = f.k
for i, v := range f.filter {
nfilt.filter[i] = v | casfil.filter[i]
}
return nfilt, nil
}
func (f *filter) HammingDistance(o Filter) (int, error) {
casfil, ok := o.(*filter)
if !ok {
return 0, errors.New("Unsupported filter type")
}
if len(f.filter) != len(casfil.filter) {
return 0, errors.New("filter lengths must match!")
}
acc := 0
// xor together
for i := 0; i < len(f.filter); i++ {
acc += hamming.Byte(f.filter[i], casfil.filter[i])
}
return acc, nil
}