/
splitter_buzhash32.go
46 lines (39 loc) · 1.03 KB
/
splitter_buzhash32.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
//nolint:dupl
package object
import (
"github.com/chmduquesne/rollinghash/buzhash32"
)
type buzhash32Splitter struct {
// we're intentionally not using rollinghash.Hash32 interface because doing this in a tight loop
// is 40% slower because compiler can't inline the call.
rh *buzhash32.Buzhash32
mask uint32
count int
minSize int
maxSize int
}
func (rs *buzhash32Splitter) ShouldSplit(b byte) bool {
rs.rh.Roll(b)
rs.count++
if rs.rh.Sum32()&rs.mask == 0 && rs.count >= rs.minSize {
rs.count = 0
return true
}
if rs.count >= rs.maxSize {
rs.count = 0
return true
}
return false
}
func newBuzHash32SplitterFactory(avgSize int) SplitterFactory {
// avgSize must be a power of two, so 0b000001000...0000
// it just so happens that mask is avgSize-1 :)
mask := uint32(avgSize - 1)
maxSize := avgSize * 2
minSize := avgSize / 2
return func() Splitter {
s := buzhash32.New()
s.Write(make([]byte, splitterSlidingWindowSize)) //nolint:errcheck
return &buzhash32Splitter{s, mask, 0, minSize, maxSize}
}
}