-
-
Notifications
You must be signed in to change notification settings - Fork 4.3k
/
weakhash.go
125 lines (106 loc) · 2.59 KB
/
weakhash.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
// Copyright (C) 2016 The Syncthing Authors.
//
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this file,
// You can obtain one at https://mozilla.org/MPL/2.0/.
package weakhash
import (
"bufio"
"io"
"os"
"github.com/chmduquesne/rollinghash/adler32"
)
const (
Size = 4
// don't track more hits than this for any given weakhash
maxWeakhashFinderHits = 10
)
var (
Enabled = true
)
// Find finds all the blocks of the given size within io.Reader that matches
// the hashes provided, and returns a hash -> slice of offsets within reader
// map, that produces the same weak hash.
func Find(ir io.Reader, hashesToFind []uint32, size int) (map[uint32][]int64, error) {
if ir == nil || len(hashesToFind) == 0 {
return nil, nil
}
r := bufio.NewReader(ir)
hf := adler32.New()
n, err := io.CopyN(hf, r, int64(size))
if err == io.EOF {
return nil, nil
}
if err != nil {
return nil, err
}
if n != int64(size) {
return nil, io.ErrShortBuffer
}
offsets := make(map[uint32][]int64)
for _, hashToFind := range hashesToFind {
offsets[hashToFind] = make([]int64, 0, maxWeakhashFinderHits)
}
var i int64
var hash uint32
for {
hash = hf.Sum32()
if existing, ok := offsets[hash]; ok && len(existing) < maxWeakhashFinderHits {
offsets[hash] = append(existing, i)
}
i++
bt, err := r.ReadByte()
if err == io.EOF {
break
} else if err != nil {
return offsets, err
}
hf.Roll(bt)
}
return offsets, nil
}
func NewFinder(path string, size int, hashesToFind []uint32) (*Finder, error) {
file, err := os.Open(path)
if err != nil {
return nil, err
}
offsets, err := Find(file, hashesToFind, size)
if err != nil {
file.Close()
return nil, err
}
return &Finder{
file: file,
size: size,
offsets: offsets,
}, nil
}
type Finder struct {
file *os.File
size int
offsets map[uint32][]int64
}
// Iterate iterates all available blocks that matches the provided hash, reads
// them into buf, and calls the iterator function. The iterator function should
// return whether it wishes to continue iterating.
func (h *Finder) Iterate(hash uint32, buf []byte, iterFunc func(int64) bool) (bool, error) {
if h == nil || hash == 0 || len(buf) != h.size {
return false, nil
}
for _, offset := range h.offsets[hash] {
_, err := h.file.ReadAt(buf, offset)
if err != nil {
return false, err
}
if !iterFunc(offset) {
return true, nil
}
}
return false, nil
}
// Close releases any resource associated with the finder
func (h *Finder) Close() {
if h != nil {
h.file.Close()
}
}