/
hidrivehash.go
410 lines (376 loc) · 12.4 KB
/
hidrivehash.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
// Package hidrivehash implements the HiDrive hashing algorithm which combines SHA-1 hashes hierarchically to a single top-level hash.
//
// Note: This implementation does not grant access to any partial hashes generated.
//
// See: https://developer.hidrive.com/wp-content/uploads/2021/07/HiDrive_Synchronization-v3.3-rev28.pdf
// (link to newest version: https://static.hidrive.com/dev/0001)
package hidrivehash
import (
"bytes"
"crypto/sha1"
"encoding"
"encoding/binary"
"errors"
"fmt"
"hash"
"io"
"github.com/rclone/rclone/backend/hidrive/hidrivehash/internal"
)
const (
// BlockSize of the checksum in bytes.
BlockSize = 4096
// Size of the checksum in bytes.
Size = sha1.Size
// sumsPerLevel is the number of checksums
sumsPerLevel = 256
)
var (
// zeroSum is a special hash consisting of 20 null-bytes.
// This will be the hash of any empty file (or ones containing only null-bytes).
zeroSum = [Size]byte{}
// ErrorInvalidEncoding is returned when a hash should be decoded from a binary form that is invalid.
ErrorInvalidEncoding = errors.New("encoded binary form is invalid for this hash")
// ErrorHashFull is returned when a hash reached its capacity and cannot accept any more input.
ErrorHashFull = errors.New("hash reached its capacity")
)
// writeByBlock writes len(p) bytes from p to the io.Writer in blocks of size blockSize.
// It returns the number of bytes written from p (0 <= n <= len(p))
// and any error encountered that caused the write to stop early.
//
// A pointer bytesInBlock to a counter needs to be supplied,
// that is used to keep track how many bytes have been written to the writer already.
// A pointer onlyNullBytesInBlock to a boolean needs to be supplied,
// that is used to keep track whether the block so far only consists of null-bytes.
// The callback onBlockWritten is called whenever a full block has been written to the writer
// and is given as input the number of bytes that still need to be written.
func writeByBlock(p []byte, writer io.Writer, blockSize uint32, bytesInBlock *uint32, onlyNullBytesInBlock *bool, onBlockWritten func(remaining int) error) (n int, err error) {
total := len(p)
nullBytes := make([]byte, blockSize)
for len(p) > 0 {
toWrite := int(blockSize - *bytesInBlock)
if toWrite > len(p) {
toWrite = len(p)
}
c, err := writer.Write(p[:toWrite])
*bytesInBlock += uint32(c)
*onlyNullBytesInBlock = *onlyNullBytesInBlock && bytes.Equal(nullBytes[:toWrite], p[:toWrite])
// Discard data written through a reslice
p = p[c:]
if err != nil {
return total - len(p), err
}
if *bytesInBlock == blockSize {
err = onBlockWritten(len(p))
if err != nil {
return total - len(p), err
}
*bytesInBlock = 0
*onlyNullBytesInBlock = true
}
}
return total, nil
}
// level is a hash.Hash that is used to aggregate the checksums produced by the level hierarchically beneath it.
// It is used to represent any level-n hash, except for level-0.
type level struct {
checksum [Size]byte // aggregated checksum of this level
sumCount uint32 // number of sums contained in this level so far
bytesInHasher uint32 // number of bytes written into hasher so far
onlyNullBytesInHasher bool // whether the hasher only contains null-bytes so far
hasher hash.Hash
}
// NewLevel returns a new hash.Hash computing any level-n hash, except level-0.
func NewLevel() hash.Hash {
l := &level{}
l.Reset()
return l
}
// Add takes a position-embedded SHA-1 checksum and adds it to the level.
func (l *level) Add(sha1sum []byte) {
var tmp uint
var carry bool
for i := Size - 1; i >= 0; i-- {
tmp = uint(sha1sum[i]) + uint(l.checksum[i])
if carry {
tmp++
}
carry = tmp > 255
l.checksum[i] = byte(tmp)
}
}
// IsFull returns whether the number of checksums added to this level reached its capacity.
func (l *level) IsFull() bool {
return l.sumCount >= sumsPerLevel
}
// Write (via the embedded io.Writer interface) adds more data to the running hash.
// Contrary to the specification from hash.Hash, this DOES return an error,
// specifically ErrorHashFull if and only if IsFull() returns true.
func (l *level) Write(p []byte) (n int, err error) {
if l.IsFull() {
return 0, ErrorHashFull
}
onBlockWritten := func(remaining int) error {
if !l.onlyNullBytesInHasher {
c, err := l.hasher.Write([]byte{byte(l.sumCount)})
l.bytesInHasher += uint32(c)
if err != nil {
return err
}
l.Add(l.hasher.Sum(nil))
}
l.sumCount++
l.hasher.Reset()
if remaining > 0 && l.IsFull() {
return ErrorHashFull
}
return nil
}
return writeByBlock(p, l.hasher, uint32(l.BlockSize()), &l.bytesInHasher, &l.onlyNullBytesInHasher, onBlockWritten)
}
// Sum appends the current hash to b and returns the resulting slice.
// It does not change the underlying hash state.
func (l *level) Sum(b []byte) []byte {
return append(b, l.checksum[:]...)
}
// Reset resets the Hash to its initial state.
func (l *level) Reset() {
l.checksum = zeroSum // clear the current checksum
l.sumCount = 0
l.bytesInHasher = 0
l.onlyNullBytesInHasher = true
l.hasher = sha1.New()
}
// Size returns the number of bytes Sum will return.
func (l *level) Size() int {
return Size
}
// BlockSize returns the hash's underlying block size.
// The Write method must be able to accept any amount
// of data, but it may operate more efficiently if all writes
// are a multiple of the block size.
func (l *level) BlockSize() int {
return Size
}
// MarshalBinary encodes the hash into a binary form and returns the result.
func (l *level) MarshalBinary() ([]byte, error) {
b := make([]byte, Size+4+4+1)
copy(b, l.checksum[:])
binary.BigEndian.PutUint32(b[Size:], l.sumCount)
binary.BigEndian.PutUint32(b[Size+4:], l.bytesInHasher)
if l.onlyNullBytesInHasher {
b[Size+4+4] = 1
}
encodedHasher, err := l.hasher.(encoding.BinaryMarshaler).MarshalBinary()
if err != nil {
return nil, err
}
b = append(b, encodedHasher...)
return b, nil
}
// UnmarshalBinary decodes the binary form generated by MarshalBinary.
// The hash will replace its internal state accordingly.
func (l *level) UnmarshalBinary(b []byte) error {
if len(b) < Size+4+4+1 {
return ErrorInvalidEncoding
}
copy(l.checksum[:], b)
l.sumCount = binary.BigEndian.Uint32(b[Size:])
l.bytesInHasher = binary.BigEndian.Uint32(b[Size+4:])
switch b[Size+4+4] {
case 0:
l.onlyNullBytesInHasher = false
case 1:
l.onlyNullBytesInHasher = true
default:
return ErrorInvalidEncoding
}
err := l.hasher.(encoding.BinaryUnmarshaler).UnmarshalBinary(b[Size+4+4+1:])
return err
}
// hidriveHash is the hash computing the actual checksum used by HiDrive by combining multiple level-hashes.
type hidriveHash struct {
levels []*level // collection of level-hashes, one for each level starting at level-1
lastSumWritten [Size]byte // the last checksum written to any of the levels
bytesInBlock uint32 // bytes written into blockHash so far
onlyNullBytesInBlock bool // whether the hasher only contains null-bytes so far
blockHash hash.Hash
}
// New returns a new hash.Hash computing the HiDrive checksum.
func New() hash.Hash {
h := &hidriveHash{}
h.Reset()
return h
}
// aggregateToLevel writes the checksum to the level at the given index
// and if necessary propagates any changes to levels above.
func (h *hidriveHash) aggregateToLevel(index int, sum []byte) {
for i := index; ; i++ {
if i >= len(h.levels) {
h.levels = append(h.levels, NewLevel().(*level))
}
_, err := h.levels[i].Write(sum)
copy(h.lastSumWritten[:], sum)
if err != nil {
panic(fmt.Errorf("level-hash should not have produced an error: %w", err))
}
if !h.levels[i].IsFull() {
break
}
sum = h.levels[i].Sum(nil)
h.levels[i].Reset()
}
}
// Write (via the embedded io.Writer interface) adds more data to the running hash.
// It never returns an error.
func (h *hidriveHash) Write(p []byte) (n int, err error) {
onBlockWritten := func(remaining int) error {
var sum []byte
if h.onlyNullBytesInBlock {
sum = zeroSum[:]
} else {
sum = h.blockHash.Sum(nil)
}
h.blockHash.Reset()
h.aggregateToLevel(0, sum)
return nil
}
return writeByBlock(p, h.blockHash, uint32(BlockSize), &h.bytesInBlock, &h.onlyNullBytesInBlock, onBlockWritten)
}
// Sum appends the current hash to b and returns the resulting slice.
// It does not change the underlying hash state.
func (h *hidriveHash) Sum(b []byte) []byte {
// Save internal state.
state, err := h.MarshalBinary()
if err != nil {
panic(fmt.Errorf("saving the internal state should not have produced an error: %w", err))
}
if h.bytesInBlock > 0 {
// Fill remainder of block with null-bytes.
filler := make([]byte, h.BlockSize()-int(h.bytesInBlock))
_, err = h.Write(filler)
if err != nil {
panic(fmt.Errorf("filling with null-bytes should not have an error: %w", err))
}
}
checksum := zeroSum
for i := 0; i < len(h.levels); i++ {
level := h.levels[i]
if i < len(h.levels)-1 {
// Aggregate non-empty non-final levels.
if level.sumCount >= 1 {
h.aggregateToLevel(i+1, level.Sum(nil))
level.Reset()
}
} else {
// Determine sum of final level.
if level.sumCount > 1 {
copy(checksum[:], level.Sum(nil))
} else {
// This is needed, otherwise there is no way to return
// the non-position-embedded checksum.
checksum = h.lastSumWritten
}
}
}
// Restore internal state.
err = h.UnmarshalBinary(state)
if err != nil {
panic(fmt.Errorf("restoring the internal state should not have produced an error: %w", err))
}
return append(b, checksum[:]...)
}
// Reset resets the Hash to its initial state.
func (h *hidriveHash) Reset() {
h.levels = nil
h.lastSumWritten = zeroSum // clear the last written checksum
h.bytesInBlock = 0
h.onlyNullBytesInBlock = true
h.blockHash = sha1.New()
}
// Size returns the number of bytes Sum will return.
func (h *hidriveHash) Size() int {
return Size
}
// BlockSize returns the hash's underlying block size.
// The Write method must be able to accept any amount
// of data, but it may operate more efficiently if all writes
// are a multiple of the block size.
func (h *hidriveHash) BlockSize() int {
return BlockSize
}
// MarshalBinary encodes the hash into a binary form and returns the result.
func (h *hidriveHash) MarshalBinary() ([]byte, error) {
b := make([]byte, Size+4+1+8)
copy(b, h.lastSumWritten[:])
binary.BigEndian.PutUint32(b[Size:], h.bytesInBlock)
if h.onlyNullBytesInBlock {
b[Size+4] = 1
}
binary.BigEndian.PutUint64(b[Size+4+1:], uint64(len(h.levels)))
for _, level := range h.levels {
encodedLevel, err := level.MarshalBinary()
if err != nil {
return nil, err
}
encodedLength := make([]byte, 8)
binary.BigEndian.PutUint64(encodedLength, uint64(len(encodedLevel)))
b = append(b, encodedLength...)
b = append(b, encodedLevel...)
}
encodedBlockHash, err := h.blockHash.(encoding.BinaryMarshaler).MarshalBinary()
if err != nil {
return nil, err
}
b = append(b, encodedBlockHash...)
return b, nil
}
// UnmarshalBinary decodes the binary form generated by MarshalBinary.
// The hash will replace its internal state accordingly.
func (h *hidriveHash) UnmarshalBinary(b []byte) error {
if len(b) < Size+4+1+8 {
return ErrorInvalidEncoding
}
copy(h.lastSumWritten[:], b)
h.bytesInBlock = binary.BigEndian.Uint32(b[Size:])
switch b[Size+4] {
case 0:
h.onlyNullBytesInBlock = false
case 1:
h.onlyNullBytesInBlock = true
default:
return ErrorInvalidEncoding
}
amount := binary.BigEndian.Uint64(b[Size+4+1:])
h.levels = make([]*level, int(amount))
offset := Size + 4 + 1 + 8
for i := range h.levels {
length := int(binary.BigEndian.Uint64(b[offset:]))
offset += 8
h.levels[i] = NewLevel().(*level)
err := h.levels[i].UnmarshalBinary(b[offset : offset+length])
if err != nil {
return err
}
offset += length
}
err := h.blockHash.(encoding.BinaryUnmarshaler).UnmarshalBinary(b[offset:])
return err
}
// Sum returns the HiDrive checksum of the data.
func Sum(data []byte) [Size]byte {
h := New().(*hidriveHash)
_, _ = h.Write(data)
var result [Size]byte
copy(result[:], h.Sum(nil))
return result
}
// Check the interfaces are satisfied.
var (
_ hash.Hash = (*level)(nil)
_ encoding.BinaryMarshaler = (*level)(nil)
_ encoding.BinaryUnmarshaler = (*level)(nil)
_ internal.LevelHash = (*level)(nil)
_ hash.Hash = (*hidriveHash)(nil)
_ encoding.BinaryMarshaler = (*hidriveHash)(nil)
_ encoding.BinaryUnmarshaler = (*hidriveHash)(nil)
)