-
Notifications
You must be signed in to change notification settings - Fork 309
/
encode_better.go
229 lines (199 loc) · 6.16 KB
/
encode_better.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
// Copyright 2016 The Snappy-Go Authors. All rights reserved.
// Copyright (c) 2019 Klaus Post. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package s2
import (
"math/bits"
)
// hash4 returns the hash of the lowest 4 bytes of u to fit in a hash table with h bits.
// Preferably h should be a constant and should always be <32.
func hash4(u uint64, h uint8) uint32 {
const prime4bytes = 2654435761
return (uint32(u) * prime4bytes) >> ((32 - h) & 31)
}
// hash5 returns the hash of the lowest 5 bytes of u to fit in a hash table with h bits.
// Preferably h should be a constant and should always be <64.
func hash5(u uint64, h uint8) uint32 {
const prime5bytes = 889523592379
return uint32(((u << (64 - 40)) * prime5bytes) >> ((64 - h) & 63))
}
// hash7 returns the hash of the lowest 7 bytes of u to fit in a hash table with h bits.
// Preferably h should be a constant and should always be <64.
func hash7(u uint64, h uint8) uint32 {
const prime7bytes = 58295818150454627
return uint32(((u << (64 - 56)) * prime7bytes) >> ((64 - h) & 63))
}
// hash8 returns the hash of u to fit in a hash table with h bits.
// Preferably h should be a constant and should always be <64.
func hash8(u uint64, h uint8) uint32 {
const prime8bytes = 0xcf1bbcdcb7a56463
return uint32((u * prime8bytes) >> ((64 - h) & 63))
}
// encodeBlockBetter encodes a non-empty src to a guaranteed-large-enough dst. It
// assumes that the varint-encoded length of the decompressed bytes has already
// been written.
//
// It also assumes that:
// len(dst) >= MaxEncodedLen(len(src)) &&
// minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize
func encodeBlockBetter(dst, src []byte) (d int) {
// Initialize the hash tables.
const (
// Long hash matches.
lTableBits = 16
maxLTableSize = 1 << lTableBits
// Short hash matches.
sTableBits = 14
maxSTableSize = 1 << sTableBits
)
// In Go, all array elements are zero-initialized, so there is no advantage
// to a smaller tableSize per se. However, it matches the C++ algorithm,
// and in the asm versions of this code, we can get away with zeroing only
// the first tableSize elements.
var lTable [maxLTableSize]uint32
var sTable [maxSTableSize]uint32
// sLimit is when to stop looking for offset/length copies. The inputMargin
// lets us use a fast path for emitLiteral in the main loop, while we are
// looking for copies.
sLimit := len(src) - inputMargin
// Bail if we can't compress to at least this.
dstLimit := len(src) - len(src)>>5 - 5
// nextEmit is where in src the next emitLiteral should start from.
nextEmit := 0
// The encoded form must start with a literal, as there are no previous
// bytes to copy, so we start looking for hash matches at s == 1.
s := 1
cv := load64(src, s)
// We search for a repeat at -1, but don't output repeats when nextEmit == 0
repeat := 1
for {
candidateL := 0
for {
// Next src position to check
nextS := s + (s-nextEmit)>>7 + 1
if nextS > sLimit {
goto emitRemainder
}
hashL := hash7(cv, lTableBits)
hashS := hash4(cv, sTableBits)
candidateL = int(lTable[hashL])
candidateS := int(sTable[hashS])
lTable[hashL] = uint32(s)
sTable[hashS] = uint32(s)
// Check repeat at offset checkRep.
const checkRep = 1
if uint32(cv>>(checkRep*8)) == load32(src, s-repeat+checkRep) {
base := s + checkRep
// Extend back
for i := base - repeat; base > nextEmit && i > 0 && src[i-1] == src[base-1]; {
i--
base--
}
d += emitLiteral(dst[d:], src[nextEmit:base])
// Extend forward
candidate := s - repeat + 4 + checkRep
s += 4 + checkRep
for s <= sLimit {
if diff := load64(src, s) ^ load64(src, candidate); diff != 0 {
s += bits.TrailingZeros64(diff) >> 3
break
}
s += 8
candidate += 8
}
if nextEmit > 0 {
// same as `add := emitCopy(dst[d:], repeat, s-base)` but skips storing offset.
d += emitRepeat(dst[d:], repeat, s-base)
} else {
// First match, cannot be repeat.
d += emitCopy(dst[d:], repeat, s-base)
}
nextEmit = s
if s >= sLimit {
goto emitRemainder
}
cv = load64(src, s)
continue
}
if uint32(cv) == load32(src, candidateL) {
break
}
// Check our short candidate
if uint32(cv) == load32(src, candidateS) {
// Try a long candidate at s+1
hashL = hash7(cv>>8, lTableBits)
candidateL = int(lTable[hashL])
lTable[hashL] = uint32(s + 1)
if uint32(cv>>8) == load32(src, candidateL) {
s++
break
}
// Use our short candidate.
candidateL = candidateS
break
}
cv = load64(src, nextS)
s = nextS
}
// Extend backwards
for candidateL > 0 && s > nextEmit && src[candidateL-1] == src[s-1] {
candidateL--
s--
}
// Bail if we exceed the maximum size.
if d+(s-nextEmit) > dstLimit {
return 0
}
base := s
offset := base - candidateL
// Extend the 4-byte match as long as possible.
s += 4
candidateL += 4
for s <= len(src)-8 {
if diff := load64(src, s) ^ load64(src, candidateL); diff != 0 {
s += bits.TrailingZeros64(diff) >> 3
break
}
s += 8
candidateL += 8
}
if offset > 65535 && s-base <= 5 {
// Bail if the match is equal or worse to the encoding.
s = base + 3
cv = load64(src, s)
continue
}
repeat = offset
d += emitLiteral(dst[d:], src[nextEmit:base])
d += emitCopy(dst[d:], offset, s-base)
nextEmit = s
if s >= sLimit {
goto emitRemainder
}
if d > dstLimit {
// Do we have space for more, if not bail.
return 0
}
// Index match start+1 (long) and start+2 (short)
index0 := base + 1
// Index match end-2 (long) and end-1 (short)
index1 := s - 2
cv0 := load64(src, index0)
cv1 := load64(src, index1)
cv = load64(src, s)
lTable[hash7(cv0, lTableBits)] = uint32(index0)
lTable[hash7(cv1, lTableBits)] = uint32(index1)
sTable[hash4(cv0>>8, sTableBits)] = uint32(index0 + 1)
sTable[hash4(cv1>>8, sTableBits)] = uint32(index1 + 1)
}
emitRemainder:
if nextEmit < len(src) {
// Bail if we exceed the maximum size.
if d+len(src)-nextEmit > dstLimit {
return 0
}
d += emitLiteral(dst[d:], src[nextEmit:])
}
return d
}