Skip to content

Commit

Permalink
deflate: Use compound hcode (#622)
Browse files Browse the repository at this point in the history
~5% Faster for huffman-only, same for rest.

```
github-ranks-backup.bin	gzkp	-2	1862623243	1298789681	5547	320.22
github-ranks-backup.bin	gzkp	-2	1862623243	1298789681	5305	334.83
```

Replaces #619 - more speedup, and no regression.
  • Loading branch information
klauspost committed Jun 9, 2022
1 parent 03dccc3 commit 3221590
Show file tree
Hide file tree
Showing 2 changed files with 49 additions and 36 deletions.
47 changes: 24 additions & 23 deletions flate/huffman_bit_writer.go
Expand Up @@ -169,7 +169,7 @@ func (w *huffmanBitWriter) canReuse(t *tokens) (ok bool) {
b := w.offsetEncoding.codes
b = b[:len(a)]
for i, v := range a {
if v != 0 && b[i].len == 0 {
if v != 0 && b[i].zero() {
return false
}
}
Expand All @@ -178,15 +178,15 @@ func (w *huffmanBitWriter) canReuse(t *tokens) (ok bool) {
b = w.literalEncoding.codes[256:literalCount]
b = b[:len(a)]
for i, v := range a {
if v != 0 && b[i].len == 0 {
if v != 0 && b[i].zero() {
return false
}
}

a = t.litHist[:256]
b = w.literalEncoding.codes[:len(a)]
for i, v := range a {
if v != 0 && b[i].len == 0 {
if v != 0 && b[i].zero() {
return false
}
}
Expand Down Expand Up @@ -280,12 +280,12 @@ func (w *huffmanBitWriter) generateCodegen(numLiterals int, numOffsets int, litE
// Copy the concatenated code sizes to codegen. Put a marker at the end.
cgnl := codegen[:numLiterals]
for i := range cgnl {
cgnl[i] = uint8(litEnc.codes[i].len)
cgnl[i] = litEnc.codes[i].len()
}

cgnl = codegen[numLiterals : numLiterals+numOffsets]
for i := range cgnl {
cgnl[i] = uint8(offEnc.codes[i].len)
cgnl[i] = offEnc.codes[i].len()
}
codegen[numLiterals+numOffsets] = badCode

Expand Down Expand Up @@ -428,8 +428,8 @@ func (w *huffmanBitWriter) storedSize(in []byte) (int, bool) {

func (w *huffmanBitWriter) writeCode(c hcode) {
// The function does not get inlined if we "& 63" the shift.
w.bits |= uint64(c.code) << (w.nbits & 63)
w.nbits += c.len
w.bits |= c.code64() << (w.nbits & 63)
w.nbits += c.len()
if w.nbits >= 48 {
w.writeOutBits()
}
Expand Down Expand Up @@ -477,7 +477,7 @@ func (w *huffmanBitWriter) writeDynamicHeader(numLiterals int, numOffsets int, n
w.writeBits(int32(numCodegens-4), 4)

for i := 0; i < numCodegens; i++ {
value := uint(w.codegenEncoding.codes[codegenOrder[i]].len)
value := uint(w.codegenEncoding.codes[codegenOrder[i]].len())
w.writeBits(int32(value), 3)
}

Expand Down Expand Up @@ -670,7 +670,7 @@ func (w *huffmanBitWriter) writeBlockDynamic(tokens *tokens, eof bool, input []b
// Estimate size for using a new table.
// Use the previous header size as the best estimate.
newSize := w.lastHeader + tokens.EstimatedBits()
newSize += int(w.literalEncoding.codes[endBlockMarker].len) + newSize>>w.logNewTablePenalty
newSize += int(w.literalEncoding.codes[endBlockMarker].len()) + newSize>>w.logNewTablePenalty

// The estimated size is calculated as an optimal table.
// We add a penalty to make it more realistic and re-use a bit more.
Expand Down Expand Up @@ -854,8 +854,8 @@ func (w *huffmanBitWriter) writeTokens(tokens []token, leCodes, oeCodes []hcode)
if t < 256 {
//w.writeCode(lits[t.literal()])
c := lits[t]
bits |= uint64(c.code) << (nbits & 63)
nbits += c.len
bits |= c.code64() << (nbits & 63)
nbits += c.len()
if nbits >= 48 {
binary.LittleEndian.PutUint64(w.bytes[nbytes:], bits)
//*(*uint64)(unsafe.Pointer(&w.bytes[nbytes])) = bits
Expand All @@ -882,8 +882,8 @@ func (w *huffmanBitWriter) writeTokens(tokens []token, leCodes, oeCodes []hcode)
} else {
// inlined
c := lengths[lengthCode]
bits |= uint64(c.code) << (nbits & 63)
nbits += c.len
bits |= c.code64() << (nbits & 63)
nbits += c.len()
if nbits >= 48 {
binary.LittleEndian.PutUint64(w.bytes[nbytes:], bits)
//*(*uint64)(unsafe.Pointer(&w.bytes[nbytes])) = bits
Expand Down Expand Up @@ -931,8 +931,8 @@ func (w *huffmanBitWriter) writeTokens(tokens []token, leCodes, oeCodes []hcode)
} else {
// inlined
c := offs[offsetCode]
bits |= uint64(c.code) << (nbits & 63)
nbits += c.len
bits |= c.code64() << (nbits & 63)
nbits += c.len()
if nbits >= 48 {
binary.LittleEndian.PutUint64(w.bytes[nbytes:], bits)
//*(*uint64)(unsafe.Pointer(&w.bytes[nbytes])) = bits
Expand Down Expand Up @@ -1127,12 +1127,12 @@ func (w *huffmanBitWriter) writeBlockHuff(eof bool, input []byte, sync bool) {
nbytes = 0
}
a, b := encoding[input[0]], encoding[input[1]]
bits |= uint64(a.code) << (nbits & 63)
bits |= uint64(b.code) << ((nbits + a.len) & 63)
bits |= a.code64() << (nbits & 63)
bits |= b.code64() << ((nbits + a.len()) & 63)
c := encoding[input[2]]
nbits += b.len + a.len
bits |= uint64(c.code) << (nbits & 63)
nbits += c.len
nbits += b.len() + a.len()
bits |= c.code64() << (nbits & 63)
nbits += c.len()
input = input[3:]
}

Expand All @@ -1158,10 +1158,11 @@ func (w *huffmanBitWriter) writeBlockHuff(eof bool, input []byte, sync bool) {
}
// Bitwriting inlined, ~30% speedup
c := encoding[t]
bits |= uint64(c.code) << (nbits & 63)
nbits += c.len
bits |= c.code64() << (nbits & 63)

nbits += c.len()
if debugDeflate {
count += int(c.len)
count += int(c.len())
}
}
// Restore...
Expand Down
38 changes: 25 additions & 13 deletions flate/huffman_code.go
Expand Up @@ -16,9 +16,18 @@ const (
)

// hcode is a huffman code with a bit code and bit length.
type hcode struct {
code uint16
len uint8
type hcode uint32

func (h hcode) len() uint8 {
return uint8(h)
}

func (h hcode) code64() uint64 {
return uint64(h >> 8)
}

func (h hcode) zero() bool {
return h == 0
}

type huffmanEncoder struct {
Expand Down Expand Up @@ -58,8 +67,11 @@ type levelInfo struct {

// set sets the code and length of an hcode.
func (h *hcode) set(code uint16, length uint8) {
h.len = length
h.code = code
*h = hcode(length) | (hcode(code) << 8)
}

func newhcode(code uint16, length uint8) hcode {
return hcode(length) | (hcode(code) << 8)
}

func reverseBits(number uint16, bitLength byte) uint16 {
Expand Down Expand Up @@ -100,7 +112,7 @@ func generateFixedLiteralEncoding() *huffmanEncoder {
bits = ch + 192 - 280
size = 8
}
codes[ch] = hcode{code: reverseBits(bits, size), len: size}
codes[ch] = newhcode(reverseBits(bits, size), size)
}
return h
}
Expand All @@ -109,7 +121,7 @@ func generateFixedOffsetEncoding() *huffmanEncoder {
h := newHuffmanEncoder(30)
codes := h.codes
for ch := range codes {
codes[ch] = hcode{code: reverseBits(uint16(ch), 5), len: 5}
codes[ch] = newhcode(reverseBits(uint16(ch), 5), 5)
}
return h
}
Expand All @@ -121,7 +133,7 @@ func (h *huffmanEncoder) bitLength(freq []uint16) int {
var total int
for i, f := range freq {
if f != 0 {
total += int(f) * int(h.codes[i].len)
total += int(f) * int(h.codes[i].len())
}
}
return total
Expand All @@ -130,7 +142,7 @@ func (h *huffmanEncoder) bitLength(freq []uint16) int {
func (h *huffmanEncoder) bitLengthRaw(b []byte) int {
var total int
for _, f := range b {
total += int(h.codes[f].len)
total += int(h.codes[f].len())
}
return total
}
Expand All @@ -141,10 +153,10 @@ func (h *huffmanEncoder) canReuseBits(freq []uint16) int {
for i, f := range freq {
if f != 0 {
code := h.codes[i]
if code.len == 0 {
if code.zero() {
return math.MaxInt32
}
total += int(f) * int(code.len)
total += int(f) * int(code.len())
}
}
return total
Expand Down Expand Up @@ -308,7 +320,7 @@ func (h *huffmanEncoder) assignEncodingAndSize(bitCount []int32, list []literalN

sortByLiteral(chunk)
for _, node := range chunk {
h.codes[node.literal] = hcode{code: reverseBits(code, uint8(n)), len: uint8(n)}
h.codes[node.literal] = newhcode(reverseBits(code, uint8(n)), uint8(n))
code++
}
list = list[0 : len(list)-int(bits)]
Expand All @@ -330,7 +342,7 @@ func (h *huffmanEncoder) generate(freq []uint16, maxBits int32) {
list[count] = literalNode{uint16(i), f}
count++
} else {
codes[i].len = 0
codes[i] = 0
}
}
list[count] = literalNode{}
Expand Down

0 comments on commit 3221590

Please sign in to comment.