Skip to content

Commit

Permalink
deflate: Speed up huffman encoding (#455)
Browse files Browse the repository at this point in the history
Huffman only:

320.36 -> 414.67MB/s  (github-june-2days-2019.json)
237.41-> 293.24 MB/s (github-ranks-backup.bin)

Level 1:

278.94 -> 308.47 MB/s  (github-june-2days-2019.json)
232.93 -> 261.16MB/s  (github-ranks-backup.bin)
  • Loading branch information
klauspost committed Nov 30, 2021
1 parent 4bd7627 commit a4773d2
Showing 1 changed file with 39 additions and 8 deletions.
47 changes: 39 additions & 8 deletions flate/huffman_bit_writer.go
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,7 @@ func (w *huffmanBitWriter) write(b []byte) {
}

func (w *huffmanBitWriter) writeBits(b int32, nb uint16) {
w.bits |= uint64(b) << w.nbits
w.bits |= uint64(b) << (w.nbits & 63)
w.nbits += nb
if w.nbits >= 48 {
w.writeOutBits()
Expand Down Expand Up @@ -423,7 +423,7 @@ func (w *huffmanBitWriter) storedSize(in []byte) (int, bool) {

func (w *huffmanBitWriter) writeCode(c hcode) {
// The function does not get inlined if we "& 63" the shift.
w.bits |= uint64(c.code) << w.nbits
w.bits |= uint64(c.code) << (w.nbits & 63)
w.nbits += c.len
if w.nbits >= 48 {
w.writeOutBits()
Expand Down Expand Up @@ -768,7 +768,7 @@ func (w *huffmanBitWriter) writeTokens(tokens []token, leCodes, oeCodes []hcode)
if t < matchType {
//w.writeCode(lits[t.literal()])
c := lits[t.literal()]
bits |= uint64(c.code) << nbits
bits |= uint64(c.code) << (nbits & 63)
nbits += c.len
if nbits >= 48 {
binary.LittleEndian.PutUint64(w.bytes[nbytes:], bits)
Expand Down Expand Up @@ -796,7 +796,7 @@ func (w *huffmanBitWriter) writeTokens(tokens []token, leCodes, oeCodes []hcode)
} else {
// inlined
c := lengths[lengthCode&31]
bits |= uint64(c.code) << nbits
bits |= uint64(c.code) << (nbits & 63)
nbits += c.len
if nbits >= 48 {
binary.LittleEndian.PutUint64(w.bytes[nbytes:], bits)
Expand All @@ -819,7 +819,7 @@ func (w *huffmanBitWriter) writeTokens(tokens []token, leCodes, oeCodes []hcode)
if extraLengthBits > 0 {
//w.writeBits(extraLength, extraLengthBits)
extraLength := int32(length - lengthBase[lengthCode&31])
bits |= uint64(extraLength) << nbits
bits |= uint64(extraLength) << (nbits & 63)
nbits += extraLengthBits
if nbits >= 48 {
binary.LittleEndian.PutUint64(w.bytes[nbytes:], bits)
Expand All @@ -846,7 +846,7 @@ func (w *huffmanBitWriter) writeTokens(tokens []token, leCodes, oeCodes []hcode)
} else {
// inlined
c := offs[offsetCode]
bits |= uint64(c.code) << nbits
bits |= uint64(c.code) << (nbits & 63)
nbits += c.len
if nbits >= 48 {
binary.LittleEndian.PutUint64(w.bytes[nbytes:], bits)
Expand All @@ -867,7 +867,7 @@ func (w *huffmanBitWriter) writeTokens(tokens []token, leCodes, oeCodes []hcode)
offsetComb := offsetCombined[offsetCode]
if offsetComb > 1<<16 {
//w.writeBits(extraOffset, extraOffsetBits)
bits |= uint64(offset&matchOffsetOnlyMask-(offsetComb&0xffff)) << nbits
bits |= uint64(offset&matchOffsetOnlyMask-(offsetComb&0xffff)) << (nbits & 63)
nbits += uint16(offsetComb >> 16)
if nbits >= 48 {
binary.LittleEndian.PutUint64(w.bytes[nbytes:], bits)
Expand Down Expand Up @@ -996,10 +996,41 @@ func (w *huffmanBitWriter) writeBlockHuff(eof bool, input []byte, sync bool) {
encoding := w.literalEncoding.codes[:256]
// Go 1.16 LOVES having these on stack. At least 1.5x the speed.
bits, nbits, nbytes := w.bits, w.nbits, w.nbytes

// Unroll, write 3 codes/loop.
// Fastest number of unrolls.
for len(input) > 3 {
// We must have at least 48 bits free.
if nbits >= 8 {
n := nbits >> 3
binary.LittleEndian.PutUint64(w.bytes[nbytes:], bits)
bits >>= (n * 8) & 63
nbits -= n * 8
nbytes += uint8(n)
}
if nbytes >= bufferFlushSize {
if w.err != nil {
nbytes = 0
return
}
_, w.err = w.writer.Write(w.bytes[:nbytes])
nbytes = 0
}
a, b := encoding[input[0]], encoding[input[1]]
bits |= uint64(a.code) << (nbits & 63)
bits |= uint64(b.code) << ((nbits + a.len) & 63)
c := encoding[input[2]]
nbits += b.len + a.len
bits |= uint64(c.code) << (nbits & 63)
nbits += c.len
input = input[3:]
}

// Remaining...
for _, t := range input {
// Bitwriting inlined, ~30% speedup
c := encoding[t]
bits |= uint64(c.code) << nbits
bits |= uint64(c.code) << (nbits & 63)
nbits += c.len
if debugDeflate {
count += int(c.len)
Expand Down

0 comments on commit a4773d2

Please sign in to comment.