Skip to content

Commit

Permalink
Use separate extra bits to make comparison fair.
Browse files Browse the repository at this point in the history
  • Loading branch information
klauspost committed Jan 16, 2020
1 parent 8647fbe commit 182d024
Show file tree
Hide file tree
Showing 4 changed files with 42 additions and 24 deletions.
6 changes: 3 additions & 3 deletions flate/deflate.go
Original file line number Diff line number Diff line change
Expand Up @@ -644,21 +644,21 @@ func (d *compressor) init(w io.Writer, level int) (err error) {
d.fill = (*compressor).fillBlock
d.step = (*compressor).store
case level == ConstantCompression:
d.w.logReusePenalty = uint(7)
d.w.logNewTablePenalty = 4
d.window = make([]byte, maxStoreBlockSize)
d.fill = (*compressor).fillBlock
d.step = (*compressor).storeHuff
case level == DefaultCompression:
level = 5
fallthrough
case level >= 1 && level <= 6:
d.w.logReusePenalty = 8
d.w.logNewTablePenalty = 7
d.fast = newFastEnc(level)
d.window = make([]byte, maxStoreBlockSize)
d.fill = (*compressor).fillBlock
d.step = (*compressor).storeFast
case 7 <= level && level <= 9:
d.w.logReusePenalty = 10
d.w.logNewTablePenalty = 10
d.state = &advancedState{}
d.compressionLevel = levels[level]
d.initDeflate()
Expand Down
29 changes: 17 additions & 12 deletions flate/huffman_bit_writer.go
Original file line number Diff line number Diff line change
Expand Up @@ -93,12 +93,12 @@ type huffmanBitWriter struct {
err error
lastHeader int
// Set between 0 (reused block can be up to 2x the size)
logReusePenalty uint
lastHuffMan bool
bytes [256]byte
literalFreq [lengthCodesStart + 32]uint16
offsetFreq [32]uint16
codegenFreq [codegenCodeCount]uint16
logNewTablePenalty uint
lastHuffMan bool
bytes [256]byte
literalFreq [lengthCodesStart + 32]uint16
offsetFreq [32]uint16
codegenFreq [codegenCodeCount]uint16

// codegen must have an extra space for the final symbol.
codegen [literalCount + offsetCodeCount + 1]uint8
Expand All @@ -119,7 +119,7 @@ type huffmanBitWriter struct {
// If lastHuffMan is set, a table for outputting literals has been generated and offsets are invalid.
//
// An incoming block estimates the output size of a new table using a 'fresh' by calculating the
// optimal size and adding a penalty in 'logReusePenalty'.
// optimal size and adding a penalty in 'logNewTablePenalty'.
// A Huffman table is not optimal, which is why we add a penalty, and generating a new table
// is slower both for compression and decompression.

Expand Down Expand Up @@ -612,7 +612,7 @@ func (w *huffmanBitWriter) writeBlockDynamic(tokens *tokens, eof bool, input []b
// Estimate size for using a new table.
// Use the previous header size as the best estimate.
newSize := w.lastHeader + tokens.EstimatedBits()
newSize += newSize >> w.logReusePenalty
newSize += newSize >> w.logNewTablePenalty

// The estimated size is calculated as an optimal table.
// We add a penalty to make it more realistic and re-use a bit more.
Expand Down Expand Up @@ -812,7 +812,12 @@ func (w *huffmanBitWriter) writeBlockHuff(eof bool, input []byte, sync bool) {
// Assume header is around 70 bytes:
// https://stackoverflow.com/a/25454430
const guessHeaderSizeBits = 70 * 8
estBits := histogramSize(input, w.literalFreq[:], !eof && !sync) + 15 + guessHeaderSizeBits
estBits, estExtra := histogramSize(input, w.literalFreq[:], !eof && !sync)
estBits += w.lastHeader + 15
if w.lastHeader == 0 {
estBits += guessHeaderSizeBits
}
estBits += estBits >> w.logNewTablePenalty

// Store bytes, if we don't get a reasonable improvement.
ssize, storable := w.storedSize(input)
Expand All @@ -823,10 +828,10 @@ func (w *huffmanBitWriter) writeBlockHuff(eof bool, input []byte, sync bool) {
}

if w.lastHeader > 0 {
size := w.dynamicReuseSize(w.literalEncoding, huffOffset) + w.lastHeader
estBits += estBits >> w.logReusePenalty
reuseSize := w.literalEncoding.bitLength(w.literalFreq[:256])
estBits += estExtra

if estBits < size {
if estBits < reuseSize {
// We owe an EOB
w.writeCode(w.literalEncoding.codes[endBlockMarker])
w.lastHeader = 0
Expand Down
2 changes: 1 addition & 1 deletion flate/huffman_bit_writer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ func testBlockHuff(t *testing.T, in, out string) {
}
var buf bytes.Buffer
bw := newHuffmanBitWriter(&buf)
bw.logReusePenalty = 8
bw.logNewTablePenalty = 8
bw.writeBlockHuff(false, all, false)
bw.flush()
got := buf.Bytes()
Expand Down
29 changes: 21 additions & 8 deletions flate/huffman_code.go
Original file line number Diff line number Diff line change
Expand Up @@ -331,20 +331,33 @@ func atLeastOne(v float32) float32 {
// An estimated size in bits is returned.
// Unassigned values are assigned '1' in the histogram.
// len(h) must be >= 256, and h's elements must be all zeroes.
func histogramSize(b []byte, h []uint16, fill bool) int {
func histogramSize(b []byte, h []uint16, fill bool) (int, int) {
h = h[:256]
for _, t := range b {
h[t]++
}
invTotal := 1.0 / float32(len(b))
shannon := float32(0.0)
for i, v := range h[:] {
if v > 0 {
n := float32(v)
shannon += atLeastOne(-mFastLog2(n*invTotal)) * n
} else if fill {
h[i] = 1
var extra float32
if fill {
oneBits := atLeastOne(-mFastLog2(invTotal))
for i, v := range h[:] {
if v > 0 {
n := float32(v)
shannon += atLeastOne(-mFastLog2(n*invTotal)) * n
} else {
h[i] = 1
extra += oneBits
}
}
} else {
for _, v := range h[:] {
if v > 0 {
n := float32(v)
shannon += atLeastOne(-mFastLog2(n*invTotal)) * n
}
}
}
return int(shannon + 0.99)

return int(shannon + 0.99), int(extra + 0.99)
}

0 comments on commit 182d024

Please sign in to comment.