Skip to content

Commit

Permalink
Fix and optimize bit estimates
Browse files Browse the repository at this point in the history
New blocks where given an advantage since estimates for new blocks did not include extra bits.

Optimize log2 calculation with faster estimate:

```
Benchmark_tokens_EstimatedBits-32    	  299931	      4062 ns/op	   0.25 MB/s
Benchmark_tokens_EstimatedBits-32    	 2201341	       543 ns/op	   1.84 MB/s
```
  • Loading branch information
klauspost committed Jan 14, 2020
1 parent da94045 commit 7269def
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 19 deletions.
10 changes: 5 additions & 5 deletions flate/huffman_code.go
Original file line number Diff line number Diff line change
Expand Up @@ -365,13 +365,13 @@ func histogramSize(b []byte, h []uint16, fill bool) int {
for _, t := range b {
h[t]++
}
invTotal := 1.0 / float64(len(b))
shannon := 0.0
single := math.Ceil(-math.Log2(invTotal))
invTotal := float32(1.0 / float64(len(b)))
shannon := float32(0.0)
single := float32(math.Ceil(-math.Log2(float64(invTotal))))
for i, v := range h[:] {
if v > 0 {
n := float64(v)
shannon += math.Ceil(-math.Log2(n*invTotal) * n)
n := float32(v)
shannon += -mFastLog2(n*invTotal) * n
} else if fill {
shannon += single
h[i] = 1
Expand Down
37 changes: 24 additions & 13 deletions flate/token.go
Original file line number Diff line number Diff line change
Expand Up @@ -211,39 +211,50 @@ func (t *tokens) AddLiteral(lit byte) {
t.nLits++
}

// from https://stackoverflow.com/a/28730362
func mFastLog2(val float32) float32 {
ux := int32(math.Float32bits(val))
log2 := (float32)(((ux >> 23) & 255) - 128)
ux &= -0x7f800001
ux += 127 << 23
uval := math.Float32frombits(uint32(ux))
log2 += ((-0.34484843)*uval+2.02466578)*uval - 0.67487759
return log2
}

// EstimatedBits will return an minimum size estimated by an *optimal*
// compression of the block.
// The size of the block
func (t *tokens) EstimatedBits() int {
shannon := float64(0)
shannon := float32(0)
bits := int(0)
nMatches := 0
if t.nLits > 0 {
invTotal := 1.0 / float64(t.nLits)
invTotal := 1.0 / float32(t.nLits)
for _, v := range t.litHist[:] {
if v > 0 {
n := float64(v)
shannon += math.Ceil(-math.Log2(n*invTotal) * n)
n := float32(v)
shannon += -mFastLog2(n*invTotal) * n
}
}
// Just add 15 for EOB
shannon += 15
for _, v := range t.extraHist[1 : literalCount-256] {
for i, v := range t.extraHist[1 : literalCount-256] {
if v > 0 {
n := float64(v)
shannon += math.Ceil(-math.Log2(n*invTotal) * n)
bits += int(lengthExtraBits[v&31]) * int(v)
n := float32(v)
shannon += -mFastLog2(n*invTotal) * n
bits += int(lengthExtraBits[i&31]) * int(v)
nMatches += int(v)
}
}
}
if nMatches > 0 {
invTotal := 1.0 / float64(nMatches)
for _, v := range t.offHist[:offsetCodeCount] {
invTotal := 1.0 / float32(nMatches)
for i, v := range t.offHist[:offsetCodeCount] {
if v > 0 {
n := float64(v)
shannon += math.Ceil(-math.Log2(n*invTotal) * n)
bits += int(offsetExtraBits[v&31]) * int(n)
n := float32(v)
shannon += -mFastLog2(n*invTotal) * n
bits += int(offsetExtraBits[i&31]) * int(v)
}
}
}
Expand Down
2 changes: 1 addition & 1 deletion flate/token_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ func loadTestTokens(t testFatal) *tokens {
func Test_tokens_EstimatedBits(t *testing.T) {
tok := loadTestTokens(t)
// The estimated size, update if method changes.
const expect = 199380
const expect = 196635
if n := tok.EstimatedBits(); n != expect {
t.Error("want:", expect, "bits, got:", n)
}
Expand Down

0 comments on commit 7269def

Please sign in to comment.