Skip to content

Commit

Permalink
Merge Go 1.11 inflate optimizations (#92)
Browse files Browse the repository at this point in the history
* Merge Go 1.11 inflate optimizations
* Remove versions older than 1.9
* Update README.md

https://go-review.googlesource.com/c/go/+/102235
  • Loading branch information
klauspost committed Jun 29, 2018
1 parent 5fb1f31 commit a17685a
Show file tree
Hide file tree
Showing 3 changed files with 39 additions and 28 deletions.
2 changes: 0 additions & 2 deletions .travis.yml
Expand Up @@ -7,8 +7,6 @@ os:
- osx

go:
- 1.7.x
- 1.8.x
- 1.9.x
- 1.10.x
- master
Expand Down
1 change: 1 addition & 0 deletions README.md
Expand Up @@ -14,6 +14,7 @@ It offers slightly better compression at lower compression settings, and up to 3

# changelog

* Jul 23, 2018: Merged [Go 1.11 inflate optimizations](https://go-review.googlesource.com/c/go/+/102235). Go 1.9 is now required. Backwards compatible version tagged with [v1.3.0](https://github.com/klauspost/compress/releases/tag/v1.3.0).
* Apr 2, 2018: Added [huff0](https://godoc.org/github.com/klauspost/compress/huff0) en/decoder. Experimental for now, API may change.
* Mar 4, 2018: Added [FSE Entropy](https://godoc.org/github.com/klauspost/compress/fse) en/decoder. Experimental for now, API may change.
* Nov 3, 2017: Add compression [Estimate](https://godoc.org/github.com/klauspost/compress#Estimate) function.
Expand Down
64 changes: 38 additions & 26 deletions flate/inflate.go
Expand Up @@ -10,6 +10,7 @@ package flate
import (
"bufio"
"io"
"math/bits"
"strconv"
"sync"
)
Expand Down Expand Up @@ -113,7 +114,7 @@ type huffmanDecoder struct {
// tree (i.e., neither over-subscribed nor under-subscribed). The exception is a
// degenerate case where the tree has only a single symbol with length 1. Empty
// trees are permitted.
func (h *huffmanDecoder) init(bits []int) bool {
func (h *huffmanDecoder) init(lengths []int) bool {
// Sanity enables additional runtime tests during Huffman
// table construction. It's intended to be used during
// development to supplement the currently ad-hoc unit tests.
Expand All @@ -130,7 +131,7 @@ func (h *huffmanDecoder) init(bits []int) bool {
// compute min and max length.
var count [maxCodeLen]int
var min, max int
for _, n := range bits {
for _, n := range lengths {
if n == 0 {
continue
}
Expand Down Expand Up @@ -189,7 +190,7 @@ func (h *huffmanDecoder) init(bits []int) bool {
h.links = h.links[:huffmanNumChunks-link]
}
for j := uint(link); j < huffmanNumChunks; j++ {
reverse := int(reverseByte[j>>8]) | int(reverseByte[j&0xff])<<8
reverse := int(bits.Reverse16(uint16(j)))
reverse >>= uint(16 - huffmanChunkBits)
off := j - uint(link)
if sanity && h.chunks[reverse] != 0 {
Expand All @@ -207,14 +208,14 @@ func (h *huffmanDecoder) init(bits []int) bool {
h.links = h.links[:0]
}

for i, n := range bits {
for i, n := range lengths {
if n == 0 {
continue
}
code := nextcode[n]
nextcode[n]++
chunk := uint32(i<<huffmanValueShift | n)
reverse := int(reverseByte[code>>8]) | int(reverseByte[code&0xff])<<8
reverse := int(bits.Reverse16(uint16(code)))
reverse >>= uint(16 - n)
if n <= huffmanChunkBits {
for off := reverse; off < len(h.chunks); off += 1 << uint(n) {
Expand Down Expand Up @@ -609,7 +610,7 @@ readLiteral:
return
}
}
dist = int(reverseByte[(f.b&0x1F)<<3])
dist = int(bits.Reverse8(uint8(f.b & 0x1F << 3)))
f.b >>= 5
f.nb -= 5
} else {
Expand Down Expand Up @@ -681,10 +682,7 @@ func (f *decompressor) dataBlock() {
nr, err := io.ReadFull(f.r, f.buf[0:4])
f.roffset += int64(nr)
if err != nil {
if err == io.EOF {
err = io.ErrUnexpectedEOF
}
f.err = err
f.err = noEOF(err)
return
}
n := int(f.buf[0]) | int(f.buf[1])<<8
Expand Down Expand Up @@ -717,10 +715,7 @@ func (f *decompressor) copyData() {
f.copyLen -= cnt
f.dict.writeMark(cnt)
if err != nil {
if err == io.EOF {
err = io.ErrUnexpectedEOF
}
f.err = err
f.err = noEOF(err)
return
}

Expand All @@ -742,13 +737,18 @@ func (f *decompressor) finishBlock() {
f.step = (*decompressor).nextBlock
}

// noEOF returns err, unless err == io.EOF, in which case it returns io.ErrUnexpectedEOF.
func noEOF(e error) error {
if e == io.EOF {
return io.ErrUnexpectedEOF
}
return e
}

func (f *decompressor) moreBits() error {
c, err := f.r.ReadByte()
if err != nil {
if err == io.EOF {
err = io.ErrUnexpectedEOF
}
return err
return noEOF(err)
}
f.roffset++
f.b |= uint32(c) << f.nb
Expand All @@ -763,25 +763,37 @@ func (f *decompressor) huffSym(h *huffmanDecoder) (int, error) {
// cases, the chunks slice will be 0 for the invalid sequence, leading it
// satisfy the n == 0 check below.
n := uint(h.min)
// Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers,
// but is smart enough to keep local variables in registers, so use nb and b,
// inline call to moreBits and reassign b,nb back to f on return.
nb, b := f.nb, f.b
for {
for f.nb < n {
if err := f.moreBits(); err != nil {
return 0, err
for nb < n {
c, err := f.r.ReadByte()
if err != nil {
f.b = b
f.nb = nb
return 0, noEOF(err)
}
f.roffset++
b |= uint32(c) << (nb & 31)
nb += 8
}
chunk := h.chunks[f.b&(huffmanNumChunks-1)]
chunk := h.chunks[b&(huffmanNumChunks-1)]
n = uint(chunk & huffmanCountMask)
if n > huffmanChunkBits {
chunk = h.links[chunk>>huffmanValueShift][(f.b>>huffmanChunkBits)&h.linkMask]
chunk = h.links[chunk>>huffmanValueShift][(b>>huffmanChunkBits)&h.linkMask]
n = uint(chunk & huffmanCountMask)
}
if n <= f.nb {
if n <= nb {
if n == 0 {
f.b = b
f.nb = nb
f.err = CorruptInputError(f.roffset)
return 0, f.err
}
f.b >>= n
f.nb -= n
f.b = b >> (n & 31)
f.nb = nb - n
return int(chunk >> huffmanValueShift), nil
}
}
Expand Down

0 comments on commit a17685a

Please sign in to comment.