Skip to content

Commit

Permalink
s2: Avoid Decode size checks (#328)
Browse files Browse the repository at this point in the history
Add another decode loop that doesn't check sizes as long as src is big enough.

For now only in Go version.

Streams:
```
Before: Decompressing. 426243297 -> 1000000000 [234.61%]; 1.941s, 491.3MB/s
After:  Decompressing. 426243297 -> 1000000000 [234.61%]; 1.895s, 503.3MB/s
```

Some blocks like it a lot:
```
BenchmarkDecodeS2Block/4-pdf/block-32                      379           336           -11.35%
BenchmarkDecodeS2Block/4-pdf/block-better-32               691           630           -8.83%
BenchmarkDecodeS2Block/4-pdf/block-best-32                 816           702           -13.97%
BenchmarkDecodeS2Block/5-html4/block-32                    13645         8727          -36.04%
BenchmarkDecodeS2Block/5-html4/block-better-32             14059         9204          -34.53%
BenchmarkDecodeS2Block/5-html4/block-best-32               13729         8975          -34.63%
```
  • Loading branch information
klauspost committed Mar 2, 2021
1 parent a576225 commit 0d78ef0
Showing 1 changed file with 105 additions and 0 deletions.
105 changes: 105 additions & 0 deletions s2/decode_other.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,110 @@ func s2Decode(dst, src []byte) int {
}
var d, s, length int
offset := 0

// As long as we can read at least 5 bytes...
for s < len(src)-5 {
switch src[s] & 0x03 {
case tagLiteral:
x := uint32(src[s] >> 2)
switch {
case x < 60:
s++
case x == 60:
s += 2
x = uint32(src[s-1])
case x == 61:
s += 3
x = uint32(src[s-2]) | uint32(src[s-1])<<8
case x == 62:
s += 4
x = uint32(src[s-3]) | uint32(src[s-2])<<8 | uint32(src[s-1])<<16
case x == 63:
s += 5
x = uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24
}
length = int(x) + 1
if length > len(dst)-d || length > len(src)-s {
return decodeErrCodeCorrupt
}
if debug {
fmt.Println("literals, length:", length, "d-after:", d+length)
}

copy(dst[d:], src[s:s+length])
d += length
s += length
continue

case tagCopy1:
s += 2
length = int(src[s-2]) >> 2 & 0x7
toffset := int(uint32(src[s-2])&0xe0<<3 | uint32(src[s-1]))
if toffset == 0 {
if debug {
fmt.Print("(repeat) ")
}
// keep last offset
switch length {
case 5:
s += 1
length = int(uint32(src[s-1])) + 4
case 6:
s += 2
length = int(uint32(src[s-2])|(uint32(src[s-1])<<8)) + (1 << 8)
case 7:
s += 3
length = int(uint32(src[s-3])|(uint32(src[s-2])<<8)|(uint32(src[s-1])<<16)) + (1 << 16)
default: // 0-> 4
}
} else {
offset = toffset
}
length += 4
case tagCopy2:
s += 3
length = 1 + int(src[s-3])>>2
offset = int(uint32(src[s-2]) | uint32(src[s-1])<<8)

case tagCopy4:
s += 5
length = 1 + int(src[s-5])>>2
offset = int(uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24)
}

if offset <= 0 || d < offset || length > len(dst)-d {
return decodeErrCodeCorrupt
}

if debug {
fmt.Println("copy, length:", length, "offset:", offset, "d-after:", d+length)
}

// Copy from an earlier sub-slice of dst to a later sub-slice.
// If no overlap, use the built-in copy:
if offset > length {
copy(dst[d:d+length], dst[d-offset:])
d += length
continue
}

// Unlike the built-in copy function, this byte-by-byte copy always runs
// forwards, even if the slices overlap. Conceptually, this is:
//
// d += forwardCopy(dst[d:d+length], dst[d-offset:])
//
// We align the slices into a and b and show the compiler they are the same size.
// This allows the loop to run without bounds checks.
a := dst[d : d+length]
b := dst[d-offset:]
b = b[:len(a)]
for i := range a {
a[i] = b[i]
}
d += length
}

// Remaining with extra checks...
for s < len(src) {
switch src[s] & 0x03 {
case tagLiteral:
Expand Down Expand Up @@ -151,6 +255,7 @@ func s2Decode(dst, src []byte) int {
}
d += length
}

if d != len(dst) {
return decodeErrCodeCorrupt
}
Expand Down

0 comments on commit 0d78ef0

Please sign in to comment.