Skip to content

Commit

Permalink
Rework fast matching
Browse files Browse the repository at this point in the history
This makes several changes that in combination gives close to the same compression, but with a big speedup in most cases.

We change the hash table to contain hashes of 6 bytes. The speed is about the same, but this usually gives better compression since hashes are of better quality. This typically also makes the content faster to decode since longer matches are preferred.

Hash table size is now defined separately of window size. I found that 16 bits was a good value, especially since the better hash table opens up for other optimization.

We check 3 bytes, then skip one (plus more if data is hard to compress. This gives most of the speedup, but also looses us some compression.

We index 2 bytes before the end of each match. This doesn't impact speed much and gives a nice compression boost.

This combines well with #49 (not included in this benchmark)

Now for the numbers. They are all before/after, best of 2 runs.
```
file	out	level	insize	outsize	millis	mb/s
consensus.db.10gb	lz4	0	10737418240	5057961420	35446	288.88
consensus.db.10gb	lz4	0	10737418240	5077608378	23226	440.87

file	out	level	insize	outsize	millis	mb/s
rawstudio-mint14.tar	lz4	0	8558382592	4568741520	25369	321.73
rawstudio-mint14.tar	lz4	0	8558382592	4592776475	17168	475.41

file	out	level	insize	outsize	millis	mb/s
github-ranks-backup.bin	lz4	0	1862623243	579273817	4074	436.02
github-ranks-backup.bin	lz4	0	1862623243	627056167	3522	504.35

file	out	level	insize	outsize	millis	mb/s
github-june-2days-2019.json	lz4	0	6273951764	1355117284	10763	555.86
github-june-2days-2019.json	lz4	0	6273951764	1293582359	9136	654.91

file	out	level	insize	outsize	millis	mb/s
gob-stream	lz4	0	1911399616	384235547	3481	523.66
gob-stream	lz4	0	1911399616	384292384	2827	644.80

file	out	level	insize	outsize	millis	mb/s
10gb.tar	lz4	0	10065157632	6481808453	23629	406.23
10gb.tar	lz4	0	10065157632	5902162074	22592	424.88

file	out	level	insize	outsize	millis	mb/s
enwik9	lz4	0	1000000000	489160425	3733	255.47
enwik9	lz4	0	1000000000	482276927	3520	270.93

file	out	level	insize	outsize	millis	mb/s
silesia.tar	lz4	0	211947520	99218419	691	292.51
silesia.tar	lz4	0	211947520	96766005	590	342.01

file	out	level	insize	outsize	millis	mb/s
sharnd.out	lz4	0	500000000	500000495	169	2821.52
sharnd.out	lz4	0	500000000	500000495	166	2872.51
```

Only [github-ranks-backup.bin](https://files.klauspost.com/compress/github-ranks-backup.bin.zst) has a significant size increase. The others are very close or better than before.

All show minor to a significant speedup.
  • Loading branch information
klauspost committed Aug 1, 2019
1 parent 377214e commit a207029
Show file tree
Hide file tree
Showing 5 changed files with 81 additions and 33 deletions.
4 changes: 2 additions & 2 deletions bench_test.go
Expand Up @@ -10,7 +10,7 @@ import (
)

func BenchmarkCompress(b *testing.B) {
var hashTable [1 << 16]int
var hashTable [htSize]int
buf := make([]byte, len(pg1661))

b.ReportAllocs()
Expand All @@ -22,7 +22,7 @@ func BenchmarkCompress(b *testing.B) {
}

func BenchmarkCompressRandom(b *testing.B) {
var hashTable [1 << 16]int
var hashTable [htSize]int
buf := make([]byte, len(randomLZ4))

b.ReportAllocs()
Expand Down
83 changes: 63 additions & 20 deletions block.go
Expand Up @@ -2,13 +2,14 @@ package lz4

import (
"encoding/binary"
"fmt"
"math/bits"
)

// blockHash hashes 4 bytes into a value < winSize.
func blockHash(x uint32) uint32 {
const hasher uint32 = 2654435761 // Knuth multiplicative hash.
return x * hasher >> hashShift
// blockHash hashes the lower 6 bytes into a value < htSize.
func blockHash(x uint64) uint32 {
const prime6bytes = 227718039650203
return uint32(((x << (64 - 48)) * prime6bytes) >> (64 - hashLog))
}

// CompressBlockBound returns the maximum size of a given buffer of size n, when not compressible.
Expand Down Expand Up @@ -46,33 +47,62 @@ func CompressBlock(src, dst []byte, hashTable []int) (di int, err error) {
// This significantly speeds up incompressible data and usually has very small impact on compresssion.
// bytes to skip = 1 + (bytes since last match >> adaptSkipLog)
const adaptSkipLog = 7

sn, dn := len(src)-mfLimit, len(dst)
if sn <= 0 || dn == 0 {
return 0, nil
}
var si int

// Fast scan strategy: the hash table only stores the last 4 bytes sequences.
if len(hashTable) < htSize {
return 0, fmt.Errorf("hash table too small, should be at least %d in size", htSize)
}
// Prove to the compiler the table has at least htSize elements.
// The compiler can see that "uint32() >> hashShift" cannot be out of bounds.
hashTable = hashTable[:htSize]

anchor := si // Position of the current literals.
// si: Current position of the search.
// anchor: Position of the current literals.
var si, anchor int

// Fast scan strategy: the hash table only stores the last 4 bytes sequences.
for si < sn {
// Hash the next 4 bytes (sequence)...
match := binary.LittleEndian.Uint32(src[si:])
// Hash the next 6 bytes (sequence)...
match := binary.LittleEndian.Uint64(src[si:])
h := blockHash(match)
h2 := blockHash(match >> 8)

// We check a match at s, s+1 and s+2 and pick the first one we get.
// Checking 3 only requires us to load the source one.
ref := hashTable[h]
ref2 := hashTable[h2]
hashTable[h] = si
if ref >= sn { // Invalid reference (dirty hashtable).
si += 1 + (si-anchor)>>adaptSkipLog
continue
}
hashTable[h2] = si + 1
offset := si - ref

// If offset <= 0 we got an old entry in the hash table.
if offset <= 0 || offset >= winSize || // Out of window.
match != binary.LittleEndian.Uint32(src[ref:]) { // Hash collision on different matches.
si += 1 + (si-anchor)>>adaptSkipLog
continue
uint32(match) != binary.LittleEndian.Uint32(src[ref:]) { // Hash collision on different matches.
// No match. Start calculating another hash.
// The processor can usually do this out-of-order.
h = blockHash(match >> 16)
ref = hashTable[h]

// Check the second match at si+1
si += 1
offset = si - ref2

if offset <= 0 || offset >= winSize ||
uint32(match>>8) != binary.LittleEndian.Uint32(src[ref2:]) {
// No match. Check the third match at si+2
si += 1
offset = si - ref
hashTable[h] = si

if offset <= 0 || offset >= winSize ||
uint32(match>>16) != binary.LittleEndian.Uint32(src[ref:]) {
// Skip one extra byte (at si+3) before we check 3 matches again.
si += 2 + (si-anchor)>>adaptSkipLog
continue
}
}
}

// Match found.
Expand Down Expand Up @@ -134,6 +164,13 @@ func CompressBlock(src, dst []byte, hashTable []int) (di int, err error) {
dst[di] = byte(mLen)
di++
}
// Check if we can load next values.
if si >= sn {
break
}
// Hash match end-2
h = blockHash(binary.LittleEndian.Uint64(src[si-2:]))
hashTable[h] = si - 2
}

if anchor == 0 {
Expand Down Expand Up @@ -165,6 +202,12 @@ func CompressBlock(src, dst []byte, hashTable []int) (di int, err error) {
return di, nil
}

// blockHash hashes 4 bytes into a value < winSize.
func blockHashHC(x uint32) uint32 {
const hasher uint32 = 2654435761 // Knuth multiplicative hash.
return x * hasher >> (32 - winSizeLog)
}

// CompressBlockHC compresses the source buffer src into the destination dst
// with max search depth (use 0 or negative value for no max).
//
Expand Down Expand Up @@ -199,7 +242,7 @@ func CompressBlockHC(src, dst []byte, depth int) (di int, err error) {
for si < sn {
// Hash the next 4 bytes (sequence).
match := binary.LittleEndian.Uint32(src[si:])
h := blockHash(match)
h := blockHashHC(match)

// Follow the chain until out of window and give the longest match.
mLen := 0
Expand Down Expand Up @@ -251,7 +294,7 @@ func CompressBlockHC(src, dst []byte, depth int) (di int, err error) {
for si, ml := winStart, si+mLen; si < ml; {
match >>= 8
match |= uint32(src[si+3]) << 24
h := blockHash(match)
h := blockHashHC(match)
chainTable[si&winMask] = hashTable[h]
hashTable[h] = si
si++
Expand Down
21 changes: 13 additions & 8 deletions block_test.go
Expand Up @@ -11,8 +11,11 @@ import (
"github.com/pierrec/lz4"
)

// Hash table size.
const htSize = 1 << 16 // 64kb
const (
// Should match values in lz4.go
hashLog = 16
htSize = 1 << hashLog
)

type testcase struct {
file string
Expand All @@ -22,11 +25,11 @@ type testcase struct {

var rawFiles = []testcase{
// {"testdata/207326ba-36f8-11e7-954a-aca46ba8ca73.png", true, nil},
{"testdata/e.txt", true, nil},
{"testdata/e.txt", false, nil},
{"testdata/gettysburg.txt", true, nil},
{"testdata/Mark.Twain-Tom.Sawyer.txt", true, nil},
{"testdata/pg1661.txt", true, nil},
{"testdata/pi.txt", true, nil},
{"testdata/pi.txt", false, nil},
{"testdata/random.data", false, nil},
{"testdata/repeat.txt", true, nil},
{"testdata/pg1661.txt", true, nil},
Expand Down Expand Up @@ -125,10 +128,12 @@ func TestCompressCornerCase_CopyDstUpperBound(t *testing.T) {
t.Helper()

// Compress the data.
zbuf := make([]byte, int(float64(len(src))*0.85))
// We provide a destination that is too small to trigger an out-of-bounds,
// which makes it return the error we want.
zbuf := make([]byte, int(float64(len(src))*0.40))
_, err := compress(src, zbuf)
if err != lz4.ErrInvalidSourceShortBuffer {
t.Fatal("err should be ErrInvalidSourceShortBuffer")
t.Fatal("err should be ErrInvalidSourceShortBuffer, was", err)
}
}

Expand All @@ -154,9 +159,9 @@ func TestCompressCornerCase_CopyDstUpperBound(t *testing.T) {
}

func TestIssue23(t *testing.T) {
compressBuf := make([]byte, lz4.CompressBlockBound(htSize))
compressBuf := make([]byte, lz4.CompressBlockBound(1<<16))
for j := 1; j < 16; j++ {
var buf [htSize]byte
var buf [1 << 16]byte
var ht [htSize]int

for i := 0; i < len(buf); i += j {
Expand Down
6 changes: 3 additions & 3 deletions lz4.go
Expand Up @@ -30,9 +30,9 @@ const (
// hashLog determines the size of the hash table used to quickly find a previous match position.
// Its value influences the compression speed and memory usage, the lower the faster,
// but at the expense of the compression ratio.
// 16 seems to be the best compromise.
hashLog = 16
hashShift = uint((minMatch * 8) - hashLog)
// 16 seems to be the best compromise for fast compression.
hashLog = 16
htSize = 1 << hashLog

mfLimit = 8 + minMatch // The last match cannot start within the last 12 bytes.
)
Expand Down
Binary file modified testdata/upperbound.data
Binary file not shown.

0 comments on commit a207029

Please sign in to comment.