Skip to content

Commit

Permalink
zstd: Simplify hashing functions (#402)
Browse files Browse the repository at this point in the history
Inlining now works properly with constant folding.
  • Loading branch information
klauspost committed Jun 22, 2021
1 parent 7cf5f14 commit 09f13c9
Show file tree
Hide file tree
Showing 5 changed files with 117 additions and 144 deletions.
39 changes: 21 additions & 18 deletions zstd/enc_best.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,16 @@ import (
const (
bestLongTableBits = 20 // Bits used in the long match table
bestLongTableSize = 1 << bestLongTableBits // Size of the table
bestLongLen = 8 // Bytes used for table hash

// Note: Increasing the short table bits or making the hash shorter
// can actually lead to compression degradation since it will 'steal' more from the
// long match table and match offsets are quite big.
// This greatly depends on the type of input.
bestShortTableBits = 16 // Bits used in the short match table
bestShortTableSize = 1 << bestShortTableBits // Size of the table
bestShortLen = 4 // Bytes used for table hash

)

// bestFastEncoder uses 2 tables, one for short matches (5 bytes) and one for long matches.
Expand Down Expand Up @@ -174,8 +177,8 @@ encodeLoop:
}
const goodEnough = 100

nextHashL := hash8(cv, bestLongTableBits)
nextHashS := hash4x64(cv, bestShortTableBits)
nextHashL := hashLen(cv, bestLongTableBits, bestLongLen)
nextHashS := hashLen(cv, bestShortTableBits, bestShortLen)
candidateL := e.longTable[nextHashL]
candidateS := e.table[nextHashS]

Expand Down Expand Up @@ -209,11 +212,11 @@ encodeLoop:
}

s++
candidateS = e.table[hash4x64(cv>>8, bestShortTableBits)]
candidateS = e.table[hashLen(cv>>8, bestShortTableBits, bestShortLen)]
cv = load6432(src, s)
cv2 := load6432(src, s+1)
candidateL = e.longTable[hash8(cv, bestLongTableBits)]
candidateL2 := e.longTable[hash8(cv2, bestLongTableBits)]
candidateL = e.longTable[hashLen(cv, bestLongTableBits, bestLongLen)]
candidateL2 := e.longTable[hashLen(cv2, bestLongTableBits, bestLongLen)]

best = bestOf(best, matchAt(candidateS.offset-e.cur, s, uint32(cv), -1))
best = bestOf(best, matchAt(candidateL.offset-e.cur, s, uint32(cv), -1))
Expand All @@ -224,7 +227,7 @@ encodeLoop:
// See if we can find a better match by checking where the current best ends.
// Use that offset to see if we can find a better full match.
if sAt := best.s + best.length; sAt < sLimit {
nextHashL := hash8(load6432(src, sAt), bestLongTableBits)
nextHashL := hashLen(load6432(src, sAt), bestLongTableBits, bestLongLen)
candidateEnd := e.longTable[nextHashL]
if pos := candidateEnd.offset - e.cur - best.length; pos >= 0 {
bestEnd := bestOf(best, matchAt(pos, best.s, load3232(src, best.s), -1))
Expand Down Expand Up @@ -284,8 +287,8 @@ encodeLoop:
off := index0 + e.cur
for index0 < s-1 {
cv0 := load6432(src, index0)
h0 := hash8(cv0, bestLongTableBits)
h1 := hash4x64(cv0, bestShortTableBits)
h0 := hashLen(cv0, bestLongTableBits, bestLongLen)
h1 := hashLen(cv0, bestShortTableBits, bestShortLen)
e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
e.table[h1] = prevEntry{offset: off, prev: e.table[h1].offset}
off++
Expand Down Expand Up @@ -352,8 +355,8 @@ encodeLoop:
// every entry
for index0 < s-1 {
cv0 := load6432(src, index0)
h0 := hash8(cv0, bestLongTableBits)
h1 := hash4x64(cv0, bestShortTableBits)
h0 := hashLen(cv0, bestLongTableBits, bestLongLen)
h1 := hashLen(cv0, bestShortTableBits, bestShortLen)
off := index0 + e.cur
e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
e.table[h1] = prevEntry{offset: off, prev: e.table[h1].offset}
Expand All @@ -374,8 +377,8 @@ encodeLoop:
}

// Store this, since we have it.
nextHashS := hash4x64(cv, bestShortTableBits)
nextHashL := hash8(cv, bestLongTableBits)
nextHashS := hashLen(cv, bestShortTableBits, bestShortLen)
nextHashL := hashLen(cv, bestLongTableBits, bestLongLen)

// We have at least 4 byte match.
// No need to check backwards. We come straight from a match
Expand Down Expand Up @@ -441,10 +444,10 @@ func (e *bestFastEncoder) Reset(d *dict, singleBlock bool) {
const hashLog = bestShortTableBits

cv := load6432(d.content, i-e.maxMatchOff)
nextHash := hash4x64(cv, hashLog) // 0 -> 4
nextHash1 := hash4x64(cv>>8, hashLog) // 1 -> 5
nextHash2 := hash4x64(cv>>16, hashLog) // 2 -> 6
nextHash3 := hash4x64(cv>>24, hashLog) // 3 -> 7
nextHash := hashLen(cv, hashLog, bestShortLen) // 0 -> 4
nextHash1 := hashLen(cv>>8, hashLog, bestShortLen) // 1 -> 5
nextHash2 := hashLen(cv>>16, hashLog, bestShortLen) // 2 -> 6
nextHash3 := hashLen(cv>>24, hashLog, bestShortLen) // 3 -> 7
e.dictTable[nextHash] = prevEntry{
prev: e.dictTable[nextHash].offset,
offset: i,
Expand Down Expand Up @@ -472,7 +475,7 @@ func (e *bestFastEncoder) Reset(d *dict, singleBlock bool) {
}
if len(d.content) >= 8 {
cv := load6432(d.content, 0)
h := hash8(cv, bestLongTableBits)
h := hashLen(cv, bestLongTableBits, bestLongLen)
e.dictLongTable[h] = prevEntry{
offset: e.maxMatchOff,
prev: e.dictLongTable[h].offset,
Expand All @@ -482,7 +485,7 @@ func (e *bestFastEncoder) Reset(d *dict, singleBlock bool) {
off := 8 // First to read
for i := e.maxMatchOff + 1; i < end; i++ {
cv = cv>>8 | (uint64(d.content[off]) << 56)
h := hash8(cv, bestLongTableBits)
h := hashLen(cv, bestLongTableBits, bestLongLen)
e.dictLongTable[h] = prevEntry{
offset: i,
prev: e.dictLongTable[h].offset,
Expand Down
64 changes: 33 additions & 31 deletions zstd/enc_better.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,15 @@ import "fmt"
const (
betterLongTableBits = 19 // Bits used in the long match table
betterLongTableSize = 1 << betterLongTableBits // Size of the table
betterLongLen = 8 // Bytes used for table hash

// Note: Increasing the short table bits or making the hash shorter
// can actually lead to compression degradation since it will 'steal' more from the
// long match table and match offsets are quite big.
// This greatly depends on the type of input.
betterShortTableBits = 13 // Bits used in the short match table
betterShortTableSize = 1 << betterShortTableBits // Size of the table
betterShortLen = 5 // Bytes used for table hash

betterLongTableShardCnt = 1 << (betterLongTableBits - dictShardBits) // Number of shards in the table
betterLongTableShardSize = betterLongTableSize / betterLongTableShardCnt // Size of an individual shard
Expand Down Expand Up @@ -154,8 +156,8 @@ encodeLoop:
panic("offset0 was 0")
}

nextHashS := hash5(cv, betterShortTableBits)
nextHashL := hash8(cv, betterLongTableBits)
nextHashS := hashLen(cv, betterShortTableBits, betterShortLen)
nextHashL := hashLen(cv, betterLongTableBits, betterLongLen)
candidateL := e.longTable[nextHashL]
candidateS := e.table[nextHashS]

Expand Down Expand Up @@ -214,10 +216,10 @@ encodeLoop:
for index0 < s-1 {
cv0 := load6432(src, index0)
cv1 := cv0 >> 8
h0 := hash8(cv0, betterLongTableBits)
h0 := hashLen(cv0, betterLongTableBits, betterLongLen)
off := index0 + e.cur
e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
e.table[hash5(cv1, betterShortTableBits)] = tableEntry{offset: off + 1, val: uint32(cv1)}
e.table[hashLen(cv1, betterShortTableBits, betterShortLen)] = tableEntry{offset: off + 1, val: uint32(cv1)}
index0 += 2
}
cv = load6432(src, s)
Expand Down Expand Up @@ -275,10 +277,10 @@ encodeLoop:
for index0 < s-1 {
cv0 := load6432(src, index0)
cv1 := cv0 >> 8
h0 := hash8(cv0, betterLongTableBits)
h0 := hashLen(cv0, betterLongTableBits, betterLongLen)
off := index0 + e.cur
e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
e.table[hash5(cv1, betterShortTableBits)] = tableEntry{offset: off + 1, val: uint32(cv1)}
e.table[hashLen(cv1, betterShortTableBits, betterShortLen)] = tableEntry{offset: off + 1, val: uint32(cv1)}
index0 += 2
}
cv = load6432(src, s)
Expand Down Expand Up @@ -353,7 +355,7 @@ encodeLoop:
// See if we can find a long match at s+1
const checkAt = 1
cv := load6432(src, s+checkAt)
nextHashL = hash8(cv, betterLongTableBits)
nextHashL = hashLen(cv, betterLongTableBits, betterLongLen)
candidateL = e.longTable[nextHashL]
coffsetL = candidateL.offset - e.cur

Expand Down Expand Up @@ -413,8 +415,8 @@ encodeLoop:
}

// Try to find a better match by searching for a long match at the end of the current best match
if true && s+matched < sLimit {
nextHashL := hash8(load6432(src, s+matched), betterLongTableBits)
if s+matched < sLimit {
nextHashL := hashLen(load6432(src, s+matched), betterLongTableBits, betterLongLen)
cv := load3232(src, s)
candidateL := e.longTable[nextHashL]
coffsetL := candidateL.offset - e.cur - matched
Expand Down Expand Up @@ -495,10 +497,10 @@ encodeLoop:
for index0 < s-1 {
cv0 := load6432(src, index0)
cv1 := cv0 >> 8
h0 := hash8(cv0, betterLongTableBits)
h0 := hashLen(cv0, betterLongTableBits, betterLongLen)
off := index0 + e.cur
e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
e.table[hash5(cv1, betterShortTableBits)] = tableEntry{offset: off + 1, val: uint32(cv1)}
e.table[hashLen(cv1, betterShortTableBits, betterShortLen)] = tableEntry{offset: off + 1, val: uint32(cv1)}
index0 += 2
}

Expand All @@ -516,8 +518,8 @@ encodeLoop:
}

// Store this, since we have it.
nextHashS := hash5(cv, betterShortTableBits)
nextHashL := hash8(cv, betterLongTableBits)
nextHashS := hashLen(cv, betterShortTableBits, betterShortLen)
nextHashL := hashLen(cv, betterLongTableBits, betterLongLen)

// We have at least 4 byte match.
// No need to check backwards. We come straight from a match
Expand Down Expand Up @@ -672,8 +674,8 @@ encodeLoop:
panic("offset0 was 0")
}

nextHashS := hash5(cv, betterShortTableBits)
nextHashL := hash8(cv, betterLongTableBits)
nextHashS := hashLen(cv, betterShortTableBits, betterShortLen)
nextHashL := hashLen(cv, betterLongTableBits, betterLongLen)
candidateL := e.longTable[nextHashL]
candidateS := e.table[nextHashS]

Expand Down Expand Up @@ -734,11 +736,11 @@ encodeLoop:
for index0 < s-1 {
cv0 := load6432(src, index0)
cv1 := cv0 >> 8
h0 := hash8(cv0, betterLongTableBits)
h0 := hashLen(cv0, betterLongTableBits, betterLongLen)
off := index0 + e.cur
e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
e.markLongShardDirty(h0)
h1 := hash5(cv1, betterShortTableBits)
h1 := hashLen(cv1, betterShortTableBits, betterShortLen)
e.table[h1] = tableEntry{offset: off + 1, val: uint32(cv1)}
e.markShortShardDirty(h1)
index0 += 2
Expand Down Expand Up @@ -798,11 +800,11 @@ encodeLoop:
for index0 < s-1 {
cv0 := load6432(src, index0)
cv1 := cv0 >> 8
h0 := hash8(cv0, betterLongTableBits)
h0 := hashLen(cv0, betterLongTableBits, betterLongLen)
off := index0 + e.cur
e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
e.markLongShardDirty(h0)
h1 := hash5(cv1, betterShortTableBits)
h1 := hashLen(cv1, betterShortTableBits, betterShortLen)
e.table[h1] = tableEntry{offset: off + 1, val: uint32(cv1)}
e.markShortShardDirty(h1)
index0 += 2
Expand Down Expand Up @@ -879,7 +881,7 @@ encodeLoop:
// See if we can find a long match at s+1
const checkAt = 1
cv := load6432(src, s+checkAt)
nextHashL = hash8(cv, betterLongTableBits)
nextHashL = hashLen(cv, betterLongTableBits, betterLongLen)
candidateL = e.longTable[nextHashL]
coffsetL = candidateL.offset - e.cur

Expand Down Expand Up @@ -940,7 +942,7 @@ encodeLoop:
}
// Try to find a better match by searching for a long match at the end of the current best match
if s+matched < sLimit {
nextHashL := hash8(load6432(src, s+matched), betterLongTableBits)
nextHashL := hashLen(load6432(src, s+matched), betterLongTableBits, betterLongLen)
cv := load3232(src, s)
candidateL := e.longTable[nextHashL]
coffsetL := candidateL.offset - e.cur - matched
Expand Down Expand Up @@ -1021,11 +1023,11 @@ encodeLoop:
for index0 < s-1 {
cv0 := load6432(src, index0)
cv1 := cv0 >> 8
h0 := hash8(cv0, betterLongTableBits)
h0 := hashLen(cv0, betterLongTableBits, betterLongLen)
off := index0 + e.cur
e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
e.markLongShardDirty(h0)
h1 := hash5(cv1, betterShortTableBits)
h1 := hashLen(cv1, betterShortTableBits, betterShortLen)
e.table[h1] = tableEntry{offset: off + 1, val: uint32(cv1)}
e.markShortShardDirty(h1)
index0 += 2
Expand All @@ -1045,8 +1047,8 @@ encodeLoop:
}

// Store this, since we have it.
nextHashS := hash5(cv, betterShortTableBits)
nextHashL := hash8(cv, betterLongTableBits)
nextHashS := hashLen(cv, betterShortTableBits, betterShortLen)
nextHashL := hashLen(cv, betterLongTableBits, betterLongLen)

// We have at least 4 byte match.
// No need to check backwards. We come straight from a match
Expand Down Expand Up @@ -1113,10 +1115,10 @@ func (e *betterFastEncoderDict) Reset(d *dict, singleBlock bool) {
const hashLog = betterShortTableBits

cv := load6432(d.content, i-e.maxMatchOff)
nextHash := hash5(cv, hashLog) // 0 -> 4
nextHash1 := hash5(cv>>8, hashLog) // 1 -> 5
nextHash2 := hash5(cv>>16, hashLog) // 2 -> 6
nextHash3 := hash5(cv>>24, hashLog) // 3 -> 7
nextHash := hashLen(cv, hashLog, betterShortLen) // 0 -> 4
nextHash1 := hashLen(cv>>8, hashLog, betterShortLen) // 1 -> 5
nextHash2 := hashLen(cv>>16, hashLog, betterShortLen) // 2 -> 6
nextHash3 := hashLen(cv>>24, hashLog, betterShortLen) // 3 -> 7
e.dictTable[nextHash] = tableEntry{
val: uint32(cv),
offset: i,
Expand Down Expand Up @@ -1145,7 +1147,7 @@ func (e *betterFastEncoderDict) Reset(d *dict, singleBlock bool) {
}
if len(d.content) >= 8 {
cv := load6432(d.content, 0)
h := hash8(cv, betterLongTableBits)
h := hashLen(cv, betterLongTableBits, betterLongLen)
e.dictLongTable[h] = prevEntry{
offset: e.maxMatchOff,
prev: e.dictLongTable[h].offset,
Expand All @@ -1155,7 +1157,7 @@ func (e *betterFastEncoderDict) Reset(d *dict, singleBlock bool) {
off := 8 // First to read
for i := e.maxMatchOff + 1; i < end; i++ {
cv = cv>>8 | (uint64(d.content[off]) << 56)
h := hash8(cv, betterLongTableBits)
h := hashLen(cv, betterLongTableBits, betterLongLen)
e.dictLongTable[h] = prevEntry{
offset: i,
prev: e.dictLongTable[h].offset,
Expand Down

0 comments on commit 09f13c9

Please sign in to comment.