Skip to content

Commit

Permalink
zstd: Improve throughput of SpeedBestCompression encoder (#699)
Browse files Browse the repository at this point in the history
Lifted ofCode and mlCode computations out of match.estBits, so that
method will be inlined into its only caller. Also some changes to
eliminate a branch: the last if block becomes two CMOVs/CSELs on
amd64 and arm64.

```
name                              old speed      new speed       delta
Encoder_EncodeAllSimple/best-8    11.1MB/s ± 1%   16.9MB/s ± 1%  +52.23%  (p=0.000 n=10+10)
Encoder_EncodeAllSimple4K/best-8  8.41MB/s ± 1%  10.95MB/s ± 0%  +30.20%  (p=0.000 n=10+10)

name                              old alloc/op   new alloc/op    delta
Encoder_EncodeAllSimple/best-8       20.0B ± 0%      18.0B ± 0%  -10.00%  (p=0.002 n=8+10)
Encoder_EncodeAllSimple4K/best-8     2.00B ± 0%      2.00B ± 0%     ~     (all equal)
```
  • Loading branch information
greatroar committed Nov 26, 2022
1 parent fae4e3f commit 2878205
Showing 1 changed file with 17 additions and 15 deletions.
32 changes: 17 additions & 15 deletions zstd/enc_best.go
Expand Up @@ -38,27 +38,29 @@ type match struct {
const highScore = 25000

// estBits will estimate output bits from predefined tables.
func (m *match) estBits(bitsPerByte int32) {
mlc := mlCode(uint32(m.length - zstdMinMatch))
var ofc uint8
if m.rep < 0 {
ofc = ofCode(uint32(m.s-m.offset) + 3)
} else {
ofc = ofCode(uint32(m.rep))
}
func (m *match) estBits(bitsPerByte int32, ofc, mlc uint8) {
// Cost, excluding
ofTT, mlTT := fsePredefEnc[tableOffsets].ct.symbolTT[ofc], fsePredefEnc[tableMatchLengths].ct.symbolTT[mlc]

// Add cost of match encoding...
m.est = int32(ofTT.outBits + mlTT.outBits)
m.est += int32(ofTT.deltaNbBits>>16 + mlTT.deltaNbBits>>16)
est := int32(ofTT.outBits + mlTT.outBits)
est += int32(ofTT.deltaNbBits>>16 + mlTT.deltaNbBits>>16)
// Subtract savings compared to literal encoding...
m.est -= (m.length * bitsPerByte) >> 10
if m.est > 0 {
length := m.length
est -= (length * bitsPerByte) >> 10
if est > 0 {
// Unlikely gain..
m.length = 0
m.est = highScore
est, length = highScore, 0
}
m.est, m.length = est, length
}

func (m *match) ofCode() uint8 {
offset := uint32(m.rep)
if offset < 0 {
offset = uint32(m.s-m.offset) + 3
}
return ofCode(offset)
}

// bestFastEncoder uses 2 tables, one for short matches (5 bytes) and one for long matches.
Expand Down Expand Up @@ -216,7 +218,7 @@ encodeLoop:
}
}
m := match{offset: offset, s: s, length: 4 + e.matchlen(s+4, offset+4, src), rep: rep}
m.estBits(bitsPerByte)
m.estBits(bitsPerByte, m.ofCode(), mlCode(uint32(m.length-zstdMinMatch)))
return m
}

Expand Down

0 comments on commit 2878205

Please sign in to comment.