Skip to content

Commit

Permalink
huff0: Use bmi1 on GOAMD64=v3 (#519)
Browse files Browse the repository at this point in the history
Go v1.18 feature. Set `GOAMD64=v3` to enable. Nothing worth having separate codepaths for.

Allows breaking dependency chain a bit.

```
benchmark                                              old ns/op     new ns/op     delta
BenchmarkDecompress4XNoTable/gettysburg/10000-32       11464         11195         -2.35%
BenchmarkDecompress4XNoTable/gettysburg/262143-32      322679        319985        -0.83%
BenchmarkDecompress4XNoTable/twain/10000-32            11505         11238         -2.32%
BenchmarkDecompress4XNoTable/twain/262143-32           373751        370410        -0.89%
BenchmarkDecompress4XNoTable/pngdata.001/10000-32      11957         11461         -4.15%
BenchmarkDecompress4XNoTable/pngdata.001/262143-32     306403        300566        -1.91%
```
  • Loading branch information
klauspost committed Mar 9, 2022
1 parent 275e1fc commit 0ff8ec1
Show file tree
Hide file tree
Showing 3 changed files with 244 additions and 57 deletions.
16 changes: 8 additions & 8 deletions huff0/decompress_8b_amd64.s
Original file line number Diff line number Diff line change
Expand Up @@ -137,8 +137,8 @@ skip_fill0:
ADDQ CX, br_bits_read // bits_read += n

// these two writes get coalesced
// buf[stream][off] = uint8(v0.entry >> 8)
// buf[stream][off+1] = uint8(v1.entry >> 8)
// buf[stream][off+2] = uint8(v2.entry >> 8)
// buf[stream][off+3] = uint8(v3.entry >> 8)
MOVW BX, 0+2(buffer)(off*1)

// update the bitrader reader structure
Expand Down Expand Up @@ -241,8 +241,8 @@ skip_fill1:
ADDQ CX, br_bits_read // bits_read += n

// these two writes get coalesced
// buf[stream][off] = uint8(v0.entry >> 8)
// buf[stream][off+1] = uint8(v1.entry >> 8)
// buf[stream][off+2] = uint8(v2.entry >> 8)
// buf[stream][off+3] = uint8(v3.entry >> 8)
MOVW BX, 256+2(buffer)(off*1)

// update the bitrader reader structure
Expand Down Expand Up @@ -345,8 +345,8 @@ skip_fill2:
ADDQ CX, br_bits_read // bits_read += n

// these two writes get coalesced
// buf[stream][off] = uint8(v0.entry >> 8)
// buf[stream][off+1] = uint8(v1.entry >> 8)
// buf[stream][off+2] = uint8(v2.entry >> 8)
// buf[stream][off+3] = uint8(v3.entry >> 8)
MOVW BX, 512+2(buffer)(off*1)

// update the bitrader reader structure
Expand Down Expand Up @@ -449,8 +449,8 @@ skip_fill3:
ADDQ CX, br_bits_read // bits_read += n

// these two writes get coalesced
// buf[stream][off] = uint8(v0.entry >> 8)
// buf[stream][off+1] = uint8(v1.entry >> 8)
// buf[stream][off+2] = uint8(v2.entry >> 8)
// buf[stream][off+3] = uint8(v3.entry >> 8)
MOVW BX, 768+2(buffer)(off*1)

// update the bitrader reader structure
Expand Down
Loading

0 comments on commit 0ff8ec1

Please sign in to comment.