Skip to content

Commit

Permalink
Add SIMD support for ARM64, Apple Silicon and AVX512 (#34)
Browse files Browse the repository at this point in the history
* add support for arm64 and apple silicon

* -O3 on arm

* fix build generic

* wrong build tag

* add no asm to benchmark

* use improved gocc

* add avx512 support

* fix build

* tests

* test

* fix m1 detection
  • Loading branch information
kelindar committed May 3, 2023
1 parent 8b235c8 commit fa863a0
Show file tree
Hide file tree
Showing 33 changed files with 4,401 additions and 1,664 deletions.
5 changes: 3 additions & 2 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,15 +11,16 @@ jobs:
- name: Set up Go
uses: actions/setup-go@v1
with:
go-version: "1.18"
go-version: "1.20"
- name: Check out code
uses: actions/checkout@v2
- name: Install dependencies
run: |
go mod download
- name: Run Unit Tests
run: |
go test -race -covermode atomic -coverprofile=profile.cov ./...
go test -tags noasm -race -covermode atomic -coverprofile=profile.cov ./...
go test -race ./...
- name: Upload Coverage
uses: shogo82148/actions-goveralls@v1
with:
Expand Down
46 changes: 44 additions & 2 deletions bitmap.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,34 @@ package bitmap

import (
"math/bits"
"unsafe"

"github.com/klauspost/cpuid/v2"
)

var (
avx2 = cpuid.CPU.Supports(cpuid.AVX2)
const (
isUnsupported = iota
isAccelerated
isAVX512
)

// Hardware contains the resolved acceleration level
var hardware = levelOf(cpuid.CPU)

// levelOf returns the hardware acceleration level
func levelOf(cpu cpuid.CPUInfo) int {
switch {
case cpu.Supports(cpuid.AVX512F) && cpu.Supports(cpuid.AVX512DQ) && cpu.Supports(cpuid.AVX512BW):
return isAVX512
case cpu.Supports(cpuid.AVX2) && cpu.Supports(cpuid.FMA3):
return isAccelerated
case cpu.Supports(cpuid.ASIMD):
return isAccelerated
default:
return isUnsupported
}
}

// Bitmap represents a scalar-backed bitmap index
type Bitmap []uint64

Expand Down Expand Up @@ -205,3 +225,25 @@ func resize(capacity, v int) int {
}
return capacity
}

// dimensionsOf returns a uint64 containing the packed dimensions
func dimensionsOf(n, m int) uint64 {
return uint64(n) | (uint64(m) << 32)
}

// pointersOf returns a pointer to an array containing pointers to the
// first element of each bitmap and the maximum length of all bitmaps
func pointersOf(other Bitmap, extra []Bitmap) (unsafe.Pointer, int) {
out := make([]unsafe.Pointer, len(extra)+1)
out[0] = unsafe.Pointer(&other[0])
max := 0

for i := range extra {
out[i+1] = unsafe.Pointer(&extra[i][0])
if len(extra[i]) > max {
max = len(extra[i])
}
}

return unsafe.Pointer(&out[0]), max
}
261 changes: 134 additions & 127 deletions bitmap_amd64.go
Original file line number Diff line number Diff line change
@@ -1,127 +1,134 @@
// Copyright (c) Roman Atachiants and contributors. All rights reserved.
// Licensed under the MIT license. See LICENSE file in the project root for details.

//go:build amd64
// +build amd64

package bitmap

// And computes the intersection between two bitmaps and stores the result in the current bitmap
func (dst *Bitmap) And(other Bitmap, extra ...Bitmap) {
max := minlen(*dst, other, extra)
dst.shrink(max)

switch avx2 {
case true:
switch len(extra) {
case 0:
x64and1(*dst, other[:max])
case 1:
x64and2(*dst, other[:max], extra[0][:max])
case 2:
x64and3(*dst, other[:max], extra[0][:max], extra[1][:max])
case 3:
x64and4(*dst, other[:max], extra[0][:max], extra[1][:max], extra[2][:max])
default:
x64and4(*dst, other[:max], extra[0][:max], extra[1][:max], extra[2][:max])
for i := 3; i < len(extra); i++ {
x64and1(*dst, extra[i][:max])
}
}
default:
and(*dst, max, other, extra)
return
}
}

// AndNot computes the difference between two bitmaps and stores the result in the current bitmap.
// Operation works as set subtract: dst - b
func (dst *Bitmap) AndNot(other Bitmap, extra ...Bitmap) {
max := minlen(*dst, other, extra)

switch avx2 {
case true:
switch len(extra) {
case 0:
x64andn1(*dst, other[:max])
case 1:
x64andn2(*dst, other[:max], extra[0][:max])
case 2:
x64andn3(*dst, other[:max], extra[0][:max], extra[1][:max])
case 3:
x64andn4(*dst, other[:max], extra[0][:max], extra[1][:max], extra[2][:max])
default:
x64andn4(*dst, other[:max], extra[0][:max], extra[1][:max], extra[2][:max])
for i := 3; i < len(extra); i++ {
x64andn1(*dst, extra[i][:max])
}
}
default:
andn(*dst, max, other, extra)
return
}
}

// Or computes the union between two bitmaps and stores the result in the current bitmap
func (dst *Bitmap) Or(other Bitmap, extra ...Bitmap) {
max := maxlen(*dst, other, extra)
dst.grow(max - 1)

switch avx2 {
case true:
switch len(extra) {
case 0:
x64or1(*dst, other)
case 1:
x64or2(*dst, other, extra[0])
case 2:
x64or3(*dst, other, extra[0], extra[1])
case 3:
x64or4(*dst, other, extra[0], extra[1], extra[2])
default:
x64or4(*dst, other, extra[0], extra[1], extra[2])
for i := 3; i < len(extra); i++ {
x64or1(*dst, extra[i])
}
}
default:
or(*dst, other, extra)
}
}

// Xor computes the symmetric difference between two bitmaps and stores the result in the current bitmap
func (dst *Bitmap) Xor(other Bitmap, extra ...Bitmap) {
max := maxlen(*dst, other, extra)
dst.grow(max - 1)

switch avx2 {
case true:
switch len(extra) {
case 0:
x64xor1(*dst, other)
case 1:
x64xor2(*dst, other, extra[0])
case 2:
x64xor3(*dst, other, extra[0], extra[1])
case 3:
x64xor4(*dst, other, extra[0], extra[1], extra[2])
default:
x64xor4(*dst, other, extra[0], extra[1], extra[2])
for i := 3; i < len(extra); i++ {
x64xor1(*dst, extra[i])
}
}
default:
xor(*dst, other, extra)
}
}

// Count returns the number of elements in this bitmap
func (dst Bitmap) Count() int {
switch avx2 {
case true:
return x64count_avx2(dst)
default:
return count(dst)
}
}
// Copyright (c) Roman Atachiants and contributors. All rights reserved.
// Licensed under the MIT license. See LICENSE file in the project root for details.

//go:build !noasm && amd64

package bitmap

import "unsafe"

// And computes the intersection between two bitmaps and stores the result in the current bitmap
func (dst *Bitmap) And(other Bitmap, extra ...Bitmap) {
max := minlen(*dst, other, extra)
dst.shrink(max)

switch hardware {
case isAccelerated:
switch len(extra) {
case 0:
_and(unsafe.Pointer(&(*dst)[0]), unsafe.Pointer(&other[0]), uint64(max))
default:
vx, _ := pointersOf(other, extra)
_and_many(unsafe.Pointer(&(*dst)[0]), vx, dimensionsOf(max, len(extra)+1))
}
case isAVX512:
switch len(extra) {
case 0:
_and_avx512(unsafe.Pointer(&(*dst)[0]), unsafe.Pointer(&other[0]), uint64(max))
default:
vx, _ := pointersOf(other, extra)
_and_many_avx512(unsafe.Pointer(&(*dst)[0]), vx, dimensionsOf(max, len(extra)+1))
}
default:
and(*dst, max, other, extra)
return
}
}

// AndNot computes the difference between two bitmaps and stores the result in the current bitmap.
// Operation works as set subtract: dst - b
func (dst *Bitmap) AndNot(other Bitmap, extra ...Bitmap) {
max := minlen(*dst, other, extra)

switch hardware {
case isAccelerated:
switch len(extra) {
case 0:
_andn(unsafe.Pointer(&(*dst)[0]), unsafe.Pointer(&other[0]), uint64(max))
default:
vx, _ := pointersOf(other, extra)
_andn_many(unsafe.Pointer(&(*dst)[0]), vx, dimensionsOf(max, len(extra)+1))
}
case isAVX512:
switch len(extra) {
case 0:
_andn_avx512(unsafe.Pointer(&(*dst)[0]), unsafe.Pointer(&other[0]), uint64(max))
default:
vx, _ := pointersOf(other, extra)
_andn_many_avx512(unsafe.Pointer(&(*dst)[0]), vx, dimensionsOf(max, len(extra)+1))
}
default:
andn(*dst, max, other, extra)
return
}
}

// Or computes the union between two bitmaps and stores the result in the current bitmap
func (dst *Bitmap) Or(other Bitmap, extra ...Bitmap) {
max := maxlen(*dst, other, extra)
dst.grow(max - 1)

switch hardware {
case isAccelerated:
switch len(extra) {
case 0:
_or(unsafe.Pointer(&(*dst)[0]), unsafe.Pointer(&other[0]), uint64(len(other)))
default:
vx, max := pointersOf(other, extra)
_or_many(unsafe.Pointer(&(*dst)[0]), vx, dimensionsOf(max, len(extra)+1))
}
case isAVX512:
switch len(extra) {
case 0:
_or_avx512(unsafe.Pointer(&(*dst)[0]), unsafe.Pointer(&other[0]), uint64(len(other)))
default:
vx, max := pointersOf(other, extra)
_or_many_avx512(unsafe.Pointer(&(*dst)[0]), vx, dimensionsOf(max, len(extra)+1))
}
default:
or(*dst, other, extra)
}
}

// Xor computes the symmetric difference between two bitmaps and stores the result in the current bitmap
func (dst *Bitmap) Xor(other Bitmap, extra ...Bitmap) {
max := maxlen(*dst, other, extra)
dst.grow(max - 1)

switch hardware {
case isAccelerated:
switch len(extra) {
case 0:
_xor(unsafe.Pointer(&(*dst)[0]), unsafe.Pointer(&other[0]), uint64(len(other)))
default:
vx, max := pointersOf(other, extra)
_xor_many(unsafe.Pointer(&(*dst)[0]), vx, dimensionsOf(max, len(extra)+1))
}
case isAVX512:
switch len(extra) {
case 0:
_xor_avx512(unsafe.Pointer(&(*dst)[0]), unsafe.Pointer(&other[0]), uint64(len(other)))
default:
vx, max := pointersOf(other, extra)
_xor_many_avx512(unsafe.Pointer(&(*dst)[0]), vx, dimensionsOf(max, len(extra)+1))
}
default:
xor(*dst, other, extra)
}
}

// Count returns the number of elements in this bitmap
func (dst Bitmap) Count() int {
if len(dst) == 0 {
return 0
}

switch hardware {
case isAccelerated:
var res uint64
_count(unsafe.Pointer(&dst[0]), uint64(len(dst)), unsafe.Pointer(&res))
return int(res)
default:
return count(dst)
}
}
Loading

0 comments on commit fa863a0

Please sign in to comment.