-
-
Notifications
You must be signed in to change notification settings - Fork 22
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add SIMD support for ARM64, Apple Silicon and AVX512 (#34)
* add support for arm64 and apple silicon * -O3 on arm * fix build generic * wrong build tag * add no asm to benchmark * use improved gocc * add avx512 support * fix build * tests * test * fix m1 detection
- Loading branch information
Showing
33 changed files
with
4,401 additions
and
1,664 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,127 +1,134 @@ | ||
// Copyright (c) Roman Atachiants and contributors. All rights reserved. | ||
// Licensed under the MIT license. See LICENSE file in the project root for details. | ||
|
||
//go:build amd64 | ||
// +build amd64 | ||
|
||
package bitmap | ||
|
||
// And computes the intersection between two bitmaps and stores the result in the current bitmap | ||
func (dst *Bitmap) And(other Bitmap, extra ...Bitmap) { | ||
max := minlen(*dst, other, extra) | ||
dst.shrink(max) | ||
|
||
switch avx2 { | ||
case true: | ||
switch len(extra) { | ||
case 0: | ||
x64and1(*dst, other[:max]) | ||
case 1: | ||
x64and2(*dst, other[:max], extra[0][:max]) | ||
case 2: | ||
x64and3(*dst, other[:max], extra[0][:max], extra[1][:max]) | ||
case 3: | ||
x64and4(*dst, other[:max], extra[0][:max], extra[1][:max], extra[2][:max]) | ||
default: | ||
x64and4(*dst, other[:max], extra[0][:max], extra[1][:max], extra[2][:max]) | ||
for i := 3; i < len(extra); i++ { | ||
x64and1(*dst, extra[i][:max]) | ||
} | ||
} | ||
default: | ||
and(*dst, max, other, extra) | ||
return | ||
} | ||
} | ||
|
||
// AndNot computes the difference between two bitmaps and stores the result in the current bitmap. | ||
// Operation works as set subtract: dst - b | ||
func (dst *Bitmap) AndNot(other Bitmap, extra ...Bitmap) { | ||
max := minlen(*dst, other, extra) | ||
|
||
switch avx2 { | ||
case true: | ||
switch len(extra) { | ||
case 0: | ||
x64andn1(*dst, other[:max]) | ||
case 1: | ||
x64andn2(*dst, other[:max], extra[0][:max]) | ||
case 2: | ||
x64andn3(*dst, other[:max], extra[0][:max], extra[1][:max]) | ||
case 3: | ||
x64andn4(*dst, other[:max], extra[0][:max], extra[1][:max], extra[2][:max]) | ||
default: | ||
x64andn4(*dst, other[:max], extra[0][:max], extra[1][:max], extra[2][:max]) | ||
for i := 3; i < len(extra); i++ { | ||
x64andn1(*dst, extra[i][:max]) | ||
} | ||
} | ||
default: | ||
andn(*dst, max, other, extra) | ||
return | ||
} | ||
} | ||
|
||
// Or computes the union between two bitmaps and stores the result in the current bitmap | ||
func (dst *Bitmap) Or(other Bitmap, extra ...Bitmap) { | ||
max := maxlen(*dst, other, extra) | ||
dst.grow(max - 1) | ||
|
||
switch avx2 { | ||
case true: | ||
switch len(extra) { | ||
case 0: | ||
x64or1(*dst, other) | ||
case 1: | ||
x64or2(*dst, other, extra[0]) | ||
case 2: | ||
x64or3(*dst, other, extra[0], extra[1]) | ||
case 3: | ||
x64or4(*dst, other, extra[0], extra[1], extra[2]) | ||
default: | ||
x64or4(*dst, other, extra[0], extra[1], extra[2]) | ||
for i := 3; i < len(extra); i++ { | ||
x64or1(*dst, extra[i]) | ||
} | ||
} | ||
default: | ||
or(*dst, other, extra) | ||
} | ||
} | ||
|
||
// Xor computes the symmetric difference between two bitmaps and stores the result in the current bitmap | ||
func (dst *Bitmap) Xor(other Bitmap, extra ...Bitmap) { | ||
max := maxlen(*dst, other, extra) | ||
dst.grow(max - 1) | ||
|
||
switch avx2 { | ||
case true: | ||
switch len(extra) { | ||
case 0: | ||
x64xor1(*dst, other) | ||
case 1: | ||
x64xor2(*dst, other, extra[0]) | ||
case 2: | ||
x64xor3(*dst, other, extra[0], extra[1]) | ||
case 3: | ||
x64xor4(*dst, other, extra[0], extra[1], extra[2]) | ||
default: | ||
x64xor4(*dst, other, extra[0], extra[1], extra[2]) | ||
for i := 3; i < len(extra); i++ { | ||
x64xor1(*dst, extra[i]) | ||
} | ||
} | ||
default: | ||
xor(*dst, other, extra) | ||
} | ||
} | ||
|
||
// Count returns the number of elements in this bitmap | ||
func (dst Bitmap) Count() int { | ||
switch avx2 { | ||
case true: | ||
return x64count_avx2(dst) | ||
default: | ||
return count(dst) | ||
} | ||
} | ||
// Copyright (c) Roman Atachiants and contributors. All rights reserved. | ||
// Licensed under the MIT license. See LICENSE file in the project root for details. | ||
|
||
//go:build !noasm && amd64 | ||
|
||
package bitmap | ||
|
||
import "unsafe" | ||
|
||
// And computes the intersection between two bitmaps and stores the result in the current bitmap | ||
func (dst *Bitmap) And(other Bitmap, extra ...Bitmap) { | ||
max := minlen(*dst, other, extra) | ||
dst.shrink(max) | ||
|
||
switch hardware { | ||
case isAccelerated: | ||
switch len(extra) { | ||
case 0: | ||
_and(unsafe.Pointer(&(*dst)[0]), unsafe.Pointer(&other[0]), uint64(max)) | ||
default: | ||
vx, _ := pointersOf(other, extra) | ||
_and_many(unsafe.Pointer(&(*dst)[0]), vx, dimensionsOf(max, len(extra)+1)) | ||
} | ||
case isAVX512: | ||
switch len(extra) { | ||
case 0: | ||
_and_avx512(unsafe.Pointer(&(*dst)[0]), unsafe.Pointer(&other[0]), uint64(max)) | ||
default: | ||
vx, _ := pointersOf(other, extra) | ||
_and_many_avx512(unsafe.Pointer(&(*dst)[0]), vx, dimensionsOf(max, len(extra)+1)) | ||
} | ||
default: | ||
and(*dst, max, other, extra) | ||
return | ||
} | ||
} | ||
|
||
// AndNot computes the difference between two bitmaps and stores the result in the current bitmap. | ||
// Operation works as set subtract: dst - b | ||
func (dst *Bitmap) AndNot(other Bitmap, extra ...Bitmap) { | ||
max := minlen(*dst, other, extra) | ||
|
||
switch hardware { | ||
case isAccelerated: | ||
switch len(extra) { | ||
case 0: | ||
_andn(unsafe.Pointer(&(*dst)[0]), unsafe.Pointer(&other[0]), uint64(max)) | ||
default: | ||
vx, _ := pointersOf(other, extra) | ||
_andn_many(unsafe.Pointer(&(*dst)[0]), vx, dimensionsOf(max, len(extra)+1)) | ||
} | ||
case isAVX512: | ||
switch len(extra) { | ||
case 0: | ||
_andn_avx512(unsafe.Pointer(&(*dst)[0]), unsafe.Pointer(&other[0]), uint64(max)) | ||
default: | ||
vx, _ := pointersOf(other, extra) | ||
_andn_many_avx512(unsafe.Pointer(&(*dst)[0]), vx, dimensionsOf(max, len(extra)+1)) | ||
} | ||
default: | ||
andn(*dst, max, other, extra) | ||
return | ||
} | ||
} | ||
|
||
// Or computes the union between two bitmaps and stores the result in the current bitmap | ||
func (dst *Bitmap) Or(other Bitmap, extra ...Bitmap) { | ||
max := maxlen(*dst, other, extra) | ||
dst.grow(max - 1) | ||
|
||
switch hardware { | ||
case isAccelerated: | ||
switch len(extra) { | ||
case 0: | ||
_or(unsafe.Pointer(&(*dst)[0]), unsafe.Pointer(&other[0]), uint64(len(other))) | ||
default: | ||
vx, max := pointersOf(other, extra) | ||
_or_many(unsafe.Pointer(&(*dst)[0]), vx, dimensionsOf(max, len(extra)+1)) | ||
} | ||
case isAVX512: | ||
switch len(extra) { | ||
case 0: | ||
_or_avx512(unsafe.Pointer(&(*dst)[0]), unsafe.Pointer(&other[0]), uint64(len(other))) | ||
default: | ||
vx, max := pointersOf(other, extra) | ||
_or_many_avx512(unsafe.Pointer(&(*dst)[0]), vx, dimensionsOf(max, len(extra)+1)) | ||
} | ||
default: | ||
or(*dst, other, extra) | ||
} | ||
} | ||
|
||
// Xor computes the symmetric difference between two bitmaps and stores the result in the current bitmap | ||
func (dst *Bitmap) Xor(other Bitmap, extra ...Bitmap) { | ||
max := maxlen(*dst, other, extra) | ||
dst.grow(max - 1) | ||
|
||
switch hardware { | ||
case isAccelerated: | ||
switch len(extra) { | ||
case 0: | ||
_xor(unsafe.Pointer(&(*dst)[0]), unsafe.Pointer(&other[0]), uint64(len(other))) | ||
default: | ||
vx, max := pointersOf(other, extra) | ||
_xor_many(unsafe.Pointer(&(*dst)[0]), vx, dimensionsOf(max, len(extra)+1)) | ||
} | ||
case isAVX512: | ||
switch len(extra) { | ||
case 0: | ||
_xor_avx512(unsafe.Pointer(&(*dst)[0]), unsafe.Pointer(&other[0]), uint64(len(other))) | ||
default: | ||
vx, max := pointersOf(other, extra) | ||
_xor_many_avx512(unsafe.Pointer(&(*dst)[0]), vx, dimensionsOf(max, len(extra)+1)) | ||
} | ||
default: | ||
xor(*dst, other, extra) | ||
} | ||
} | ||
|
||
// Count returns the number of elements in this bitmap | ||
func (dst Bitmap) Count() int { | ||
if len(dst) == 0 { | ||
return 0 | ||
} | ||
|
||
switch hardware { | ||
case isAccelerated: | ||
var res uint64 | ||
_count(unsafe.Pointer(&dst[0]), uint64(len(dst)), unsafe.Pointer(&res)) | ||
return int(res) | ||
default: | ||
return count(dst) | ||
} | ||
} |
Oops, something went wrong.