New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP 256 Bit wide instruction optimization for avx2 capable processors #1641

Closed
wants to merge 16 commits into
base: master
from
View
@@ -0,0 +1,155 @@
// +build amd64,!appengine,!gccgo
// Copyright 2017 Pilosa Corp.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Package roaring implements roaring bitmaps with support for incremental changes.
// Frame layout
// |-----------------------------+---+---+---+---|
// 0 | a_data_ptr | | | | |
// 8 | a_len | | | | |
// 16 | a_cap | | | | |
// 24 | b_data_ptr | | | | |
// 32 | b_len | | | | |
// 40 | b_cap | | | | |
// 48 | c_data_ptr | | | | |
// 56 | c_len | | | | |
// 64 | c_cap | | | | |
// 72 | function return value (int) | | | | |
//
// func asmAnd(a,b,c []int64)int
TEXT ·asmAnd(SB), 7, $0
MOVQ $0, SI
MOVQ a_data+0(FP), BX // BX = &a[0]
MOVL a_len+8(FP), CX // len(a)
MOVQ b_data+24(FP), DX // DX = &b[0]
MOVQ c_data+48(FP), R15 // DX = &c[0]
loop_begin0:
VMOVDQA (BX), Y0
VMOVDQA (DX), Y1
VPAND Y0, Y1, Y0
VMOVDQU Y0, (R15)
POPCNTQ (R15), BP
ADDQ BP, SI
POPCNTQ 8(R15), BP
ADDQ BP, SI
POPCNTQ 16(R15), BP
ADDQ BP, SI
POPCNTQ 24(R15), BP
ADDQ BP, SI
ADDQ $32, BX
ADDQ $32, DX
ADDQ $32, R15
SUBQ $4, CX
JNE loop_begin0
MOVQ SI, ·noname+72(FP)
VZEROUPPER
RET
// func asmOr(a,b,c []int64)int
TEXT ·asmOr(SB), 7, $0
MOVQ $0, SI
MOVQ a_data+0(FP), BX // BX = &a[0]
MOVL a_len+8(FP), CX // len(a)
MOVQ b_data+24(FP), DX // DX = &b[0]
MOVQ c_data+48(FP), R15 // DX = &c[0]
loop_begin1:
VMOVDQA (BX), Y0
VMOVDQA (DX), Y1
VPOR Y0, Y1, Y0
VMOVDQU Y0, (R15)
POPCNTQ (R15), BP
ADDQ BP, SI
POPCNTQ 8(R15), BP
ADDQ BP, SI
POPCNTQ 16(R15), BP
ADDQ BP, SI
POPCNTQ 24(R15), BP
ADDQ BP, SI
ADDQ $32, BX
ADDQ $32, DX
ADDQ $32, R15
SUBQ $4, CX
JNE loop_begin1
MOVQ SI, ·noname+72(FP)
VZEROUPPER
RET
// func asmXor(a,b,c []int64)int
TEXT ·asmXor(SB), 7, $0
MOVQ $0, SI
MOVQ a_data+0(FP), BX // BX = &a[0]
MOVL a_len+8(FP), CX // len(a)
MOVQ b_data+24(FP), DX // DX = &b[0]
MOVQ c_data+48(FP), R15 // DX = &c[0]
loop_begin2:
VMOVDQA (BX), Y0
VMOVDQA (DX), Y1
VPXOR Y0, Y1, Y0
VMOVDQU Y0, (R15)
POPCNTQ (R15), BP
ADDQ BP, SI
POPCNTQ 8(R15), BP
ADDQ BP, SI
POPCNTQ 16(R15), BP
ADDQ BP, SI
POPCNTQ 24(R15), BP
ADDQ BP, SI
ADDQ $32, BX
ADDQ $32, DX
ADDQ $32, R15
SUBQ $4, CX
JNE loop_begin2
MOVQ SI, ·noname+72(FP)
VZEROUPPER
RET
// func asmAndN(a,b,c []int64)int
TEXT ·asmAndN(SB), 7, $0
MOVQ $0, SI
MOVQ a_data+0(FP), BX // BX = &a[0]
MOVL a_len+8(FP), CX // len(a)
MOVQ b_data+24(FP), DX // DX = &b[0]
MOVQ c_data+48(FP), R15 // DX = &c[0]
loop_begin3:
VMOVDQA (BX), Y0
VMOVDQA (DX), Y1
VPANDN Y0, Y1, Y0
VMOVDQU Y0, (R15)
POPCNTQ (R15), BP
ADDQ BP, SI
POPCNTQ 8(R15), BP
ADDQ BP, SI
POPCNTQ 16(R15), BP
ADDQ BP, SI
POPCNTQ 24(R15), BP
ADDQ BP, SI
ADDQ $32, BX
ADDQ $32, DX
ADDQ $32, R15
SUBQ $4, CX
JNE loop_begin3
MOVQ SI, ·noname+72(FP)
VZEROUPPER
RET
View
@@ -0,0 +1,22 @@
// Copyright 2017 Pilosa Corp.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Package roaring implements roaring bitmaps with support for incremental changes.
package roaring
This conversation was marked as resolved by tgruben

This comment has been minimized.

@travisturner

travisturner Sep 13, 2018

Member

this file needs a license header

@travisturner

travisturner Sep 13, 2018

Member

this file needs a license header

//go:noescape
func asmAnd(a, b, c []uint64) int
func asmOr(a, b, c []uint64) int
func asmXor(a, b, c []uint64) int
func asmAndN(a, b, c []uint64) int
View
@@ -0,0 +1,126 @@
// Copyright 2017 Pilosa Corp.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package roaring
import (
"testing"
)
func BenchmarkBitmap_ASMIntersect(b *testing.B) {
a := bitmapFull()
results := make([]uint64, bitmapN)
b.ResetTimer()
for x := 0; x < b.N; x++ {
_ = asmAnd(a, a, results)
}
}
func BenchmarkBitmap_GOIntersect(b *testing.B) {
a := bitmapFull()
c := bitmapEvenBitsSet()
results := make([]uint64, bitmapN)
b.ResetTimer()
for x := 0; x < b.N; x++ {
_ = goAnd(a, c, results)
}
}
func goAndNoBCE(a, b, c []uint64) (n int) {
for i := 0; i < bitmapN; i++ {
c[i] = a[i] & b[i]
n += int(popcount(c[i]))
}
return n
}
func BenchmarkBitmap_GoAndNoBCE(b *testing.B) {
a := bitmapFull()
c := bitmapEvenBitsSet()
results := make([]uint64, bitmapN)
b.ResetTimer()
for x := 0; x < b.N; x++ {
_ = goAndNoBCE(a, c, results)
}
}
func BenchmarkBitmap_GOIntersectUnroll4(b *testing.B) {
a := bitmapFull()
c := bitmapEvenBitsSet()
results := make([]uint64, bitmapN)
b.ResetTimer()
for x := 0; x < b.N; x++ {
_ = goAndUnroll4(a, c, results)
}
}
func BenchmarkBitmap_GOIntersectUnroll8(b *testing.B) {
a := bitmapFull()
c := bitmapEvenBitsSet()
results := make([]uint64, bitmapN)
b.ResetTimer()
for x := 0; x < b.N; x++ {
_ = goAndUnroll8(a, c, results)
}
}
func BenchmarkBitmap_GOIntersectUnroll16(b *testing.B) {
a := bitmapFull()
c := bitmapEvenBitsSet()
results := make([]uint64, bitmapN)
b.ResetTimer()
for x := 0; x < b.N; x++ {
_ = goAndUnroll16(a, c, results)
}
}
func BenchmarkBitmap_GOIntersectUnroll32(b *testing.B) {
a := bitmapFull()
c := bitmapEvenBitsSet()
results := make([]uint64, bitmapN)
b.ResetTimer()
for x := 0; x < b.N; x++ {
_ = goAndUnroll32(a, c, results)
}
}
func BenchmarkBitmap_GOIntersectUnroll64(b *testing.B) {
a := bitmapFull()
c := bitmapEvenBitsSet()
results := make([]uint64, bitmapN)
b.ResetTimer()
for x := 0; x < b.N; x++ {
_ = goAndUnroll64(a, c, results)
}
}
func BenchmarkBitmap_GOIntersectUnroll128(b *testing.B) {
a := bitmapFull()
c := bitmapEvenBitsSet()
results := make([]uint64, bitmapN)
b.ResetTimer()
for x := 0; x < b.N; x++ {
_ = goAndUnroll128(a, c, results)
}
}
func BenchmarkBitmap_GOIntersectUnroll1024(b *testing.B) {
a := bitmapFull()
c := bitmapEvenBitsSet()
results := make([]uint64, bitmapN)
b.ResetTimer()
for x := 0; x < b.N; x++ {
_ = goAndUnroll1024(a, c, results)
}
}
Oops, something went wrong.
ProTip! Use n and p to navigate between commits in a pull request.