From 65a34134000d6e3935fe59b55e36b83df677043c Mon Sep 17 00:00:00 2001 From: kimwalisch Date: Sun, 7 Apr 2024 10:27:25 +0200 Subject: [PATCH] Simplify AVX512 example --- doc/CPP_API.md | 4 ++-- doc/C_API.md | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/doc/CPP_API.md b/doc/CPP_API.md index 6103fa0a6..8b6d600ae 100644 --- a/doc/CPP_API.md +++ b/doc/CPP_API.md @@ -466,7 +466,7 @@ int main() { // Sum 64-bit primes using AVX512 for (std::size_t i = 0; i < it.size_; i += 8) { - __mmask8 mask = (i + 8 < it.size_) ? 0xff : 0xff >> (i + 8 - it.size_); + __mmask8 mask = (__mmask8) _bzhi_u64(0xff, it.size_ - i); __m512i primes = _mm512_maskz_loadu_epi64(mask, (__m512i*) &it.primes_[i]); sums = _mm512_add_epi64(sums, primes); } @@ -493,7 +493,7 @@ int main() ```bash # Unix-like OSes -c++ -O3 -mavx512f -funroll-loops primesum.cpp -o primesum -lprimesieve +c++ -O3 -mavx512f -mbmi2 -funroll-loops primesum.cpp -o primesum -lprimesieve time ./primesum ``` diff --git a/doc/C_API.md b/doc/C_API.md index 0fde55f18..eaaa2db69 100644 --- a/doc/C_API.md +++ b/doc/C_API.md @@ -532,7 +532,7 @@ int main(void) { // Sum 64-bit primes using AVX512 for (size_t i = 0; i < it.size; i += 8) { - __mmask8 mask = (i + 8 < it.size) ? 0xff : 0xff >> (i + 8 - it.size); + __mmask8 mask = (__mmask8) _bzhi_u64(0xff, it.size - i); __m512i primes = _mm512_maskz_loadu_epi64(mask, (__m512i*) &it.primes[i]); sums = _mm512_add_epi64(sums, primes); } @@ -560,7 +560,7 @@ int main(void) ```bash # Unix-like OSes -cc -O3 -mavx512f -funroll-loops primesum.c -o primesum -lprimesieve +cc -O3 -mavx512f -mbmi2 -funroll-loops primesum.c -o primesum -lprimesieve time ./primesum ```