Skip to content

Commit

Permalink
Merge tag 'pull-misc-20240503' of https://gitlab.com/rth7680/qemu int…
Browse files Browse the repository at this point in the history
…o staging

util/bufferiszero:
  - Remove sse4.1 and avx512 variants
  - Reorganize for early test for acceleration
  - Remove useless prefetches
  - Optimize sse2, avx2 and integer variants
  - Add simd acceleration for aarch64
  - Add bufferiszero-bench

# -----BEGIN PGP SIGNATURE-----
#
# iQFRBAABCgA7FiEEekgeeIaLTbaoWgXAZN846K9+IV8FAmY0/qMdHHJpY2hhcmQu
# aGVuZGVyc29uQGxpbmFyby5vcmcACgkQZN846K9+IV+ULQf/T2JSdvG6/EjDCf4N
# cnSGiUV2MIeByw8tkrc/fWCNdlulHhk9gbg9l+f2muwK8H/k2BdynbrQnt1Ymmtk
# xzM6+PNOcByaovSAkvNweZVbrQX36Yih9S7f3n+xcxfVuvvYhKSLHXLkeqO96LMd
# rN+WRpxhReaU3n8/FO7o3S26SRpk7X9kRfShaT7U7ytHGjGsXUvMKIRs30hbsJTB
# yjed0a0u54FoSlN6AEqjWdgzaWP8nT65+8Yxe3dzB9hx09UiolZo60eHqYy7Mkno
# N6aMOB6gUUbCiKZ3Qk+1zEX97vl26NH3zt5tIIJTWDoIkC3f9qbg1x5hwWLQ3rra
# rM8h8w==
# =DnZO
# -----END PGP SIGNATURE-----
# gpg: Signature made Fri 03 May 2024 08:11:31 AM PDT
# gpg:                using RSA key 7A481E78868B4DB6A85A05C064DF38E8AF7E215F
# gpg:                issuer "richard.henderson@linaro.org"
# gpg: Good signature from "Richard Henderson <richard.henderson@linaro.org>" [ultimate]

* tag 'pull-misc-20240503' of https://gitlab.com/rth7680/qemu:
  tests/bench: Add bufferiszero-bench
  util/bufferiszero: Add simd acceleration for aarch64
  util/bufferiszero: Simplify test_buffer_is_zero_next_accel
  util/bufferiszero: Introduce biz_accel_fn typedef
  util/bufferiszero: Improve scalar variant
  util/bufferiszero: Optimize SSE2 and AVX2 variants
  util/bufferiszero: Remove useless prefetches
  util/bufferiszero: Reorganize for early test for acceleration
  util/bufferiszero: Remove AVX512 variant
  util/bufferiszero: Remove SSE4.1 variant

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
  • Loading branch information
rth7680 committed May 3, 2024
2 parents 4977ce1 + a06d9ed commit 909aff7
Show file tree
Hide file tree
Showing 4 changed files with 313 additions and 210 deletions.
32 changes: 31 additions & 1 deletion include/qemu/cutils.h
Original file line number Diff line number Diff line change
Expand Up @@ -187,9 +187,39 @@ char *freq_to_str(uint64_t freq_hz);
/* used to print char* safely */
#define STR_OR_NULL(str) ((str) ? (str) : "null")

bool buffer_is_zero(const void *buf, size_t len);
/*
* Check if a buffer is all zeroes.
*/

bool buffer_is_zero_ool(const void *vbuf, size_t len);
bool buffer_is_zero_ge256(const void *vbuf, size_t len);
bool test_buffer_is_zero_next_accel(void);

static inline bool buffer_is_zero_sample3(const char *buf, size_t len)
{
/*
* For any reasonably sized buffer, these three samples come from
* three different cachelines. In qemu-img usage, we find that
* each byte eliminates more than half of all buffer testing.
* It is therefore critical to performance that the byte tests
* short-circuit, so that we do not pull in additional cache lines.
* Do not "optimize" this to !(a | b | c).
*/
return !buf[0] && !buf[len - 1] && !buf[len / 2];
}

#ifdef __OPTIMIZE__
static inline bool buffer_is_zero(const void *buf, size_t len)
{
return (__builtin_constant_p(len) && len >= 256
? buffer_is_zero_sample3(buf, len) &&
buffer_is_zero_ge256(buf, len)
: buffer_is_zero_ool(buf, len));
}
#else
#define buffer_is_zero buffer_is_zero_ool
#endif

/*
* Implementation of ULEB128 (http://en.wikipedia.org/wiki/LEB128)
* Input is limited to 14-bit numbers
Expand Down
47 changes: 47 additions & 0 deletions tests/bench/bufferiszero-bench.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
/*
* QEMU buffer_is_zero speed benchmark
*
* This work is licensed under the terms of the GNU GPL, version 2 or
* (at your option) any later version. See the COPYING file in the
* top-level directory.
*/
#include "qemu/osdep.h"
#include "qemu/cutils.h"
#include "qemu/units.h"

static void test(const void *opaque)
{
size_t max = 64 * KiB;
void *buf = g_malloc0(max);
int accel_index = 0;

do {
if (accel_index != 0) {
g_test_message("%s", ""); /* gnu_printf Werror for simple "" */
}
for (size_t len = 1 * KiB; len <= max; len *= 4) {
double total = 0.0;

g_test_timer_start();
do {
buffer_is_zero_ge256(buf, len);
total += len;
} while (g_test_timer_elapsed() < 0.5);

total /= MiB;
g_test_message("buffer_is_zero #%d: %2zuKB %8.0f MB/sec",
accel_index, len / (size_t)KiB,
total / g_test_timer_last());
}
accel_index++;
} while (test_buffer_is_zero_next_accel());

g_free(buf);
}

int main(int argc, char **argv)
{
g_test_init(&argc, &argv, NULL);
g_test_add_data_func("/cutils/bufferiszero/speed", NULL, test);
return g_test_run();
}
1 change: 1 addition & 0 deletions tests/bench/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ benchs = {}

if have_block
benchs += {
'bufferiszero-bench': [],
'benchmark-crypto-hash': [crypto],
'benchmark-crypto-hmac': [crypto],
'benchmark-crypto-cipher': [crypto],
Expand Down

0 comments on commit 909aff7

Please sign in to comment.