469 changes: 0 additions & 469 deletions tests/bench/xbzrle-bench.c

This file was deleted.

49 changes: 10 additions & 39 deletions tests/unit/test-xbzrle.c
Expand Up @@ -16,35 +16,6 @@

#define XBZRLE_PAGE_SIZE 4096

int (*xbzrle_encode_buffer_func)(uint8_t *, uint8_t *, int,
uint8_t *, int) = xbzrle_encode_buffer;
#if defined(CONFIG_AVX512BW_OPT)
#include "qemu/cpuid.h"
static void __attribute__((constructor)) init_cpu_flag(void)
{
unsigned max = __get_cpuid_max(0, NULL);
int a, b, c, d;
if (max >= 1) {
__cpuid(1, a, b, c, d);
/* We must check that AVX is not just available, but usable. */
if ((c & bit_OSXSAVE) && (c & bit_AVX) && max >= 7) {
int bv;
__asm("xgetbv" : "=a"(bv), "=d"(d) : "c"(0));
__cpuid_count(7, 0, a, b, c, d);
/* 0xe6:
* XCR0[7:5] = 111b (OPMASK state, upper 256-bit of ZMM0-ZMM15
* and ZMM16-ZMM31 state are enabled by OS)
* XCR0[2:1] = 11b (XMM state and YMM state are enabled by OS)
*/
if ((bv & 0xe6) == 0xe6 && (b & bit_AVX512BW)) {
xbzrle_encode_buffer_func = xbzrle_encode_buffer_avx512;
}
}
}
return ;
}
#endif

static void test_uleb(void)
{
uint32_t i, val;
Expand Down Expand Up @@ -83,8 +54,8 @@ static void test_encode_decode_zero(void)
buffer[1000 + diff_len + 5] = 105;

/* encode zero page */
dlen = xbzrle_encode_buffer_func(buffer, buffer, XBZRLE_PAGE_SIZE, compressed,
XBZRLE_PAGE_SIZE);
dlen = xbzrle_encode_buffer(buffer, buffer, XBZRLE_PAGE_SIZE,
compressed, XBZRLE_PAGE_SIZE);
g_assert(dlen == 0);

g_free(buffer);
Expand All @@ -107,8 +78,8 @@ static void test_encode_decode_unchanged(void)
test[1000 + diff_len + 5] = 109;

/* test unchanged buffer */
dlen = xbzrle_encode_buffer_func(test, test, XBZRLE_PAGE_SIZE, compressed,
XBZRLE_PAGE_SIZE);
dlen = xbzrle_encode_buffer(test, test, XBZRLE_PAGE_SIZE,
compressed, XBZRLE_PAGE_SIZE);
g_assert(dlen == 0);

g_free(test);
Expand All @@ -125,8 +96,8 @@ static void test_encode_decode_1_byte(void)

test[XBZRLE_PAGE_SIZE - 1] = 1;

dlen = xbzrle_encode_buffer_func(buffer, test, XBZRLE_PAGE_SIZE, compressed,
XBZRLE_PAGE_SIZE);
dlen = xbzrle_encode_buffer(buffer, test, XBZRLE_PAGE_SIZE,
compressed, XBZRLE_PAGE_SIZE);
g_assert(dlen == (uleb128_encode_small(&buf[0], 4095) + 2));

rc = xbzrle_decode_buffer(compressed, dlen, buffer, XBZRLE_PAGE_SIZE);
Expand All @@ -150,8 +121,8 @@ static void test_encode_decode_overflow(void)
}

/* encode overflow */
rc = xbzrle_encode_buffer_func(buffer, test, XBZRLE_PAGE_SIZE, compressed,
XBZRLE_PAGE_SIZE);
rc = xbzrle_encode_buffer(buffer, test, XBZRLE_PAGE_SIZE,
compressed, XBZRLE_PAGE_SIZE);
g_assert(rc == -1);

g_free(buffer);
Expand Down Expand Up @@ -181,8 +152,8 @@ static void encode_decode_range(void)
test[1000 + diff_len + 5] = 109;

/* test encode/decode */
dlen = xbzrle_encode_buffer_func(test, buffer, XBZRLE_PAGE_SIZE, compressed,
XBZRLE_PAGE_SIZE);
dlen = xbzrle_encode_buffer(test, buffer, XBZRLE_PAGE_SIZE,
compressed, XBZRLE_PAGE_SIZE);

rc = xbzrle_decode_buffer(compressed, dlen, test, XBZRLE_PAGE_SIZE);
g_assert(rc < XBZRLE_PAGE_SIZE);
Expand Down
125 changes: 45 additions & 80 deletions util/bufferiszero.c
Expand Up @@ -24,6 +24,7 @@
#include "qemu/osdep.h"
#include "qemu/cutils.h"
#include "qemu/bswap.h"
#include "host/cpuinfo.h"

static bool
buffer_zero_int(const void *buf, size_t len)
Expand Down Expand Up @@ -184,111 +185,75 @@ buffer_zero_avx512(const void *buf, size_t len)
}
#endif /* CONFIG_AVX512F_OPT */


/* Note that for test_buffer_is_zero_next_accel, the most preferred
* ISA must have the least significant bit.
*/
#define CACHE_AVX512F 1
#define CACHE_AVX2 2
#define CACHE_SSE4 4
#define CACHE_SSE2 8

/* Make sure that these variables are appropriately initialized when
/*
* Make sure that these variables are appropriately initialized when
* SSE2 is enabled on the compiler command-line, but the compiler is
* too old to support CONFIG_AVX2_OPT.
*/
#if defined(CONFIG_AVX512F_OPT) || defined(CONFIG_AVX2_OPT)
# define INIT_CACHE 0
# define INIT_ACCEL buffer_zero_int
# define INIT_USED 0
# define INIT_LENGTH 0
# define INIT_ACCEL buffer_zero_int
#else
# ifndef __SSE2__
# error "ISA selection confusion"
# endif
# define INIT_CACHE CACHE_SSE2
# define INIT_ACCEL buffer_zero_sse2
# define INIT_USED CPUINFO_SSE2
# define INIT_LENGTH 64
# define INIT_ACCEL buffer_zero_sse2
#endif

static unsigned cpuid_cache = INIT_CACHE;
static unsigned used_accel = INIT_USED;
static unsigned length_to_accel = INIT_LENGTH;
static bool (*buffer_accel)(const void *, size_t) = INIT_ACCEL;
static int length_to_accel = 64;

static void init_accel(unsigned cache)
static unsigned __attribute__((noinline))
select_accel_cpuinfo(unsigned info)
{
bool (*fn)(const void *, size_t) = buffer_zero_int;
if (cache & CACHE_SSE2) {
fn = buffer_zero_sse2;
length_to_accel = 64;
}
/* Array is sorted in order of algorithm preference. */
static const struct {
unsigned bit;
unsigned len;
bool (*fn)(const void *, size_t);
} all[] = {
#ifdef CONFIG_AVX512F_OPT
{ CPUINFO_AVX512F, 256, buffer_zero_avx512 },
#endif
#ifdef CONFIG_AVX2_OPT
if (cache & CACHE_SSE4) {
fn = buffer_zero_sse4;
length_to_accel = 64;
}
if (cache & CACHE_AVX2) {
fn = buffer_zero_avx2;
length_to_accel = 128;
}
{ CPUINFO_AVX2, 128, buffer_zero_avx2 },
{ CPUINFO_SSE4, 64, buffer_zero_sse4 },
#endif
#ifdef CONFIG_AVX512F_OPT
if (cache & CACHE_AVX512F) {
fn = buffer_zero_avx512;
length_to_accel = 256;
{ CPUINFO_SSE2, 64, buffer_zero_sse2 },
{ CPUINFO_ALWAYS, 0, buffer_zero_int },
};

for (unsigned i = 0; i < ARRAY_SIZE(all); ++i) {
if (info & all[i].bit) {
length_to_accel = all[i].len;
buffer_accel = all[i].fn;
return all[i].bit;
}
}
#endif
buffer_accel = fn;
return 0;
}

#if defined(CONFIG_AVX512F_OPT) || defined(CONFIG_AVX2_OPT)
#include "qemu/cpuid.h"

static void __attribute__((constructor)) init_cpuid_cache(void)
static void __attribute__((constructor)) init_accel(void)
{
unsigned max = __get_cpuid_max(0, NULL);
int a, b, c, d;
unsigned cache = 0;

if (max >= 1) {
__cpuid(1, a, b, c, d);
if (d & bit_SSE2) {
cache |= CACHE_SSE2;
}
if (c & bit_SSE4_1) {
cache |= CACHE_SSE4;
}

/* We must check that AVX is not just available, but usable. */
if ((c & bit_OSXSAVE) && (c & bit_AVX) && max >= 7) {
unsigned bv = xgetbv_low(0);
__cpuid_count(7, 0, a, b, c, d);
if ((bv & 0x6) == 0x6 && (b & bit_AVX2)) {
cache |= CACHE_AVX2;
}
/* 0xe6:
* XCR0[7:5] = 111b (OPMASK state, upper 256-bit of ZMM0-ZMM15
* and ZMM16-ZMM31 state are enabled by OS)
* XCR0[2:1] = 11b (XMM state and YMM state are enabled by OS)
*/
if ((bv & 0xe6) == 0xe6 && (b & bit_AVX512F)) {
cache |= CACHE_AVX512F;
}
}
}
cpuid_cache = cache;
init_accel(cache);
used_accel = select_accel_cpuinfo(cpuinfo_init());
}
#endif /* CONFIG_AVX2_OPT */

bool test_buffer_is_zero_next_accel(void)
{
/* If no bits set, we just tested buffer_zero_int, and there
are no more acceleration options to test. */
if (cpuid_cache == 0) {
return false;
}
/* Disable the accelerator we used before and select a new one. */
cpuid_cache &= cpuid_cache - 1;
init_accel(cpuid_cache);
return true;
/*
* Accumulate the accelerators that we've already tested, and
* remove them from the set to test this round. We'll get back
* a zero from select_accel_cpuinfo when there are no more.
*/
unsigned used = select_accel_cpuinfo(cpuinfo & ~used_accel);
used_accel |= used;
return used;
}

static bool select_accel_fn(const void *buf, size_t len)
Expand Down
67 changes: 67 additions & 0 deletions util/cpuinfo-aarch64.c
@@ -0,0 +1,67 @@
/*
* SPDX-License-Identifier: GPL-2.0-or-later
* Host specific cpu indentification for AArch64.
*/

#include "qemu/osdep.h"
#include "host/cpuinfo.h"

#ifdef CONFIG_LINUX
# ifdef CONFIG_GETAUXVAL
# include <sys/auxv.h>
# else
# include <asm/hwcap.h>
# include "elf.h"
# endif
#endif
#ifdef CONFIG_DARWIN
# include <sys/sysctl.h>
#endif

unsigned cpuinfo;

#ifdef CONFIG_DARWIN
static bool sysctl_for_bool(const char *name)
{
int val = 0;
size_t len = sizeof(val);

if (sysctlbyname(name, &val, &len, NULL, 0) == 0) {
return val != 0;
}

/*
* We might in the future ask for properties not present in older kernels,
* but we're only asking about static properties, all of which should be
* 'int'. So we shouln't see ENOMEM (val too small), or any of the other
* more exotic errors.
*/
assert(errno == ENOENT);
return false;
}
#endif

/* Called both as constructor and (possibly) via other constructors. */
unsigned __attribute__((constructor)) cpuinfo_init(void)
{
unsigned info = cpuinfo;

if (info) {
return info;
}

info = CPUINFO_ALWAYS;

#ifdef CONFIG_LINUX
unsigned long hwcap = qemu_getauxval(AT_HWCAP);
info |= (hwcap & HWCAP_ATOMICS ? CPUINFO_LSE : 0);
info |= (hwcap & HWCAP_USCAT ? CPUINFO_LSE2 : 0);
#endif
#ifdef CONFIG_DARWIN
info |= sysctl_for_bool("hw.optional.arm.FEAT_LSE") * CPUINFO_LSE;
info |= sysctl_for_bool("hw.optional.arm.FEAT_LSE2") * CPUINFO_LSE2;
#endif

cpuinfo = info;
return info;
}
99 changes: 99 additions & 0 deletions util/cpuinfo-i386.c
@@ -0,0 +1,99 @@
/*
* SPDX-License-Identifier: GPL-2.0-or-later
* Host specific cpu indentification for x86.
*/

#include "qemu/osdep.h"
#include "host/cpuinfo.h"
#ifdef CONFIG_CPUID_H
# include "qemu/cpuid.h"
#endif

unsigned cpuinfo;

/* Called both as constructor and (possibly) via other constructors. */
unsigned __attribute__((constructor)) cpuinfo_init(void)
{
unsigned info = cpuinfo;

if (info) {
return info;
}

#ifdef CONFIG_CPUID_H
unsigned max, a, b, c, d, b7 = 0, c7 = 0;

max = __get_cpuid_max(0, 0);

if (max >= 7) {
__cpuid_count(7, 0, a, b7, c7, d);
info |= (b7 & bit_BMI ? CPUINFO_BMI1 : 0);
info |= (b7 & bit_BMI2 ? CPUINFO_BMI2 : 0);
}

if (max >= 1) {
__cpuid(1, a, b, c, d);

info |= (d & bit_CMOV ? CPUINFO_CMOV : 0);
info |= (d & bit_SSE2 ? CPUINFO_SSE2 : 0);
info |= (c & bit_SSE4_1 ? CPUINFO_SSE4 : 0);
info |= (c & bit_MOVBE ? CPUINFO_MOVBE : 0);
info |= (c & bit_POPCNT ? CPUINFO_POPCNT : 0);

/* For AVX features, we must check available and usable. */
if ((c & bit_AVX) && (c & bit_OSXSAVE)) {
unsigned bv = xgetbv_low(0);

if ((bv & 6) == 6) {
info |= CPUINFO_AVX1;
info |= (b7 & bit_AVX2 ? CPUINFO_AVX2 : 0);

if ((bv & 0xe0) == 0xe0) {
info |= (b7 & bit_AVX512F ? CPUINFO_AVX512F : 0);
info |= (b7 & bit_AVX512VL ? CPUINFO_AVX512VL : 0);
info |= (b7 & bit_AVX512BW ? CPUINFO_AVX512BW : 0);
info |= (b7 & bit_AVX512DQ ? CPUINFO_AVX512DQ : 0);
info |= (c7 & bit_AVX512VBMI2 ? CPUINFO_AVX512VBMI2 : 0);
}

/*
* The Intel SDM has added:
* Processors that enumerate support for Intel® AVX
* (by setting the feature flag CPUID.01H:ECX.AVX[bit 28])
* guarantee that the 16-byte memory operations performed
* by the following instructions will always be carried
* out atomically:
* - MOVAPD, MOVAPS, and MOVDQA.
* - VMOVAPD, VMOVAPS, and VMOVDQA when encoded with VEX.128.
* - VMOVAPD, VMOVAPS, VMOVDQA32, and VMOVDQA64 when encoded
* with EVEX.128 and k0 (masking disabled).
* Note that these instructions require the linear addresses
* of their memory operands to be 16-byte aligned.
*
* AMD has provided an even stronger guarantee that processors
* with AVX provide 16-byte atomicity for all cachable,
* naturally aligned single loads and stores, e.g. MOVDQU.
*
* See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=104688
*/
__cpuid(0, a, b, c, d);
if (c == signature_INTEL_ecx) {
info |= CPUINFO_ATOMIC_VMOVDQA;
} else if (c == signature_AMD_ecx) {
info |= CPUINFO_ATOMIC_VMOVDQA | CPUINFO_ATOMIC_VMOVDQU;
}
}
}
}

max = __get_cpuid_max(0x8000000, 0);
if (max >= 1) {
__cpuid(0x80000001, a, b, c, d);
info |= (c & bit_LZCNT ? CPUINFO_LZCNT : 0);
}
#endif

info |= CPUINFO_ALWAYS;
cpuinfo = info;
return info;
}
6 changes: 6 additions & 0 deletions util/meson.build
Expand Up @@ -108,3 +108,9 @@ if have_block
endif
util_ss.add(when: 'CONFIG_LINUX', if_true: files('vfio-helpers.c'))
endif

if cpu == 'aarch64'
util_ss.add(files('cpuinfo-aarch64.c'))
elif cpu in ['x86', 'x86_64']
util_ss.add(files('cpuinfo-i386.c'))
endif