Skip to content

Commit

Permalink
Merge tag 'pull-tcg-20230502' of https://gitlab.com/rth7680/qemu into…
Browse files Browse the repository at this point in the history
… staging

Misc tcg-related patch queue.

# -----BEGIN PGP SIGNATURE-----
#
# iQFRBAABCgA7FiEEekgeeIaLTbaoWgXAZN846K9+IV8FAmRQ8YwdHHJpY2hhcmQu
# aGVuZGVyc29uQGxpbmFyby5vcmcACgkQZN846K9+IV+9rAf/RetCt2y/7VSlWBnP
# o/A5y8p6jQI+LjY0BIlG8V5DC1H/bLhCLD60/DyUwzWP6Zb9wYRQ5+WpsgBmUcql
# SkRA+mZLIGZWDSi6wIDN6IHixVkd/yVjn+05LQS8/GU7y/AEbjqsUevI5OX7aOkv
# 2LkDrzbZrBWcE/C1coZKUNDHac1+Wh4UBUqfxVWvBIQW+qXMXuwSraoOBHA9BvcH
# wGJjJ4eoLKoKBbu49rx+b2wvXiTRtIq2jfKAOEVoZy5uWrcXQTqZHJXejhB1JOFY
# 5i2qqGA9vX6HycDmq2xM3qwO2RtlyS2tGgXL0QX2D/4z1ysEzQUv7bPL9euGk13K
# neCv5w==
# =G9sH
# -----END PGP SIGNATURE-----
# gpg: Signature made Tue 02 May 2023 12:18:36 PM BST
# gpg:                using RSA key 7A481E78868B4DB6A85A05C064DF38E8AF7E215F
# gpg:                issuer "richard.henderson@linaro.org"
# gpg: Good signature from "Richard Henderson <richard.henderson@linaro.org>" [ultimate]

* tag 'pull-tcg-20230502' of https://gitlab.com/rth7680/qemu:
  tcg: Introduce tcg_out_movext2
  tcg/mips: Conditionalize tcg_out_exts_i32_i64
  tcg/loongarch64: Conditionalize tcg_out_exts_i32_i64
  accel/tcg: Add cpu_ld*_code_mmu
  migration/xbzrle: Use __attribute__((target)) for avx512
  qemu/int128: Re-shuffle Int128Alias members
  tcg: Add tcg_gen_gvec_rotrs
  tcg: Add tcg_gen_gvec_andcs
  qemu/host-utils.h: Add clz and ctz functions for lower-bit integers
  qemu/bitops.h: Limit rotate amounts
  accel/tcg: Uncache the host address for instruction fetch when tlb size < 1
  softmmu: Tidy dirtylimit_dirty_ring_full_time

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
  • Loading branch information
rth7680 committed May 2, 2023
2 parents b5f47ba + bdc7fba commit 1b0e90a
Show file tree
Hide file tree
Showing 18 changed files with 347 additions and 68 deletions.
53 changes: 53 additions & 0 deletions accel/tcg/cputlb.c
Original file line number Diff line number Diff line change
Expand Up @@ -1696,6 +1696,11 @@ tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, target_ulong addr,
if (p == NULL) {
return -1;
}

if (full->lg_page_size < TARGET_PAGE_BITS) {
return -1;
}

if (hostp) {
*hostp = p;
}
Expand Down Expand Up @@ -2768,3 +2773,51 @@ uint64_t cpu_ldq_code(CPUArchState *env, abi_ptr addr)
MemOpIdx oi = make_memop_idx(MO_TEUQ, cpu_mmu_index(env, true));
return full_ldq_code(env, addr, oi, 0);
}

uint8_t cpu_ldb_code_mmu(CPUArchState *env, abi_ptr addr,
MemOpIdx oi, uintptr_t retaddr)
{
return full_ldub_code(env, addr, oi, retaddr);
}

uint16_t cpu_ldw_code_mmu(CPUArchState *env, abi_ptr addr,
MemOpIdx oi, uintptr_t retaddr)
{
MemOp mop = get_memop(oi);
int idx = get_mmuidx(oi);
uint16_t ret;

ret = full_lduw_code(env, addr, make_memop_idx(MO_TEUW, idx), retaddr);
if ((mop & MO_BSWAP) != MO_TE) {
ret = bswap16(ret);
}
return ret;
}

uint32_t cpu_ldl_code_mmu(CPUArchState *env, abi_ptr addr,
MemOpIdx oi, uintptr_t retaddr)
{
MemOp mop = get_memop(oi);
int idx = get_mmuidx(oi);
uint32_t ret;

ret = full_ldl_code(env, addr, make_memop_idx(MO_TEUL, idx), retaddr);
if ((mop & MO_BSWAP) != MO_TE) {
ret = bswap32(ret);
}
return ret;
}

uint64_t cpu_ldq_code_mmu(CPUArchState *env, abi_ptr addr,
MemOpIdx oi, uintptr_t retaddr)
{
MemOp mop = get_memop(oi);
int idx = get_mmuidx(oi);
uint64_t ret;

ret = full_ldq_code(env, addr, make_memop_idx(MO_TEUQ, idx), retaddr);
if ((mop & MO_BSWAP) != MO_TE) {
ret = bswap64(ret);
}
return ret;
}
11 changes: 11 additions & 0 deletions accel/tcg/tcg-runtime-gvec.c
Original file line number Diff line number Diff line change
Expand Up @@ -550,6 +550,17 @@ void HELPER(gvec_ands)(void *d, void *a, uint64_t b, uint32_t desc)
clear_high(d, oprsz, desc);
}

void HELPER(gvec_andcs)(void *d, void *a, uint64_t b, uint32_t desc)
{
intptr_t oprsz = simd_oprsz(desc);
intptr_t i;

for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
*(uint64_t *)(d + i) = *(uint64_t *)(a + i) & ~b;
}
clear_high(d, oprsz, desc);
}

void HELPER(gvec_xors)(void *d, void *a, uint64_t b, uint32_t desc)
{
intptr_t oprsz = simd_oprsz(desc);
Expand Down
1 change: 1 addition & 0 deletions accel/tcg/tcg-runtime.h
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,7 @@ DEF_HELPER_FLAGS_4(gvec_nor, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_eqv, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)

DEF_HELPER_FLAGS_4(gvec_ands, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
DEF_HELPER_FLAGS_4(gvec_andcs, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
DEF_HELPER_FLAGS_4(gvec_xors, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
DEF_HELPER_FLAGS_4(gvec_ors, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)

Expand Down
58 changes: 58 additions & 0 deletions accel/tcg/user-exec.c
Original file line number Diff line number Diff line change
Expand Up @@ -1219,6 +1219,64 @@ uint64_t cpu_ldq_code(CPUArchState *env, abi_ptr ptr)
return ret;
}

uint8_t cpu_ldb_code_mmu(CPUArchState *env, abi_ptr addr,
MemOpIdx oi, uintptr_t ra)
{
void *haddr;
uint8_t ret;

haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_INST_FETCH);
ret = ldub_p(haddr);
clear_helper_retaddr();
return ret;
}

uint16_t cpu_ldw_code_mmu(CPUArchState *env, abi_ptr addr,
MemOpIdx oi, uintptr_t ra)
{
void *haddr;
uint16_t ret;

haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_INST_FETCH);
ret = lduw_p(haddr);
clear_helper_retaddr();
if (get_memop(oi) & MO_BSWAP) {
ret = bswap16(ret);
}
return ret;
}

uint32_t cpu_ldl_code_mmu(CPUArchState *env, abi_ptr addr,
MemOpIdx oi, uintptr_t ra)
{
void *haddr;
uint32_t ret;

haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_INST_FETCH);
ret = ldl_p(haddr);
clear_helper_retaddr();
if (get_memop(oi) & MO_BSWAP) {
ret = bswap32(ret);
}
return ret;
}

uint64_t cpu_ldq_code_mmu(CPUArchState *env, abi_ptr addr,
MemOpIdx oi, uintptr_t ra)
{
void *haddr;
uint64_t ret;

validate_memop(oi, MO_BEUQ);
haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD);
ret = ldq_p(haddr);
clear_helper_retaddr();
if (get_memop(oi) & MO_BSWAP) {
ret = bswap64(ret);
}
return ret;
}

#include "ldst_common.c.inc"

/*
Expand Down
9 changes: 9 additions & 0 deletions include/exec/cpu_ldst.h
Original file line number Diff line number Diff line change
Expand Up @@ -445,6 +445,15 @@ static inline CPUTLBEntry *tlb_entry(CPUArchState *env, uintptr_t mmu_idx,
# define cpu_stq_mmu cpu_stq_le_mmu
#endif

uint8_t cpu_ldb_code_mmu(CPUArchState *env, abi_ptr addr,
MemOpIdx oi, uintptr_t ra);
uint16_t cpu_ldw_code_mmu(CPUArchState *env, abi_ptr addr,
MemOpIdx oi, uintptr_t ra);
uint32_t cpu_ldl_code_mmu(CPUArchState *env, abi_ptr addr,
MemOpIdx oi, uintptr_t ra);
uint64_t cpu_ldq_code_mmu(CPUArchState *env, abi_ptr addr,
MemOpIdx oi, uintptr_t ra);

uint32_t cpu_ldub_code(CPUArchState *env, abi_ptr addr);
uint32_t cpu_lduw_code(CPUArchState *env, abi_ptr addr);
uint32_t cpu_ldl_code(CPUArchState *env, abi_ptr addr);
Expand Down
24 changes: 16 additions & 8 deletions include/qemu/bitops.h
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,8 @@ static inline unsigned long find_first_zero_bit(const unsigned long *addr,
*/
static inline uint8_t rol8(uint8_t word, unsigned int shift)
{
return (word << shift) | (word >> ((8 - shift) & 7));
shift &= 7;
return (word << shift) | (word >> (8 - shift));
}

/**
Expand All @@ -228,7 +229,8 @@ static inline uint8_t rol8(uint8_t word, unsigned int shift)
*/
static inline uint8_t ror8(uint8_t word, unsigned int shift)
{
return (word >> shift) | (word << ((8 - shift) & 7));
shift &= 7;
return (word >> shift) | (word << (8 - shift));
}

/**
Expand All @@ -238,7 +240,8 @@ static inline uint8_t ror8(uint8_t word, unsigned int shift)
*/
static inline uint16_t rol16(uint16_t word, unsigned int shift)
{
return (word << shift) | (word >> ((16 - shift) & 15));
shift &= 15;
return (word << shift) | (word >> (16 - shift));
}

/**
Expand All @@ -248,7 +251,8 @@ static inline uint16_t rol16(uint16_t word, unsigned int shift)
*/
static inline uint16_t ror16(uint16_t word, unsigned int shift)
{
return (word >> shift) | (word << ((16 - shift) & 15));
shift &= 15;
return (word >> shift) | (word << (16 - shift));
}

/**
Expand All @@ -258,7 +262,8 @@ static inline uint16_t ror16(uint16_t word, unsigned int shift)
*/
static inline uint32_t rol32(uint32_t word, unsigned int shift)
{
return (word << shift) | (word >> ((32 - shift) & 31));
shift &= 31;
return (word << shift) | (word >> (32 - shift));
}

/**
Expand All @@ -268,7 +273,8 @@ static inline uint32_t rol32(uint32_t word, unsigned int shift)
*/
static inline uint32_t ror32(uint32_t word, unsigned int shift)
{
return (word >> shift) | (word << ((32 - shift) & 31));
shift &= 31;
return (word >> shift) | (word << (32 - shift));
}

/**
Expand All @@ -278,7 +284,8 @@ static inline uint32_t ror32(uint32_t word, unsigned int shift)
*/
static inline uint64_t rol64(uint64_t word, unsigned int shift)
{
return (word << shift) | (word >> ((64 - shift) & 63));
shift &= 63;
return (word << shift) | (word >> (64 - shift));
}

/**
Expand All @@ -288,7 +295,8 @@ static inline uint64_t rol64(uint64_t word, unsigned int shift)
*/
static inline uint64_t ror64(uint64_t word, unsigned int shift)
{
return (word >> shift) | (word << ((64 - shift) & 63));
shift &= 63;
return (word >> shift) | (word << (64 - shift));
}

/**
Expand Down
54 changes: 54 additions & 0 deletions include/qemu/host-utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,36 @@ static inline uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c)
}
#endif

/**
* clz8 - count leading zeros in a 8-bit value.
* @val: The value to search
*
* Returns 8 if the value is zero. Note that the GCC builtin is
* undefined if the value is zero.
*
* Note that the GCC builtin will upcast its argument to an `unsigned int`
* so this function subtracts off the number of prepended zeroes.
*/
static inline int clz8(uint8_t val)
{
return val ? __builtin_clz(val) - 24 : 8;
}

/**
* clz16 - count leading zeros in a 16-bit value.
* @val: The value to search
*
* Returns 16 if the value is zero. Note that the GCC builtin is
* undefined if the value is zero.
*
* Note that the GCC builtin will upcast its argument to an `unsigned int`
* so this function subtracts off the number of prepended zeroes.
*/
static inline int clz16(uint16_t val)
{
return val ? __builtin_clz(val) - 16 : 16;
}

/**
* clz32 - count leading zeros in a 32-bit value.
* @val: The value to search
Expand Down Expand Up @@ -153,6 +183,30 @@ static inline int clo64(uint64_t val)
return clz64(~val);
}

/**
* ctz8 - count trailing zeros in a 8-bit value.
* @val: The value to search
*
* Returns 8 if the value is zero. Note that the GCC builtin is
* undefined if the value is zero.
*/
static inline int ctz8(uint8_t val)
{
return val ? __builtin_ctz(val) : 8;
}

/**
* ctz16 - count trailing zeros in a 16-bit value.
* @val: The value to search
*
* Returns 16 if the value is zero. Note that the GCC builtin is
* undefined if the value is zero.
*/
static inline int ctz16(uint16_t val)
{
return val ? __builtin_ctz(val) : 16;
}

/**
* ctz32 - count trailing zeros in a 32-bit value.
* @val: The value to search
Expand Down
4 changes: 2 additions & 2 deletions include/qemu/int128.h
Original file line number Diff line number Diff line change
Expand Up @@ -483,9 +483,9 @@ static inline void bswap128s(Int128 *s)
*/
#ifdef CONFIG_INT128
typedef union {
Int128 s;
__int128_t i;
__uint128_t u;
__int128_t i;
Int128 s;
} Int128Alias __attribute__((transparent_union));
#else
typedef Int128 Int128Alias;
Expand Down
4 changes: 4 additions & 0 deletions include/tcg/tcg-op-gvec.h
Original file line number Diff line number Diff line change
Expand Up @@ -330,6 +330,8 @@ void tcg_gen_gvec_ori(unsigned vece, uint32_t dofs, uint32_t aofs,

void tcg_gen_gvec_ands(unsigned vece, uint32_t dofs, uint32_t aofs,
TCGv_i64 c, uint32_t oprsz, uint32_t maxsz);
void tcg_gen_gvec_andcs(unsigned vece, uint32_t dofs, uint32_t aofs,
TCGv_i64 c, uint32_t oprsz, uint32_t maxsz);
void tcg_gen_gvec_xors(unsigned vece, uint32_t dofs, uint32_t aofs,
TCGv_i64 c, uint32_t oprsz, uint32_t maxsz);
void tcg_gen_gvec_ors(unsigned vece, uint32_t dofs, uint32_t aofs,
Expand Down Expand Up @@ -369,6 +371,8 @@ void tcg_gen_gvec_sars(unsigned vece, uint32_t dofs, uint32_t aofs,
TCGv_i32 shift, uint32_t oprsz, uint32_t maxsz);
void tcg_gen_gvec_rotls(unsigned vece, uint32_t dofs, uint32_t aofs,
TCGv_i32 shift, uint32_t oprsz, uint32_t maxsz);
void tcg_gen_gvec_rotrs(unsigned vece, uint32_t dofs, uint32_t aofs,
TCGv_i32 shift, uint32_t oprsz, uint32_t maxsz);

/*
* Perform vector shift by vector element, modulo the element size.
Expand Down
5 changes: 1 addition & 4 deletions meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -2370,12 +2370,9 @@ config_host_data.set('CONFIG_AVX512F_OPT', get_option('avx512f') \
config_host_data.set('CONFIG_AVX512BW_OPT', get_option('avx512bw') \
.require(have_cpuid_h, error_message: 'cpuid.h not available, cannot enable AVX512BW') \
.require(cc.links('''
#pragma GCC push_options
#pragma GCC target("avx512bw")
#include <cpuid.h>
#include <immintrin.h>
static int bar(void *a) {
static int __attribute__((target("avx512bw"))) bar(void *a) {
__m512i *x = a;
__m512i res= _mm512_abs_epi8(*x);
return res[1];
Expand Down
9 changes: 4 additions & 5 deletions migration/xbzrle.c
Original file line number Diff line number Diff line change
Expand Up @@ -177,11 +177,11 @@ int xbzrle_decode_buffer(uint8_t *src, int slen, uint8_t *dst, int dlen)
}

#if defined(CONFIG_AVX512BW_OPT)
#pragma GCC push_options
#pragma GCC target("avx512bw")
#include <immintrin.h>
int xbzrle_encode_buffer_avx512(uint8_t *old_buf, uint8_t *new_buf, int slen,
uint8_t *dst, int dlen)

int __attribute__((target("avx512bw")))
xbzrle_encode_buffer_avx512(uint8_t *old_buf, uint8_t *new_buf, int slen,
uint8_t *dst, int dlen)
{
uint32_t zrun_len = 0, nzrun_len = 0;
int d = 0, i = 0, num = 0;
Expand Down Expand Up @@ -296,5 +296,4 @@ int xbzrle_encode_buffer_avx512(uint8_t *old_buf, uint8_t *new_buf, int slen,
}
return d;
}
#pragma GCC pop_options
#endif

0 comments on commit 1b0e90a

Please sign in to comment.