Skip to content

Commit

Permalink
Merge tag 'pull-tcg-20231018' of https://gitlab.com/rth7680/qemu into…
Browse files Browse the repository at this point in the history
… staging

tcg: Drop unused tcg_temp_free define
tcg: Introduce tcg_use_softmmu
tcg: Optimize past conditional branches
tcg: Use constant zero when expanding with divu2
tcg/ppc: Enable direct branching tcg_out_goto_tb with TCG_REG_TB
tcg/ppc: Use ADDPCIS for power9
tcg/ppc: Use prefixed instructions for power10
tcg/ppc: Disable TCG_REG_TB for Power9/Power10

# -----BEGIN PGP SIGNATURE-----
#
# iQFRBAABCgA7FiEEekgeeIaLTbaoWgXAZN846K9+IV8FAmUwWtodHHJpY2hhcmQu
# aGVuZGVyc29uQGxpbmFyby5vcmcACgkQZN846K9+IV97dQf7B0gIlqiIO3WrGipa
# Mwo994CAvvXJHuWtl/Pn20NeaCr4K6+Ab8YyfSWpjMP1T+qBRAMjDK5Lt8+QyLmy
# R2eJlRTI2e3JjOahiNvNSItbJ9WHu4IFsQw63hecrNIRkUDd70Mxff8euNGvhM2U
# DjCQCnyDltyK83GOTHgtqE991yEwsx1sR/FQGBLrmnpNMu5kZt05HKaZ+/WqjG5q
# 63NUokzTINa6A11+4JAAX6v16pRMSjehlb6YfjVZnsoY/GzIMiL2aSG/SIO+4YAr
# aBvYxpkK9xU5rwuPehddPI3ATeIdVPWG5N1I8b70e8hkuDkbiqukwr6OCdCT2wTA
# uwjVKA==
# =3iH2
# -----END PGP SIGNATURE-----
# gpg: Signature made Wed 18 Oct 2023 15:23:22 PDT
# gpg:                using RSA key 7A481E78868B4DB6A85A05C064DF38E8AF7E215F
# gpg:                issuer "richard.henderson@linaro.org"
# gpg: Good signature from "Richard Henderson <richard.henderson@linaro.org>" [full]
# Primary key fingerprint: 7A48 1E78 868B 4DB6 A85A  05C0 64DF 38E8 AF7E 215F

* tag 'pull-tcg-20231018' of https://gitlab.com/rth7680/qemu: (29 commits)
  target/i386: Use i128 for 128 and 256-bit loads and stores
  tcg: Add tcg_gen_{ld,st}_i128
  tcg: Optimize past conditional branches
  tcg: Use constant zero when expanding with divu2
  tcg: drop unused tcg_temp_free define
  tcg/s390x: Use tcg_use_softmmu
  tcg/riscv: Use tcg_use_softmmu
  tcg/riscv: Do not reserve TCG_GUEST_BASE_REG for guest_base zero
  tcg/ppc: Use tcg_use_softmmu
  tcg/mips: Use tcg_use_softmmu
  tcg/loongarch64: Use tcg_use_softmmu
  tcg/i386: Use tcg_use_softmmu
  tcg/aarch64: Use tcg_use_softmmu
  tcg/arm: Use tcg_use_softmmu
  tcg: Provide guest_base fallback for system mode
  tcg: Introduce tcg_use_softmmu
  tcg/ppc: Disable TCG_REG_TB for Power9/Power10
  tcg/ppc: Use PLD in tcg_out_goto_tb
  tcg/ppc: Use prefixed instructions in tcg_out_dupi_vec
  tcg/ppc: Use PLD in tcg_out_movi for constant pool
  ...

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
  • Loading branch information
stefanhaRH committed Oct 19, 2023
2 parents 79cc3d6 + b540757 commit fa26b06
Show file tree
Hide file tree
Showing 16 changed files with 1,068 additions and 863 deletions.
3 changes: 3 additions & 0 deletions include/tcg/tcg-op-common.h
Original file line number Diff line number Diff line change
Expand Up @@ -747,6 +747,9 @@ void tcg_gen_mov_i128(TCGv_i128 dst, TCGv_i128 src);
void tcg_gen_extr_i128_i64(TCGv_i64 lo, TCGv_i64 hi, TCGv_i128 arg);
void tcg_gen_concat_i64_i128(TCGv_i128 ret, TCGv_i64 lo, TCGv_i64 hi);

void tcg_gen_ld_i128(TCGv_i128 ret, TCGv_ptr base, tcg_target_long offset);
void tcg_gen_st_i128(TCGv_i128 val, TCGv_ptr base, tcg_target_long offset);

static inline void tcg_gen_concat32_i64(TCGv_i64 ret, TCGv_i64 lo, TCGv_i64 hi)
{
tcg_gen_deposit_i64(ret, lo, hi, 32, 32);
Expand Down
2 changes: 0 additions & 2 deletions include/tcg/tcg-op.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,15 +52,13 @@ static inline void tcg_gen_insn_start(target_ulong pc, target_ulong a1,
typedef TCGv_i32 TCGv;
#define tcg_temp_new() tcg_temp_new_i32()
#define tcg_global_mem_new tcg_global_mem_new_i32
#define tcg_temp_free tcg_temp_free_i32
#define tcgv_tl_temp tcgv_i32_temp
#define tcg_gen_qemu_ld_tl tcg_gen_qemu_ld_i32
#define tcg_gen_qemu_st_tl tcg_gen_qemu_st_i32
#elif TARGET_LONG_BITS == 64
typedef TCGv_i64 TCGv;
#define tcg_temp_new() tcg_temp_new_i64()
#define tcg_global_mem_new tcg_global_mem_new_i64
#define tcg_temp_free tcg_temp_free_i64
#define tcgv_tl_temp tcgv_i64_temp
#define tcg_gen_qemu_ld_tl tcg_gen_qemu_ld_i64
#define tcg_gen_qemu_st_tl tcg_gen_qemu_st_i64
Expand Down
8 changes: 6 additions & 2 deletions include/tcg/tcg.h
Original file line number Diff line number Diff line change
Expand Up @@ -488,11 +488,9 @@ struct TCGContext {
int nb_ops;
TCGType addr_type; /* TCG_TYPE_I32 or TCG_TYPE_I64 */

#ifdef CONFIG_SOFTMMU
int page_mask;
uint8_t page_bits;
uint8_t tlb_dyn_max_bits;
#endif
uint8_t insn_start_words;
TCGBar guest_mo;

Expand Down Expand Up @@ -573,6 +571,12 @@ static inline bool temp_readonly(TCGTemp *ts)
return ts->kind >= TEMP_FIXED;
}

#ifdef CONFIG_USER_ONLY
extern bool tcg_use_softmmu;
#else
#define tcg_use_softmmu true
#endif

extern __thread TCGContext *tcg_ctx;
extern const void *tcg_code_gen_epilogue;
extern uintptr_t tcg_splitwx_diff;
Expand Down
63 changes: 29 additions & 34 deletions target/i386/tcg/translate.c
Original file line number Diff line number Diff line change
Expand Up @@ -2918,59 +2918,54 @@ static inline void gen_stq_env_A0(DisasContext *s, int offset)

static inline void gen_ldo_env_A0(DisasContext *s, int offset, bool align)
{
MemOp atom = (s->cpuid_ext_features & CPUID_EXT_AVX
? MO_ATOM_IFALIGN : MO_ATOM_IFALIGN_PAIR);
MemOp mop = MO_128 | MO_LE | atom | (align ? MO_ALIGN_16 : 0);
int mem_index = s->mem_index;
tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0, mem_index,
MO_LEUQ | (align ? MO_ALIGN_16 : 0));
tcg_gen_st_i64(s->tmp1_i64, tcg_env, offset + offsetof(XMMReg, XMM_Q(0)));
tcg_gen_addi_tl(s->tmp0, s->A0, 8);
tcg_gen_qemu_ld_i64(s->tmp1_i64, s->tmp0, mem_index, MO_LEUQ);
tcg_gen_st_i64(s->tmp1_i64, tcg_env, offset + offsetof(XMMReg, XMM_Q(1)));
TCGv_i128 t = tcg_temp_new_i128();

tcg_gen_qemu_ld_i128(t, s->A0, mem_index, mop);
tcg_gen_st_i128(t, tcg_env, offset);
}

static inline void gen_sto_env_A0(DisasContext *s, int offset, bool align)
{
MemOp atom = (s->cpuid_ext_features & CPUID_EXT_AVX
? MO_ATOM_IFALIGN : MO_ATOM_IFALIGN_PAIR);
MemOp mop = MO_128 | MO_LE | atom | (align ? MO_ALIGN_16 : 0);
int mem_index = s->mem_index;
tcg_gen_ld_i64(s->tmp1_i64, tcg_env, offset + offsetof(XMMReg, XMM_Q(0)));
tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0, mem_index,
MO_LEUQ | (align ? MO_ALIGN_16 : 0));
tcg_gen_addi_tl(s->tmp0, s->A0, 8);
tcg_gen_ld_i64(s->tmp1_i64, tcg_env, offset + offsetof(XMMReg, XMM_Q(1)));
tcg_gen_qemu_st_i64(s->tmp1_i64, s->tmp0, mem_index, MO_LEUQ);
TCGv_i128 t = tcg_temp_new_i128();

tcg_gen_ld_i128(t, tcg_env, offset);
tcg_gen_qemu_st_i128(t, s->A0, mem_index, mop);
}

static void gen_ldy_env_A0(DisasContext *s, int offset, bool align)
{
MemOp mop = MO_128 | MO_LE | MO_ATOM_IFALIGN_PAIR;
int mem_index = s->mem_index;
tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0, mem_index,
MO_LEUQ | (align ? MO_ALIGN_32 : 0));
tcg_gen_st_i64(s->tmp1_i64, tcg_env, offset + offsetof(YMMReg, YMM_Q(0)));
tcg_gen_addi_tl(s->tmp0, s->A0, 8);
tcg_gen_qemu_ld_i64(s->tmp1_i64, s->tmp0, mem_index, MO_LEUQ);
tcg_gen_st_i64(s->tmp1_i64, tcg_env, offset + offsetof(YMMReg, YMM_Q(1)));
TCGv_i128 t0 = tcg_temp_new_i128();
TCGv_i128 t1 = tcg_temp_new_i128();

tcg_gen_qemu_ld_i128(t0, s->A0, mem_index, mop | (align ? MO_ALIGN_32 : 0));
tcg_gen_addi_tl(s->tmp0, s->A0, 16);
tcg_gen_qemu_ld_i64(s->tmp1_i64, s->tmp0, mem_index, MO_LEUQ);
tcg_gen_st_i64(s->tmp1_i64, tcg_env, offset + offsetof(YMMReg, YMM_Q(2)));
tcg_gen_addi_tl(s->tmp0, s->A0, 24);
tcg_gen_qemu_ld_i64(s->tmp1_i64, s->tmp0, mem_index, MO_LEUQ);
tcg_gen_st_i64(s->tmp1_i64, tcg_env, offset + offsetof(YMMReg, YMM_Q(3)));
tcg_gen_qemu_ld_i128(t1, s->tmp0, mem_index, mop);

tcg_gen_st_i128(t0, tcg_env, offset + offsetof(YMMReg, YMM_X(0)));
tcg_gen_st_i128(t1, tcg_env, offset + offsetof(YMMReg, YMM_X(1)));
}

static void gen_sty_env_A0(DisasContext *s, int offset, bool align)
{
MemOp mop = MO_128 | MO_LE | MO_ATOM_IFALIGN_PAIR;
int mem_index = s->mem_index;
tcg_gen_ld_i64(s->tmp1_i64, tcg_env, offset + offsetof(YMMReg, YMM_Q(0)));
tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0, mem_index,
MO_LEUQ | (align ? MO_ALIGN_32 : 0));
tcg_gen_addi_tl(s->tmp0, s->A0, 8);
tcg_gen_ld_i64(s->tmp1_i64, tcg_env, offset + offsetof(YMMReg, YMM_Q(1)));
tcg_gen_qemu_st_i64(s->tmp1_i64, s->tmp0, mem_index, MO_LEUQ);
TCGv_i128 t = tcg_temp_new_i128();

tcg_gen_ld_i128(t, tcg_env, offset + offsetof(YMMReg, YMM_X(0)));
tcg_gen_qemu_st_i128(t, s->A0, mem_index, mop | (align ? MO_ALIGN_32 : 0));
tcg_gen_addi_tl(s->tmp0, s->A0, 16);
tcg_gen_ld_i64(s->tmp1_i64, tcg_env, offset + offsetof(YMMReg, YMM_Q(2)));
tcg_gen_qemu_st_i64(s->tmp1_i64, s->tmp0, mem_index, MO_LEUQ);
tcg_gen_addi_tl(s->tmp0, s->A0, 24);
tcg_gen_ld_i64(s->tmp1_i64, tcg_env, offset + offsetof(YMMReg, YMM_Q(3)));
tcg_gen_qemu_st_i64(s->tmp1_i64, s->tmp0, mem_index, MO_LEUQ);
tcg_gen_ld_i128(t, tcg_env, offset + offsetof(YMMReg, YMM_X(1)));
tcg_gen_qemu_st_i128(t, s->tmp0, mem_index, mop);
}

#include "decode-new.h"
Expand Down
175 changes: 87 additions & 88 deletions tcg/aarch64/tcg-target.c.inc
Original file line number Diff line number Diff line change
Expand Up @@ -77,9 +77,7 @@ static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot)
#define TCG_REG_TMP2 TCG_REG_X30
#define TCG_VEC_TMP0 TCG_REG_V31

#ifndef CONFIG_SOFTMMU
#define TCG_REG_GUEST_BASE TCG_REG_X28
#endif

static bool reloc_pc26(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
{
Expand Down Expand Up @@ -1664,97 +1662,98 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
s_bits == MO_128);
a_mask = (1 << h->aa.align) - 1;

#ifdef CONFIG_SOFTMMU
unsigned s_mask = (1u << s_bits) - 1;
unsigned mem_index = get_mmuidx(oi);
TCGReg addr_adj;
TCGType mask_type;
uint64_t compare_mask;

ldst = new_ldst_label(s);
ldst->is_ld = is_ld;
ldst->oi = oi;
ldst->addrlo_reg = addr_reg;

mask_type = (s->page_bits + s->tlb_dyn_max_bits > 32
? TCG_TYPE_I64 : TCG_TYPE_I32);

/* Load cpu->neg.tlb.f[mmu_idx].{mask,table} into {tmp0,tmp1}. */
QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, mask) != 0);
QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 8);
tcg_out_insn(s, 3314, LDP, TCG_REG_TMP0, TCG_REG_TMP1, TCG_AREG0,
tlb_mask_table_ofs(s, mem_index), 1, 0);

/* Extract the TLB index from the address into X0. */
tcg_out_insn(s, 3502S, AND_LSR, mask_type == TCG_TYPE_I64,
TCG_REG_TMP0, TCG_REG_TMP0, addr_reg,
s->page_bits - CPU_TLB_ENTRY_BITS);

/* Add the tlb_table pointer, forming the CPUTLBEntry address in TMP1. */
tcg_out_insn(s, 3502, ADD, 1, TCG_REG_TMP1, TCG_REG_TMP1, TCG_REG_TMP0);

/* Load the tlb comparator into TMP0, and the fast path addend into TMP1. */
QEMU_BUILD_BUG_ON(HOST_BIG_ENDIAN);
tcg_out_ld(s, addr_type, TCG_REG_TMP0, TCG_REG_TMP1,
is_ld ? offsetof(CPUTLBEntry, addr_read)
: offsetof(CPUTLBEntry, addr_write));
tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1,
offsetof(CPUTLBEntry, addend));
if (tcg_use_softmmu) {
unsigned s_mask = (1u << s_bits) - 1;
unsigned mem_index = get_mmuidx(oi);
TCGReg addr_adj;
TCGType mask_type;
uint64_t compare_mask;

/*
* For aligned accesses, we check the first byte and include the alignment
* bits within the address. For unaligned access, we check that we don't
* cross pages using the address of the last byte of the access.
*/
if (a_mask >= s_mask) {
addr_adj = addr_reg;
} else {
addr_adj = TCG_REG_TMP2;
tcg_out_insn(s, 3401, ADDI, addr_type,
addr_adj, addr_reg, s_mask - a_mask);
}
compare_mask = (uint64_t)s->page_mask | a_mask;
ldst = new_ldst_label(s);
ldst->is_ld = is_ld;
ldst->oi = oi;
ldst->addrlo_reg = addr_reg;

/* Store the page mask part of the address into TMP2. */
tcg_out_logicali(s, I3404_ANDI, addr_type, TCG_REG_TMP2,
addr_adj, compare_mask);
mask_type = (s->page_bits + s->tlb_dyn_max_bits > 32
? TCG_TYPE_I64 : TCG_TYPE_I32);

/* Perform the address comparison. */
tcg_out_cmp(s, addr_type, TCG_REG_TMP0, TCG_REG_TMP2, 0);
/* Load cpu->neg.tlb.f[mmu_idx].{mask,table} into {tmp0,tmp1}. */
QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, mask) != 0);
QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 8);
tcg_out_insn(s, 3314, LDP, TCG_REG_TMP0, TCG_REG_TMP1, TCG_AREG0,
tlb_mask_table_ofs(s, mem_index), 1, 0);

/* If not equal, we jump to the slow path. */
ldst->label_ptr[0] = s->code_ptr;
tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
/* Extract the TLB index from the address into X0. */
tcg_out_insn(s, 3502S, AND_LSR, mask_type == TCG_TYPE_I64,
TCG_REG_TMP0, TCG_REG_TMP0, addr_reg,
s->page_bits - CPU_TLB_ENTRY_BITS);

h->base = TCG_REG_TMP1;
h->index = addr_reg;
h->index_ext = addr_type;
#else
if (a_mask) {
ldst = new_ldst_label(s);
/* Add the tlb_table pointer, forming the CPUTLBEntry address. */
tcg_out_insn(s, 3502, ADD, 1, TCG_REG_TMP1, TCG_REG_TMP1, TCG_REG_TMP0);

ldst->is_ld = is_ld;
ldst->oi = oi;
ldst->addrlo_reg = addr_reg;
/* Load the tlb comparator into TMP0, and the fast path addend. */
QEMU_BUILD_BUG_ON(HOST_BIG_ENDIAN);
tcg_out_ld(s, addr_type, TCG_REG_TMP0, TCG_REG_TMP1,
is_ld ? offsetof(CPUTLBEntry, addr_read)
: offsetof(CPUTLBEntry, addr_write));
tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1,
offsetof(CPUTLBEntry, addend));

/*
* For aligned accesses, we check the first byte and include
* the alignment bits within the address. For unaligned access,
* we check that we don't cross pages using the address of the
* last byte of the access.
*/
if (a_mask >= s_mask) {
addr_adj = addr_reg;
} else {
addr_adj = TCG_REG_TMP2;
tcg_out_insn(s, 3401, ADDI, addr_type,
addr_adj, addr_reg, s_mask - a_mask);
}
compare_mask = (uint64_t)s->page_mask | a_mask;

/* Store the page mask part of the address into TMP2. */
tcg_out_logicali(s, I3404_ANDI, addr_type, TCG_REG_TMP2,
addr_adj, compare_mask);

/* tst addr, #mask */
tcg_out_logicali(s, I3404_ANDSI, 0, TCG_REG_XZR, addr_reg, a_mask);
/* Perform the address comparison. */
tcg_out_cmp(s, addr_type, TCG_REG_TMP0, TCG_REG_TMP2, 0);

/* b.ne slow_path */
/* If not equal, we jump to the slow path. */
ldst->label_ptr[0] = s->code_ptr;
tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
}

if (guest_base || addr_type == TCG_TYPE_I32) {
h->base = TCG_REG_GUEST_BASE;
h->base = TCG_REG_TMP1;
h->index = addr_reg;
h->index_ext = addr_type;
} else {
h->base = addr_reg;
h->index = TCG_REG_XZR;
h->index_ext = TCG_TYPE_I64;
if (a_mask) {
ldst = new_ldst_label(s);

ldst->is_ld = is_ld;
ldst->oi = oi;
ldst->addrlo_reg = addr_reg;

/* tst addr, #mask */
tcg_out_logicali(s, I3404_ANDSI, 0, TCG_REG_XZR, addr_reg, a_mask);

/* b.ne slow_path */
ldst->label_ptr[0] = s->code_ptr;
tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
}

if (guest_base || addr_type == TCG_TYPE_I32) {
h->base = TCG_REG_GUEST_BASE;
h->index = addr_reg;
h->index_ext = addr_type;
} else {
h->base = addr_reg;
h->index = TCG_REG_XZR;
h->index_ext = TCG_TYPE_I64;
}
}
#endif

return ldst;
}
Expand Down Expand Up @@ -3117,16 +3116,16 @@ static void tcg_target_qemu_prologue(TCGContext *s)
tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE,
CPU_TEMP_BUF_NLONGS * sizeof(long));

#if !defined(CONFIG_SOFTMMU)
/*
* Note that XZR cannot be encoded in the address base register slot,
* as that actually encodes SP. Depending on the guest, we may need
* to zero-extend the guest address via the address index register slot,
* therefore we need to load even a zero guest base into a register.
*/
tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, guest_base);
tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE);
#endif
if (!tcg_use_softmmu) {
/*
* Note that XZR cannot be encoded in the address base register slot,
* as that actually encodes SP. Depending on the guest, we may need
* to zero-extend the guest address via the address index register slot,
* therefore we need to load even a zero guest base into a register.
*/
tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, guest_base);
tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE);
}

tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
tcg_out_insn(s, 3207, BR, tcg_target_call_iarg_regs[1]);
Expand Down

0 comments on commit fa26b06

Please sign in to comment.