Skip to content

Commit

Permalink
Merge tag 'pull-tcg-20230724' of https://gitlab.com/rth7680/qemu into…
Browse files Browse the repository at this point in the history
… staging

accel/tcg: Zero-pad vaddr in tlb debug output
accel/tcg: Fix type of 'last' for pageflags_{find,next}
accel/tcg: Fix sense of read-only probes in ldst_atomicity
accel/tcg: Take mmap_lock in load_atomic*_or_exit
tcg: Add earlyclobber to op_add2 for x86 and s390x
tcg/ppc: Fix race in goto_tb implementation

# -----BEGIN PGP SIGNATURE-----
#
# iQFRBAABCgA7FiEEekgeeIaLTbaoWgXAZN846K9+IV8FAmS+O7cdHHJpY2hhcmQu
# aGVuZGVyc29uQGxpbmFyby5vcmcACgkQZN846K9+IV8qrAf/VeAFnMbtantUTfM5
# zOcfBlutsDlJrNwA/ajFDrPwUDewP7s5cqxImAYqhXfhqlc2RIB3UiMCgSaQ+q6O
# MBOH0bEj/zbeIlwRX07ZBWhUYVdqJVd7Nxb1W19YwgG9yieWUxa+Xo1i2fhyXMv+
# 20VOFB1dPnxYyUMrzh/bSiHE90JFZktO1WzV10FRD+IpnImY9R+YGdpGTpVzUhor
# ReRHTkMKyYilY6EEUG2gFhotrY/bbSSSFyl9BcQjkZh11603nAN0mNKxtSjPJnNB
# rXhCVEgmbbBvCufsO6szQ03W/7RZ/KCg/DyKqxyCP1Ril4BIOx3tiucROcapXH/K
# 0y/ycA==
# =hdk/
# -----END PGP SIGNATURE-----
# gpg: Signature made Mon 24 Jul 2023 09:52:07 BST
# gpg:                using RSA key 7A481E78868B4DB6A85A05C064DF38E8AF7E215F
# gpg:                issuer "richard.henderson@linaro.org"
# gpg: Good signature from "Richard Henderson <richard.henderson@linaro.org>" [full]
# Primary key fingerprint: 7A48 1E78 868B 4DB6 A85A  05C0 64DF 38E8 AF7E 215F

* tag 'pull-tcg-20230724' of https://gitlab.com/rth7680/qemu:
  accel/tcg: Fix type of 'last' for pageflags_{find,next}
  accel/tcg: Zero-pad vaddr in tlb_debug output
  tcg/{i386, s390x}: Add earlyclobber to the op_add2's first output
  accel/tcg: Take mmap_lock in load_atomic*_or_exit
  accel/tcg: Fix sense of read-only probes in ldst_atomicity
  include/exec: Add WITH_MMAP_LOCK_GUARD
  tcg/ppc: Fix race in goto_tb implementation

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
  • Loading branch information
pm215 committed Jul 24, 2023
2 parents d1181d2 + 32b1203 commit 4b14424
Show file tree
Hide file tree
Showing 12 changed files with 66 additions and 38 deletions.
20 changes: 10 additions & 10 deletions accel/tcg/cputlb.c
Original file line number Diff line number Diff line change
Expand Up @@ -497,8 +497,8 @@ static void tlb_flush_page_locked(CPUArchState *env, int midx, vaddr page)

/* Check if we need to flush due to large pages. */
if ((page & lp_mask) == lp_addr) {
tlb_debug("forcing full flush midx %d (%"
VADDR_PRIx "/%" VADDR_PRIx ")\n",
tlb_debug("forcing full flush midx %d (%016"
VADDR_PRIx "/%016" VADDR_PRIx ")\n",
midx, lp_addr, lp_mask);
tlb_flush_one_mmuidx_locked(env, midx, get_clock_realtime());
} else {
Expand Down Expand Up @@ -527,7 +527,7 @@ static void tlb_flush_page_by_mmuidx_async_0(CPUState *cpu,

assert_cpu_is_self(cpu);

tlb_debug("page addr: %" VADDR_PRIx " mmu_map:0x%x\n", addr, idxmap);
tlb_debug("page addr: %016" VADDR_PRIx " mmu_map:0x%x\n", addr, idxmap);

qemu_spin_lock(&env_tlb(env)->c.lock);
for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
Expand Down Expand Up @@ -591,7 +591,7 @@ static void tlb_flush_page_by_mmuidx_async_2(CPUState *cpu,

void tlb_flush_page_by_mmuidx(CPUState *cpu, vaddr addr, uint16_t idxmap)
{
tlb_debug("addr: %" VADDR_PRIx " mmu_idx:%" PRIx16 "\n", addr, idxmap);
tlb_debug("addr: %016" VADDR_PRIx " mmu_idx:%" PRIx16 "\n", addr, idxmap);

/* This should already be page aligned */
addr &= TARGET_PAGE_MASK;
Expand Down Expand Up @@ -625,7 +625,7 @@ void tlb_flush_page(CPUState *cpu, vaddr addr)
void tlb_flush_page_by_mmuidx_all_cpus(CPUState *src_cpu, vaddr addr,
uint16_t idxmap)
{
tlb_debug("addr: %" VADDR_PRIx " mmu_idx:%"PRIx16"\n", addr, idxmap);
tlb_debug("addr: %016" VADDR_PRIx " mmu_idx:%"PRIx16"\n", addr, idxmap);

/* This should already be page aligned */
addr &= TARGET_PAGE_MASK;
Expand Down Expand Up @@ -666,7 +666,7 @@ void tlb_flush_page_by_mmuidx_all_cpus_synced(CPUState *src_cpu,
vaddr addr,
uint16_t idxmap)
{
tlb_debug("addr: %" VADDR_PRIx " mmu_idx:%"PRIx16"\n", addr, idxmap);
tlb_debug("addr: %016" VADDR_PRIx " mmu_idx:%"PRIx16"\n", addr, idxmap);

/* This should already be page aligned */
addr &= TARGET_PAGE_MASK;
Expand Down Expand Up @@ -728,7 +728,7 @@ static void tlb_flush_range_locked(CPUArchState *env, int midx,
*/
if (mask < f->mask || len > f->mask) {
tlb_debug("forcing full flush midx %d ("
"%" VADDR_PRIx "/%" VADDR_PRIx "+%" VADDR_PRIx ")\n",
"%016" VADDR_PRIx "/%016" VADDR_PRIx "+%016" VADDR_PRIx ")\n",
midx, addr, mask, len);
tlb_flush_one_mmuidx_locked(env, midx, get_clock_realtime());
return;
Expand All @@ -741,7 +741,7 @@ static void tlb_flush_range_locked(CPUArchState *env, int midx,
*/
if (((addr + len - 1) & d->large_page_mask) == d->large_page_addr) {
tlb_debug("forcing full flush midx %d ("
"%" VADDR_PRIx "/%" VADDR_PRIx ")\n",
"%016" VADDR_PRIx "/%016" VADDR_PRIx ")\n",
midx, d->large_page_addr, d->large_page_mask);
tlb_flush_one_mmuidx_locked(env, midx, get_clock_realtime());
return;
Expand Down Expand Up @@ -773,7 +773,7 @@ static void tlb_flush_range_by_mmuidx_async_0(CPUState *cpu,

assert_cpu_is_self(cpu);

tlb_debug("range: %" VADDR_PRIx "/%u+%" VADDR_PRIx " mmu_map:0x%x\n",
tlb_debug("range: %016" VADDR_PRIx "/%u+%016" VADDR_PRIx " mmu_map:0x%x\n",
d.addr, d.bits, d.len, d.idxmap);

qemu_spin_lock(&env_tlb(env)->c.lock);
Expand Down Expand Up @@ -1165,7 +1165,7 @@ void tlb_set_page_full(CPUState *cpu, int mmu_idx,
&xlat, &sz, full->attrs, &prot);
assert(sz >= TARGET_PAGE_SIZE);

tlb_debug("vaddr=%" VADDR_PRIx " paddr=0x" HWADDR_FMT_plx
tlb_debug("vaddr=%016" VADDR_PRIx " paddr=0x" HWADDR_FMT_plx
" prot=%x idx=%d\n",
addr, full->phys_addr, prot, mmu_idx);

Expand Down
32 changes: 18 additions & 14 deletions accel/tcg/ldst_atomicity.c.inc
Original file line number Diff line number Diff line change
Expand Up @@ -159,9 +159,11 @@ static uint64_t load_atomic8_or_exit(CPUArchState *env, uintptr_t ra, void *pv)
* another process, because the fallback start_exclusive solution
* provides no protection across processes.
*/
if (page_check_range(h2g(pv), 8, PAGE_WRITE_ORG)) {
uint64_t *p = __builtin_assume_aligned(pv, 8);
return *p;
WITH_MMAP_LOCK_GUARD() {
if (!page_check_range(h2g(pv), 8, PAGE_WRITE_ORG)) {
uint64_t *p = __builtin_assume_aligned(pv, 8);
return *p;
}
}
#endif

Expand All @@ -186,25 +188,27 @@ static Int128 load_atomic16_or_exit(CPUArchState *env, uintptr_t ra, void *pv)
return atomic16_read_ro(p);
}

#ifdef CONFIG_USER_ONLY
/*
* We can only use cmpxchg to emulate a load if the page is writable.
* If the page is not writable, then assume the value is immutable
* and requires no locking. This ignores the case of MAP_SHARED with
* another process, because the fallback start_exclusive solution
* provides no protection across processes.
*
* In system mode all guest pages are writable. For user mode,
* we must take mmap_lock so that the query remains valid until
* the write is complete -- tests/tcg/multiarch/munmap-pthread.c
* is an example that can race.
*/
if (page_check_range(h2g(p), 16, PAGE_WRITE_ORG)) {
return *p;
}
WITH_MMAP_LOCK_GUARD() {
#ifdef CONFIG_USER_ONLY
if (!page_check_range(h2g(p), 16, PAGE_WRITE_ORG)) {
return *p;
}
#endif

/*
* In system mode all guest pages are writable, and for user-only
* we have just checked writability. Try cmpxchg.
*/
if (HAVE_ATOMIC128_RW) {
return atomic16_read_rw(p);
if (HAVE_ATOMIC128_RW) {
return atomic16_read_rw(p);
}
}

/* Ultimate fallback: re-execute in serial context. */
Expand Down
4 changes: 2 additions & 2 deletions accel/tcg/user-exec.c
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ typedef struct PageFlagsNode {

static IntervalTreeRoot pageflags_root;

static PageFlagsNode *pageflags_find(target_ulong start, target_long last)
static PageFlagsNode *pageflags_find(target_ulong start, target_ulong last)
{
IntervalTreeNode *n;

Expand All @@ -153,7 +153,7 @@ static PageFlagsNode *pageflags_find(target_ulong start, target_long last)
}

static PageFlagsNode *pageflags_next(PageFlagsNode *p, target_ulong start,
target_long last)
target_ulong last)
{
IntervalTreeNode *n;

Expand Down
1 change: 1 addition & 0 deletions bsd-user/mmap.c
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ void mmap_lock(void)

void mmap_unlock(void)
{
assert(mmap_lock_count > 0);
if (--mmap_lock_count == 0) {
pthread_mutex_unlock(&mmap_mutex);
}
Expand Down
10 changes: 10 additions & 0 deletions include/exec/exec-all.h
Original file line number Diff line number Diff line change
Expand Up @@ -629,6 +629,15 @@ void TSA_NO_TSA mmap_lock(void);
void TSA_NO_TSA mmap_unlock(void);
bool have_mmap_lock(void);

static inline void mmap_unlock_guard(void *unused)
{
mmap_unlock();
}

#define WITH_MMAP_LOCK_GUARD() \
for (int _mmap_lock_iter __attribute__((cleanup(mmap_unlock_guard))) \
= (mmap_lock(), 0); _mmap_lock_iter == 0; _mmap_lock_iter = 1)

/**
* adjust_signal_pc:
* @pc: raw pc from the host signal ucontext_t.
Expand Down Expand Up @@ -683,6 +692,7 @@ G_NORETURN void cpu_loop_exit_sigbus(CPUState *cpu, target_ulong addr,
#else
static inline void mmap_lock(void) {}
static inline void mmap_unlock(void) {}
#define WITH_MMAP_LOCK_GUARD()

void tlb_reset_dirty(CPUState *cpu, ram_addr_t start1, ram_addr_t length);
void tlb_set_dirty(CPUState *cpu, vaddr addr);
Expand Down
1 change: 1 addition & 0 deletions linux-user/mmap.c
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ void mmap_lock(void)

void mmap_unlock(void)
{
assert(mmap_lock_count > 0);
if (--mmap_lock_count == 0) {
pthread_mutex_unlock(&mmap_mutex);
}
Expand Down
5 changes: 4 additions & 1 deletion tcg/i386/tcg-target-con-set.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@
*
* C_N1_Im(...) defines a constraint set with 1 output and <m> inputs,
* except that the output must use a new register.
*
* C_Nn_Om_Ik(...) defines a constraint set with <n + m> outputs and <k>
* inputs, except that the first <n> outputs must use new registers.
*/
C_O0_I1(r)
C_O0_I2(L, L)
Expand Down Expand Up @@ -53,4 +56,4 @@ C_O2_I1(r, r, L)
C_O2_I2(a, d, a, r)
C_O2_I2(r, r, L, L)
C_O2_I3(a, d, 0, 1, r)
C_O2_I4(r, r, 0, 1, re, re)
C_N1_O1_I4(r, r, 0, 1, re, re)
2 changes: 1 addition & 1 deletion tcg/i386/tcg-target.c.inc
Original file line number Diff line number Diff line change
Expand Up @@ -3335,7 +3335,7 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
case INDEX_op_add2_i64:
case INDEX_op_sub2_i32:
case INDEX_op_sub2_i64:
return C_O2_I4(r, r, 0, 1, re, re);
return C_N1_O1_I4(r, r, 0, 1, re, re);

case INDEX_op_ctz_i32:
case INDEX_op_ctz_i64:
Expand Down
9 changes: 5 additions & 4 deletions tcg/ppc/tcg-target.c.inc
Original file line number Diff line number Diff line change
Expand Up @@ -2496,11 +2496,10 @@ static void tcg_out_goto_tb(TCGContext *s, int which)
ptrdiff_t offset = tcg_tbrel_diff(s, (void *)ptr);
tcg_out_mem_long(s, LD, LDX, TCG_REG_TB, TCG_REG_TB, offset);

/* Direct branch will be patched by tb_target_set_jmp_target. */
/* TODO: Use direct branches when possible. */
set_jmp_insn_offset(s, which);
tcg_out32(s, MTSPR | RS(TCG_REG_TB) | CTR);

/* When branch is out of range, fall through to indirect. */
tcg_out32(s, BCCTR | BO_ALWAYS);

/* For the unlinked case, need to reset TCG_REG_TB. */
Expand Down Expand Up @@ -2528,10 +2527,12 @@ void tb_target_set_jmp_target(const TranslationBlock *tb, int n,
intptr_t diff = addr - jmp_rx;
tcg_insn_unit insn;

if (USE_REG_TB) {
return;
}

if (in_range_b(diff)) {
insn = B | (diff & 0x3fffffc);
} else if (USE_REG_TB) {
insn = MTSPR | RS(TCG_REG_TB) | CTR;
} else {
insn = NOP;
}
Expand Down
8 changes: 5 additions & 3 deletions tcg/s390x/tcg-target-con-set.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@
* C_On_Im(...) defines a constraint set with <n> outputs and <m> inputs.
* Each operand should be a sequence of constraint letters as defined by
* tcg-target-con-str.h; the constraint combination is inclusive or.
*
* C_Nn_Om_Ik(...) defines a constraint set with <n + m> outputs and <k>
* inputs, except that the first <n> outputs must use new registers.
*/
C_O0_I1(r)
C_O0_I2(r, r)
Expand Down Expand Up @@ -41,6 +44,5 @@ C_O2_I1(o, m, r)
C_O2_I2(o, m, 0, r)
C_O2_I2(o, m, r, r)
C_O2_I3(o, m, 0, 1, r)
C_O2_I4(r, r, 0, 1, rA, r)
C_O2_I4(r, r, 0, 1, ri, r)
C_O2_I4(r, r, 0, 1, r, r)
C_N1_O1_I4(r, r, 0, 1, ri, r)
C_N1_O1_I4(r, r, 0, 1, rA, r)
4 changes: 2 additions & 2 deletions tcg/s390x/tcg-target.c.inc
Original file line number Diff line number Diff line change
Expand Up @@ -3229,11 +3229,11 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)

case INDEX_op_add2_i32:
case INDEX_op_sub2_i32:
return C_O2_I4(r, r, 0, 1, ri, r);
return C_N1_O1_I4(r, r, 0, 1, ri, r);

case INDEX_op_add2_i64:
case INDEX_op_sub2_i64:
return C_O2_I4(r, r, 0, 1, rA, r);
return C_N1_O1_I4(r, r, 0, 1, rA, r);

case INDEX_op_st_vec:
return C_O0_I2(v, r);
Expand Down
8 changes: 7 additions & 1 deletion tcg/tcg.c
Original file line number Diff line number Diff line change
Expand Up @@ -648,6 +648,7 @@ static void tcg_out_movext3(TCGContext *s, const TCGMovExtend *i1,
#define C_O2_I2(O1, O2, I1, I2) C_PFX4(c_o2_i2_, O1, O2, I1, I2),
#define C_O2_I3(O1, O2, I1, I2, I3) C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3),
#define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4),
#define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4),

typedef enum {
#include "tcg-target-con-set.h"
Expand All @@ -668,6 +669,7 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode);
#undef C_O2_I2
#undef C_O2_I3
#undef C_O2_I4
#undef C_N1_O1_I4

/* Put all of the constraint sets into an array, indexed by the enum. */

Expand All @@ -687,6 +689,7 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode);
#define C_O2_I2(O1, O2, I1, I2) { .args_ct_str = { #O1, #O2, #I1, #I2 } },
#define C_O2_I3(O1, O2, I1, I2, I3) { .args_ct_str = { #O1, #O2, #I1, #I2, #I3 } },
#define C_O2_I4(O1, O2, I1, I2, I3, I4) { .args_ct_str = { #O1, #O2, #I1, #I2, #I3, #I4 } },
#define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) { .args_ct_str = { "&" #O1, #O2, #I1, #I2, #I3, #I4 } },

static const TCGTargetOpDef constraint_sets[] = {
#include "tcg-target-con-set.h"
Expand All @@ -706,6 +709,7 @@ static const TCGTargetOpDef constraint_sets[] = {
#undef C_O2_I2
#undef C_O2_I3
#undef C_O2_I4
#undef C_N1_O1_I4

/* Expand the enumerator to be returned from tcg_target_op_def(). */

Expand All @@ -725,6 +729,7 @@ static const TCGTargetOpDef constraint_sets[] = {
#define C_O2_I2(O1, O2, I1, I2) C_PFX4(c_o2_i2_, O1, O2, I1, I2)
#define C_O2_I3(O1, O2, I1, I2, I3) C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3)
#define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4)
#define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4)

#include "tcg-target.c.inc"

Expand Down Expand Up @@ -4703,7 +4708,8 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
* dead after the instruction, we must allocate a new
* register and move it.
*/
if (temp_readonly(ts) || !IS_DEAD_ARG(i)) {
if (temp_readonly(ts) || !IS_DEAD_ARG(i)
|| def->args_ct[arg_ct->alias_index].newreg) {
allocate_new_reg = true;
} else if (ts->val_type == TEMP_VAL_REG) {
/*
Expand Down

0 comments on commit 4b14424

Please sign in to comment.