Skip to content

Commit

Permalink
Merge remote-tracking branch 'remotes/rth/tags/pull-tcg-20181216' int…
Browse files Browse the repository at this point in the history
…o staging

- Remove retranslation remenents
- Return success from patch_reloc
- Preserve 32-bit values as zero-extended on x86_64
- Make bswap during memory ops as optional
- Cleanup xxhash
- Revert constant pooling for tcg/sparc/

# gpg: Signature made Mon 17 Dec 2018 03:25:21 GMT
# gpg:                using RSA key 64DF38E8AF7E215F
# gpg: Good signature from "Richard Henderson <richard.henderson@linaro.org>"
# Primary key fingerprint: 7A48 1E78 868B 4DB6 A85A  05C0 64DF 38E8 AF7E 215F

* remotes/rth/tags/pull-tcg-20181216: (33 commits)
  xxhash: match output against the original xxhash32
  include: move exec/tb-hash-xx.h to qemu/xxhash.h
  exec: introduce qemu_xxhash{2,4,5,6,7}
  qht-bench: document -p flag
  tcg: Drop nargs from tcg_op_insert_{before,after}
  tcg/mips: Improve the add2/sub2 command to use TCG_TARGET_REG_BITS
  tcg: Add TCG_TARGET_HAS_MEMORY_BSWAP
  tcg/optimize: Optimize bswap
  tcg: Clean up generic bswap64
  tcg: Clean up generic bswap32
  tcg/i386: Add setup_guest_base_seg for FreeBSD
  tcg/i386: Precompute all guest_base parameters
  tcg/i386: Assume 32-bit values are zero-extended
  tcg/i386: Implement INDEX_op_extr{lh}_i64_i32 for 32-bit guests
  tcg/i386: Propagate is64 to tcg_out_qemu_ld_slow_path
  tcg/i386: Propagate is64 to tcg_out_qemu_ld_direct
  tcg/s390x: Return false on failure from patch_reloc
  tcg/ppc: Return false on failure from patch_reloc
  tcg/arm: Return false on failure from patch_reloc
  tcg/aarch64: Return false on failure from patch_reloc
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
  • Loading branch information
pm215 committed Dec 17, 2018
2 parents 139108f + b7c2cd0 commit f163448
Show file tree
Hide file tree
Showing 24 changed files with 462 additions and 402 deletions.
4 changes: 2 additions & 2 deletions include/exec/tb-hash.h
Expand Up @@ -20,7 +20,7 @@
#ifndef EXEC_TB_HASH_H
#define EXEC_TB_HASH_H

#include "exec/tb-hash-xx.h"
#include "qemu/xxhash.h"

#ifdef CONFIG_SOFTMMU

Expand Down Expand Up @@ -61,7 +61,7 @@ static inline
uint32_t tb_hash_func(tb_page_addr_t phys_pc, target_ulong pc, uint32_t flags,
uint32_t cf_mask, uint32_t trace_vcpu_dstate)
{
return tb_hash_func7(phys_pc, pc, flags, cf_mask, trace_vcpu_dstate);
return qemu_xxhash7(phys_pc, pc, flags, cf_mask, trace_vcpu_dstate);
}

#endif
47 changes: 34 additions & 13 deletions include/exec/tb-hash-xx.h → include/qemu/xxhash.h
Expand Up @@ -31,8 +31,8 @@
* - xxHash source repository : https://github.com/Cyan4973/xxHash
*/

#ifndef EXEC_TB_HASH_XX_H
#define EXEC_TB_HASH_XX_H
#ifndef QEMU_XXHASH_H
#define QEMU_XXHASH_H

#include "qemu/bitops.h"

Expand All @@ -42,23 +42,23 @@
#define PRIME32_4 668265263U
#define PRIME32_5 374761393U

#define TB_HASH_XX_SEED 1
#define QEMU_XXHASH_SEED 1

/*
* xxhash32, customized for input variables that are not guaranteed to be
* contiguous in memory.
*/
static inline uint32_t
tb_hash_func7(uint64_t a0, uint64_t b0, uint32_t e, uint32_t f, uint32_t g)
qemu_xxhash7(uint64_t ab, uint64_t cd, uint32_t e, uint32_t f, uint32_t g)
{
uint32_t v1 = TB_HASH_XX_SEED + PRIME32_1 + PRIME32_2;
uint32_t v2 = TB_HASH_XX_SEED + PRIME32_2;
uint32_t v3 = TB_HASH_XX_SEED + 0;
uint32_t v4 = TB_HASH_XX_SEED - PRIME32_1;
uint32_t a = a0 >> 32;
uint32_t b = a0;
uint32_t c = b0 >> 32;
uint32_t d = b0;
uint32_t v1 = QEMU_XXHASH_SEED + PRIME32_1 + PRIME32_2;
uint32_t v2 = QEMU_XXHASH_SEED + PRIME32_2;
uint32_t v3 = QEMU_XXHASH_SEED + 0;
uint32_t v4 = QEMU_XXHASH_SEED - PRIME32_1;
uint32_t a = ab;
uint32_t b = ab >> 32;
uint32_t c = cd;
uint32_t d = cd >> 32;
uint32_t h32;

v1 += a * PRIME32_2;
Expand Down Expand Up @@ -98,4 +98,25 @@ tb_hash_func7(uint64_t a0, uint64_t b0, uint32_t e, uint32_t f, uint32_t g)
return h32;
}

#endif /* EXEC_TB_HASH_XX_H */
static inline uint32_t qemu_xxhash2(uint64_t ab)
{
return qemu_xxhash7(ab, 0, 0, 0, 0);
}

static inline uint32_t qemu_xxhash4(uint64_t ab, uint64_t cd)
{
return qemu_xxhash7(ab, cd, 0, 0, 0);
}

static inline uint32_t qemu_xxhash5(uint64_t ab, uint64_t cd, uint32_t e)
{
return qemu_xxhash7(ab, cd, e, 0, 0);
}

static inline uint32_t qemu_xxhash6(uint64_t ab, uint64_t cd, uint32_t e,
uint32_t f)
{
return qemu_xxhash7(ab, cd, e, f, 0);
}

#endif /* QEMU_XXHASH_H */
1 change: 1 addition & 0 deletions tcg/aarch64/tcg-target.h
Expand Up @@ -137,6 +137,7 @@ typedef enum {
#define TCG_TARGET_HAS_mul_vec 1

#define TCG_TARGET_DEFAULT_MO (0)
#define TCG_TARGET_HAS_MEMORY_BSWAP 1

static inline void flush_icache_range(uintptr_t start, uintptr_t stop)
{
Expand Down
71 changes: 24 additions & 47 deletions tcg/aarch64/tcg-target.inc.c
Expand Up @@ -78,48 +78,40 @@ static const int tcg_target_call_oarg_regs[1] = {
#define TCG_REG_GUEST_BASE TCG_REG_X28
#endif

static inline void reloc_pc26(tcg_insn_unit *code_ptr, tcg_insn_unit *target)
static inline bool reloc_pc26(tcg_insn_unit *code_ptr, tcg_insn_unit *target)
{
ptrdiff_t offset = target - code_ptr;
tcg_debug_assert(offset == sextract64(offset, 0, 26));
/* read instruction, mask away previous PC_REL26 parameter contents,
set the proper offset, then write back the instruction. */
*code_ptr = deposit32(*code_ptr, 0, 26, offset);
}

static inline void reloc_pc26_atomic(tcg_insn_unit *code_ptr,
tcg_insn_unit *target)
{
ptrdiff_t offset = target - code_ptr;
tcg_insn_unit insn;
tcg_debug_assert(offset == sextract64(offset, 0, 26));
/* read instruction, mask away previous PC_REL26 parameter contents,
set the proper offset, then write back the instruction. */
insn = atomic_read(code_ptr);
atomic_set(code_ptr, deposit32(insn, 0, 26, offset));
if (offset == sextract64(offset, 0, 26)) {
/* read instruction, mask away previous PC_REL26 parameter contents,
set the proper offset, then write back the instruction. */
*code_ptr = deposit32(*code_ptr, 0, 26, offset);
return true;
}
return false;
}

static inline void reloc_pc19(tcg_insn_unit *code_ptr, tcg_insn_unit *target)
static inline bool reloc_pc19(tcg_insn_unit *code_ptr, tcg_insn_unit *target)
{
ptrdiff_t offset = target - code_ptr;
tcg_debug_assert(offset == sextract64(offset, 0, 19));
*code_ptr = deposit32(*code_ptr, 5, 19, offset);
if (offset == sextract64(offset, 0, 19)) {
*code_ptr = deposit32(*code_ptr, 5, 19, offset);
return true;
}
return false;
}

static inline void patch_reloc(tcg_insn_unit *code_ptr, int type,
static inline bool patch_reloc(tcg_insn_unit *code_ptr, int type,
intptr_t value, intptr_t addend)
{
tcg_debug_assert(addend == 0);
switch (type) {
case R_AARCH64_JUMP26:
case R_AARCH64_CALL26:
reloc_pc26(code_ptr, (tcg_insn_unit *)value);
break;
return reloc_pc26(code_ptr, (tcg_insn_unit *)value);
case R_AARCH64_CONDBR19:
reloc_pc19(code_ptr, (tcg_insn_unit *)value);
break;
return reloc_pc19(code_ptr, (tcg_insn_unit *)value);
default:
tcg_abort();
g_assert_not_reached();
}
}

Expand Down Expand Up @@ -1141,23 +1133,6 @@ static inline void tcg_out_goto_long(TCGContext *s, tcg_insn_unit *target)
}
}

static inline void tcg_out_goto_noaddr(TCGContext *s)
{
/* We pay attention here to not modify the branch target by reading from
the buffer. This ensure that caches and memory are kept coherent during
retranslation. Mask away possible garbage in the high bits for the
first translation, while keeping the offset bits for retranslation. */
uint32_t old = tcg_in32(s);
tcg_out_insn(s, 3206, B, old);
}

static inline void tcg_out_goto_cond_noaddr(TCGContext *s, TCGCond c)
{
/* See comments in tcg_out_goto_noaddr. */
uint32_t old = tcg_in32(s) >> 5;
tcg_out_insn(s, 3202, B_C, c, old);
}

static inline void tcg_out_callr(TCGContext *s, TCGReg reg)
{
tcg_out_insn(s, 3207, BLR, reg);
Expand Down Expand Up @@ -1204,7 +1179,7 @@ static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l)
{
if (!l->has_value) {
tcg_out_reloc(s, s->code_ptr, R_AARCH64_JUMP26, l, 0);
tcg_out_goto_noaddr(s);
tcg_out_insn(s, 3206, B, 0);
} else {
tcg_out_goto(s, l->u.value_ptr);
}
Expand Down Expand Up @@ -1415,7 +1390,8 @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
TCGMemOp opc = get_memop(oi);
TCGMemOp size = opc & MO_SIZE;

reloc_pc19(lb->label_ptr[0], s->code_ptr);
bool ok = reloc_pc19(lb->label_ptr[0], s->code_ptr);
tcg_debug_assert(ok);

tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
Expand All @@ -1437,7 +1413,8 @@ static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
TCGMemOp opc = get_memop(oi);
TCGMemOp size = opc & MO_SIZE;

reloc_pc19(lb->label_ptr[0], s->code_ptr);
bool ok = reloc_pc19(lb->label_ptr[0], s->code_ptr);
tcg_debug_assert(ok);

tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
Expand Down Expand Up @@ -1535,7 +1512,7 @@ static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, TCGMemOp opc,

/* If not equal, we jump to the slow path. */
*label_ptr = s->code_ptr;
tcg_out_goto_cond_noaddr(s, TCG_COND_NE);
tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
}

#endif /* CONFIG_SOFTMMU */
Expand Down
1 change: 1 addition & 0 deletions tcg/arm/tcg-target.h
Expand Up @@ -131,6 +131,7 @@ enum {
};

#define TCG_TARGET_DEFAULT_MO (0)
#define TCG_TARGET_HAS_MEMORY_BSWAP 1

static inline void flush_icache_range(uintptr_t start, uintptr_t stop)
{
Expand Down
55 changes: 21 additions & 34 deletions tcg/arm/tcg-target.inc.c
Expand Up @@ -187,27 +187,23 @@ static const uint8_t tcg_cond_to_arm_cond[] = {
[TCG_COND_GTU] = COND_HI,
};

static inline void reloc_pc24(tcg_insn_unit *code_ptr, tcg_insn_unit *target)
static inline bool reloc_pc24(tcg_insn_unit *code_ptr, tcg_insn_unit *target)
{
ptrdiff_t offset = (tcg_ptr_byte_diff(target, code_ptr) - 8) >> 2;
*code_ptr = (*code_ptr & ~0xffffff) | (offset & 0xffffff);
}

static inline void reloc_pc24_atomic(tcg_insn_unit *code_ptr, tcg_insn_unit *target)
{
ptrdiff_t offset = (tcg_ptr_byte_diff(target, code_ptr) - 8) >> 2;
tcg_insn_unit insn = atomic_read(code_ptr);
tcg_debug_assert(offset == sextract32(offset, 0, 24));
atomic_set(code_ptr, deposit32(insn, 0, 24, offset));
if (offset == sextract32(offset, 0, 24)) {
*code_ptr = (*code_ptr & ~0xffffff) | (offset & 0xffffff);
return true;
}
return false;
}

static void patch_reloc(tcg_insn_unit *code_ptr, int type,
static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
intptr_t value, intptr_t addend)
{
tcg_debug_assert(addend == 0);

if (type == R_ARM_PC24) {
reloc_pc24(code_ptr, (tcg_insn_unit *)value);
return reloc_pc24(code_ptr, (tcg_insn_unit *)value);
} else if (type == R_ARM_PC13) {
intptr_t diff = value - (uintptr_t)(code_ptr + 2);
tcg_insn_unit insn = *code_ptr;
Expand All @@ -221,7 +217,11 @@ static void patch_reloc(tcg_insn_unit *code_ptr, int type,
} else {
int rd = extract32(insn, 12, 4);
int rt = rd == TCG_REG_PC ? TCG_REG_TMP : rd;
assert(diff >= 0x1000 && diff < 0x100000);

if (diff < 0x1000 || diff >= 0x100000) {
return false;
}

/* add rt, pc, #high */
*code_ptr++ = ((insn & 0xf0000000) | (1 << 25) | ARITH_ADD
| (TCG_REG_PC << 16) | (rt << 12)
Expand All @@ -237,6 +237,7 @@ static void patch_reloc(tcg_insn_unit *code_ptr, int type,
} else {
g_assert_not_reached();
}
return true;
}

#define TCG_CT_CONST_ARM 0x100
Expand Down Expand Up @@ -374,22 +375,6 @@ static inline void tcg_out_b(TCGContext *s, int cond, int32_t offset)
(((offset - 8) >> 2) & 0x00ffffff));
}

static inline void tcg_out_b_noaddr(TCGContext *s, int cond)
{
/* We pay attention here to not modify the branch target by masking
the corresponding bytes. This ensure that caches and memory are
kept coherent during retranslation. */
tcg_out32(s, deposit32(*s->code_ptr, 24, 8, (cond << 4) | 0x0a));
}

static inline void tcg_out_bl_noaddr(TCGContext *s, int cond)
{
/* We pay attention here to not modify the branch target by masking
the corresponding bytes. This ensure that caches and memory are
kept coherent during retranslation. */
tcg_out32(s, deposit32(*s->code_ptr, 24, 8, (cond << 4) | 0x0b));
}

static inline void tcg_out_bl(TCGContext *s, int cond, int32_t offset)
{
tcg_out32(s, (cond << 28) | 0x0b000000 |
Expand Down Expand Up @@ -1090,7 +1075,7 @@ static inline void tcg_out_goto_label(TCGContext *s, int cond, TCGLabel *l)
tcg_out_goto(s, cond, l->u.value_ptr);
} else {
tcg_out_reloc(s, s->code_ptr, R_ARM_PC24, l, 0);
tcg_out_b_noaddr(s, cond);
tcg_out_b(s, cond, 0);
}
}

Expand Down Expand Up @@ -1395,7 +1380,8 @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
TCGMemOp opc = get_memop(oi);
void *func;

reloc_pc24(lb->label_ptr[0], s->code_ptr);
bool ok = reloc_pc24(lb->label_ptr[0], s->code_ptr);
tcg_debug_assert(ok);

argreg = tcg_out_arg_reg32(s, TCG_REG_R0, TCG_AREG0);
if (TARGET_LONG_BITS == 64) {
Expand Down Expand Up @@ -1455,7 +1441,8 @@ static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
TCGMemOpIdx oi = lb->oi;
TCGMemOp opc = get_memop(oi);

reloc_pc24(lb->label_ptr[0], s->code_ptr);
bool ok = reloc_pc24(lb->label_ptr[0], s->code_ptr);
tcg_debug_assert(ok);

argreg = TCG_REG_R0;
argreg = tcg_out_arg_reg32(s, argreg, TCG_AREG0);
Expand Down Expand Up @@ -1636,7 +1623,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64)
/* This a conditional BL only to load a pointer within this opcode into LR
for the slow path. We will not be using the value for a tail call. */
label_ptr = s->code_ptr;
tcg_out_bl_noaddr(s, COND_NE);
tcg_out_bl(s, COND_NE, 0);

tcg_out_qemu_ld_index(s, opc, datalo, datahi, addrlo, addend);

Expand Down Expand Up @@ -1768,7 +1755,7 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64)

/* The conditional call must come last, as we're going to return here. */
label_ptr = s->code_ptr;
tcg_out_bl_noaddr(s, COND_NE);
tcg_out_bl(s, COND_NE, 0);

add_qemu_ldst_label(s, false, oi, datalo, datahi, addrlo, addrhi,
s->code_ptr, label_ptr);
Expand Down

0 comments on commit f163448

Please sign in to comment.