Skip to content

Commit

Permalink
Merge remote-tracking branch 'remotes/rth/tags/pull-tcg-20170907' int…
Browse files Browse the repository at this point in the history
…o staging

TCG constant pools

# gpg: Signature made Thu 07 Sep 2017 23:35:45 BST
# gpg:                using RSA key 0x64DF38E8AF7E215F
# gpg: Good signature from "Richard Henderson <richard.henderson@linaro.org>"
# gpg: WARNING: This key is not certified with sufficiently trusted signatures!
# gpg:          It is not certain that the signature belongs to the owner.
# Primary key fingerprint: 7A48 1E78 868B 4DB6 A85A  05C0 64DF 38E8 AF7E 215F

* remotes/rth/tags/pull-tcg-20170907: (23 commits)
  tcg/ppc: Use constant pool for movi
  tcg/ppc: Look for shifted constants
  tcg/ppc: Change TCG_REG_RA to TCG_REG_TB
  tcg/arm: Use constant pool for call
  tcg/arm: Use constant pool for movi
  tcg/arm: Extract INSN_NOP
  tcg/arm: Code rearrangement
  tcg/arm: Tighten tlb indexing offset test
  tcg/arm: Improve tlb load for armv7
  tcg/sparc: Use constant pool for movi
  tcg/sparc: Introduce TCG_REG_TB
  tcg/aarch64: Use constant pool for movi
  tcg/s390: Use constant pool for cmpi
  tcg/s390: Use constant pool for xori
  tcg/s390: Use constant pool for ori
  tcg/s390: Use constant pool for andi
  tcg/s390: Use constant pool for movi
  tcg/s390: Fix sign of patch_reloc addend
  tcg/s390: Introduce TCG_REG_TB
  tcg/i386: Store out-of-range call targets in constant pool
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
  • Loading branch information
pm215 committed Sep 8, 2017
2 parents e6d767b + 53c89ef commit b3a1179
Show file tree
Hide file tree
Showing 25 changed files with 1,444 additions and 1,004 deletions.
35 changes: 35 additions & 0 deletions accel/tcg/cpu-exec.c
Expand Up @@ -329,6 +329,41 @@ TranslationBlock *tb_htable_lookup(CPUState *cpu, target_ulong pc,
return qht_lookup(&tcg_ctx.tb_ctx.htable, tb_cmp, &desc, h);
}

void tb_set_jmp_target(TranslationBlock *tb, int n, uintptr_t addr)
{
if (TCG_TARGET_HAS_direct_jump) {
uintptr_t offset = tb->jmp_target_arg[n];
uintptr_t tc_ptr = (uintptr_t)tb->tc_ptr;
tb_target_set_jmp_target(tc_ptr, tc_ptr + offset, addr);
} else {
tb->jmp_target_arg[n] = addr;
}
}

/* Called with tb_lock held. */
static inline void tb_add_jump(TranslationBlock *tb, int n,
TranslationBlock *tb_next)
{
assert(n < ARRAY_SIZE(tb->jmp_list_next));
if (tb->jmp_list_next[n]) {
/* Another thread has already done this while we were
* outside of the lock; nothing to do in this case */
return;
}
qemu_log_mask_and_addr(CPU_LOG_EXEC, tb->pc,
"Linking TBs %p [" TARGET_FMT_lx
"] index %d -> %p [" TARGET_FMT_lx "]\n",
tb->tc_ptr, tb->pc, n,
tb_next->tc_ptr, tb_next->pc);

/* patch the native jump address */
tb_set_jmp_target(tb, n, (uintptr_t)tb_next->tc_ptr);

/* add in TB jmp circular list */
tb->jmp_list_next[n] = tb_next->jmp_list_first;
tb_next->jmp_list_first = (uintptr_t)tb | n;
}

static inline TranslationBlock *tb_find(CPUState *cpu,
TranslationBlock *last_tb,
int tb_exit)
Expand Down
36 changes: 28 additions & 8 deletions accel/tcg/translate-all.c
Expand Up @@ -1289,13 +1289,13 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
tb->jmp_reset_offset[0] = TB_JMP_RESET_OFFSET_INVALID;
tb->jmp_reset_offset[1] = TB_JMP_RESET_OFFSET_INVALID;
tcg_ctx.tb_jmp_reset_offset = tb->jmp_reset_offset;
#ifdef USE_DIRECT_JUMP
tcg_ctx.tb_jmp_insn_offset = tb->jmp_insn_offset;
tcg_ctx.tb_jmp_target_addr = NULL;
#else
tcg_ctx.tb_jmp_insn_offset = NULL;
tcg_ctx.tb_jmp_target_addr = tb->jmp_target_addr;
#endif
if (TCG_TARGET_HAS_direct_jump) {
tcg_ctx.tb_jmp_insn_offset = tb->jmp_target_arg;
tcg_ctx.tb_jmp_target_addr = NULL;
} else {
tcg_ctx.tb_jmp_insn_offset = NULL;
tcg_ctx.tb_jmp_target_addr = tb->jmp_target_arg;
}

#ifdef CONFIG_PROFILER
tcg_ctx.tb_count++;
Expand Down Expand Up @@ -1329,7 +1329,27 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
qemu_log_in_addr_range(tb->pc)) {
qemu_log_lock();
qemu_log("OUT: [size=%d]\n", gen_code_size);
log_disas(tb->tc_ptr, gen_code_size);
if (tcg_ctx.data_gen_ptr) {
size_t code_size = tcg_ctx.data_gen_ptr - tb->tc_ptr;
size_t data_size = gen_code_size - code_size;
size_t i;

log_disas(tb->tc_ptr, code_size);

for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
if (sizeof(tcg_target_ulong) == 8) {
qemu_log("0x%08" PRIxPTR ": .quad 0x%016" PRIx64 "\n",
(uintptr_t)tcg_ctx.data_gen_ptr + i,
*(uint64_t *)(tcg_ctx.data_gen_ptr + i));
} else {
qemu_log("0x%08" PRIxPTR ": .long 0x%08x\n",
(uintptr_t)tcg_ctx.data_gen_ptr + i,
*(uint32_t *)(tcg_ctx.data_gen_ptr + i));
}
}
} else {
log_disas(tb->tc_ptr, gen_code_size);
}
qemu_log("\n");
qemu_log_flush();
qemu_log_unlock();
Expand Down
3 changes: 2 additions & 1 deletion include/elf.h
Expand Up @@ -942,8 +942,9 @@ typedef struct {
#define R_390_TLS_DTPOFF 55 /* Offset in TLS block. */
#define R_390_TLS_TPOFF 56 /* Negate offset in static TLS
block. */
#define R_390_20 57
/* Keep this the last entry. */
#define R_390_NUM 57
#define R_390_NUM 58

/* x86-64 relocation types */
#define R_X86_64_NONE 0 /* No reloc */
Expand Down
95 changes: 3 additions & 92 deletions include/exec/exec-all.h
Expand Up @@ -301,15 +301,6 @@ static inline void tb_invalidate_phys_addr(AddressSpace *as, hwaddr addr)
#define CODE_GEN_AVG_BLOCK_SIZE 150
#endif

#if defined(_ARCH_PPC) \
|| defined(__x86_64__) || defined(__i386__) \
|| defined(__sparc__) || defined(__aarch64__) \
|| defined(__s390x__) || defined(__mips__) \
|| defined(CONFIG_TCG_INTERPRETER)
/* NOTE: Direct jump patching must be atomic to be thread-safe. */
#define USE_DIRECT_JUMP
#endif

struct TranslationBlock {
target_ulong pc; /* simulated PC corresponding to this block (EIP + CS base) */
target_ulong cs_base; /* CS base for this block */
Expand Down Expand Up @@ -347,11 +338,8 @@ struct TranslationBlock {
*/
uint16_t jmp_reset_offset[2]; /* offset of original jump target */
#define TB_JMP_RESET_OFFSET_INVALID 0xffff /* indicates no jump generated */
#ifdef USE_DIRECT_JUMP
uint16_t jmp_insn_offset[2]; /* offset of native jump instruction */
#else
uintptr_t jmp_target_addr[2]; /* target address for indirect jump */
#endif
uintptr_t jmp_target_arg[2]; /* target address or offset */

/* Each TB has an assosiated circular list of TBs jumping to this one.
* jmp_list_first points to the first TB jumping to this one.
* jmp_list_next is used to point to the next TB in a list.
Expand All @@ -373,84 +361,7 @@ void tb_flush(CPUState *cpu);
void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr);
TranslationBlock *tb_htable_lookup(CPUState *cpu, target_ulong pc,
target_ulong cs_base, uint32_t flags);

#if defined(USE_DIRECT_JUMP)

#if defined(CONFIG_TCG_INTERPRETER)
static inline void tb_set_jmp_target1(uintptr_t jmp_addr, uintptr_t addr)
{
/* patch the branch destination */
atomic_set((int32_t *)jmp_addr, addr - (jmp_addr + 4));
/* no need to flush icache explicitly */
}
#elif defined(_ARCH_PPC)
void ppc_tb_set_jmp_target(uintptr_t jmp_addr, uintptr_t addr);
#define tb_set_jmp_target1 ppc_tb_set_jmp_target
#elif defined(__i386__) || defined(__x86_64__)
static inline void tb_set_jmp_target1(uintptr_t jmp_addr, uintptr_t addr)
{
/* patch the branch destination */
atomic_set((int32_t *)jmp_addr, addr - (jmp_addr + 4));
/* no need to flush icache explicitly */
}
#elif defined(__s390x__)
static inline void tb_set_jmp_target1(uintptr_t jmp_addr, uintptr_t addr)
{
/* patch the branch destination */
intptr_t disp = addr - (jmp_addr - 2);
atomic_set((int32_t *)jmp_addr, disp / 2);
/* no need to flush icache explicitly */
}
#elif defined(__aarch64__)
void aarch64_tb_set_jmp_target(uintptr_t jmp_addr, uintptr_t addr);
#define tb_set_jmp_target1 aarch64_tb_set_jmp_target
#elif defined(__sparc__) || defined(__mips__)
void tb_set_jmp_target1(uintptr_t jmp_addr, uintptr_t addr);
#else
#error tb_set_jmp_target1 is missing
#endif

static inline void tb_set_jmp_target(TranslationBlock *tb,
int n, uintptr_t addr)
{
uint16_t offset = tb->jmp_insn_offset[n];
tb_set_jmp_target1((uintptr_t)(tb->tc_ptr + offset), addr);
}

#else

/* set the jump target */
static inline void tb_set_jmp_target(TranslationBlock *tb,
int n, uintptr_t addr)
{
tb->jmp_target_addr[n] = addr;
}

#endif

/* Called with tb_lock held. */
static inline void tb_add_jump(TranslationBlock *tb, int n,
TranslationBlock *tb_next)
{
assert(n < ARRAY_SIZE(tb->jmp_list_next));
if (tb->jmp_list_next[n]) {
/* Another thread has already done this while we were
* outside of the lock; nothing to do in this case */
return;
}
qemu_log_mask_and_addr(CPU_LOG_EXEC, tb->pc,
"Linking TBs %p [" TARGET_FMT_lx
"] index %d -> %p [" TARGET_FMT_lx "]\n",
tb->tc_ptr, tb->pc, n,
tb_next->tc_ptr, tb_next->pc);

/* patch the native jump address */
tb_set_jmp_target(tb, n, (uintptr_t)tb_next->tc_ptr);

/* add in TB jmp circular list */
tb->jmp_list_next[n] = tb_next->jmp_list_first;
tb_next->jmp_list_first = (uintptr_t)tb | n;
}
void tb_set_jmp_target(TranslationBlock *tb, int n, uintptr_t addr);

/* GETPC is the true target of the return instruction that we'll execute. */
#if defined(CONFIG_TCG_INTERPRETER)
Expand Down
10 changes: 9 additions & 1 deletion tcg/aarch64/tcg-target.h
Expand Up @@ -111,12 +111,20 @@ typedef enum {
#define TCG_TARGET_HAS_muls2_i64 0
#define TCG_TARGET_HAS_muluh_i64 1
#define TCG_TARGET_HAS_mulsh_i64 1
#define TCG_TARGET_HAS_direct_jump 1

#define TCG_TARGET_DEFAULT_MO (0)

static inline void flush_icache_range(uintptr_t start, uintptr_t stop)
{
__builtin___clear_cache((char *)start, (char *)stop);
}

#define TCG_TARGET_DEFAULT_MO (0)
void tb_target_set_jmp_target(uintptr_t, uintptr_t, uintptr_t);

#ifdef CONFIG_SOFTMMU
#define TCG_TARGET_NEED_LDST_LABELS
#endif
#define TCG_TARGET_NEED_POOL_LABELS

#endif /* AARCH64_TCG_TARGET_H */
78 changes: 39 additions & 39 deletions tcg/aarch64/tcg-target.inc.c
Expand Up @@ -10,7 +10,7 @@
* See the COPYING file in the top-level directory for details.
*/

#include "tcg-be-ldst.h"
#include "tcg-pool.inc.c"
#include "qemu/bitops.h"

/* We're going to re-use TCGType in setting of the SF bit, which controls
Expand Down Expand Up @@ -588,9 +588,11 @@ static void tcg_out_logicali(TCGContext *s, AArch64Insn insn, TCGType ext,
static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
tcg_target_long value)
{
int i, wantinv, shift;
tcg_target_long svalue = value;
tcg_target_long ivalue = ~value;
tcg_target_long t0, t1, t2;
int s0, s1;
AArch64Insn opc;

/* For 32-bit values, discard potential garbage in value. For 64-bit
values within [2**31, 2**32-1], we can create smaller sequences by
Expand Down Expand Up @@ -639,38 +641,29 @@ static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
}
}

/* Would it take fewer insns to begin with MOVN? For the value and its
inverse, count the number of 16-bit lanes that are 0. */
for (i = wantinv = 0; i < 64; i += 16) {
tcg_target_long mask = 0xffffull << i;
wantinv -= ((value & mask) == 0);
wantinv += ((ivalue & mask) == 0);
}

if (wantinv <= 0) {
/* Find the lowest lane that is not 0x0000. */
shift = ctz64(value) & (63 & -16);
tcg_out_insn(s, 3405, MOVZ, type, rd, value >> shift, shift);
/* Clear out the lane that we just set. */
value &= ~(0xffffUL << shift);
/* Iterate until all non-zero lanes have been processed. */
while (value) {
shift = ctz64(value) & (63 & -16);
tcg_out_insn(s, 3405, MOVK, type, rd, value >> shift, shift);
value &= ~(0xffffUL << shift);
}
/* Would it take fewer insns to begin with MOVN? */
if (ctpop64(value) >= 32) {
t0 = ivalue;
opc = I3405_MOVN;
} else {
/* Like above, but with the inverted value and MOVN to start. */
shift = ctz64(ivalue) & (63 & -16);
tcg_out_insn(s, 3405, MOVN, type, rd, ivalue >> shift, shift);
ivalue &= ~(0xffffUL << shift);
while (ivalue) {
shift = ctz64(ivalue) & (63 & -16);
/* Provide MOVK with the non-inverted value. */
tcg_out_insn(s, 3405, MOVK, type, rd, ~(ivalue >> shift), shift);
ivalue &= ~(0xffffUL << shift);
t0 = value;
opc = I3405_MOVZ;
}
s0 = ctz64(t0) & (63 & -16);
t1 = t0 & ~(0xffffUL << s0);
s1 = ctz64(t1) & (63 & -16);
t2 = t1 & ~(0xffffUL << s1);
if (t2 == 0) {
tcg_out_insn_3405(s, opc, type, rd, t0 >> s0, s0);
if (t1 != 0) {
tcg_out_insn(s, 3405, MOVK, type, rd, value >> s1, s1);
}
return;
}

/* For more than 2 insns, dump it into the constant pool. */
new_pool_label(s, value, R_AARCH64_CONDBR19, s->code_ptr, 0);
tcg_out_insn(s, 3305, LDR, 0, rd);
}

/* Define something more legible for general use. */
Expand Down Expand Up @@ -871,9 +864,8 @@ static inline void tcg_out_call(TCGContext *s, tcg_insn_unit *target)
}
}

#ifdef USE_DIRECT_JUMP

void aarch64_tb_set_jmp_target(uintptr_t jmp_addr, uintptr_t addr)
void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_addr,
uintptr_t addr)
{
tcg_insn_unit i1, i2;
TCGType rt = TCG_TYPE_I64;
Expand All @@ -898,8 +890,6 @@ void aarch64_tb_set_jmp_target(uintptr_t jmp_addr, uintptr_t addr)
flush_icache_range(jmp_addr, jmp_addr + 8);
}

#endif

static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l)
{
if (!l->has_value) {
Expand Down Expand Up @@ -1073,6 +1063,8 @@ static void tcg_out_cltz(TCGContext *s, TCGType ext, TCGReg d,
}

#ifdef CONFIG_SOFTMMU
#include "tcg-ldst.inc.c"

/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
* TCGMemOpIdx oi, uintptr_t ra)
*/
Expand Down Expand Up @@ -1412,19 +1404,19 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,

case INDEX_op_goto_tb:
if (s->tb_jmp_insn_offset != NULL) {
/* USE_DIRECT_JUMP */
/* TCG_TARGET_HAS_direct_jump */
/* Ensure that ADRP+ADD are 8-byte aligned so that an atomic
write can be used to patch the target address. */
if ((uintptr_t)s->code_ptr & 7) {
tcg_out32(s, NOP);
}
s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s);
/* actual branch destination will be patched by
aarch64_tb_set_jmp_target later. */
tb_target_set_jmp_target later. */
tcg_out_insn(s, 3406, ADRP, TCG_REG_TMP, 0);
tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_TMP, TCG_REG_TMP, 0);
} else {
/* !USE_DIRECT_JUMP */
/* !TCG_TARGET_HAS_direct_jump */
tcg_debug_assert(s->tb_jmp_target_addr != NULL);
intptr_t offset = tcg_pcrel_diff(s, (s->tb_jmp_target_addr + a0)) >> 2;
tcg_out_insn(s, 3305, LDR, offset, TCG_REG_TMP);
Expand Down Expand Up @@ -2032,6 +2024,14 @@ static void tcg_target_qemu_prologue(TCGContext *s)
tcg_out_insn(s, 3207, RET, TCG_REG_LR);
}

static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
{
int i;
for (i = 0; i < count; ++i) {
p[i] = NOP;
}
}

typedef struct {
DebugFrameHeader h;
uint8_t fde_def_cfa[4];
Expand Down

0 comments on commit b3a1179

Please sign in to comment.