Skip to content

Commit

Permalink
Merge remote-tracking branch 'remotes/rth-gitlab/tags/pull-tcg-202103…
Browse files Browse the repository at this point in the history
…06' into staging

TCI build fix and cleanup
Streamline tb_lookup
Fixes for tcg/aarch64

# gpg: Signature made Sat 06 Mar 2021 21:34:46 GMT
# gpg:                using RSA key 7A481E78868B4DB6A85A05C064DF38E8AF7E215F
# gpg:                issuer "richard.henderson@linaro.org"
# gpg: Good signature from "Richard Henderson <richard.henderson@linaro.org>" [full]
# Primary key fingerprint: 7A48 1E78 868B 4DB6 A85A  05C0 64DF 38E8 AF7E 215F

* remotes/rth-gitlab/tags/pull-tcg-20210306: (27 commits)
  accel/tcg: Precompute curr_cflags into cpu->tcg_cflags
  include/exec: lightly re-arrange TranslationBlock
  accel/tcg: drop the use of CF_HASH_MASK and rename params
  accel/tcg: move CF_CLUSTER calculation to curr_cflags
  accel/tcg: rename tb_lookup__cpu_state and hoist state extraction
  tcg/tci: Merge mov, not and neg operations
  tcg/tci: Merge bswap operations
  tcg/tci: Merge extension operations
  tcg/tci: Merge basic arithmetic operations
  tcg/tci: Reduce use of tci_read_r64
  tcg/tci: Remove tci_read_r32s
  tcg/tci: Remove tci_read_r32
  tcg/tci: Remove tci_read_r16s
  tcg/tci: Remove tci_read_r16
  tcg/tci: Remove tci_read_r8s
  tcg/tci: Remove tci_read_r8
  tcg/tci: Merge identical cases in generation (load/store opcodes)
  tcg/tci: Merge identical cases in generation (conditional opcodes)
  tcg/tci: Merge identical cases in generation (deposit opcode)
  tcg/tci: Merge identical cases in generation (exchange opcodes)
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
  • Loading branch information
pm215 committed Mar 8, 2021
2 parents 229a834 + 6cc9d67 commit 74fd46e
Show file tree
Hide file tree
Showing 18 changed files with 529 additions and 610 deletions.
34 changes: 18 additions & 16 deletions accel/tcg/cpu-exec.c
Expand Up @@ -245,11 +245,11 @@ static void cpu_exec_exit(CPUState *cpu)

void cpu_exec_step_atomic(CPUState *cpu)
{
CPUArchState *env = (CPUArchState *)cpu->env_ptr;
TranslationBlock *tb;
target_ulong cs_base, pc;
uint32_t flags;
uint32_t cflags = 1;
uint32_t cf_mask = cflags & CF_HASH_MASK;
uint32_t cflags = (curr_cflags(cpu) & ~CF_PARALLEL) | 1;
int tb_exit;

if (sigsetjmp(cpu->jmp_env, 0) == 0) {
Expand All @@ -258,15 +258,15 @@ void cpu_exec_step_atomic(CPUState *cpu)
g_assert(!cpu->running);
cpu->running = true;

tb = tb_lookup__cpu_state(cpu, &pc, &cs_base, &flags, cf_mask);
cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
tb = tb_lookup(cpu, pc, cs_base, flags, cflags);

if (tb == NULL) {
mmap_lock();
tb = tb_gen_code(cpu, pc, cs_base, flags, cflags);
mmap_unlock();
}

/* Since we got here, we know that parallel_cpus must be true. */
parallel_cpus = false;
cpu_exec_enter(cpu);
/* execute the generated code */
trace_exec_tb(tb, pc);
Expand Down Expand Up @@ -294,7 +294,6 @@ void cpu_exec_step_atomic(CPUState *cpu)
* the execution.
*/
g_assert(cpu_in_exclusive_context(cpu));
parallel_cpus = true;
cpu->running = false;
end_exclusive();
}
Expand All @@ -305,7 +304,7 @@ struct tb_desc {
CPUArchState *env;
tb_page_addr_t phys_page1;
uint32_t flags;
uint32_t cf_mask;
uint32_t cflags;
uint32_t trace_vcpu_dstate;
};

Expand All @@ -319,7 +318,7 @@ static bool tb_lookup_cmp(const void *p, const void *d)
tb->cs_base == desc->cs_base &&
tb->flags == desc->flags &&
tb->trace_vcpu_dstate == desc->trace_vcpu_dstate &&
(tb_cflags(tb) & (CF_HASH_MASK | CF_INVALID)) == desc->cf_mask) {
tb_cflags(tb) == desc->cflags) {
/* check next page if needed */
if (tb->page_addr[1] == -1) {
return true;
Expand All @@ -339,7 +338,7 @@ static bool tb_lookup_cmp(const void *p, const void *d)

TranslationBlock *tb_htable_lookup(CPUState *cpu, target_ulong pc,
target_ulong cs_base, uint32_t flags,
uint32_t cf_mask)
uint32_t cflags)
{
tb_page_addr_t phys_pc;
struct tb_desc desc;
Expand All @@ -348,15 +347,15 @@ TranslationBlock *tb_htable_lookup(CPUState *cpu, target_ulong pc,
desc.env = (CPUArchState *)cpu->env_ptr;
desc.cs_base = cs_base;
desc.flags = flags;
desc.cf_mask = cf_mask;
desc.cflags = cflags;
desc.trace_vcpu_dstate = *cpu->trace_dstate;
desc.pc = pc;
phys_pc = get_page_addr_code(desc.env, pc);
if (phys_pc == -1) {
return NULL;
}
desc.phys_page1 = phys_pc & TARGET_PAGE_MASK;
h = tb_hash_func(phys_pc, pc, flags, cf_mask, *cpu->trace_dstate);
h = tb_hash_func(phys_pc, pc, flags, cflags, *cpu->trace_dstate);
return qht_lookup_custom(&tb_ctx.htable, &desc, h, tb_lookup_cmp);
}

Expand Down Expand Up @@ -416,16 +415,19 @@ static inline void tb_add_jump(TranslationBlock *tb, int n,

static inline TranslationBlock *tb_find(CPUState *cpu,
TranslationBlock *last_tb,
int tb_exit, uint32_t cf_mask)
int tb_exit, uint32_t cflags)
{
CPUArchState *env = (CPUArchState *)cpu->env_ptr;
TranslationBlock *tb;
target_ulong cs_base, pc;
uint32_t flags;

tb = tb_lookup__cpu_state(cpu, &pc, &cs_base, &flags, cf_mask);
cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);

tb = tb_lookup(cpu, pc, cs_base, flags, cflags);
if (tb == NULL) {
mmap_lock();
tb = tb_gen_code(cpu, pc, cs_base, flags, cf_mask);
tb = tb_gen_code(cpu, pc, cs_base, flags, cflags);
mmap_unlock();
/* We add the TB in the virtual pc hash table for the fast lookup */
qatomic_set(&cpu->tb_jmp_cache[tb_jmp_cache_hash_func(pc)], tb);
Expand Down Expand Up @@ -491,7 +493,7 @@ static inline bool cpu_handle_exception(CPUState *cpu, int *ret)
if (replay_has_exception()
&& cpu_neg(cpu)->icount_decr.u16.low + cpu->icount_extra == 0) {
/* Execute just one insn to trigger exception pending in the log */
cpu->cflags_next_tb = (curr_cflags() & ~CF_USE_ICOUNT) | 1;
cpu->cflags_next_tb = (curr_cflags(cpu) & ~CF_USE_ICOUNT) | 1;
}
#endif
return false;
Expand Down Expand Up @@ -788,7 +790,7 @@ int cpu_exec(CPUState *cpu)
have CF_INVALID set, -1 is a convenient invalid value that
does not require tcg headers for cpu_common_reset. */
if (cflags == -1) {
cflags = curr_cflags();
cflags = curr_cflags(cpu);
} else {
cpu->cflags_next_tb = -1;
}
Expand Down
3 changes: 1 addition & 2 deletions accel/tcg/tcg-accel-ops-mttcg.c
Expand Up @@ -114,8 +114,7 @@ void mttcg_start_vcpu_thread(CPUState *cpu)
char thread_name[VCPU_THREAD_NAME_SIZE];

g_assert(tcg_enabled());

parallel_cpus = (current_machine->smp.max_cpus > 1);
tcg_cpu_init_cflags(cpu, current_machine->smp.max_cpus > 1);

cpu->thread = g_malloc0(sizeof(QemuThread));
cpu->halt_cond = g_malloc0(sizeof(QemuCond));
Expand Down
2 changes: 1 addition & 1 deletion accel/tcg/tcg-accel-ops-rr.c
Expand Up @@ -269,7 +269,7 @@ void rr_start_vcpu_thread(CPUState *cpu)
static QemuThread *single_tcg_cpu_thread;

g_assert(tcg_enabled());
parallel_cpus = false;
tcg_cpu_init_cflags(cpu, false);

if (!single_tcg_cpu_thread) {
cpu->thread = g_malloc0(sizeof(QemuThread));
Expand Down
8 changes: 8 additions & 0 deletions accel/tcg/tcg-accel-ops.c
Expand Up @@ -41,6 +41,14 @@

/* common functionality among all TCG variants */

void tcg_cpu_init_cflags(CPUState *cpu, bool parallel)
{
uint32_t cflags = cpu->cluster_index << CF_CLUSTER_SHIFT;
cflags |= parallel ? CF_PARALLEL : 0;
cflags |= icount_enabled() ? CF_USE_ICOUNT : 0;
cpu->tcg_cflags = cflags;
}

void tcg_cpus_destroy(CPUState *cpu)
{
cpu_thread_signal_destroyed(cpu);
Expand Down
1 change: 1 addition & 0 deletions accel/tcg/tcg-accel-ops.h
Expand Up @@ -17,5 +17,6 @@
void tcg_cpus_destroy(CPUState *cpu);
int tcg_cpus_exec(CPUState *cpu);
void tcg_handle_interrupt(CPUState *cpu, int mask);
void tcg_cpu_init_cflags(CPUState *cpu, bool parallel);

#endif /* TCG_CPUS_H */
6 changes: 4 additions & 2 deletions accel/tcg/tcg-runtime.c
Expand Up @@ -27,10 +27,10 @@
#include "exec/helper-proto.h"
#include "exec/cpu_ldst.h"
#include "exec/exec-all.h"
#include "exec/tb-lookup.h"
#include "disas/disas.h"
#include "exec/log.h"
#include "tcg/tcg.h"
#include "exec/tb-lookup.h"

/* 32-bit helpers */

Expand Down Expand Up @@ -152,7 +152,9 @@ const void *HELPER(lookup_tb_ptr)(CPUArchState *env)
target_ulong cs_base, pc;
uint32_t flags;

tb = tb_lookup__cpu_state(cpu, &pc, &cs_base, &flags, curr_cflags());
cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);

tb = tb_lookup(cpu, pc, cs_base, flags, curr_cflags(cpu));
if (tb == NULL) {
return tcg_code_gen_epilogue;
}
Expand Down
18 changes: 8 additions & 10 deletions accel/tcg/translate-all.c
Expand Up @@ -224,7 +224,6 @@ static void *l1_map[V_L1_MAX_SIZE];
TCGContext tcg_init_ctx;
__thread TCGContext *tcg_ctx;
TBContext tb_ctx;
bool parallel_cpus;

static void page_table_config_init(void)
{
Expand Down Expand Up @@ -1311,7 +1310,7 @@ static bool tb_cmp(const void *ap, const void *bp)
return a->pc == b->pc &&
a->cs_base == b->cs_base &&
a->flags == b->flags &&
(tb_cflags(a) & CF_HASH_MASK) == (tb_cflags(b) & CF_HASH_MASK) &&
(tb_cflags(a) & ~CF_INVALID) == (tb_cflags(b) & ~CF_INVALID) &&
a->trace_vcpu_dstate == b->trace_vcpu_dstate &&
a->page_addr[0] == b->page_addr[0] &&
a->page_addr[1] == b->page_addr[1];
Expand Down Expand Up @@ -1616,6 +1615,7 @@ static void do_tb_phys_invalidate(TranslationBlock *tb, bool rm_from_page_list)
PageDesc *p;
uint32_t h;
tb_page_addr_t phys_pc;
uint32_t orig_cflags = tb_cflags(tb);

assert_memory_lock();

Expand All @@ -1626,7 +1626,7 @@ static void do_tb_phys_invalidate(TranslationBlock *tb, bool rm_from_page_list)

/* remove the TB from the hash list */
phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
h = tb_hash_func(phys_pc, tb->pc, tb->flags, tb_cflags(tb) & CF_HASH_MASK,
h = tb_hash_func(phys_pc, tb->pc, tb->flags, orig_cflags,
tb->trace_vcpu_dstate);
if (!qht_remove(&tb_ctx.htable, tb, h)) {
return;
Expand Down Expand Up @@ -1793,6 +1793,7 @@ tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
uint32_t h;

assert_memory_lock();
tcg_debug_assert(!(tb->cflags & CF_INVALID));

/*
* Add the TB to the page list, acquiring first the pages's locks.
Expand All @@ -1811,7 +1812,7 @@ tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
}

/* add in the hash table */
h = tb_hash_func(phys_pc, tb->pc, tb->flags, tb->cflags & CF_HASH_MASK,
h = tb_hash_func(phys_pc, tb->pc, tb->flags, tb->cflags,
tb->trace_vcpu_dstate);
qht_insert(&tb_ctx.htable, tb, h, &existing_tb);

Expand Down Expand Up @@ -1865,9 +1866,6 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
cflags = (cflags & ~CF_COUNT_MASK) | 1;
}

cflags &= ~CF_CLUSTER_MASK;
cflags |= cpu->cluster_index << CF_CLUSTER_SHIFT;

max_insns = cflags & CF_COUNT_MASK;
if (max_insns == 0) {
max_insns = CF_COUNT_MASK;
Expand Down Expand Up @@ -2194,7 +2192,7 @@ tb_invalidate_phys_page_range__locked(struct page_collection *pages,
if (current_tb_modified) {
page_collection_unlock(pages);
/* Force execution of one insn next time. */
cpu->cflags_next_tb = 1 | curr_cflags();
cpu->cflags_next_tb = 1 | curr_cflags(cpu);
mmap_unlock();
cpu_loop_exit_noexc(cpu);
}
Expand Down Expand Up @@ -2362,7 +2360,7 @@ static bool tb_invalidate_phys_page(tb_page_addr_t addr, uintptr_t pc)
#ifdef TARGET_HAS_PRECISE_SMC
if (current_tb_modified) {
/* Force execution of one insn next time. */
cpu->cflags_next_tb = 1 | curr_cflags();
cpu->cflags_next_tb = 1 | curr_cflags(cpu);
return true;
}
#endif
Expand Down Expand Up @@ -2438,7 +2436,7 @@ void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr)
* operations only (which execute after completion) so we don't
* double instrument the instruction.
*/
cpu->cflags_next_tb = curr_cflags() | CF_MEMI_ONLY | CF_LAST_IO | n;
cpu->cflags_next_tb = curr_cflags(cpu) | CF_MEMI_ONLY | CF_LAST_IO | n;

qemu_log_mask_and_addr(CPU_LOG_EXEC, tb->pc,
"cpu_io_recompile: rewound execution of TB to "
Expand Down
22 changes: 11 additions & 11 deletions include/exec/exec-all.h
Expand Up @@ -448,9 +448,6 @@ struct TranslationBlock {
target_ulong pc; /* simulated PC corresponding to this block (EIP + CS base) */
target_ulong cs_base; /* CS base for this block */
uint32_t flags; /* flags defining in which context the code was generated */
uint16_t size; /* size of target code for this block (1 <=
size <= TARGET_PAGE_SIZE) */
uint16_t icount;
uint32_t cflags; /* compile flags */
#define CF_COUNT_MASK 0x00007fff
#define CF_LAST_IO 0x00008000 /* Last insn may be an IO access. */
Expand All @@ -460,12 +457,18 @@ struct TranslationBlock {
#define CF_PARALLEL 0x00080000 /* Generate code for a parallel context */
#define CF_CLUSTER_MASK 0xff000000 /* Top 8 bits are cluster ID */
#define CF_CLUSTER_SHIFT 24
/* cflags' mask for hashing/comparison, basically ignore CF_INVALID */
#define CF_HASH_MASK (~CF_INVALID)

/* Per-vCPU dynamic tracing state used to generate this TB */
uint32_t trace_vcpu_dstate;

/*
* Above fields used for comparing
*/

/* size of target code for this block (1 <= size <= TARGET_PAGE_SIZE) */
uint16_t size;
uint16_t icount;

struct tb_tc tc;

/* first and second physical page containing code. The lower bit
Expand Down Expand Up @@ -510,19 +513,16 @@ struct TranslationBlock {
uintptr_t jmp_dest[2];
};

extern bool parallel_cpus;

/* Hide the qatomic_read to make code a little easier on the eyes */
static inline uint32_t tb_cflags(const TranslationBlock *tb)
{
return qatomic_read(&tb->cflags);
}

/* current cflags for hashing/comparison */
static inline uint32_t curr_cflags(void)
static inline uint32_t curr_cflags(CPUState *cpu)
{
return (parallel_cpus ? CF_PARALLEL : 0)
| (icount_enabled() ? CF_USE_ICOUNT : 0);
return cpu->tcg_cflags;
}

/* TranslationBlock invalidate API */
Expand All @@ -536,7 +536,7 @@ void tb_flush(CPUState *cpu);
void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr);
TranslationBlock *tb_htable_lookup(CPUState *cpu, target_ulong pc,
target_ulong cs_base, uint32_t flags,
uint32_t cf_mask);
uint32_t cflags);
void tb_set_jmp_target(TranslationBlock *tb, int n, uintptr_t addr);

/* GETPC is the true target of the return instruction that we'll execute. */
Expand Down
26 changes: 12 additions & 14 deletions include/exec/tb-lookup.h
Expand Up @@ -17,30 +17,28 @@
#include "exec/tb-hash.h"

/* Might cause an exception, so have a longjmp destination ready */
static inline TranslationBlock *
tb_lookup__cpu_state(CPUState *cpu, target_ulong *pc, target_ulong *cs_base,
uint32_t *flags, uint32_t cf_mask)
static inline TranslationBlock *tb_lookup(CPUState *cpu, target_ulong pc,
target_ulong cs_base,
uint32_t flags, uint32_t cflags)
{
CPUArchState *env = (CPUArchState *)cpu->env_ptr;
TranslationBlock *tb;
uint32_t hash;

cpu_get_tb_cpu_state(env, pc, cs_base, flags);
hash = tb_jmp_cache_hash_func(*pc);
tb = qatomic_rcu_read(&cpu->tb_jmp_cache[hash]);
/* we should never be trying to look up an INVALID tb */
tcg_debug_assert(!(cflags & CF_INVALID));

cf_mask &= ~CF_CLUSTER_MASK;
cf_mask |= cpu->cluster_index << CF_CLUSTER_SHIFT;
hash = tb_jmp_cache_hash_func(pc);
tb = qatomic_rcu_read(&cpu->tb_jmp_cache[hash]);

if (likely(tb &&
tb->pc == *pc &&
tb->cs_base == *cs_base &&
tb->flags == *flags &&
tb->pc == pc &&
tb->cs_base == cs_base &&
tb->flags == flags &&
tb->trace_vcpu_dstate == *cpu->trace_dstate &&
(tb_cflags(tb) & (CF_HASH_MASK | CF_INVALID)) == cf_mask)) {
tb_cflags(tb) == cflags)) {
return tb;
}
tb = tb_htable_lookup(cpu, *pc, *cs_base, *flags, cf_mask);
tb = tb_htable_lookup(cpu, pc, cs_base, flags, cflags);
if (tb == NULL) {
return NULL;
}
Expand Down
2 changes: 2 additions & 0 deletions include/hw/core/cpu.h
Expand Up @@ -282,6 +282,7 @@ struct qemu_work_item;
* to a cluster this will be UNASSIGNED_CLUSTER_INDEX; otherwise it will
* be the same as the cluster-id property of the CPU object's TYPE_CPU_CLUSTER
* QOM parent.
* @tcg_cflags: Pre-computed cflags for this cpu.
* @nr_cores: Number of cores within this CPU package.
* @nr_threads: Number of threads within this CPU.
* @running: #true if CPU is currently running (lockless).
Expand Down Expand Up @@ -412,6 +413,7 @@ struct CPUState {
/* TODO Move common fields from CPUArchState here. */
int cpu_index;
int cluster_index;
uint32_t tcg_cflags;
uint32_t halted;
uint32_t can_do_io;
int32_t exception_index;
Expand Down

0 comments on commit 74fd46e

Please sign in to comment.