Skip to content

Commit

Permalink
Merge remote-tracking branch 'remotes/rth-gitlab/tags/pull-tcg-202101…
Browse files Browse the repository at this point in the history
…24' into staging

Fix tcg constant temp overflow.
Fix running during atomic single-step.
Partial support for apple silicon.
Cleanups for accel/tcg.

# gpg: Signature made Sun 24 Jan 2021 18:08:57 GMT
# gpg:                using RSA key 7A481E78868B4DB6A85A05C064DF38E8AF7E215F
# gpg:                issuer "richard.henderson@linaro.org"
# gpg: Good signature from "Richard Henderson <richard.henderson@linaro.org>" [full]
# Primary key fingerprint: 7A48 1E78 868B 4DB6 A85A  05C0 64DF 38E8 AF7E 215F

* remotes/rth-gitlab/tags/pull-tcg-20210124:
  tcg: Restart code generation when we run out of temps
  tcg: Toggle page execution for Apple Silicon
  accel/tcg: Restrict cpu_io_recompile() from other accelerators
  accel/tcg: Declare missing cpu_loop_exit*() stubs
  accel/tcg: Restrict tb_gen_code() from other accelerators
  accel/tcg: Move tb_flush_jmp_cache() to cputlb.c
  accel/tcg: Make cpu_gen_init() static
  tcg: Optimize inline dup_const for MO_64
  qemu/compiler: Split out qemu_build_not_reached_always
  tcg: update the cpu running flag in cpu_exec_step_atomic

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
  • Loading branch information
pm215 committed Jan 24, 2021
2 parents e81eb5e + ae30e86 commit e672f1d
Show file tree
Hide file tree
Showing 10 changed files with 120 additions and 36 deletions.
10 changes: 10 additions & 0 deletions accel/stubs/tcg-stub.c
Expand Up @@ -28,3 +28,13 @@ void *probe_access(CPUArchState *env, target_ulong addr, int size,
/* Handled by hardware accelerator. */
g_assert_not_reached();
}

void QEMU_NORETURN cpu_loop_exit(CPUState *cpu)
{
g_assert_not_reached();
}

void QEMU_NORETURN cpu_loop_exit_restore(CPUState *cpu, uintptr_t pc)
{
g_assert_not_reached();
}
7 changes: 7 additions & 0 deletions accel/tcg/cpu-exec.c
Expand Up @@ -41,6 +41,7 @@
#include "exec/cpu-all.h"
#include "sysemu/cpu-timers.h"
#include "sysemu/replay.h"
#include "internal.h"

/* -icount align implementation. */

Expand Down Expand Up @@ -185,6 +186,7 @@ cpu_tb_exec(CPUState *cpu, TranslationBlock *itb, int *tb_exit)
}
#endif /* DEBUG_DISAS */

qemu_thread_jit_execute();
ret = tcg_qemu_tb_exec(env, tb_ptr);
cpu->can_do_io = 1;
/*
Expand Down Expand Up @@ -285,6 +287,9 @@ void cpu_exec_step_atomic(CPUState *cpu)

if (sigsetjmp(cpu->jmp_env, 0) == 0) {
start_exclusive();
g_assert(cpu == current_cpu);
g_assert(!cpu->running);
cpu->running = true;

tb = tb_lookup__cpu_state(cpu, &pc, &cs_base, &flags, cf_mask);
if (tb == NULL) {
Expand Down Expand Up @@ -323,6 +328,7 @@ void cpu_exec_step_atomic(CPUState *cpu)
*/
g_assert(cpu_in_exclusive_context(cpu));
parallel_cpus = true;
cpu->running = false;
end_exclusive();
}

Expand Down Expand Up @@ -405,6 +411,7 @@ static inline void tb_add_jump(TranslationBlock *tb, int n,
{
uintptr_t old;

qemu_thread_jit_write();
assert(n < ARRAY_SIZE(tb->jmp_list_next));
qemu_spin_lock(&tb_next->jmp_lock);

Expand Down
19 changes: 19 additions & 0 deletions accel/tcg/cputlb.c
Expand Up @@ -25,6 +25,7 @@
#include "exec/address-spaces.h"
#include "exec/cpu_ldst.h"
#include "exec/cputlb.h"
#include "exec/tb-hash.h"
#include "exec/memory-internal.h"
#include "exec/ram_addr.h"
#include "tcg/tcg.h"
Expand All @@ -36,6 +37,7 @@
#include "exec/translate-all.h"
#include "trace/trace-root.h"
#include "trace/mem.h"
#include "internal.h"
#ifdef CONFIG_PLUGIN
#include "qemu/plugin-memory.h"
#endif
Expand Down Expand Up @@ -97,6 +99,23 @@ static void tlb_window_reset(CPUTLBDesc *desc, int64_t ns,
desc->window_max_entries = max_entries;
}

static void tb_jmp_cache_clear_page(CPUState *cpu, target_ulong page_addr)
{
unsigned int i, i0 = tb_jmp_cache_hash_page(page_addr);

for (i = 0; i < TB_JMP_PAGE_SIZE; i++) {
qatomic_set(&cpu->tb_jmp_cache[i0 + i], NULL);
}
}

static void tb_flush_jmp_cache(CPUState *cpu, target_ulong addr)
{
/* Discard jump cache entries for any tb which might potentially
overlap the flushed page. */
tb_jmp_cache_clear_page(cpu, addr - TARGET_PAGE_SIZE);
tb_jmp_cache_clear_page(cpu, addr);
}

/**
* tlb_mmu_resize_locked() - perform TLB resize bookkeeping; resize if necessary
* @desc: The CPUTLBDesc portion of the TLB
Expand Down
20 changes: 20 additions & 0 deletions accel/tcg/internal.h
@@ -0,0 +1,20 @@
/*
* Internal execution defines for qemu
*
* Copyright (c) 2003 Fabrice Bellard
*
* SPDX-License-Identifier: LGPL-2.1-or-later
*/

#ifndef ACCEL_TCG_INTERNAL_H
#define ACCEL_TCG_INTERNAL_H

#include "exec/exec-all.h"

TranslationBlock *tb_gen_code(CPUState *cpu, target_ulong pc,
target_ulong cs_base, uint32_t flags,
int cflags);

void QEMU_NORETURN cpu_io_recompile(CPUState *cpu, uintptr_t retaddr);

#endif /* ACCEL_TCG_INTERNAL_H */
38 changes: 19 additions & 19 deletions accel/tcg/translate-all.c
Expand Up @@ -60,6 +60,7 @@
#include "sysemu/cpu-timers.h"
#include "sysemu/tcg.h"
#include "qapi/error.h"
#include "internal.h"

/* #define DEBUG_TB_INVALIDATE */
/* #define DEBUG_TB_FLUSH */
Expand Down Expand Up @@ -243,7 +244,7 @@ static void page_table_config_init(void)
assert(v_l2_levels >= 0);
}

void cpu_gen_init(void)
static void cpu_gen_init(void)
{
tcg_context_init(&tcg_init_ctx);
}
Expand Down Expand Up @@ -1669,7 +1670,9 @@ static void do_tb_phys_invalidate(TranslationBlock *tb, bool rm_from_page_list)

static void tb_phys_invalidate__locked(TranslationBlock *tb)
{
qemu_thread_jit_write();
do_tb_phys_invalidate(tb, true);
qemu_thread_jit_execute();
}

/* invalidate one TB
Expand Down Expand Up @@ -1871,6 +1874,7 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
#endif

assert_memory_lock();
qemu_thread_jit_write();

phys_pc = get_page_addr_code(env, pc);

Expand Down Expand Up @@ -1922,11 +1926,17 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
ti = profile_getclock();
#endif

gen_code_size = sigsetjmp(tcg_ctx->jmp_trans, 0);
if (unlikely(gen_code_size != 0)) {
goto error_return;
}

tcg_func_start(tcg_ctx);

tcg_ctx->cpu = env_cpu(env);
gen_intermediate_code(cpu, tb, max_insns);
tcg_ctx->cpu = NULL;
max_insns = tb->icount;

trace_translate_block(tb, tb->pc, tb->tc.ptr);

Expand All @@ -1951,6 +1961,7 @@ TranslationBlock *tb_gen_code(CPUState *cpu,

gen_code_size = tcg_gen_code(tcg_ctx, tb);
if (unlikely(gen_code_size < 0)) {
error_return:
switch (gen_code_size) {
case -1:
/*
Expand All @@ -1962,6 +1973,9 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
* flush the TBs, allocate a new TB, re-initialize it per
* above, and re-do the actual code generation.
*/
qemu_log_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT,
"Restarting code generation for "
"code_gen_buffer overflow\n");
goto buffer_overflow;

case -2:
Expand All @@ -1974,9 +1988,12 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
* Try again with half as many insns as we attempted this time.
* If a single insn overflows, there's a bug somewhere...
*/
max_insns = tb->icount;
assert(max_insns > 1);
max_insns /= 2;
qemu_log_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT,
"Restarting code generation with "
"smaller translation block (max %d insns)\n",
max_insns);
goto tb_overflow;

default:
Expand Down Expand Up @@ -2461,23 +2478,6 @@ void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr)
cpu_loop_exit_noexc(cpu);
}

static void tb_jmp_cache_clear_page(CPUState *cpu, target_ulong page_addr)
{
unsigned int i, i0 = tb_jmp_cache_hash_page(page_addr);

for (i = 0; i < TB_JMP_PAGE_SIZE; i++) {
qatomic_set(&cpu->tb_jmp_cache[i0 + i], NULL);
}
}

void tb_flush_jmp_cache(CPUState *cpu, target_ulong addr)
{
/* Discard jump cache entries for any tb which might potentially
overlap the flushed page. */
tb_jmp_cache_clear_page(cpu, addr - TARGET_PAGE_SIZE);
tb_jmp_cache_clear_page(cpu, addr);
}

static void print_qht_statistics(struct qht_stats hst)
{
uint32_t hgram_opts;
Expand Down
11 changes: 0 additions & 11 deletions include/exec/exec-all.h
Expand Up @@ -47,8 +47,6 @@ void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns);
void restore_state_to_opc(CPUArchState *env, TranslationBlock *tb,
target_ulong *data);

void cpu_gen_init(void);

/**
* cpu_restore_state:
* @cpu: the vCPU state is to be restore to
Expand All @@ -65,12 +63,6 @@ void cpu_gen_init(void);
bool cpu_restore_state(CPUState *cpu, uintptr_t searched_pc, bool will_exit);

void QEMU_NORETURN cpu_loop_exit_noexc(CPUState *cpu);
void QEMU_NORETURN cpu_io_recompile(CPUState *cpu, uintptr_t retaddr);
TranslationBlock *tb_gen_code(CPUState *cpu,
target_ulong pc, target_ulong cs_base,
uint32_t flags,
int cflags);

void QEMU_NORETURN cpu_loop_exit(CPUState *cpu);
void QEMU_NORETURN cpu_loop_exit_restore(CPUState *cpu, uintptr_t pc);
void QEMU_NORETURN cpu_loop_exit_atomic(CPUState *cpu, uintptr_t pc);
Expand Down Expand Up @@ -665,9 +657,6 @@ tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, target_ulong addr,
void tlb_reset_dirty(CPUState *cpu, ram_addr_t start1, ram_addr_t length);
void tlb_set_dirty(CPUState *cpu, target_ulong vaddr);

/* exec.c */
void tb_flush_jmp_cache(CPUState *cpu, target_ulong addr);

MemoryRegionSection *
address_space_translate_for_iotlb(CPUState *cpu, int asidx, hwaddr addr,
hwaddr *xlat, hwaddr *plen,
Expand Down
5 changes: 3 additions & 2 deletions include/qemu/compiler.h
Expand Up @@ -215,9 +215,10 @@
* supports QEMU_ERROR, this will be reported at compile time; otherwise
* this will be reported at link time due to the missing symbol.
*/
#if defined(__OPTIMIZE__) && !defined(__NO_INLINE__)
extern void QEMU_NORETURN QEMU_ERROR("code path is reachable")
qemu_build_not_reached(void);
qemu_build_not_reached_always(void);
#if defined(__OPTIMIZE__) && !defined(__NO_INLINE__)
#define qemu_build_not_reached() qemu_build_not_reached_always()
#else
#define qemu_build_not_reached() g_assert_not_reached()
#endif
Expand Down
28 changes: 28 additions & 0 deletions include/qemu/osdep.h
Expand Up @@ -119,6 +119,10 @@ extern int daemon(int, int);
#include "sysemu/os-posix.h"
#endif

#ifdef __APPLE__
#include <AvailabilityMacros.h>
#endif

#include "glib-compat.h"
#include "qemu/typedefs.h"

Expand Down Expand Up @@ -682,4 +686,28 @@ char *qemu_get_host_name(Error **errp);
*/
size_t qemu_get_host_physmem(void);

/*
* Toggle write/execute on the pages marked MAP_JIT
* for the current thread.
*/
#if defined(MAC_OS_VERSION_11_0) && \
MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_VERSION_11_0
static inline void qemu_thread_jit_execute(void)
{
if (__builtin_available(macOS 11.0, *)) {
pthread_jit_write_protect_np(true);
}
}

static inline void qemu_thread_jit_write(void)
{
if (__builtin_available(macOS 11.0, *)) {
pthread_jit_write_protect_np(false);
}
}
#else
static inline void qemu_thread_jit_write(void) {}
static inline void qemu_thread_jit_execute(void) {}
#endif

#endif
6 changes: 5 additions & 1 deletion include/tcg/tcg.h
Expand Up @@ -680,6 +680,9 @@ struct TCGContext {

uint16_t gen_insn_end_off[TCG_MAX_INSNS];
target_ulong gen_insn_data[TCG_MAX_INSNS][TARGET_INSN_START_WORDS];

/* Exit to translator on overflow. */
sigjmp_buf jmp_trans;
};

static inline bool temp_readonly(TCGTemp *ts)
Expand Down Expand Up @@ -1325,7 +1328,8 @@ uint64_t dup_const(unsigned vece, uint64_t c);
? ( (VECE) == MO_8 ? 0x0101010101010101ull * (uint8_t)(C) \
: (VECE) == MO_16 ? 0x0001000100010001ull * (uint16_t)(C) \
: (VECE) == MO_32 ? 0x0000000100000001ull * (uint32_t)(C) \
: dup_const(VECE, C)) \
: (VECE) == MO_64 ? (uint64_t)(C) \
: (qemu_build_not_reached_always(), 0)) \
: dup_const(VECE, C))


Expand Down
12 changes: 9 additions & 3 deletions tcg/tcg.c
Expand Up @@ -1112,6 +1112,7 @@ void tcg_prologue_init(TCGContext *s)
s->pool_labels = NULL;
#endif

qemu_thread_jit_write();
/* Generate the prologue. */
tcg_target_qemu_prologue(s);

Expand Down Expand Up @@ -1204,18 +1205,23 @@ void tcg_func_start(TCGContext *s)
QSIMPLEQ_INIT(&s->labels);
}

static inline TCGTemp *tcg_temp_alloc(TCGContext *s)
static TCGTemp *tcg_temp_alloc(TCGContext *s)
{
int n = s->nb_temps++;
tcg_debug_assert(n < TCG_MAX_TEMPS);

if (n >= TCG_MAX_TEMPS) {
/* Signal overflow, starting over with fewer guest insns. */
siglongjmp(s->jmp_trans, -2);
}
return memset(&s->temps[n], 0, sizeof(TCGTemp));
}

static inline TCGTemp *tcg_global_alloc(TCGContext *s)
static TCGTemp *tcg_global_alloc(TCGContext *s)
{
TCGTemp *ts;

tcg_debug_assert(s->nb_globals == s->nb_temps);
tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS);
s->nb_globals++;
ts = tcg_temp_alloc(s);
ts->kind = TEMP_GLOBAL;
Expand Down

0 comments on commit e672f1d

Please sign in to comment.