20 changes: 10 additions & 10 deletions accel/tcg/cputlb.c
Expand Up @@ -497,8 +497,8 @@ static void tlb_flush_page_locked(CPUArchState *env, int midx, vaddr page)

/* Check if we need to flush due to large pages. */
if ((page & lp_mask) == lp_addr) {
tlb_debug("forcing full flush midx %d (%"
VADDR_PRIx "/%" VADDR_PRIx ")\n",
tlb_debug("forcing full flush midx %d (%016"
VADDR_PRIx "/%016" VADDR_PRIx ")\n",
midx, lp_addr, lp_mask);
tlb_flush_one_mmuidx_locked(env, midx, get_clock_realtime());
} else {
Expand Down Expand Up @@ -527,7 +527,7 @@ static void tlb_flush_page_by_mmuidx_async_0(CPUState *cpu,

assert_cpu_is_self(cpu);

tlb_debug("page addr: %" VADDR_PRIx " mmu_map:0x%x\n", addr, idxmap);
tlb_debug("page addr: %016" VADDR_PRIx " mmu_map:0x%x\n", addr, idxmap);

qemu_spin_lock(&env_tlb(env)->c.lock);
for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
Expand Down Expand Up @@ -591,7 +591,7 @@ static void tlb_flush_page_by_mmuidx_async_2(CPUState *cpu,

void tlb_flush_page_by_mmuidx(CPUState *cpu, vaddr addr, uint16_t idxmap)
{
tlb_debug("addr: %" VADDR_PRIx " mmu_idx:%" PRIx16 "\n", addr, idxmap);
tlb_debug("addr: %016" VADDR_PRIx " mmu_idx:%" PRIx16 "\n", addr, idxmap);

/* This should already be page aligned */
addr &= TARGET_PAGE_MASK;
Expand Down Expand Up @@ -625,7 +625,7 @@ void tlb_flush_page(CPUState *cpu, vaddr addr)
void tlb_flush_page_by_mmuidx_all_cpus(CPUState *src_cpu, vaddr addr,
uint16_t idxmap)
{
tlb_debug("addr: %" VADDR_PRIx " mmu_idx:%"PRIx16"\n", addr, idxmap);
tlb_debug("addr: %016" VADDR_PRIx " mmu_idx:%"PRIx16"\n", addr, idxmap);

/* This should already be page aligned */
addr &= TARGET_PAGE_MASK;
Expand Down Expand Up @@ -666,7 +666,7 @@ void tlb_flush_page_by_mmuidx_all_cpus_synced(CPUState *src_cpu,
vaddr addr,
uint16_t idxmap)
{
tlb_debug("addr: %" VADDR_PRIx " mmu_idx:%"PRIx16"\n", addr, idxmap);
tlb_debug("addr: %016" VADDR_PRIx " mmu_idx:%"PRIx16"\n", addr, idxmap);

/* This should already be page aligned */
addr &= TARGET_PAGE_MASK;
Expand Down Expand Up @@ -728,7 +728,7 @@ static void tlb_flush_range_locked(CPUArchState *env, int midx,
*/
if (mask < f->mask || len > f->mask) {
tlb_debug("forcing full flush midx %d ("
"%" VADDR_PRIx "/%" VADDR_PRIx "+%" VADDR_PRIx ")\n",
"%016" VADDR_PRIx "/%016" VADDR_PRIx "+%016" VADDR_PRIx ")\n",
midx, addr, mask, len);
tlb_flush_one_mmuidx_locked(env, midx, get_clock_realtime());
return;
Expand All @@ -741,7 +741,7 @@ static void tlb_flush_range_locked(CPUArchState *env, int midx,
*/
if (((addr + len - 1) & d->large_page_mask) == d->large_page_addr) {
tlb_debug("forcing full flush midx %d ("
"%" VADDR_PRIx "/%" VADDR_PRIx ")\n",
"%016" VADDR_PRIx "/%016" VADDR_PRIx ")\n",
midx, d->large_page_addr, d->large_page_mask);
tlb_flush_one_mmuidx_locked(env, midx, get_clock_realtime());
return;
Expand Down Expand Up @@ -773,7 +773,7 @@ static void tlb_flush_range_by_mmuidx_async_0(CPUState *cpu,

assert_cpu_is_self(cpu);

tlb_debug("range: %" VADDR_PRIx "/%u+%" VADDR_PRIx " mmu_map:0x%x\n",
tlb_debug("range: %016" VADDR_PRIx "/%u+%016" VADDR_PRIx " mmu_map:0x%x\n",
d.addr, d.bits, d.len, d.idxmap);

qemu_spin_lock(&env_tlb(env)->c.lock);
Expand Down Expand Up @@ -1165,7 +1165,7 @@ void tlb_set_page_full(CPUState *cpu, int mmu_idx,
&xlat, &sz, full->attrs, &prot);
assert(sz >= TARGET_PAGE_SIZE);

tlb_debug("vaddr=%" VADDR_PRIx " paddr=0x" HWADDR_FMT_plx
tlb_debug("vaddr=%016" VADDR_PRIx " paddr=0x" HWADDR_FMT_plx
" prot=%x idx=%d\n",
addr, full->phys_addr, prot, mmu_idx);

Expand Down
32 changes: 18 additions & 14 deletions accel/tcg/ldst_atomicity.c.inc
Expand Up @@ -159,9 +159,11 @@ static uint64_t load_atomic8_or_exit(CPUArchState *env, uintptr_t ra, void *pv)
* another process, because the fallback start_exclusive solution
* provides no protection across processes.
*/
if (page_check_range(h2g(pv), 8, PAGE_WRITE_ORG)) {
uint64_t *p = __builtin_assume_aligned(pv, 8);
return *p;
WITH_MMAP_LOCK_GUARD() {
if (!page_check_range(h2g(pv), 8, PAGE_WRITE_ORG)) {
uint64_t *p = __builtin_assume_aligned(pv, 8);
return *p;
}
}
#endif

Expand All @@ -186,25 +188,27 @@ static Int128 load_atomic16_or_exit(CPUArchState *env, uintptr_t ra, void *pv)
return atomic16_read_ro(p);
}

#ifdef CONFIG_USER_ONLY
/*
* We can only use cmpxchg to emulate a load if the page is writable.
* If the page is not writable, then assume the value is immutable
* and requires no locking. This ignores the case of MAP_SHARED with
* another process, because the fallback start_exclusive solution
* provides no protection across processes.
*
* In system mode all guest pages are writable. For user mode,
* we must take mmap_lock so that the query remains valid until
* the write is complete -- tests/tcg/multiarch/munmap-pthread.c
* is an example that can race.
*/
if (page_check_range(h2g(p), 16, PAGE_WRITE_ORG)) {
return *p;
}
WITH_MMAP_LOCK_GUARD() {
#ifdef CONFIG_USER_ONLY
if (!page_check_range(h2g(p), 16, PAGE_WRITE_ORG)) {
return *p;
}
#endif

/*
* In system mode all guest pages are writable, and for user-only
* we have just checked writability. Try cmpxchg.
*/
if (HAVE_ATOMIC128_RW) {
return atomic16_read_rw(p);
if (HAVE_ATOMIC128_RW) {
return atomic16_read_rw(p);
}
}

/* Ultimate fallback: re-execute in serial context. */
Expand Down
4 changes: 2 additions & 2 deletions accel/tcg/user-exec.c
Expand Up @@ -144,7 +144,7 @@ typedef struct PageFlagsNode {

static IntervalTreeRoot pageflags_root;

static PageFlagsNode *pageflags_find(target_ulong start, target_long last)
static PageFlagsNode *pageflags_find(target_ulong start, target_ulong last)
{
IntervalTreeNode *n;

Expand All @@ -153,7 +153,7 @@ static PageFlagsNode *pageflags_find(target_ulong start, target_long last)
}

static PageFlagsNode *pageflags_next(PageFlagsNode *p, target_ulong start,
target_long last)
target_ulong last)
{
IntervalTreeNode *n;

Expand Down
1 change: 1 addition & 0 deletions bsd-user/mmap.c
Expand Up @@ -32,6 +32,7 @@ void mmap_lock(void)

void mmap_unlock(void)
{
assert(mmap_lock_count > 0);
if (--mmap_lock_count == 0) {
pthread_mutex_unlock(&mmap_mutex);
}
Expand Down
80 changes: 80 additions & 0 deletions hw/arm/xen_arm.c
Expand Up @@ -26,6 +26,7 @@
#include "qapi/qapi-commands-migration.h"
#include "qapi/visitor.h"
#include "hw/boards.h"
#include "hw/irq.h"
#include "hw/sysbus.h"
#include "sysemu/block-backend.h"
#include "sysemu/tpm_backend.h"
Expand Down Expand Up @@ -59,6 +60,73 @@ struct XenArmState {
} cfg;
};

static MemoryRegion ram_lo, ram_hi;

/*
* VIRTIO_MMIO_DEV_SIZE is imported from tools/libs/light/libxl_arm.c under Xen
* repository.
*
* Origin: git://xenbits.xen.org/xen.git 2128143c114c
*/
#define VIRTIO_MMIO_DEV_SIZE 0x200

#define NR_VIRTIO_MMIO_DEVICES \
(GUEST_VIRTIO_MMIO_SPI_LAST - GUEST_VIRTIO_MMIO_SPI_FIRST)

static void xen_set_irq(void *opaque, int irq, int level)
{
xendevicemodel_set_irq_level(xen_dmod, xen_domid, irq, level);
}

static void xen_create_virtio_mmio_devices(XenArmState *xam)
{
int i;

for (i = 0; i < NR_VIRTIO_MMIO_DEVICES; i++) {
hwaddr base = GUEST_VIRTIO_MMIO_BASE + i * VIRTIO_MMIO_DEV_SIZE;
qemu_irq irq = qemu_allocate_irq(xen_set_irq, NULL,
GUEST_VIRTIO_MMIO_SPI_FIRST + i);

sysbus_create_simple("virtio-mmio", base, irq);

DPRINTF("Created virtio-mmio device %d: irq %d base 0x%lx\n",
i, GUEST_VIRTIO_MMIO_SPI_FIRST + i, base);
}
}

static void xen_init_ram(MachineState *machine)
{
MemoryRegion *sysmem = get_system_memory();
ram_addr_t block_len, ram_size[GUEST_RAM_BANKS];

if (machine->ram_size <= GUEST_RAM0_SIZE) {
ram_size[0] = machine->ram_size;
ram_size[1] = 0;
block_len = GUEST_RAM0_BASE + ram_size[0];
} else {
ram_size[0] = GUEST_RAM0_SIZE;
ram_size[1] = machine->ram_size - GUEST_RAM0_SIZE;
block_len = GUEST_RAM1_BASE + ram_size[1];
}

memory_region_init_ram(&ram_memory, NULL, "xen.ram", block_len,
&error_fatal);

memory_region_init_alias(&ram_lo, NULL, "xen.ram.lo", &ram_memory,
GUEST_RAM0_BASE, ram_size[0]);
memory_region_add_subregion(sysmem, GUEST_RAM0_BASE, &ram_lo);
DPRINTF("Initialized region xen.ram.lo: base 0x%llx size 0x%lx\n",
GUEST_RAM0_BASE, ram_size[0]);

if (ram_size[1] > 0) {
memory_region_init_alias(&ram_hi, NULL, "xen.ram.hi", &ram_memory,
GUEST_RAM1_BASE, ram_size[1]);
memory_region_add_subregion(sysmem, GUEST_RAM1_BASE, &ram_hi);
DPRINTF("Initialized region xen.ram.hi: base 0x%llx size 0x%lx\n",
GUEST_RAM1_BASE, ram_size[1]);
}
}

void arch_handle_ioreq(XenIOState *state, ioreq_t *req)
{
hw_error("Invalid ioreq type 0x%x\n", req->type);
Expand Down Expand Up @@ -108,8 +176,18 @@ static void xen_arm_init(MachineState *machine)

xam->state = g_new0(XenIOState, 1);

if (machine->ram_size == 0) {
DPRINTF("ram_size not specified. QEMU machine started without IOREQ"
"(no emulated devices including Virtio)\n");
return;
}

xen_init_ram(machine);

xen_register_ioreq(xam->state, machine->smp.cpus, xen_memory_listener);

xen_create_virtio_mmio_devices(xam);

#ifdef CONFIG_TPM
if (xam->cfg.tpm_base_addr) {
xen_enable_tpm(xam);
Expand Down Expand Up @@ -153,6 +231,8 @@ static void xen_arm_machine_class_init(ObjectClass *oc, void *data)
mc->init = xen_arm_init;
mc->max_cpus = 1;
mc->default_machine_opts = "accel=xen";
/* Set explicitly here to make sure that real ram_size is passed */
mc->default_ram_size = 0;

#ifdef CONFIG_TPM
object_class_property_add(oc, "tpm-base-addr", "uint64_t",
Expand Down
10 changes: 10 additions & 0 deletions include/exec/exec-all.h
Expand Up @@ -629,6 +629,15 @@ void TSA_NO_TSA mmap_lock(void);
void TSA_NO_TSA mmap_unlock(void);
bool have_mmap_lock(void);

static inline void mmap_unlock_guard(void *unused)
{
mmap_unlock();
}

#define WITH_MMAP_LOCK_GUARD() \
for (int _mmap_lock_iter __attribute__((cleanup(mmap_unlock_guard))) \
= (mmap_lock(), 0); _mmap_lock_iter == 0; _mmap_lock_iter = 1)

/**
* adjust_signal_pc:
* @pc: raw pc from the host signal ucontext_t.
Expand Down Expand Up @@ -683,6 +692,7 @@ G_NORETURN void cpu_loop_exit_sigbus(CPUState *cpu, target_ulong addr,
#else
static inline void mmap_lock(void) {}
static inline void mmap_unlock(void) {}
#define WITH_MMAP_LOCK_GUARD()

void tlb_reset_dirty(CPUState *cpu, ram_addr_t start1, ram_addr_t length);
void tlb_set_dirty(CPUState *cpu, vaddr addr);
Expand Down
7 changes: 0 additions & 7 deletions linux-user/elfload.c
Expand Up @@ -3618,13 +3618,6 @@ int load_elf_binary(struct linux_binprm *bprm, struct image_info *info)

if (elf_interpreter) {
load_elf_interp(elf_interpreter, &interp_info, bprm->buf);
/*
* adjust brk address if the interpreter was loaded above the main
* executable, e.g. happens with static binaries on armhf
*/
if (interp_info.brk > info->brk) {
info->brk = interp_info.brk;
}

/* If the program interpreter is one of these two, then assume
an iBCS2 image. Otherwise assume a native linux image. */
Expand Down
1 change: 1 addition & 0 deletions linux-user/mmap.c
Expand Up @@ -36,6 +36,7 @@ void mmap_lock(void)

void mmap_unlock(void)
{
assert(mmap_lock_count > 0);
if (--mmap_lock_count == 0) {
pthread_mutex_unlock(&mmap_mutex);
}
Expand Down
Binary file modified pc-bios/opensbi-riscv32-generic-fw_dynamic.bin
Binary file not shown.
Binary file modified pc-bios/opensbi-riscv64-generic-fw_dynamic.bin
Binary file not shown.
2 changes: 1 addition & 1 deletion roms/opensbi
Submodule opensbi updated from 255279 to 057eb1
5 changes: 4 additions & 1 deletion tcg/i386/tcg-target-con-set.h
Expand Up @@ -11,6 +11,9 @@
*
* C_N1_Im(...) defines a constraint set with 1 output and <m> inputs,
* except that the output must use a new register.
*
* C_Nn_Om_Ik(...) defines a constraint set with <n + m> outputs and <k>
* inputs, except that the first <n> outputs must use new registers.
*/
C_O0_I1(r)
C_O0_I2(L, L)
Expand Down Expand Up @@ -53,4 +56,4 @@ C_O2_I1(r, r, L)
C_O2_I2(a, d, a, r)
C_O2_I2(r, r, L, L)
C_O2_I3(a, d, 0, 1, r)
C_O2_I4(r, r, 0, 1, re, re)
C_N1_O1_I4(r, r, 0, 1, re, re)
2 changes: 1 addition & 1 deletion tcg/i386/tcg-target.c.inc
Expand Up @@ -3335,7 +3335,7 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
case INDEX_op_add2_i64:
case INDEX_op_sub2_i32:
case INDEX_op_sub2_i64:
return C_O2_I4(r, r, 0, 1, re, re);
return C_N1_O1_I4(r, r, 0, 1, re, re);

case INDEX_op_ctz_i32:
case INDEX_op_ctz_i64:
Expand Down
9 changes: 5 additions & 4 deletions tcg/ppc/tcg-target.c.inc
Expand Up @@ -2496,11 +2496,10 @@ static void tcg_out_goto_tb(TCGContext *s, int which)
ptrdiff_t offset = tcg_tbrel_diff(s, (void *)ptr);
tcg_out_mem_long(s, LD, LDX, TCG_REG_TB, TCG_REG_TB, offset);

/* Direct branch will be patched by tb_target_set_jmp_target. */
/* TODO: Use direct branches when possible. */
set_jmp_insn_offset(s, which);
tcg_out32(s, MTSPR | RS(TCG_REG_TB) | CTR);

/* When branch is out of range, fall through to indirect. */
tcg_out32(s, BCCTR | BO_ALWAYS);

/* For the unlinked case, need to reset TCG_REG_TB. */
Expand Down Expand Up @@ -2528,10 +2527,12 @@ void tb_target_set_jmp_target(const TranslationBlock *tb, int n,
intptr_t diff = addr - jmp_rx;
tcg_insn_unit insn;

if (USE_REG_TB) {
return;
}

if (in_range_b(diff)) {
insn = B | (diff & 0x3fffffc);
} else if (USE_REG_TB) {
insn = MTSPR | RS(TCG_REG_TB) | CTR;
} else {
insn = NOP;
}
Expand Down
8 changes: 5 additions & 3 deletions tcg/s390x/tcg-target-con-set.h
Expand Up @@ -8,6 +8,9 @@
* C_On_Im(...) defines a constraint set with <n> outputs and <m> inputs.
* Each operand should be a sequence of constraint letters as defined by
* tcg-target-con-str.h; the constraint combination is inclusive or.
*
* C_Nn_Om_Ik(...) defines a constraint set with <n + m> outputs and <k>
* inputs, except that the first <n> outputs must use new registers.
*/
C_O0_I1(r)
C_O0_I2(r, r)
Expand Down Expand Up @@ -41,6 +44,5 @@ C_O2_I1(o, m, r)
C_O2_I2(o, m, 0, r)
C_O2_I2(o, m, r, r)
C_O2_I3(o, m, 0, 1, r)
C_O2_I4(r, r, 0, 1, rA, r)
C_O2_I4(r, r, 0, 1, ri, r)
C_O2_I4(r, r, 0, 1, r, r)
C_N1_O1_I4(r, r, 0, 1, ri, r)
C_N1_O1_I4(r, r, 0, 1, rA, r)