6 changes: 4 additions & 2 deletions hw/usb/host-libusb.c
Expand Up @@ -1141,7 +1141,8 @@ static void usb_host_nodev_bh(void *opaque)
static void usb_host_nodev(USBHostDevice *s)
{
if (!s->bh_nodev) {
s->bh_nodev = qemu_bh_new(usb_host_nodev_bh, s);
s->bh_nodev = qemu_bh_new_guarded(usb_host_nodev_bh, s,
&DEVICE(s)->mem_reentrancy_guard);
}
qemu_bh_schedule(s->bh_nodev);
}
Expand Down Expand Up @@ -1739,7 +1740,8 @@ static int usb_host_post_load(void *opaque, int version_id)
USBHostDevice *dev = opaque;

if (!dev->bh_postld) {
dev->bh_postld = qemu_bh_new(usb_host_post_load_bh, dev);
dev->bh_postld = qemu_bh_new_guarded(usb_host_post_load_bh, dev,
&DEVICE(dev)->mem_reentrancy_guard);
}
qemu_bh_schedule(dev->bh_postld);
dev->bh_postld_pending = true;
Expand Down
6 changes: 4 additions & 2 deletions hw/usb/redirect.c
Expand Up @@ -1441,8 +1441,10 @@ static void usbredir_realize(USBDevice *udev, Error **errp)
}
}

dev->chardev_close_bh = qemu_bh_new(usbredir_chardev_close_bh, dev);
dev->device_reject_bh = qemu_bh_new(usbredir_device_reject_bh, dev);
dev->chardev_close_bh = qemu_bh_new_guarded(usbredir_chardev_close_bh, dev,
&DEVICE(dev)->mem_reentrancy_guard);
dev->device_reject_bh = qemu_bh_new_guarded(usbredir_device_reject_bh, dev,
&DEVICE(dev)->mem_reentrancy_guard);
dev->attach_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, usbredir_do_attach, dev);

packet_id_queue_init(&dev->cancelled, dev, "cancelled");
Expand Down
3 changes: 2 additions & 1 deletion hw/usb/xen-usb.c
Expand Up @@ -1032,7 +1032,8 @@ static void usbback_alloc(struct XenLegacyDevice *xendev)

QTAILQ_INIT(&usbif->req_free_q);
QSIMPLEQ_INIT(&usbif->hotplug_q);
usbif->bh = qemu_bh_new(usbback_bh, usbif);
usbif->bh = qemu_bh_new_guarded(usbback_bh, usbif,
&DEVICE(xendev)->mem_reentrancy_guard);
}

static int usbback_free(struct XenLegacyDevice *xendev)
Expand Down
5 changes: 3 additions & 2 deletions hw/virtio/virtio-balloon.c
Expand Up @@ -886,8 +886,9 @@ static void virtio_balloon_device_realize(DeviceState *dev, Error **errp)
precopy_add_notifier(&s->free_page_hint_notify);

object_ref(OBJECT(s->iothread));
s->free_page_bh = aio_bh_new(iothread_get_aio_context(s->iothread),
virtio_ballloon_get_free_page_hints, s);
s->free_page_bh = aio_bh_new_guarded(iothread_get_aio_context(s->iothread),
virtio_ballloon_get_free_page_hints, s,
&dev->mem_reentrancy_guard);
}

if (virtio_has_feature(s->host_features, VIRTIO_BALLOON_F_REPORTING)) {
Expand Down
3 changes: 2 additions & 1 deletion hw/virtio/virtio-crypto.c
Expand Up @@ -1074,7 +1074,8 @@ static void virtio_crypto_device_realize(DeviceState *dev, Error **errp)
vcrypto->vqs[i].dataq =
virtio_add_queue(vdev, 1024, virtio_crypto_handle_dataq_bh);
vcrypto->vqs[i].dataq_bh =
qemu_bh_new(virtio_crypto_dataq_bh, &vcrypto->vqs[i]);
qemu_bh_new_guarded(virtio_crypto_dataq_bh, &vcrypto->vqs[i],
&dev->mem_reentrancy_guard);
vcrypto->vqs[i].vcrypto = vcrypto;
}

Expand Down
18 changes: 16 additions & 2 deletions include/block/aio.h
Expand Up @@ -23,6 +23,8 @@
#include "qemu/thread.h"
#include "qemu/timer.h"
#include "block/graph-lock.h"
#include "hw/qdev-core.h"


typedef struct BlockAIOCB BlockAIOCB;
typedef void BlockCompletionFunc(void *opaque, int ret);
Expand Down Expand Up @@ -323,9 +325,11 @@ void aio_bh_schedule_oneshot_full(AioContext *ctx, QEMUBHFunc *cb, void *opaque,
* is opaque and must be allocated prior to its use.
*
* @name: A human-readable identifier for debugging purposes.
* @reentrancy_guard: A guard set when entering a cb to prevent
* device-reentrancy issues
*/
QEMUBH *aio_bh_new_full(AioContext *ctx, QEMUBHFunc *cb, void *opaque,
const char *name);
const char *name, MemReentrancyGuard *reentrancy_guard);

/**
* aio_bh_new: Allocate a new bottom half structure
Expand All @@ -334,7 +338,17 @@ QEMUBH *aio_bh_new_full(AioContext *ctx, QEMUBHFunc *cb, void *opaque,
* string.
*/
#define aio_bh_new(ctx, cb, opaque) \
aio_bh_new_full((ctx), (cb), (opaque), (stringify(cb)))
aio_bh_new_full((ctx), (cb), (opaque), (stringify(cb)), NULL)

/**
* aio_bh_new_guarded: Allocate a new bottom half structure with a
* reentrancy_guard
*
* A convenience wrapper for aio_bh_new_full() that uses the cb as the name
* string.
*/
#define aio_bh_new_guarded(ctx, cb, opaque, guard) \
aio_bh_new_full((ctx), (cb), (opaque), (stringify(cb)), guard)

/**
* aio_notify: Force processing of pending events.
Expand Down
5 changes: 5 additions & 0 deletions include/exec/memory.h
Expand Up @@ -767,6 +767,8 @@ struct MemoryRegion {
bool is_iommu;
RAMBlock *ram_block;
Object *owner;
/* owner as TYPE_DEVICE. Used for re-entrancy checks in MR access hotpath */
DeviceState *dev;

const MemoryRegionOps *ops;
void *opaque;
Expand All @@ -791,6 +793,9 @@ struct MemoryRegion {
unsigned ioeventfd_nb;
MemoryRegionIoeventfd *ioeventfds;
RamDiscardManager *rdm; /* Only for RAM */

/* For devices designed to perform re-entrant IO into their own IO MRs */
bool disable_reentrancy_guard;
};

struct IOMMUMemoryRegion {
Expand Down
7 changes: 7 additions & 0 deletions include/hw/qdev-core.h
Expand Up @@ -162,6 +162,10 @@ struct NamedClockList {
QLIST_ENTRY(NamedClockList) node;
};

typedef struct {
bool engaged_in_io;
} MemReentrancyGuard;

/**
* DeviceState:
* @realized: Indicates whether the device has been fully constructed.
Expand Down Expand Up @@ -194,6 +198,9 @@ struct DeviceState {
int alias_required_for_version;
ResettableState reset;
GSList *unplug_blockers;

/* Is the device currently in mmio/pio/dma? Used to prevent re-entrancy */
MemReentrancyGuard mem_reentrancy_guard;
};

struct DeviceListener {
Expand Down
7 changes: 5 additions & 2 deletions include/qemu/main-loop.h
Expand Up @@ -387,9 +387,12 @@ void qemu_cond_timedwait_iothread(QemuCond *cond, int ms);

/* internal interfaces */

#define qemu_bh_new_guarded(cb, opaque, guard) \
qemu_bh_new_full((cb), (opaque), (stringify(cb)), guard)
#define qemu_bh_new(cb, opaque) \
qemu_bh_new_full((cb), (opaque), (stringify(cb)))
QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name);
qemu_bh_new_full((cb), (opaque), (stringify(cb)), NULL)
QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name,
MemReentrancyGuard *reentrancy_guard);
void qemu_bh_schedule_idle(QEMUBH *bh);

enum {
Expand Down
8 changes: 8 additions & 0 deletions scripts/checkpatch.pl
Expand Up @@ -2865,6 +2865,14 @@ sub process {
if ($line =~ /\bsignal\s*\(/ && !($line =~ /SIG_(?:IGN|DFL)/)) {
ERROR("use sigaction to establish signal handlers; signal is not portable\n" . $herecurr);
}
# recommend qemu_bh_new_guarded instead of qemu_bh_new
if ($realfile =~ /.*\/hw\/.*/ && $line =~ /\bqemu_bh_new\s*\(/) {
ERROR("use qemu_bh_new_guarded() instead of qemu_bh_new() to avoid reentrancy problems\n" . $herecurr);
}
# recommend aio_bh_new_guarded instead of aio_bh_new
if ($realfile =~ /.*\/hw\/.*/ && $line =~ /\baio_bh_new\s*\(/) {
ERROR("use aio_bh_new_guarded() instead of aio_bh_new() to avoid reentrancy problems\n" . $herecurr);
}
# check for module_init(), use category-specific init macros explicitly please
if ($line =~ /^module_init\s*\(/) {
ERROR("please use block_init(), type_init() etc. instead of module_init()\n" . $herecurr);
Expand Down
16 changes: 16 additions & 0 deletions softmmu/memory.c
Expand Up @@ -542,6 +542,18 @@ static MemTxResult access_with_adjusted_size(hwaddr addr,
access_size_max = 4;
}

/* Do not allow more than one simultaneous access to a device's IO Regions */
if (mr->dev && !mr->disable_reentrancy_guard &&
!mr->ram_device && !mr->ram && !mr->rom_device && !mr->readonly) {
if (mr->dev->mem_reentrancy_guard.engaged_in_io) {
warn_report_once("Blocked re-entrant IO on MemoryRegion: "
"%s at addr: 0x%" HWADDR_PRIX,
memory_region_name(mr), addr);
return MEMTX_ACCESS_ERROR;
}
mr->dev->mem_reentrancy_guard.engaged_in_io = true;
}

/* FIXME: support unaligned access? */
access_size = MAX(MIN(size, access_size_max), access_size_min);
access_mask = MAKE_64BIT_MASK(0, access_size * 8);
Expand All @@ -556,6 +568,9 @@ static MemTxResult access_with_adjusted_size(hwaddr addr,
access_mask, attrs);
}
}
if (mr->dev) {
mr->dev->mem_reentrancy_guard.engaged_in_io = false;
}
return r;
}

Expand Down Expand Up @@ -1170,6 +1185,7 @@ static void memory_region_do_init(MemoryRegion *mr,
}
mr->name = g_strdup(name);
mr->owner = owner;
mr->dev = (DeviceState *) object_dynamic_cast(mr->owner, TYPE_DEVICE);
mr->ram_block = NULL;

if (name) {
Expand Down
65 changes: 45 additions & 20 deletions target/s390x/gdbstub.c
Expand Up @@ -206,12 +206,8 @@ static int cpu_write_c_reg(CPUS390XState *env, uint8_t *mem_buf, int n)
#define S390_VIRT_CPUTM_REGNUM 1
#define S390_VIRT_BEA_REGNUM 2
#define S390_VIRT_PREFIX_REGNUM 3
#define S390_VIRT_PP_REGNUM 4
#define S390_VIRT_PFT_REGNUM 5
#define S390_VIRT_PFS_REGNUM 6
#define S390_VIRT_PFC_REGNUM 7
/* total number of registers in s390-virt.xml */
#define S390_NUM_VIRT_REGS 8
#define S390_NUM_VIRT_REGS 4

static int cpu_read_virt_reg(CPUS390XState *env, GByteArray *mem_buf, int n)
{
Expand All @@ -224,14 +220,6 @@ static int cpu_read_virt_reg(CPUS390XState *env, GByteArray *mem_buf, int n)
return gdb_get_regl(mem_buf, env->gbea);
case S390_VIRT_PREFIX_REGNUM:
return gdb_get_regl(mem_buf, env->psa);
case S390_VIRT_PP_REGNUM:
return gdb_get_regl(mem_buf, env->pp);
case S390_VIRT_PFT_REGNUM:
return gdb_get_regl(mem_buf, env->pfault_token);
case S390_VIRT_PFS_REGNUM:
return gdb_get_regl(mem_buf, env->pfault_select);
case S390_VIRT_PFC_REGNUM:
return gdb_get_regl(mem_buf, env->pfault_compare);
default:
return 0;
}
Expand All @@ -256,19 +244,51 @@ static int cpu_write_virt_reg(CPUS390XState *env, uint8_t *mem_buf, int n)
env->psa = ldtul_p(mem_buf);
cpu_synchronize_post_init(env_cpu(env));
return 8;
case S390_VIRT_PP_REGNUM:
default:
return 0;
}
}

/* the values represent the positions in s390-virt-kvm.xml */
#define S390_VIRT_KVM_PP_REGNUM 0
#define S390_VIRT_KVM_PFT_REGNUM 1
#define S390_VIRT_KVM_PFS_REGNUM 2
#define S390_VIRT_KVM_PFC_REGNUM 3
/* total number of registers in s390-virt-kvm.xml */
#define S390_NUM_VIRT_KVM_REGS 4

static int cpu_read_virt_kvm_reg(CPUS390XState *env, GByteArray *mem_buf, int n)
{
switch (n) {
case S390_VIRT_KVM_PP_REGNUM:
return gdb_get_regl(mem_buf, env->pp);
case S390_VIRT_KVM_PFT_REGNUM:
return gdb_get_regl(mem_buf, env->pfault_token);
case S390_VIRT_KVM_PFS_REGNUM:
return gdb_get_regl(mem_buf, env->pfault_select);
case S390_VIRT_KVM_PFC_REGNUM:
return gdb_get_regl(mem_buf, env->pfault_compare);
default:
return 0;
}
}

static int cpu_write_virt_kvm_reg(CPUS390XState *env, uint8_t *mem_buf, int n)
{
switch (n) {
case S390_VIRT_KVM_PP_REGNUM:
env->pp = ldtul_p(mem_buf);
cpu_synchronize_post_init(env_cpu(env));
return 8;
case S390_VIRT_PFT_REGNUM:
case S390_VIRT_KVM_PFT_REGNUM:
env->pfault_token = ldtul_p(mem_buf);
cpu_synchronize_post_init(env_cpu(env));
return 8;
case S390_VIRT_PFS_REGNUM:
case S390_VIRT_KVM_PFS_REGNUM:
env->pfault_select = ldtul_p(mem_buf);
cpu_synchronize_post_init(env_cpu(env));
return 8;
case S390_VIRT_PFC_REGNUM:
case S390_VIRT_KVM_PFC_REGNUM:
env->pfault_compare = ldtul_p(mem_buf);
cpu_synchronize_post_init(env_cpu(env));
return 8;
Expand Down Expand Up @@ -321,10 +341,15 @@ void s390_cpu_gdb_init(CPUState *cs)
cpu_write_c_reg,
S390_NUM_C_REGS, "s390-cr.xml", 0);

gdb_register_coprocessor(cs, cpu_read_virt_reg,
cpu_write_virt_reg,
S390_NUM_VIRT_REGS, "s390-virt.xml", 0);

if (kvm_enabled()) {
gdb_register_coprocessor(cs, cpu_read_virt_reg,
cpu_write_virt_reg,
S390_NUM_VIRT_REGS, "s390-virt.xml", 0);
gdb_register_coprocessor(cs, cpu_read_virt_kvm_reg,
cpu_write_virt_kvm_reg,
S390_NUM_VIRT_KVM_REGS, "s390-virt-kvm.xml",
0);
}
#endif
}
3 changes: 2 additions & 1 deletion tests/qtest/vhost-user-test.c
Expand Up @@ -351,7 +351,7 @@ static void chr_read(void *opaque, const uint8_t *buf, int size)
if (size != msg.size) {
qos_printf("%s: Wrong message size received %d != %d\n",
__func__, size, msg.size);
return;
goto out;
}
}

Expand Down Expand Up @@ -509,6 +509,7 @@ static void chr_read(void *opaque, const uint8_t *buf, int size)
break;
}

out:
g_mutex_unlock(&s->data_mutex);
}

Expand Down
3 changes: 2 additions & 1 deletion tests/unit/ptimer-test-stubs.c
Expand Up @@ -107,7 +107,8 @@ int64_t qemu_clock_deadline_ns_all(QEMUClockType type, int attr_mask)
return deadline;
}

QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name)
QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name,
MemReentrancyGuard *reentrancy_guard)
{
QEMUBH *bh = g_new(QEMUBH, 1);

Expand Down
18 changes: 17 additions & 1 deletion util/async.c
Expand Up @@ -65,6 +65,7 @@ struct QEMUBH {
void *opaque;
QSLIST_ENTRY(QEMUBH) next;
unsigned flags;
MemReentrancyGuard *reentrancy_guard;
};

/* Called concurrently from any thread */
Expand Down Expand Up @@ -137,7 +138,7 @@ void aio_bh_schedule_oneshot_full(AioContext *ctx, QEMUBHFunc *cb,
}

QEMUBH *aio_bh_new_full(AioContext *ctx, QEMUBHFunc *cb, void *opaque,
const char *name)
const char *name, MemReentrancyGuard *reentrancy_guard)
{
QEMUBH *bh;
bh = g_new(QEMUBH, 1);
Expand All @@ -146,13 +147,28 @@ QEMUBH *aio_bh_new_full(AioContext *ctx, QEMUBHFunc *cb, void *opaque,
.cb = cb,
.opaque = opaque,
.name = name,
.reentrancy_guard = reentrancy_guard,
};
return bh;
}

void aio_bh_call(QEMUBH *bh)
{
bool last_engaged_in_io = false;

if (bh->reentrancy_guard) {
last_engaged_in_io = bh->reentrancy_guard->engaged_in_io;
if (bh->reentrancy_guard->engaged_in_io) {
trace_reentrant_aio(bh->ctx, bh->name);
}
bh->reentrancy_guard->engaged_in_io = true;
}

bh->cb(bh->opaque);

if (bh->reentrancy_guard) {
bh->reentrancy_guard->engaged_in_io = last_engaged_in_io;
}
}

/* Multiple occurrences of aio_bh_poll cannot be called concurrently. */
Expand Down
6 changes: 4 additions & 2 deletions util/main-loop.c
Expand Up @@ -605,9 +605,11 @@ void main_loop_wait(int nonblocking)

/* Functions to operate on the main QEMU AioContext. */

QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name)
QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name,
MemReentrancyGuard *reentrancy_guard)
{
return aio_bh_new_full(qemu_aio_context, cb, opaque, name);
return aio_bh_new_full(qemu_aio_context, cb, opaque, name,
reentrancy_guard);
}

/*
Expand Down
1 change: 1 addition & 0 deletions util/trace-events
Expand Up @@ -11,6 +11,7 @@ poll_remove(void *ctx, void *node, int fd) "ctx %p node %p fd %d"
# async.c
aio_co_schedule(void *ctx, void *co) "ctx %p co %p"
aio_co_schedule_bh_cb(void *ctx, void *co) "ctx %p co %p"
reentrant_aio(void *ctx, const char *name) "ctx %p name %s"

# thread-pool.c
thread_pool_submit(void *pool, void *req, void *opaque) "pool %p req %p opaque %p"
Expand Down