Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
scsi: only access SCSIDevice->requests from one thread
Stop depending on the AioContext lock and instead access
SCSIDevice->requests from only one thread at a time:
- When the VM is running only the BlockBackend's AioContext may access
  the requests list.
- When the VM is stopped only the main loop may access the requests
  list.

These constraints protect the requests list without the need for locking
in the I/O code path.

Note that multiple IOThreads are not supported yet because the code
assumes all SCSIRequests are executed from a single AioContext. Leave
that as future work.

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-ID: <20231204164259.1515217-2-stefanha@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
  • Loading branch information
Stefan Hajnoczi authored and Kevin Wolf committed Dec 21, 2023
1 parent 68fa478 commit 2fb957b
Show file tree
Hide file tree
Showing 2 changed files with 131 additions and 57 deletions.
181 changes: 125 additions & 56 deletions hw/scsi/scsi-bus.c
Expand Up @@ -85,6 +85,89 @@ SCSIDevice *scsi_device_get(SCSIBus *bus, int channel, int id, int lun)
return d;
}

/*
* Invoke @fn() for each enqueued request in device @s. Must be called from the
* main loop thread while the guest is stopped. This is only suitable for
* vmstate ->put(), use scsi_device_for_each_req_async() for other cases.
*/
static void scsi_device_for_each_req_sync(SCSIDevice *s,
void (*fn)(SCSIRequest *, void *),
void *opaque)
{
SCSIRequest *req;
SCSIRequest *next_req;

assert(!runstate_is_running());
assert(qemu_in_main_thread());

QTAILQ_FOREACH_SAFE(req, &s->requests, next, next_req) {
fn(req, opaque);
}
}

typedef struct {
SCSIDevice *s;
void (*fn)(SCSIRequest *, void *);
void *fn_opaque;
} SCSIDeviceForEachReqAsyncData;

static void scsi_device_for_each_req_async_bh(void *opaque)
{
g_autofree SCSIDeviceForEachReqAsyncData *data = opaque;
SCSIDevice *s = data->s;
AioContext *ctx;
SCSIRequest *req;
SCSIRequest *next;

/*
* If the AioContext changed before this BH was called then reschedule into
* the new AioContext before accessing ->requests. This can happen when
* scsi_device_for_each_req_async() is called and then the AioContext is
* changed before BHs are run.
*/
ctx = blk_get_aio_context(s->conf.blk);
if (ctx != qemu_get_current_aio_context()) {
aio_bh_schedule_oneshot(ctx, scsi_device_for_each_req_async_bh,
g_steal_pointer(&data));
return;
}

QTAILQ_FOREACH_SAFE(req, &s->requests, next, next) {
data->fn(req, data->fn_opaque);
}

/* Drop the reference taken by scsi_device_for_each_req_async() */
object_unref(OBJECT(s));
}

/*
* Schedule @fn() to be invoked for each enqueued request in device @s. @fn()
* runs in the AioContext that is executing the request.
*/
static void scsi_device_for_each_req_async(SCSIDevice *s,
void (*fn)(SCSIRequest *, void *),
void *opaque)
{
assert(qemu_in_main_thread());

SCSIDeviceForEachReqAsyncData *data =
g_new(SCSIDeviceForEachReqAsyncData, 1);

data->s = s;
data->fn = fn;
data->fn_opaque = opaque;

/*
* Hold a reference to the SCSIDevice until
* scsi_device_for_each_req_async_bh() finishes.
*/
object_ref(OBJECT(s));

aio_bh_schedule_oneshot(blk_get_aio_context(s->conf.blk),
scsi_device_for_each_req_async_bh,
data);
}

static void scsi_device_realize(SCSIDevice *s, Error **errp)
{
SCSIDeviceClass *sc = SCSI_DEVICE_GET_CLASS(s);
Expand Down Expand Up @@ -144,20 +227,18 @@ void scsi_bus_init_named(SCSIBus *bus, size_t bus_size, DeviceState *host,
qbus_set_bus_hotplug_handler(BUS(bus));
}

static void scsi_dma_restart_bh(void *opaque)
void scsi_req_retry(SCSIRequest *req)
{
SCSIDevice *s = opaque;
SCSIRequest *req, *next;

qemu_bh_delete(s->bh);
s->bh = NULL;
req->retry = true;
}

aio_context_acquire(blk_get_aio_context(s->conf.blk));
QTAILQ_FOREACH_SAFE(req, &s->requests, next, next) {
scsi_req_ref(req);
if (req->retry) {
req->retry = false;
switch (req->cmd.mode) {
/* Called in the AioContext that is executing the request */
static void scsi_dma_restart_req(SCSIRequest *req, void *opaque)
{
scsi_req_ref(req);
if (req->retry) {
req->retry = false;
switch (req->cmd.mode) {
case SCSI_XFER_FROM_DEV:
case SCSI_XFER_TO_DEV:
scsi_req_continue(req);
Expand All @@ -166,37 +247,22 @@ static void scsi_dma_restart_bh(void *opaque)
scsi_req_dequeue(req);
scsi_req_enqueue(req);
break;
}
}
scsi_req_unref(req);
}
aio_context_release(blk_get_aio_context(s->conf.blk));
/* Drop the reference that was acquired in scsi_dma_restart_cb */
object_unref(OBJECT(s));
}

void scsi_req_retry(SCSIRequest *req)
{
/* No need to save a reference, because scsi_dma_restart_bh just
* looks at the request list. */
req->retry = true;
scsi_req_unref(req);
}

static void scsi_dma_restart_cb(void *opaque, bool running, RunState state)
{
SCSIDevice *s = opaque;

assert(qemu_in_main_thread());

if (!running) {
return;
}
if (!s->bh) {
AioContext *ctx = blk_get_aio_context(s->conf.blk);
/* The reference is dropped in scsi_dma_restart_bh.*/
object_ref(OBJECT(s));
s->bh = aio_bh_new_guarded(ctx, scsi_dma_restart_bh, s,
&DEVICE(s)->mem_reentrancy_guard);
qemu_bh_schedule(s->bh);
}

scsi_device_for_each_req_async(s, scsi_dma_restart_req, NULL);
}

static bool scsi_bus_is_address_free(SCSIBus *bus,
Expand Down Expand Up @@ -1657,15 +1723,16 @@ void scsi_device_set_ua(SCSIDevice *sdev, SCSISense sense)
}
}

static void scsi_device_purge_one_req(SCSIRequest *req, void *opaque)
{
scsi_req_cancel_async(req, NULL);
}

void scsi_device_purge_requests(SCSIDevice *sdev, SCSISense sense)
{
SCSIRequest *req;
scsi_device_for_each_req_async(sdev, scsi_device_purge_one_req, NULL);

aio_context_acquire(blk_get_aio_context(sdev->conf.blk));
while (!QTAILQ_EMPTY(&sdev->requests)) {
req = QTAILQ_FIRST(&sdev->requests);
scsi_req_cancel_async(req, NULL);
}
blk_drain(sdev->conf.blk);
aio_context_release(blk_get_aio_context(sdev->conf.blk));
scsi_device_set_ua(sdev, sense);
Expand Down Expand Up @@ -1737,31 +1804,33 @@ static char *scsibus_get_fw_dev_path(DeviceState *dev)

/* SCSI request list. For simplicity, pv points to the whole device */

static void put_scsi_req(SCSIRequest *req, void *opaque)
{
QEMUFile *f = opaque;

assert(!req->io_canceled);
assert(req->status == -1 && req->host_status == -1);
assert(req->enqueued);

qemu_put_sbyte(f, req->retry ? 1 : 2);
qemu_put_buffer(f, req->cmd.buf, sizeof(req->cmd.buf));
qemu_put_be32s(f, &req->tag);
qemu_put_be32s(f, &req->lun);
if (req->bus->info->save_request) {
req->bus->info->save_request(f, req);
}
if (req->ops->save_request) {
req->ops->save_request(f, req);
}
}

static int put_scsi_requests(QEMUFile *f, void *pv, size_t size,
const VMStateField *field, JSONWriter *vmdesc)
{
SCSIDevice *s = pv;
SCSIBus *bus = DO_UPCAST(SCSIBus, qbus, s->qdev.parent_bus);
SCSIRequest *req;

QTAILQ_FOREACH(req, &s->requests, next) {
assert(!req->io_canceled);
assert(req->status == -1 && req->host_status == -1);
assert(req->enqueued);

qemu_put_sbyte(f, req->retry ? 1 : 2);
qemu_put_buffer(f, req->cmd.buf, sizeof(req->cmd.buf));
qemu_put_be32s(f, &req->tag);
qemu_put_be32s(f, &req->lun);
if (bus->info->save_request) {
bus->info->save_request(f, req);
}
if (req->ops->save_request) {
req->ops->save_request(f, req);
}
}
scsi_device_for_each_req_sync(s, put_scsi_req, f);
qemu_put_sbyte(f, 0);

return 0;
}

Expand Down
7 changes: 6 additions & 1 deletion include/hw/scsi/scsi.h
Expand Up @@ -69,14 +69,19 @@ struct SCSIDevice
{
DeviceState qdev;
VMChangeStateEntry *vmsentry;
QEMUBH *bh;
uint32_t id;
BlockConf conf;
SCSISense unit_attention;
bool sense_is_ua;
uint8_t sense[SCSI_SENSE_BUF_SIZE];
uint32_t sense_len;

/*
* The requests list is only accessed from the AioContext that executes
* requests or from the main loop when IOThread processing is stopped.
*/
QTAILQ_HEAD(, SCSIRequest) requests;

uint32_t channel;
uint32_t lun;
int blocksize;
Expand Down

0 comments on commit 2fb957b

Please sign in to comment.