From 2174f12bdeb3974141784e14bbb7ad8c53178cd9 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Wed, 6 Aug 2014 17:18:07 +0200 Subject: [PATCH] raw-posix: Switch to bdrv_co_* interfaces In order to use the modern byte-based .bdrv_co_preadv/pwritev() interface, this patch switches raw-posix to coroutine-based interfaces as a first step. In terms of semantics and performance, it doesn't make a difference with the existing code whether we go from a coroutine to a callback-based interface already in block/io.c or only in linux-aio.c As there have been concerns in the past that this change may be a step in the wrong direction with respect to a possible AIO fast path, the old callback-based interface for linux-aio is left around and can be reactivated when a fast path (e.g. directly from virtio-blk dataplane, bypassing the whole block layer) is implemented. Signed-off-by: Kevin Wolf Reviewed-by: Eric Blake Reviewed-by: Stefan Hajnoczi --- block/linux-aio.c | 87 +++++++++++++++++++++++++++++++++++------------ block/raw-aio.h | 4 +++ block/raw-posix.c | 59 +++++++++++++++----------------- 3 files changed, 96 insertions(+), 54 deletions(-) diff --git a/block/linux-aio.c b/block/linux-aio.c index 90ec98ee235c..657577a22de1 100644 --- a/block/linux-aio.c +++ b/block/linux-aio.c @@ -11,8 +11,10 @@ #include "qemu-common.h" #include "block/aio.h" #include "qemu/queue.h" +#include "block/block.h" #include "block/raw-aio.h" #include "qemu/event_notifier.h" +#include "qemu/coroutine.h" #include @@ -30,6 +32,7 @@ struct qemu_laiocb { BlockAIOCB common; + Coroutine *co; LinuxAioState *ctx; struct iocb iocb; ssize_t ret; @@ -88,9 +91,14 @@ static void qemu_laio_process_completion(struct qemu_laiocb *laiocb) } } } - laiocb->common.cb(laiocb->common.opaque, ret); - qemu_aio_unref(laiocb); + laiocb->ret = ret; + if (laiocb->co) { + qemu_coroutine_enter(laiocb->co, NULL); + } else { + laiocb->common.cb(laiocb->common.opaque, ret); + qemu_aio_unref(laiocb); + } } /* The completion BH fetches completed I/O requests and invokes their @@ -230,22 +238,12 @@ void laio_io_unplug(BlockDriverState *bs, LinuxAioState *s) } } -BlockAIOCB *laio_submit(BlockDriverState *bs, LinuxAioState *s, int fd, - int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, - BlockCompletionFunc *cb, void *opaque, int type) +static int laio_do_submit(int fd, struct qemu_laiocb *laiocb, off_t offset, + int type) { - struct qemu_laiocb *laiocb; - struct iocb *iocbs; - off_t offset = sector_num * 512; - - laiocb = qemu_aio_get(&laio_aiocb_info, bs, cb, opaque); - laiocb->nbytes = nb_sectors * 512; - laiocb->ctx = s; - laiocb->ret = -EINPROGRESS; - laiocb->is_read = (type == QEMU_AIO_READ); - laiocb->qiov = qiov; - - iocbs = &laiocb->iocb; + LinuxAioState *s = laiocb->ctx; + struct iocb *iocbs = &laiocb->iocb; + QEMUIOVector *qiov = laiocb->qiov; switch (type) { case QEMU_AIO_WRITE: @@ -258,7 +256,7 @@ BlockAIOCB *laio_submit(BlockDriverState *bs, LinuxAioState *s, int fd, default: fprintf(stderr, "%s: invalid AIO request type 0x%x.\n", __func__, type); - goto out_free_aiocb; + return -EIO; } io_set_eventfd(&laiocb->iocb, event_notifier_get_fd(&s->e)); @@ -268,11 +266,56 @@ BlockAIOCB *laio_submit(BlockDriverState *bs, LinuxAioState *s, int fd, (!s->io_q.plugged || s->io_q.n >= MAX_QUEUED_IO)) { ioq_submit(s); } - return &laiocb->common; -out_free_aiocb: - qemu_aio_unref(laiocb); - return NULL; + return 0; +} + +int coroutine_fn laio_co_submit(BlockDriverState *bs, LinuxAioState *s, int fd, + int64_t sector_num, QEMUIOVector *qiov, + int nb_sectors, int type) +{ + off_t offset = sector_num * BDRV_SECTOR_SIZE; + int ret; + + struct qemu_laiocb laiocb = { + .co = qemu_coroutine_self(), + .nbytes = nb_sectors * BDRV_SECTOR_SIZE, + .ctx = s, + .is_read = (type == QEMU_AIO_READ), + .qiov = qiov, + }; + + ret = laio_do_submit(fd, &laiocb, offset, type); + if (ret < 0) { + return ret; + } + + qemu_coroutine_yield(); + return laiocb.ret; +} + +BlockAIOCB *laio_submit(BlockDriverState *bs, LinuxAioState *s, int fd, + int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, + BlockCompletionFunc *cb, void *opaque, int type) +{ + struct qemu_laiocb *laiocb; + off_t offset = sector_num * BDRV_SECTOR_SIZE; + int ret; + + laiocb = qemu_aio_get(&laio_aiocb_info, bs, cb, opaque); + laiocb->nbytes = nb_sectors * BDRV_SECTOR_SIZE; + laiocb->ctx = s; + laiocb->ret = -EINPROGRESS; + laiocb->is_read = (type == QEMU_AIO_READ); + laiocb->qiov = qiov; + + ret = laio_do_submit(fd, laiocb, offset, type); + if (ret < 0) { + qemu_aio_unref(laiocb); + return NULL; + } + + return &laiocb->common; } void laio_detach_aio_context(LinuxAioState *s, AioContext *old_context) diff --git a/block/raw-aio.h b/block/raw-aio.h index 714714e016c4..03bbfba565ec 100644 --- a/block/raw-aio.h +++ b/block/raw-aio.h @@ -15,6 +15,7 @@ #ifndef QEMU_RAW_AIO_H #define QEMU_RAW_AIO_H +#include "qemu/coroutine.h" #include "qemu/iov.h" /* AIO request types */ @@ -38,6 +39,9 @@ typedef struct LinuxAioState LinuxAioState; LinuxAioState *laio_init(void); void laio_cleanup(LinuxAioState *s); +int coroutine_fn laio_co_submit(BlockDriverState *bs, LinuxAioState *s, int fd, + int64_t sector_num, QEMUIOVector *qiov, + int nb_sectors, int type); BlockAIOCB *laio_submit(BlockDriverState *bs, LinuxAioState *s, int fd, int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, BlockCompletionFunc *cb, void *opaque, int type); diff --git a/block/raw-posix.c b/block/raw-posix.c index ce2e20f2033f..cb987692c696 100644 --- a/block/raw-posix.c +++ b/block/raw-posix.c @@ -1325,14 +1325,13 @@ static BlockAIOCB *paio_submit(BlockDriverState *bs, int fd, return thread_pool_submit_aio(pool, aio_worker, acb, cb, opaque); } -static BlockAIOCB *raw_aio_submit(BlockDriverState *bs, - int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, - BlockCompletionFunc *cb, void *opaque, int type) +static int coroutine_fn raw_co_rw(BlockDriverState *bs, int64_t sector_num, + int nb_sectors, QEMUIOVector *qiov, int type) { BDRVRawState *s = bs->opaque; if (fd_open(bs) < 0) - return NULL; + return -EIO; /* * Check if the underlying device requires requests to be aligned, @@ -1345,14 +1344,26 @@ static BlockAIOCB *raw_aio_submit(BlockDriverState *bs, type |= QEMU_AIO_MISALIGNED; #ifdef CONFIG_LINUX_AIO } else if (s->use_aio) { - return laio_submit(bs, s->aio_ctx, s->fd, sector_num, qiov, - nb_sectors, cb, opaque, type); + return laio_co_submit(bs, s->aio_ctx, s->fd, sector_num, qiov, + nb_sectors, type); #endif } } - return paio_submit(bs, s->fd, sector_num, qiov, nb_sectors, - cb, opaque, type); + return paio_submit_co(bs, s->fd, sector_num * BDRV_SECTOR_SIZE, qiov, + nb_sectors * BDRV_SECTOR_SIZE, type); +} + +static int coroutine_fn raw_co_readv(BlockDriverState *bs, int64_t sector_num, + int nb_sectors, QEMUIOVector *qiov) +{ + return raw_co_rw(bs, sector_num, nb_sectors, qiov, QEMU_AIO_READ); +} + +static int coroutine_fn raw_co_writev(BlockDriverState *bs, int64_t sector_num, + int nb_sectors, QEMUIOVector *qiov) +{ + return raw_co_rw(bs, sector_num, nb_sectors, qiov, QEMU_AIO_WRITE); } static void raw_aio_plug(BlockDriverState *bs) @@ -1375,22 +1386,6 @@ static void raw_aio_unplug(BlockDriverState *bs) #endif } -static BlockAIOCB *raw_aio_readv(BlockDriverState *bs, - int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, - BlockCompletionFunc *cb, void *opaque) -{ - return raw_aio_submit(bs, sector_num, qiov, nb_sectors, - cb, opaque, QEMU_AIO_READ); -} - -static BlockAIOCB *raw_aio_writev(BlockDriverState *bs, - int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, - BlockCompletionFunc *cb, void *opaque) -{ - return raw_aio_submit(bs, sector_num, qiov, nb_sectors, - cb, opaque, QEMU_AIO_WRITE); -} - static BlockAIOCB *raw_aio_flush(BlockDriverState *bs, BlockCompletionFunc *cb, void *opaque) { @@ -1957,8 +1952,8 @@ BlockDriver bdrv_file = { .bdrv_co_get_block_status = raw_co_get_block_status, .bdrv_co_pwrite_zeroes = raw_co_pwrite_zeroes, - .bdrv_aio_readv = raw_aio_readv, - .bdrv_aio_writev = raw_aio_writev, + .bdrv_co_readv = raw_co_readv, + .bdrv_co_writev = raw_co_writev, .bdrv_aio_flush = raw_aio_flush, .bdrv_aio_discard = raw_aio_discard, .bdrv_refresh_limits = raw_refresh_limits, @@ -2405,8 +2400,8 @@ static BlockDriver bdrv_host_device = { .create_opts = &raw_create_opts, .bdrv_co_pwrite_zeroes = hdev_co_pwrite_zeroes, - .bdrv_aio_readv = raw_aio_readv, - .bdrv_aio_writev = raw_aio_writev, + .bdrv_co_readv = raw_co_readv, + .bdrv_co_writev = raw_co_writev, .bdrv_aio_flush = raw_aio_flush, .bdrv_aio_discard = hdev_aio_discard, .bdrv_refresh_limits = raw_refresh_limits, @@ -2535,8 +2530,8 @@ static BlockDriver bdrv_host_cdrom = { .bdrv_create = hdev_create, .create_opts = &raw_create_opts, - .bdrv_aio_readv = raw_aio_readv, - .bdrv_aio_writev = raw_aio_writev, + .bdrv_co_readv = raw_co_readv, + .bdrv_co_writev = raw_co_writev, .bdrv_aio_flush = raw_aio_flush, .bdrv_refresh_limits = raw_refresh_limits, .bdrv_io_plug = raw_aio_plug, @@ -2670,8 +2665,8 @@ static BlockDriver bdrv_host_cdrom = { .bdrv_create = hdev_create, .create_opts = &raw_create_opts, - .bdrv_aio_readv = raw_aio_readv, - .bdrv_aio_writev = raw_aio_writev, + .bdrv_co_readv = raw_co_readv, + .bdrv_co_writev = raw_co_writev, .bdrv_aio_flush = raw_aio_flush, .bdrv_refresh_limits = raw_refresh_limits, .bdrv_io_plug = raw_aio_plug,