Skip to content

Commit

Permalink
Merge remote-tracking branch 'remotes/stefanha/tags/block-pull-reques…
Browse files Browse the repository at this point in the history
…t' into staging

Pull request

# gpg: Signature made Thu 30 Jan 2020 21:31:02 GMT
# gpg:                using RSA key 8695A8BFD3F97CDAAC35775A9CA4ABB381AB73C8
# gpg: Good signature from "Stefan Hajnoczi <stefanha@redhat.com>" [full]
# gpg:                 aka "Stefan Hajnoczi <stefanha@gmail.com>" [full]
# Primary key fingerprint: 8695 A8BF D3F9 7CDA AC35  775A 9CA4 ABB3 81AB 73C8

* remotes/stefanha/tags/block-pull-request:
  tests/qemu-iotests: use AIOMODE with various tests
  tests/qemu-iotests: enable testing with aio options
  qemu-nbd: adds option for aio engines
  qemu-img: adds option to use aio engine for benchmarking
  qemu-io: adds option to use aio engine
  block/io_uring: adds userspace completion polling
  block: add trace events for io_uring
  block/file-posix.c: extend to use io_uring
  blockdev: adds bdrv_parse_aio to use io_uring
  util/async: add aio interfaces for io_uring
  stubs: add stubs for io_uring interface
  block/io_uring: implements interfaces for io_uring
  block/block: add BDRV flag for io_uring
  qapi/block-core: add option for io_uring
  configure: permit use of io_uring
  block/io: take bs->reqs_lock in bdrv_mark_request_serialising
  block/io: wait for serialising requests when a request becomes serialising
  block: eliminate BDRV_REQ_NO_SERIALISING

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
  • Loading branch information
pm215 committed Jan 31, 2020
2 parents 9281736 + 8dff69b commit adcd6e9
Show file tree
Hide file tree
Showing 37 changed files with 878 additions and 172 deletions.
9 changes: 9 additions & 0 deletions MAINTAINERS
Expand Up @@ -2634,6 +2634,15 @@ F: block/file-posix.c
F: block/file-win32.c
F: block/win32-aio.c

Linux io_uring
M: Aarushi Mehta <mehta.aaru20@gmail.com>
M: Julia Suvorova <jusual@redhat.com>
M: Stefan Hajnoczi <stefanha@redhat.com>
L: qemu-block@nongnu.org
S: Maintained
F: block/io_uring.c
F: stubs/io_uring.c

qcow2
M: Kevin Wolf <kwolf@redhat.com>
M: Max Reitz <mreitz@redhat.com>
Expand Down
22 changes: 22 additions & 0 deletions block.c
Expand Up @@ -845,6 +845,28 @@ static BlockdevDetectZeroesOptions bdrv_parse_detect_zeroes(QemuOpts *opts,
return detect_zeroes;
}

/**
* Set open flags for aio engine
*
* Return 0 on success, -1 if the engine specified is invalid
*/
int bdrv_parse_aio(const char *mode, int *flags)
{
if (!strcmp(mode, "threads")) {
/* do nothing, default */
} else if (!strcmp(mode, "native")) {
*flags |= BDRV_O_NATIVE_AIO;
#ifdef CONFIG_LINUX_IO_URING
} else if (!strcmp(mode, "io_uring")) {
*flags |= BDRV_O_IO_URING;
#endif
} else {
return -1;
}

return 0;
}

/**
* Set open flags for a given discard mode
*
Expand Down
3 changes: 3 additions & 0 deletions block/Makefile.objs
Expand Up @@ -18,6 +18,7 @@ block-obj-y += block-backend.o snapshot.o qapi.o
block-obj-$(CONFIG_WIN32) += file-win32.o win32-aio.o
block-obj-$(CONFIG_POSIX) += file-posix.o
block-obj-$(CONFIG_LINUX_AIO) += linux-aio.o
block-obj-$(CONFIG_LINUX_IO_URING) += io_uring.o
block-obj-y += null.o mirror.o commit.o io.o create.o
block-obj-y += throttle-groups.o
block-obj-$(CONFIG_LINUX) += nvme.o
Expand Down Expand Up @@ -66,5 +67,7 @@ block-obj-$(if $(CONFIG_LZFSE),m,n) += dmg-lzfse.o
dmg-lzfse.o-libs := $(LZFSE_LIBS)
qcow.o-libs := -lz
linux-aio.o-libs := -laio
io_uring.o-cflags := $(LINUX_IO_URING_CFLAGS)
io_uring.o-libs := $(LINUX_IO_URING_LIBS)
parallels.o-cflags := $(LIBXML2_CFLAGS)
parallels.o-libs := $(LIBXML2_LIBS)
99 changes: 79 additions & 20 deletions block/file-posix.c
Expand Up @@ -156,6 +156,7 @@ typedef struct BDRVRawState {
bool has_write_zeroes:1;
bool discard_zeroes:1;
bool use_linux_aio:1;
bool use_linux_io_uring:1;
bool page_cache_inconsistent:1;
bool has_fallocate;
bool needs_alignment;
Expand Down Expand Up @@ -444,7 +445,7 @@ static QemuOptsList raw_runtime_opts = {
{
.name = "aio",
.type = QEMU_OPT_STRING,
.help = "host AIO implementation (threads, native)",
.help = "host AIO implementation (threads, native, io_uring)",
},
{
.name = "locking",
Expand Down Expand Up @@ -503,9 +504,16 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
goto fail;
}

aio_default = (bdrv_flags & BDRV_O_NATIVE_AIO)
? BLOCKDEV_AIO_OPTIONS_NATIVE
: BLOCKDEV_AIO_OPTIONS_THREADS;
if (bdrv_flags & BDRV_O_NATIVE_AIO) {
aio_default = BLOCKDEV_AIO_OPTIONS_NATIVE;
#ifdef CONFIG_LINUX_IO_URING
} else if (bdrv_flags & BDRV_O_IO_URING) {
aio_default = BLOCKDEV_AIO_OPTIONS_IO_URING;
#endif
} else {
aio_default = BLOCKDEV_AIO_OPTIONS_THREADS;
}

aio = qapi_enum_parse(&BlockdevAioOptions_lookup,
qemu_opt_get(opts, "aio"),
aio_default, &local_err);
Expand All @@ -514,7 +522,11 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
ret = -EINVAL;
goto fail;
}

s->use_linux_aio = (aio == BLOCKDEV_AIO_OPTIONS_NATIVE);
#ifdef CONFIG_LINUX_IO_URING
s->use_linux_io_uring = (aio == BLOCKDEV_AIO_OPTIONS_IO_URING);
#endif

locking = qapi_enum_parse(&OnOffAuto_lookup,
qemu_opt_get(opts, "locking"),
Expand Down Expand Up @@ -600,6 +612,22 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
}
#endif /* !defined(CONFIG_LINUX_AIO) */

#ifdef CONFIG_LINUX_IO_URING
if (s->use_linux_io_uring) {
if (!aio_setup_linux_io_uring(bdrv_get_aio_context(bs), errp)) {
error_prepend(errp, "Unable to use io_uring: ");
goto fail;
}
}
#else
if (s->use_linux_io_uring) {
error_setg(errp, "aio=io_uring was specified, but is not supported "
"in this build.");
ret = -EINVAL;
goto fail;
}
#endif /* !defined(CONFIG_LINUX_IO_URING) */

s->has_discard = true;
s->has_write_zeroes = true;
if ((bs->open_flags & BDRV_O_NOCACHE) != 0) {
Expand Down Expand Up @@ -1877,21 +1905,25 @@ static int coroutine_fn raw_co_prw(BlockDriverState *bs, uint64_t offset,
return -EIO;

/*
* Check if the underlying device requires requests to be aligned,
* and if the request we are trying to submit is aligned or not.
* If this is the case tell the low-level driver that it needs
* to copy the buffer.
* When using O_DIRECT, the request must be aligned to be able to use
* either libaio or io_uring interface. If not fail back to regular thread
* pool read/write code which emulates this for us if we
* set QEMU_AIO_MISALIGNED.
*/
if (s->needs_alignment) {
if (!bdrv_qiov_is_aligned(bs, qiov)) {
type |= QEMU_AIO_MISALIGNED;
if (s->needs_alignment && !bdrv_qiov_is_aligned(bs, qiov)) {
type |= QEMU_AIO_MISALIGNED;
#ifdef CONFIG_LINUX_IO_URING
} else if (s->use_linux_io_uring) {
LuringState *aio = aio_get_linux_io_uring(bdrv_get_aio_context(bs));
assert(qiov->size == bytes);
return luring_co_submit(bs, aio, s->fd, offset, qiov, type);
#endif
#ifdef CONFIG_LINUX_AIO
} else if (s->use_linux_aio) {
LinuxAioState *aio = aio_get_linux_aio(bdrv_get_aio_context(bs));
assert(qiov->size == bytes);
return laio_co_submit(bs, aio, s->fd, offset, qiov, type);
} else if (s->use_linux_aio) {
LinuxAioState *aio = aio_get_linux_aio(bdrv_get_aio_context(bs));
assert(qiov->size == bytes);
return laio_co_submit(bs, aio, s->fd, offset, qiov, type);
#endif
}
}

acb = (RawPosixAIOData) {
Expand Down Expand Up @@ -1927,24 +1959,36 @@ static int coroutine_fn raw_co_pwritev(BlockDriverState *bs, uint64_t offset,

static void raw_aio_plug(BlockDriverState *bs)
{
BDRVRawState __attribute__((unused)) *s = bs->opaque;
#ifdef CONFIG_LINUX_AIO
BDRVRawState *s = bs->opaque;
if (s->use_linux_aio) {
LinuxAioState *aio = aio_get_linux_aio(bdrv_get_aio_context(bs));
laio_io_plug(bs, aio);
}
#endif
#ifdef CONFIG_LINUX_IO_URING
if (s->use_linux_io_uring) {
LuringState *aio = aio_get_linux_io_uring(bdrv_get_aio_context(bs));
luring_io_plug(bs, aio);
}
#endif
}

static void raw_aio_unplug(BlockDriverState *bs)
{
BDRVRawState __attribute__((unused)) *s = bs->opaque;
#ifdef CONFIG_LINUX_AIO
BDRVRawState *s = bs->opaque;
if (s->use_linux_aio) {
LinuxAioState *aio = aio_get_linux_aio(bdrv_get_aio_context(bs));
laio_io_unplug(bs, aio);
}
#endif
#ifdef CONFIG_LINUX_IO_URING
if (s->use_linux_io_uring) {
LuringState *aio = aio_get_linux_io_uring(bdrv_get_aio_context(bs));
luring_io_unplug(bs, aio);
}
#endif
}

static int raw_co_flush_to_disk(BlockDriverState *bs)
Expand All @@ -1964,14 +2008,20 @@ static int raw_co_flush_to_disk(BlockDriverState *bs)
.aio_type = QEMU_AIO_FLUSH,
};

#ifdef CONFIG_LINUX_IO_URING
if (s->use_linux_io_uring) {
LuringState *aio = aio_get_linux_io_uring(bdrv_get_aio_context(bs));
return luring_co_submit(bs, aio, s->fd, 0, NULL, QEMU_AIO_FLUSH);
}
#endif
return raw_thread_pool_submit(bs, handle_aiocb_flush, &acb);
}

static void raw_aio_attach_aio_context(BlockDriverState *bs,
AioContext *new_context)
{
BDRVRawState __attribute__((unused)) *s = bs->opaque;
#ifdef CONFIG_LINUX_AIO
BDRVRawState *s = bs->opaque;
if (s->use_linux_aio) {
Error *local_err = NULL;
if (!aio_setup_linux_aio(new_context, &local_err)) {
Expand All @@ -1981,6 +2031,16 @@ static void raw_aio_attach_aio_context(BlockDriverState *bs,
}
}
#endif
#ifdef CONFIG_LINUX_IO_URING
if (s->use_linux_io_uring) {
Error *local_err;
if (!aio_setup_linux_io_uring(new_context, &local_err)) {
error_reportf_err(local_err, "Unable to use linux io_uring, "
"falling back to thread pool: ");
s->use_linux_io_uring = false;
}
}
#endif
}

static void raw_close(BlockDriverState *bs)
Expand Down Expand Up @@ -2753,7 +2813,6 @@ raw_do_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int bytes,
req->overlap_bytes = req->bytes;

bdrv_mark_request_serialising(req, bs->bl.request_alignment);
bdrv_wait_serialising_requests(req);
}
#endif

Expand Down

0 comments on commit adcd6e9

Please sign in to comment.