Skip to content

Commit

Permalink
Merge remote-tracking branch 'remotes/kwolf/tags/for-upstream' into s…
Browse files Browse the repository at this point in the history
…taging

Block layer patches

- Fail gracefully when blockdev-snapshot creates loops
- ide: Fix IDENTIFY DEVICE for disks > 128 GiB
- file-posix: Fix return value translation for AIO discards
- file-posix: add 'aio-max-batch' option
- rbd: implement bdrv_co_block_status
- Code cleanups and build fixes

# gpg: Signature made Tue 02 Nov 2021 12:04:02 PM EDT
# gpg:                using RSA key DC3DEB159A9AF95D3D7456FE7F09B272C88F2FD6
# gpg:                issuer "kwolf@redhat.com"
# gpg: Good signature from "Kevin Wolf <kwolf@redhat.com>" [full]

* remotes/kwolf/tags/for-upstream:
  block/nvme: Extract nvme_free_queue() from nvme_free_queue_pair()
  block/nvme: Display CQ/SQ pointer in nvme_free_queue_pair()
  block/nvme: Automatically free qemu_memalign() with QEMU_AUTO_VFREE
  block-backend: Silence clang -m32 compiler warning
  linux-aio: add `dev_max_batch` parameter to laio_io_unplug()
  linux-aio: add `dev_max_batch` parameter to laio_co_submit()
  file-posix: add `aio-max-batch` option
  block/export/fuse.c: fix musl build
  ide: Cap LBA28 capacity announcement to 2^28-1
  block/rbd: implement bdrv_co_block_status
  block: Fail gracefully when blockdev-snapshot creates loops
  block/file-posix: Fix return value translation for AIO discards

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
  • Loading branch information
rth7680 committed Nov 3, 2021
2 parents 22d5760 + a895143 commit 741bdeb
Show file tree
Hide file tree
Showing 13 changed files with 258 additions and 35 deletions.
10 changes: 10 additions & 0 deletions block.c
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,9 @@ static BlockDriverState *bdrv_open_inherit(const char *filename,
BdrvChildRole child_role,
Error **errp);

static bool bdrv_recurse_has_child(BlockDriverState *bs,
BlockDriverState *child);

static void bdrv_replace_child_noperm(BdrvChild *child,
BlockDriverState *new_bs);
static void bdrv_remove_file_or_backing_child(BlockDriverState *bs,
Expand Down Expand Up @@ -2673,6 +2676,7 @@ static void bdrv_replace_child_noperm(BdrvChild *child,
int drain_saldo;

assert(!child->frozen);
assert(old_bs != new_bs);

if (old_bs && new_bs) {
assert(bdrv_get_aio_context(old_bs) == bdrv_get_aio_context(new_bs));
Expand Down Expand Up @@ -2892,6 +2896,12 @@ static int bdrv_attach_child_noperm(BlockDriverState *parent_bs,

assert(parent_bs->drv);

if (bdrv_recurse_has_child(child_bs, parent_bs)) {
error_setg(errp, "Making '%s' a %s child of '%s' would create a cycle",
child_bs->node_name, child_name, parent_bs->node_name);
return -EINVAL;
}

bdrv_get_cumulative_perm(parent_bs, &perm, &shared_perm);
bdrv_child_perm(parent_bs, child_bs, NULL, child_role, NULL,
perm, shared_perm, &perm, &shared_perm);
Expand Down
2 changes: 1 addition & 1 deletion block/block-backend.c
Original file line number Diff line number Diff line change
Expand Up @@ -1540,7 +1540,7 @@ BlockAIOCB *blk_aio_pwritev(BlockBackend *blk, int64_t offset,
QEMUIOVector *qiov, BdrvRequestFlags flags,
BlockCompletionFunc *cb, void *opaque)
{
assert(qiov->size <= INT64_MAX);
assert((uint64_t)qiov->size <= INT64_MAX);
return blk_aio_prwv(blk, offset, qiov->size, qiov,
blk_aio_write_entry, flags, cb, opaque);
}
Expand Down
4 changes: 4 additions & 0 deletions block/export/fuse.c
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,10 @@
#include <fuse.h>
#include <fuse_lowlevel.h>

#if defined(CONFIG_FALLOCATE_ZERO_RANGE)
#include <linux/falloc.h>
#endif

#ifdef __linux__
#include <linux/fs.h>
#endif
Expand Down
18 changes: 14 additions & 4 deletions block/file-posix.c
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,8 @@ typedef struct BDRVRawState {
uint64_t locked_perm;
uint64_t locked_shared_perm;

uint64_t aio_max_batch;

int perm_change_fd;
int perm_change_flags;
BDRVReopenState *reopen_state;
Expand Down Expand Up @@ -530,6 +532,11 @@ static QemuOptsList raw_runtime_opts = {
.type = QEMU_OPT_STRING,
.help = "host AIO implementation (threads, native, io_uring)",
},
{
.name = "aio-max-batch",
.type = QEMU_OPT_NUMBER,
.help = "AIO max batch size (0 = auto handled by AIO backend, default: 0)",
},
{
.name = "locking",
.type = QEMU_OPT_STRING,
Expand Down Expand Up @@ -609,6 +616,8 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
s->use_linux_io_uring = (aio == BLOCKDEV_AIO_OPTIONS_IO_URING);
#endif

s->aio_max_batch = qemu_opt_get_number(opts, "aio-max-batch", 0);

locking = qapi_enum_parse(&OnOffAuto_lookup,
qemu_opt_get(opts, "locking"),
ON_OFF_AUTO_AUTO, &local_err);
Expand Down Expand Up @@ -1807,7 +1816,7 @@ static int handle_aiocb_copy_range(void *opaque)
static int handle_aiocb_discard(void *opaque)
{
RawPosixAIOData *aiocb = opaque;
int ret = -EOPNOTSUPP;
int ret = -ENOTSUP;
BDRVRawState *s = aiocb->bs->opaque;

if (!s->has_discard) {
Expand All @@ -1829,7 +1838,7 @@ static int handle_aiocb_discard(void *opaque)
#ifdef CONFIG_FALLOCATE_PUNCH_HOLE
ret = do_fallocate(s->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
aiocb->aio_offset, aiocb->aio_nbytes);
ret = translate_err(-errno);
ret = translate_err(ret);
#elif defined(__APPLE__) && (__MACH__)
fpunchhole_t fpunchhole;
fpunchhole.fp_flags = 0;
Expand Down Expand Up @@ -2057,7 +2066,8 @@ static int coroutine_fn raw_co_prw(BlockDriverState *bs, uint64_t offset,
} else if (s->use_linux_aio) {
LinuxAioState *aio = aio_get_linux_aio(bdrv_get_aio_context(bs));
assert(qiov->size == bytes);
return laio_co_submit(bs, aio, s->fd, offset, qiov, type);
return laio_co_submit(bs, aio, s->fd, offset, qiov, type,
s->aio_max_batch);
#endif
}

Expand Down Expand Up @@ -2115,7 +2125,7 @@ static void raw_aio_unplug(BlockDriverState *bs)
#ifdef CONFIG_LINUX_AIO
if (s->use_linux_aio) {
LinuxAioState *aio = aio_get_linux_aio(bdrv_get_aio_context(bs));
laio_io_unplug(bs, aio);
laio_io_unplug(bs, aio, s->aio_max_batch);
}
#endif
#ifdef CONFIG_LINUX_IO_URING
Expand Down
38 changes: 27 additions & 11 deletions block/linux-aio.c
Original file line number Diff line number Diff line change
Expand Up @@ -334,30 +334,45 @@ static void ioq_submit(LinuxAioState *s)
}
}

static uint64_t laio_max_batch(LinuxAioState *s, uint64_t dev_max_batch)
{
uint64_t max_batch = s->aio_context->aio_max_batch ?: DEFAULT_MAX_BATCH;

/*
* AIO context can be shared between multiple block devices, so
* `dev_max_batch` allows reducing the batch size for latency-sensitive
* devices.
*/
max_batch = MIN_NON_ZERO(dev_max_batch, max_batch);

/* limit the batch with the number of available events */
max_batch = MIN_NON_ZERO(MAX_EVENTS - s->io_q.in_flight, max_batch);

return max_batch;
}

void laio_io_plug(BlockDriverState *bs, LinuxAioState *s)
{
s->io_q.plugged++;
}

void laio_io_unplug(BlockDriverState *bs, LinuxAioState *s)
void laio_io_unplug(BlockDriverState *bs, LinuxAioState *s,
uint64_t dev_max_batch)
{
assert(s->io_q.plugged);
if (--s->io_q.plugged == 0 &&
!s->io_q.blocked && !QSIMPLEQ_EMPTY(&s->io_q.pending)) {
if (s->io_q.in_queue >= laio_max_batch(s, dev_max_batch) ||
(--s->io_q.plugged == 0 &&
!s->io_q.blocked && !QSIMPLEQ_EMPTY(&s->io_q.pending))) {
ioq_submit(s);
}
}

static int laio_do_submit(int fd, struct qemu_laiocb *laiocb, off_t offset,
int type)
int type, uint64_t dev_max_batch)
{
LinuxAioState *s = laiocb->ctx;
struct iocb *iocbs = &laiocb->iocb;
QEMUIOVector *qiov = laiocb->qiov;
int64_t max_batch = s->aio_context->aio_max_batch ?: DEFAULT_MAX_BATCH;

/* limit the batch with the number of available events */
max_batch = MIN_NON_ZERO(MAX_EVENTS - s->io_q.in_flight, max_batch);

switch (type) {
case QEMU_AIO_WRITE:
Expand All @@ -378,15 +393,16 @@ static int laio_do_submit(int fd, struct qemu_laiocb *laiocb, off_t offset,
s->io_q.in_queue++;
if (!s->io_q.blocked &&
(!s->io_q.plugged ||
s->io_q.in_queue >= max_batch)) {
s->io_q.in_queue >= laio_max_batch(s, dev_max_batch))) {
ioq_submit(s);
}

return 0;
}

int coroutine_fn laio_co_submit(BlockDriverState *bs, LinuxAioState *s, int fd,
uint64_t offset, QEMUIOVector *qiov, int type)
uint64_t offset, QEMUIOVector *qiov, int type,
uint64_t dev_max_batch)
{
int ret;
struct qemu_laiocb laiocb = {
Expand All @@ -398,7 +414,7 @@ int coroutine_fn laio_co_submit(BlockDriverState *bs, LinuxAioState *s, int fd,
.qiov = qiov,
};

ret = laio_do_submit(fd, &laiocb, offset, type);
ret = laio_do_submit(fd, &laiocb, offset, type, dev_max_batch);
if (ret < 0) {
return ret;
}
Expand Down
22 changes: 12 additions & 10 deletions block/nvme.c
Original file line number Diff line number Diff line change
Expand Up @@ -183,15 +183,20 @@ static bool nvme_init_queue(BDRVNVMeState *s, NVMeQueue *q,
return r == 0;
}

static void nvme_free_queue(NVMeQueue *q)
{
qemu_vfree(q->queue);
}

static void nvme_free_queue_pair(NVMeQueuePair *q)
{
trace_nvme_free_queue_pair(q->index, q);
trace_nvme_free_queue_pair(q->index, q, &q->cq, &q->sq);
if (q->completion_bh) {
qemu_bh_delete(q->completion_bh);
}
nvme_free_queue(&q->sq);
nvme_free_queue(&q->cq);
qemu_vfree(q->prp_list_pages);
qemu_vfree(q->sq.queue);
qemu_vfree(q->cq.queue);
qemu_mutex_destroy(&q->lock);
g_free(q);
}
Expand Down Expand Up @@ -514,10 +519,10 @@ static bool nvme_identify(BlockDriverState *bs, int namespace, Error **errp)
{
BDRVNVMeState *s = bs->opaque;
bool ret = false;
union {
QEMU_AUTO_VFREE union {
NvmeIdCtrl ctrl;
NvmeIdNs ns;
} *id;
} *id = NULL;
NvmeLBAF *lbaf;
uint16_t oncs;
int r;
Expand Down Expand Up @@ -595,7 +600,6 @@ static bool nvme_identify(BlockDriverState *bs, int namespace, Error **errp)
s->blkshift = lbaf->ds;
out:
qemu_vfio_dma_unmap(s->vfio, id);
qemu_vfree(id);

return ret;
}
Expand Down Expand Up @@ -1219,7 +1223,7 @@ static int nvme_co_prw(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
{
BDRVNVMeState *s = bs->opaque;
int r;
uint8_t *buf = NULL;
QEMU_AUTO_VFREE uint8_t *buf = NULL;
QEMUIOVector local_qiov;
size_t len = QEMU_ALIGN_UP(bytes, qemu_real_host_page_size);
assert(QEMU_IS_ALIGNED(offset, s->page_size));
Expand All @@ -1246,7 +1250,6 @@ static int nvme_co_prw(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
if (!r && !is_write) {
qemu_iovec_from_buf(qiov, 0, buf, bytes);
}
qemu_vfree(buf);
return r;
}

Expand Down Expand Up @@ -1365,7 +1368,7 @@ static int coroutine_fn nvme_co_pdiscard(BlockDriverState *bs,
BDRVNVMeState *s = bs->opaque;
NVMeQueuePair *ioq = s->queues[INDEX_IO(0)];
NVMeRequest *req;
NvmeDsmRange *buf;
QEMU_AUTO_VFREE NvmeDsmRange *buf = NULL;
QEMUIOVector local_qiov;
int ret;

Expand Down Expand Up @@ -1440,7 +1443,6 @@ static int coroutine_fn nvme_co_pdiscard(BlockDriverState *bs,
trace_nvme_dsm_done(s, offset, bytes, ret);
out:
qemu_iovec_destroy(&local_qiov);
qemu_vfree(buf);
return ret;

}
Expand Down
Loading

0 comments on commit 741bdeb

Please sign in to comment.