Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
Merge tag 'for-upstream' of https://repo.or.cz/qemu/kevin into staging
Block layer patches

- Optimise reqs_lock to make multiqueue actually scale
- virtio: Drop out of coroutine context in virtio_load()
- iotests: Fix reference output for some tests after recent changes
- vpc: Avoid dynamic stack allocation
- Code cleanup, improved documentation

# -----BEGIN PGP SIGNATURE-----
#
# iQJFBAABCAAvFiEE3D3rFZqa+V09dFb+fwmycsiPL9YFAmT7VYgRHGt3b2xmQHJl
# ZGhhdC5jb20ACgkQfwmycsiPL9YfOg/7BoYF6lkB7DF/jH3XLY6f8zoI+OVM7dg1
# QFEjyVO+uZiJVh0CeBNI9WgnBe7f5vXMbiStyGbWKo3BLUsjnwoQcW/Sxpw61bR2
# jZYK6UHe0RhFqTQpbt8G1iCmlpRS+sX+Cy+lxcVcbqxcnLRXCOjT6ivyA4bGbYIC
# q9BHg/9hBmjuM05NTV6Axy8qjqBGVaIWE9ALTnw8H//waBr4/ydJPTl7EWHe3+tO
# Stm73evgPG7aLHM6W4qdFW4gwAQ8f+f42Q+0NH1YavB/pN3LTN1B6sLQY/51du+0
# d/JCsXex0IZQXmNPhqv1h01vhOyU9WBmlwpPG2iZv3a06SXk1ys3rQt/L7uIcsZg
# Z58CpcUJ517FERnkl0BWXzYhsdcW2K+RdlaiL5PX6H1A2B9LT05ouZfD47hh7kKv
# oX+Ulk05PFr3JRCKQF6QDEejRKXt169bGzInTlns/wXinD/V4sCkUnr9aWQuhoWk
# KhQm7WMscTTIyHP2FznO4x9kq0ALsoX/NKqBW2wgJUtqRzsd4XxPp5CXEsAir8Vt
# dpne/DaV5iDI1mGFJrvkctJN545tEoezBtUzC8/9rZGE0cxHAkhvQVZUDo7xVmrq
# PlGQ1ko9cNui/Gf9B6qDqaJJwSyw0S6vHurGVQJRwbyly57Fi5aisWkr4w7Rc4eA
# 7u9B1RvwF/Q=
# =2wGD
# -----END PGP SIGNATURE-----
# gpg: Signature made Fri 08 Sep 2023 13:10:32 EDT
# gpg:                using RSA key DC3DEB159A9AF95D3D7456FE7F09B272C88F2FD6
# gpg:                issuer "kwolf@redhat.com"
# gpg: Good signature from "Kevin Wolf <kwolf@redhat.com>" [full]
# Primary key fingerprint: DC3D EB15 9A9A F95D 3D74  56FE 7F09 B272 C88F 2FD6

* tag 'for-upstream' of https://repo.or.cz/qemu/kevin:
  virtio: Drop out of coroutine context in virtio_load()
  vmstate: Mark VMStateInfo.get/put() coroutine_mixed_fn
  block: Make more BlockDriver definitions static
  block/meson.build: Restore alphabetical order of files
  block: Remove unnecessary variable in bdrv_block_device_info
  block: Remove bdrv_query_block_node_info
  vmdk: Clean up bdrv_open_child() return value check
  qemu-img: Update documentation for compressed images
  block: Be more verbose in create fallback
  block/iscsi: Document why we use raw malloc()
  qemu-img: omit errno value in error message
  block: change reqs_lock to QemuMutex
  block: minimize bs->reqs_lock section in tracked_request_end()
  iotests: adapt test output for new qemu_cleanup() behavior
  block/vpc: Avoid dynamic stack allocation

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
  • Loading branch information
Stefan Hajnoczi committed Sep 11, 2023
2 parents a7e8e30 + 92e2e6a commit 78f8b6d
Show file tree
Hide file tree
Showing 22 changed files with 142 additions and 80 deletions.
10 changes: 7 additions & 3 deletions block.c
Expand Up @@ -415,7 +415,7 @@ BlockDriverState *bdrv_new(void)
for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
QLIST_INIT(&bs->op_blockers[i]);
}
qemu_co_mutex_init(&bs->reqs_lock);
qemu_mutex_init(&bs->reqs_lock);
qemu_mutex_init(&bs->dirty_bitmap_mutex);
bs->refcnt = 1;
bs->aio_context = qemu_get_aio_context();
Expand Down Expand Up @@ -661,8 +661,10 @@ int coroutine_fn bdrv_co_create_opts_simple(BlockDriver *drv,
blk = blk_co_new_open(filename, NULL, options,
BDRV_O_RDWR | BDRV_O_RESIZE, errp);
if (!blk) {
error_prepend(errp, "Protocol driver '%s' does not support image "
"creation, and opening the image failed: ",
error_prepend(errp, "Protocol driver '%s' does not support creating "
"new images, so an existing image must be selected as "
"the target; however, opening the given target as an "
"existing image failed: ",
drv->format_name);
return -EINVAL;
}
Expand Down Expand Up @@ -5476,6 +5478,8 @@ static void bdrv_delete(BlockDriverState *bs)

bdrv_close(bs);

qemu_mutex_destroy(&bs->reqs_lock);

g_free(bs);
}

Expand Down
2 changes: 1 addition & 1 deletion block/copy-before-write.c
Expand Up @@ -503,7 +503,7 @@ static void cbw_close(BlockDriverState *bs)
s->bcs = NULL;
}

BlockDriver bdrv_cbw_filter = {
static BlockDriver bdrv_cbw_filter = {
.format_name = "copy-before-write",
.instance_size = sizeof(BDRVCopyBeforeWriteState),

Expand Down
30 changes: 18 additions & 12 deletions block/io.c
Expand Up @@ -591,10 +591,16 @@ static void coroutine_fn tracked_request_end(BdrvTrackedRequest *req)
qatomic_dec(&req->bs->serialising_in_flight);
}

qemu_co_mutex_lock(&req->bs->reqs_lock);
qemu_mutex_lock(&req->bs->reqs_lock);
QLIST_REMOVE(req, list);
qemu_mutex_unlock(&req->bs->reqs_lock);

/*
* At this point qemu_co_queue_wait(&req->wait_queue, ...) won't be called
* anymore because the request has been removed from the list, so it's safe
* to restart the queue outside reqs_lock to minimize the critical section.
*/
qemu_co_queue_restart_all(&req->wait_queue);
qemu_co_mutex_unlock(&req->bs->reqs_lock);
}

/**
Expand All @@ -621,9 +627,9 @@ static void coroutine_fn tracked_request_begin(BdrvTrackedRequest *req,

qemu_co_queue_init(&req->wait_queue);

qemu_co_mutex_lock(&bs->reqs_lock);
qemu_mutex_lock(&bs->reqs_lock);
QLIST_INSERT_HEAD(&bs->tracked_requests, req, list);
qemu_co_mutex_unlock(&bs->reqs_lock);
qemu_mutex_unlock(&bs->reqs_lock);
}

static bool tracked_request_overlaps(BdrvTrackedRequest *req,
Expand Down Expand Up @@ -787,22 +793,22 @@ bdrv_wait_serialising_requests(BdrvTrackedRequest *self)
return;
}

qemu_co_mutex_lock(&bs->reqs_lock);
qemu_mutex_lock(&bs->reqs_lock);
bdrv_wait_serialising_requests_locked(self);
qemu_co_mutex_unlock(&bs->reqs_lock);
qemu_mutex_unlock(&bs->reqs_lock);
}

void coroutine_fn bdrv_make_request_serialising(BdrvTrackedRequest *req,
uint64_t align)
{
IO_CODE();

qemu_co_mutex_lock(&req->bs->reqs_lock);
qemu_mutex_lock(&req->bs->reqs_lock);

tracked_request_set_serialising(req, align);
bdrv_wait_serialising_requests_locked(req);

qemu_co_mutex_unlock(&req->bs->reqs_lock);
qemu_mutex_unlock(&req->bs->reqs_lock);
}

int bdrv_check_qiov_request(int64_t offset, int64_t bytes,
Expand Down Expand Up @@ -2996,7 +3002,7 @@ int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
goto early_exit;
}

qemu_co_mutex_lock(&bs->reqs_lock);
qemu_mutex_lock(&bs->reqs_lock);
current_gen = qatomic_read(&bs->write_gen);

/* Wait until any previous flushes are completed */
Expand All @@ -3006,7 +3012,7 @@ int coroutine_fn bdrv_co_flush(BlockDriverState *bs)

/* Flushes reach this point in nondecreasing current_gen order. */
bs->active_flush_req = true;
qemu_co_mutex_unlock(&bs->reqs_lock);
qemu_mutex_unlock(&bs->reqs_lock);

/* Write back all layers by calling one driver function */
if (bs->drv->bdrv_co_flush) {
Expand Down Expand Up @@ -3094,11 +3100,11 @@ int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
bs->flushed_gen = current_gen;
}

qemu_co_mutex_lock(&bs->reqs_lock);
qemu_mutex_lock(&bs->reqs_lock);
bs->active_flush_req = false;
/* Return value is ignored - it's ok if wait queue is empty */
qemu_co_queue_next(&bs->flush_queue);
qemu_co_mutex_unlock(&bs->reqs_lock);
qemu_mutex_unlock(&bs->reqs_lock);

early_exit:
bdrv_dec_in_flight(bs);
Expand Down
1 change: 1 addition & 0 deletions block/iscsi.c
Expand Up @@ -1058,6 +1058,7 @@ static BlockAIOCB *iscsi_aio_ioctl(BlockDriverState *bs,
return NULL;
}

/* Must use malloc(): this is freed via scsi_free_scsi_task() */
acb->task = malloc(sizeof(struct scsi_task));
if (acb->task == NULL) {
error_report("iSCSI: Failed to allocate task for scsi command. %s",
Expand Down
12 changes: 6 additions & 6 deletions block/meson.build
Expand Up @@ -4,41 +4,41 @@ block_ss.add(files(
'aio_task.c',
'amend.c',
'backup.c',
'copy-before-write.c',
'blkdebug.c',
'blklogwrites.c',
'blkverify.c',
'block-backend.c',
'block-copy.c',
'graph-lock.c',
'commit.c',
'copy-before-write.c',
'copy-on-read.c',
'preallocate.c',
'progress_meter.c',
'create.c',
'crypto.c',
'dirty-bitmap.c',
'filter-compress.c',
'graph-lock.c',
'io.c',
'mirror.c',
'nbd.c',
'null.c',
'plug.c',
'preallocate.c',
'progress_meter.c',
'qapi.c',
'qcow2.c',
'qcow2-bitmap.c',
'qcow2-cache.c',
'qcow2-cluster.c',
'qcow2-refcount.c',
'qcow2-snapshot.c',
'qcow2-threads.c',
'qcow2.c',
'quorum.c',
'raw-format.c',
'reqlist.c',
'snapshot.c',
'snapshot-access.c',
'throttle-groups.c',
'throttle.c',
'throttle-groups.c',
'write-threshold.c',
), zstd, zlib, gnutls)

Expand Down
2 changes: 1 addition & 1 deletion block/preallocate.c
Expand Up @@ -535,7 +535,7 @@ static void preallocate_child_perm(BlockDriverState *bs, BdrvChild *c,
}
}

BlockDriver bdrv_preallocate_filter = {
static BlockDriver bdrv_preallocate_filter = {
.format_name = "preallocate",
.instance_size = sizeof(BDRVPreallocateState),

Expand Down
32 changes: 2 additions & 30 deletions block/qapi.c
Expand Up @@ -48,7 +48,7 @@ BlockDeviceInfo *bdrv_block_device_info(BlockBackend *blk,
{
ImageInfo **p_image_info;
ImageInfo *backing_info;
BlockDriverState *bs0, *backing;
BlockDriverState *backing;
BlockDeviceInfo *info;
ERRP_GUARD();

Expand Down Expand Up @@ -145,15 +145,14 @@ BlockDeviceInfo *bdrv_block_device_info(BlockBackend *blk,

info->write_threshold = bdrv_write_threshold_get(bs);

bs0 = bs;
p_image_info = &info->image;
info->backing_file_depth = 0;

/*
* Skip automatically inserted nodes that the user isn't aware of for
* query-block (blk != NULL), but not for query-named-block-nodes
*/
bdrv_query_image_info(bs0, p_image_info, flat, blk != NULL, errp);
bdrv_query_image_info(bs, p_image_info, flat, blk != NULL, errp);
if (*errp) {
qapi_free_BlockDeviceInfo(info);
return NULL;
Expand Down Expand Up @@ -309,33 +308,6 @@ static void bdrv_do_query_node_info(BlockDriverState *bs,
aio_context_release(bdrv_get_aio_context(bs));
}

/**
* bdrv_query_block_node_info:
* @bs: block node to examine
* @p_info: location to store node information
* @errp: location to store error information
*
* Store image information about @bs in @p_info.
*
* @p_info will be set only on success. On error, store error in @errp.
*/
void bdrv_query_block_node_info(BlockDriverState *bs,
BlockNodeInfo **p_info,
Error **errp)
{
BlockNodeInfo *info;
ERRP_GUARD();

info = g_new0(BlockNodeInfo, 1);
bdrv_do_query_node_info(bs, info, errp);
if (*errp) {
qapi_free_BlockNodeInfo(info);
return;
}

*p_info = info;
}

/**
* bdrv_query_image_info:
* @bs: block node to examine
Expand Down
2 changes: 1 addition & 1 deletion block/snapshot-access.c
Expand Up @@ -108,7 +108,7 @@ static void snapshot_access_child_perm(BlockDriverState *bs, BdrvChild *c,
*nshared = BLK_PERM_ALL;
}

BlockDriver bdrv_snapshot_access_drv = {
static BlockDriver bdrv_snapshot_access_drv = {
.format_name = "snapshot-access",

.bdrv_open = snapshot_access_open,
Expand Down
2 changes: 1 addition & 1 deletion block/vmdk.c
Expand Up @@ -1207,7 +1207,7 @@ static int vmdk_parse_extents(const char *desc, BlockDriverState *bs,
bs, &child_of_bds, extent_role, false,
&local_err);
g_free(extent_path);
if (local_err) {
if (!extent_file) {
error_propagate(errp, local_err);
ret = -EINVAL;
goto out;
Expand Down
4 changes: 2 additions & 2 deletions block/vpc.c
Expand Up @@ -510,7 +510,7 @@ get_image_offset(BlockDriverState *bs, uint64_t offset, bool write, int *err)
miss sparse read optimization, but it's not a problem in terms of
correctness. */
if (write && (s->last_bitmap_offset != bitmap_offset)) {
uint8_t bitmap[s->bitmap_size];
g_autofree uint8_t *bitmap = g_malloc(s->bitmap_size);
int r;

s->last_bitmap_offset = bitmap_offset;
Expand Down Expand Up @@ -558,7 +558,7 @@ alloc_block(BlockDriverState *bs, int64_t offset)
int64_t bat_offset;
uint32_t index, bat_value;
int ret;
uint8_t bitmap[s->bitmap_size];
g_autofree uint8_t *bitmap = g_malloc(s->bitmap_size);

/* Check if sector_num is valid */
if ((offset < 0) || (offset > bs->total_sectors * BDRV_SECTOR_SIZE)) {
Expand Down
19 changes: 17 additions & 2 deletions docs/tools/qemu-img.rst
Expand Up @@ -106,7 +106,11 @@ by the used format or see the format descriptions below for details.
.. option:: -c
Indicates that target image must be compressed (qcow format only).
Indicates that target image must be compressed (qcow/qcow2 and vmdk with
streamOptimized subformat only).
For qcow2, the compression algorithm can be specified with the ``-o
compression_type=...`` option (see below).
.. option:: -h
Expand Down Expand Up @@ -776,7 +780,7 @@ Supported image file formats:
QEMU image format, the most versatile format. Use it to have smaller
images (useful if your filesystem does not supports holes, for example
on Windows), optional AES encryption, zlib based compression and
on Windows), optional AES encryption, zlib or zstd based compression and
support of multiple VM snapshots.
Supported options:
Expand All @@ -794,6 +798,17 @@ Supported image file formats:
``backing_fmt``
Image format of the base image
``compression_type``
This option configures which compression algorithm will be used for
compressed clusters on the image. Note that setting this option doesn't yet
cause the image to actually receive compressed writes. It is most commonly
used with the ``-c`` option of ``qemu-img convert``, but can also be used
with the ``compress`` filter driver or backup block jobs with compression
enabled.
Valid values are ``zlib`` and ``zstd``. For images that use
``compat=0.10``, only ``zlib`` compression is available.
``encryption``
If this option is set to ``on``, the image is encrypted with
128-bit AES-CBC.
Expand Down

0 comments on commit 78f8b6d

Please sign in to comment.