Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
Merge tag 'for-upstream' of https://repo.or.cz/qemu/kevin into staging
Block layer patches

- Process I/O in the current AioContext (instead of the BB AioContext)
- Optimise reqs_lock to make multiqueue actually scale
- iotests: Fix reference output for some tests after recent changes
- vpc: Avoid dynamic stack allocation
- Code cleanup, improved documentation

# -----BEGIN PGP SIGNATURE-----
#
# iQJFBAABCAAvFiEE3D3rFZqa+V09dFb+fwmycsiPL9YFAmT16nMRHGt3b2xmQHJl
# ZGhhdC5jb20ACgkQfwmycsiPL9bN5BAAyOFsVxAd9GmHHXVaROprd7uziW46++QK
# wgs3YWZnzm5P1ZhQ1h0PIW1YsY7jJF2V8cMKKOPCnZQTdOB7uFv2z1FjGrVa3nMG
# M12H5WQkdZQkMC7NNw02Ca+d1RSAt8BnsViSTm0xEKnZJ/Wal4VT8TWFGnan1MR0
# uIlCf2Adu8KTI/khiQQmF/VT3acfGace+tdcYBZFc9RzgPHqTGuRkM3fM9bK46k7
# 9T4ilI5HZt9iSyolpE4FwQtGnaTMj3hrIyFTdVTBN689w9T458KD0Yvj0U5EqlCU
# Nl6J1rUacDqvw0YufdrvyDyRAhdSr0BPseR4uAe3nS3t19fzU96Z6L3Y4pkOwETj
# yCdFF2kdqi7du5r1YLgJX83BTNBEv63OSQ02rjQP/crg+uU0s2eM2ReUF3NkWWU3
# 5gd9HrCKe7NaARD99cmcq7RNGII6R7il9f9+6SWnACW9j3Ijb92AQbTrAiq2OhMH
# Na8rbm152CHWEp//EOhbi44CTXLLck6mUr8DH8kzjSwIKZz50dFFpgAVEX2GwlTN
# VA/s0cLQnTjZzKil/p80alZ81khziv3QbpvyBl524uU0LqC5pZrnaPndEs1vEK5Y
# 5oKgXm/fVmW4VKLxa63vDX2syBYN2b+pvHW8LEfW0sgHJiltzqie0kfW0836ztE0
# vvx0h79oSZU=
# =eCVG
# -----END PGP SIGNATURE-----
# gpg: Signature made Mon 04 Sep 2023 10:32:19 EDT
# gpg:                using RSA key DC3DEB159A9AF95D3D7456FE7F09B272C88F2FD6
# gpg:                issuer "kwolf@redhat.com"
# gpg: Good signature from "Kevin Wolf <kwolf@redhat.com>" [full]
# Primary key fingerprint: DC3D EB15 9A9A F95D 3D74  56FE 7F09 B272 C88F 2FD6

* tag 'for-upstream' of https://repo.or.cz/qemu/kevin:
  block: Remove unnecessary variable in bdrv_block_device_info
  block: Remove bdrv_query_block_node_info
  block-backend: process zoned requests in the current AioContext
  block-backend: process I/O in the current AioContext
  block: remove AIOCBInfo->get_aio_context()
  vmdk: Clean up bdrv_open_child() return value check
  qemu-img: Update documentation for compressed images
  block: Be more verbose in create fallback
  block/iscsi: Document why we use raw malloc()
  qemu-img: omit errno value in error message
  block: change reqs_lock to QemuMutex
  block: minimize bs->reqs_lock section in tracked_request_end()
  iotests: adapt test output for new qemu_cleanup() behavior
  block/vpc: Avoid dynamic stack allocation

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
  • Loading branch information
Stefan Hajnoczi committed Sep 6, 2023
2 parents 90d9107 + bb86eb4 commit 388fd93
Show file tree
Hide file tree
Showing 23 changed files with 107 additions and 129 deletions.
10 changes: 7 additions & 3 deletions block.c
Expand Up @@ -415,7 +415,7 @@ BlockDriverState *bdrv_new(void)
for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
QLIST_INIT(&bs->op_blockers[i]);
}
qemu_co_mutex_init(&bs->reqs_lock);
qemu_mutex_init(&bs->reqs_lock);
qemu_mutex_init(&bs->dirty_bitmap_mutex);
bs->refcnt = 1;
bs->aio_context = qemu_get_aio_context();
Expand Down Expand Up @@ -661,8 +661,10 @@ int coroutine_fn bdrv_co_create_opts_simple(BlockDriver *drv,
blk = blk_co_new_open(filename, NULL, options,
BDRV_O_RDWR | BDRV_O_RESIZE, errp);
if (!blk) {
error_prepend(errp, "Protocol driver '%s' does not support image "
"creation, and opening the image failed: ",
error_prepend(errp, "Protocol driver '%s' does not support creating "
"new images, so an existing image must be selected as "
"the target; however, opening the given target as an "
"existing image failed: ",
drv->format_name);
return -EINVAL;
}
Expand Down Expand Up @@ -5476,6 +5478,8 @@ static void bdrv_delete(BlockDriverState *bs)

bdrv_close(bs);

qemu_mutex_destroy(&bs->reqs_lock);

g_free(bs);
}

Expand Down
35 changes: 9 additions & 26 deletions block/block-backend.c
Expand Up @@ -33,8 +33,6 @@

#define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */

static AioContext *blk_aiocb_get_aio_context(BlockAIOCB *acb);

typedef struct BlockBackendAioNotifier {
void (*attached_aio_context)(AioContext *new_context, void *opaque);
void (*detach_aio_context)(void *opaque);
Expand Down Expand Up @@ -103,7 +101,6 @@ typedef struct BlockBackendAIOCB {
} BlockBackendAIOCB;

static const AIOCBInfo block_backend_aiocb_info = {
.get_aio_context = blk_aiocb_get_aio_context,
.aiocb_size = sizeof(BlockBackendAIOCB),
};

Expand Down Expand Up @@ -1533,7 +1530,7 @@ BlockAIOCB *blk_abort_aio_request(BlockBackend *blk,
acb->blk = blk;
acb->ret = ret;

replay_bh_schedule_oneshot_event(blk_get_aio_context(blk),
replay_bh_schedule_oneshot_event(qemu_get_current_aio_context(),
error_callback_bh, acb);
return &acb->common;
}
Expand All @@ -1545,16 +1542,8 @@ typedef struct BlkAioEmAIOCB {
bool has_returned;
} BlkAioEmAIOCB;

static AioContext *blk_aio_em_aiocb_get_aio_context(BlockAIOCB *acb_)
{
BlkAioEmAIOCB *acb = container_of(acb_, BlkAioEmAIOCB, common);

return blk_get_aio_context(acb->rwco.blk);
}

static const AIOCBInfo blk_aio_em_aiocb_info = {
.aiocb_size = sizeof(BlkAioEmAIOCB),
.get_aio_context = blk_aio_em_aiocb_get_aio_context,
};

static void blk_aio_complete(BlkAioEmAIOCB *acb)
Expand Down Expand Up @@ -1595,11 +1584,11 @@ static BlockAIOCB *blk_aio_prwv(BlockBackend *blk, int64_t offset,
acb->has_returned = false;

co = qemu_coroutine_create(co_entry, acb);
aio_co_enter(blk_get_aio_context(blk), co);
aio_co_enter(qemu_get_current_aio_context(), co);

acb->has_returned = true;
if (acb->rwco.ret != NOT_DONE) {
replay_bh_schedule_oneshot_event(blk_get_aio_context(blk),
replay_bh_schedule_oneshot_event(qemu_get_current_aio_context(),
blk_aio_complete_bh, acb);
}

Expand Down Expand Up @@ -1901,11 +1890,11 @@ BlockAIOCB *blk_aio_zone_report(BlockBackend *blk, int64_t offset,
acb->has_returned = false;

co = qemu_coroutine_create(blk_aio_zone_report_entry, acb);
aio_co_enter(blk_get_aio_context(blk), co);
aio_co_enter(qemu_get_current_aio_context(), co);

acb->has_returned = true;
if (acb->rwco.ret != NOT_DONE) {
replay_bh_schedule_oneshot_event(blk_get_aio_context(blk),
replay_bh_schedule_oneshot_event(qemu_get_current_aio_context(),
blk_aio_complete_bh, acb);
}

Expand Down Expand Up @@ -1942,11 +1931,11 @@ BlockAIOCB *blk_aio_zone_mgmt(BlockBackend *blk, BlockZoneOp op,
acb->has_returned = false;

co = qemu_coroutine_create(blk_aio_zone_mgmt_entry, acb);
aio_co_enter(blk_get_aio_context(blk), co);
aio_co_enter(qemu_get_current_aio_context(), co);

acb->has_returned = true;
if (acb->rwco.ret != NOT_DONE) {
replay_bh_schedule_oneshot_event(blk_get_aio_context(blk),
replay_bh_schedule_oneshot_event(qemu_get_current_aio_context(),
blk_aio_complete_bh, acb);
}

Expand Down Expand Up @@ -1982,10 +1971,10 @@ BlockAIOCB *blk_aio_zone_append(BlockBackend *blk, int64_t *offset,
acb->has_returned = false;

co = qemu_coroutine_create(blk_aio_zone_append_entry, acb);
aio_co_enter(blk_get_aio_context(blk), co);
aio_co_enter(qemu_get_current_aio_context(), co);
acb->has_returned = true;
if (acb->rwco.ret != NOT_DONE) {
replay_bh_schedule_oneshot_event(blk_get_aio_context(blk),
replay_bh_schedule_oneshot_event(qemu_get_current_aio_context(),
blk_aio_complete_bh, acb);
}

Expand Down Expand Up @@ -2434,12 +2423,6 @@ AioContext *blk_get_aio_context(BlockBackend *blk)
return blk->ctx;
}

static AioContext *blk_aiocb_get_aio_context(BlockAIOCB *acb)
{
BlockBackendAIOCB *blk_acb = DO_UPCAST(BlockBackendAIOCB, common, acb);
return blk_get_aio_context(blk_acb->blk);
}

int blk_set_aio_context(BlockBackend *blk, AioContext *new_context,
Error **errp)
{
Expand Down
53 changes: 26 additions & 27 deletions block/io.c
Expand Up @@ -591,10 +591,16 @@ static void coroutine_fn tracked_request_end(BdrvTrackedRequest *req)
qatomic_dec(&req->bs->serialising_in_flight);
}

qemu_co_mutex_lock(&req->bs->reqs_lock);
qemu_mutex_lock(&req->bs->reqs_lock);
QLIST_REMOVE(req, list);
qemu_mutex_unlock(&req->bs->reqs_lock);

/*
* At this point qemu_co_queue_wait(&req->wait_queue, ...) won't be called
* anymore because the request has been removed from the list, so it's safe
* to restart the queue outside reqs_lock to minimize the critical section.
*/
qemu_co_queue_restart_all(&req->wait_queue);
qemu_co_mutex_unlock(&req->bs->reqs_lock);
}

/**
Expand All @@ -621,9 +627,9 @@ static void coroutine_fn tracked_request_begin(BdrvTrackedRequest *req,

qemu_co_queue_init(&req->wait_queue);

qemu_co_mutex_lock(&bs->reqs_lock);
qemu_mutex_lock(&bs->reqs_lock);
QLIST_INSERT_HEAD(&bs->tracked_requests, req, list);
qemu_co_mutex_unlock(&bs->reqs_lock);
qemu_mutex_unlock(&bs->reqs_lock);
}

static bool tracked_request_overlaps(BdrvTrackedRequest *req,
Expand Down Expand Up @@ -787,22 +793,22 @@ bdrv_wait_serialising_requests(BdrvTrackedRequest *self)
return;
}

qemu_co_mutex_lock(&bs->reqs_lock);
qemu_mutex_lock(&bs->reqs_lock);
bdrv_wait_serialising_requests_locked(self);
qemu_co_mutex_unlock(&bs->reqs_lock);
qemu_mutex_unlock(&bs->reqs_lock);
}

void coroutine_fn bdrv_make_request_serialising(BdrvTrackedRequest *req,
uint64_t align)
{
IO_CODE();

qemu_co_mutex_lock(&req->bs->reqs_lock);
qemu_mutex_lock(&req->bs->reqs_lock);

tracked_request_set_serialising(req, align);
bdrv_wait_serialising_requests_locked(req);

qemu_co_mutex_unlock(&req->bs->reqs_lock);
qemu_mutex_unlock(&req->bs->reqs_lock);
}

int bdrv_check_qiov_request(int64_t offset, int64_t bytes,
Expand Down Expand Up @@ -2944,25 +2950,18 @@ int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
/**************************************************************/
/* async I/Os */

/**
* Synchronously cancels an acb. Must be called with the BQL held and the acb
* must be processed with the BQL held too (IOThreads are not allowed).
*
* Use bdrv_aio_cancel_async() instead when possible.
*/
void bdrv_aio_cancel(BlockAIOCB *acb)
{
IO_CODE();
GLOBAL_STATE_CODE();
qemu_aio_ref(acb);
bdrv_aio_cancel_async(acb);
while (acb->refcnt > 1) {
if (acb->aiocb_info->get_aio_context) {
aio_poll(acb->aiocb_info->get_aio_context(acb), true);
} else if (acb->bs) {
/* qemu_aio_ref and qemu_aio_unref are not thread-safe, so
* assert that we're not using an I/O thread. Thread-safe
* code should use bdrv_aio_cancel_async exclusively.
*/
assert(bdrv_get_aio_context(acb->bs) == qemu_get_aio_context());
aio_poll(bdrv_get_aio_context(acb->bs), true);
} else {
abort();
}
}
AIO_WAIT_WHILE_UNLOCKED(NULL, acb->refcnt > 1);
qemu_aio_unref(acb);
}

Expand Down Expand Up @@ -2996,7 +2995,7 @@ int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
goto early_exit;
}

qemu_co_mutex_lock(&bs->reqs_lock);
qemu_mutex_lock(&bs->reqs_lock);
current_gen = qatomic_read(&bs->write_gen);

/* Wait until any previous flushes are completed */
Expand All @@ -3006,7 +3005,7 @@ int coroutine_fn bdrv_co_flush(BlockDriverState *bs)

/* Flushes reach this point in nondecreasing current_gen order. */
bs->active_flush_req = true;
qemu_co_mutex_unlock(&bs->reqs_lock);
qemu_mutex_unlock(&bs->reqs_lock);

/* Write back all layers by calling one driver function */
if (bs->drv->bdrv_co_flush) {
Expand Down Expand Up @@ -3094,11 +3093,11 @@ int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
bs->flushed_gen = current_gen;
}

qemu_co_mutex_lock(&bs->reqs_lock);
qemu_mutex_lock(&bs->reqs_lock);
bs->active_flush_req = false;
/* Return value is ignored - it's ok if wait queue is empty */
qemu_co_queue_next(&bs->flush_queue);
qemu_co_mutex_unlock(&bs->reqs_lock);
qemu_mutex_unlock(&bs->reqs_lock);

early_exit:
bdrv_dec_in_flight(bs);
Expand Down
1 change: 1 addition & 0 deletions block/iscsi.c
Expand Up @@ -1058,6 +1058,7 @@ static BlockAIOCB *iscsi_aio_ioctl(BlockDriverState *bs,
return NULL;
}

/* Must use malloc(): this is freed via scsi_free_scsi_task() */
acb->task = malloc(sizeof(struct scsi_task));
if (acb->task == NULL) {
error_report("iSCSI: Failed to allocate task for scsi command. %s",
Expand Down
32 changes: 2 additions & 30 deletions block/qapi.c
Expand Up @@ -48,7 +48,7 @@ BlockDeviceInfo *bdrv_block_device_info(BlockBackend *blk,
{
ImageInfo **p_image_info;
ImageInfo *backing_info;
BlockDriverState *bs0, *backing;
BlockDriverState *backing;
BlockDeviceInfo *info;
ERRP_GUARD();

Expand Down Expand Up @@ -145,15 +145,14 @@ BlockDeviceInfo *bdrv_block_device_info(BlockBackend *blk,

info->write_threshold = bdrv_write_threshold_get(bs);

bs0 = bs;
p_image_info = &info->image;
info->backing_file_depth = 0;

/*
* Skip automatically inserted nodes that the user isn't aware of for
* query-block (blk != NULL), but not for query-named-block-nodes
*/
bdrv_query_image_info(bs0, p_image_info, flat, blk != NULL, errp);
bdrv_query_image_info(bs, p_image_info, flat, blk != NULL, errp);
if (*errp) {
qapi_free_BlockDeviceInfo(info);
return NULL;
Expand Down Expand Up @@ -309,33 +308,6 @@ static void bdrv_do_query_node_info(BlockDriverState *bs,
aio_context_release(bdrv_get_aio_context(bs));
}

/**
* bdrv_query_block_node_info:
* @bs: block node to examine
* @p_info: location to store node information
* @errp: location to store error information
*
* Store image information about @bs in @p_info.
*
* @p_info will be set only on success. On error, store error in @errp.
*/
void bdrv_query_block_node_info(BlockDriverState *bs,
BlockNodeInfo **p_info,
Error **errp)
{
BlockNodeInfo *info;
ERRP_GUARD();

info = g_new0(BlockNodeInfo, 1);
bdrv_do_query_node_info(bs, info, errp);
if (*errp) {
qapi_free_BlockNodeInfo(info);
return;
}

*p_info = info;
}

/**
* bdrv_query_image_info:
* @bs: block node to examine
Expand Down
2 changes: 1 addition & 1 deletion block/vmdk.c
Expand Up @@ -1207,7 +1207,7 @@ static int vmdk_parse_extents(const char *desc, BlockDriverState *bs,
bs, &child_of_bds, extent_role, false,
&local_err);
g_free(extent_path);
if (local_err) {
if (!extent_file) {
error_propagate(errp, local_err);
ret = -EINVAL;
goto out;
Expand Down
4 changes: 2 additions & 2 deletions block/vpc.c
Expand Up @@ -510,7 +510,7 @@ get_image_offset(BlockDriverState *bs, uint64_t offset, bool write, int *err)
miss sparse read optimization, but it's not a problem in terms of
correctness. */
if (write && (s->last_bitmap_offset != bitmap_offset)) {
uint8_t bitmap[s->bitmap_size];
g_autofree uint8_t *bitmap = g_malloc(s->bitmap_size);
int r;

s->last_bitmap_offset = bitmap_offset;
Expand Down Expand Up @@ -558,7 +558,7 @@ alloc_block(BlockDriverState *bs, int64_t offset)
int64_t bat_offset;
uint32_t index, bat_value;
int ret;
uint8_t bitmap[s->bitmap_size];
g_autofree uint8_t *bitmap = g_malloc(s->bitmap_size);

/* Check if sector_num is valid */
if ((offset < 0) || (offset > bs->total_sectors * BDRV_SECTOR_SIZE)) {
Expand Down

0 comments on commit 388fd93

Please sign in to comment.