Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
block: change reqs_lock to QemuMutex
CoMutex has poor performance when lock contention is high. The tracked
requests list is accessed frequently and performance suffers in QEMU
multi-queue block layer scenarios.

It is not necessary to use CoMutex for the requests lock. The lock is
always released across coroutine yield operations. It is held for
relatively short periods of time and it is not beneficial to yield when
the lock is held by another coroutine.

Change the lock type from CoMutex to QemuMutex to improve multi-queue
block layer performance. fio randread bs=4k iodepth=64 with 4 IOThreads
handling a virtio-blk device with 8 virtqueues improves from 254k to
517k IOPS (+203%). Full benchmark results and configuration details are
available here:
https://gitlab.com/stefanha/virt-playbooks/-/commit/980c40845d540e3669add1528739503c2e817b57

In the future we may wish to introduce thread-local tracked requests
lists to avoid lock contention completely. That would be much more
involved though.

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-ID: <20230808155852.2745350-3-stefanha@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
  • Loading branch information
Stefan Hajnoczi authored and Kevin Wolf committed Sep 8, 2023
1 parent 3480ce6 commit fa9185f
Show file tree
Hide file tree
Showing 3 changed files with 16 additions and 14 deletions.
4 changes: 3 additions & 1 deletion block.c
Expand Up @@ -415,7 +415,7 @@ BlockDriverState *bdrv_new(void)
for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
QLIST_INIT(&bs->op_blockers[i]);
}
qemu_co_mutex_init(&bs->reqs_lock);
qemu_mutex_init(&bs->reqs_lock);
qemu_mutex_init(&bs->dirty_bitmap_mutex);
bs->refcnt = 1;
bs->aio_context = qemu_get_aio_context();
Expand Down Expand Up @@ -5476,6 +5476,8 @@ static void bdrv_delete(BlockDriverState *bs)

bdrv_close(bs);

qemu_mutex_destroy(&bs->reqs_lock);

g_free(bs);
}

Expand Down
24 changes: 12 additions & 12 deletions block/io.c
Expand Up @@ -591,9 +591,9 @@ static void coroutine_fn tracked_request_end(BdrvTrackedRequest *req)
qatomic_dec(&req->bs->serialising_in_flight);
}

qemu_co_mutex_lock(&req->bs->reqs_lock);
qemu_mutex_lock(&req->bs->reqs_lock);
QLIST_REMOVE(req, list);
qemu_co_mutex_unlock(&req->bs->reqs_lock);
qemu_mutex_unlock(&req->bs->reqs_lock);

/*
* At this point qemu_co_queue_wait(&req->wait_queue, ...) won't be called
Expand Down Expand Up @@ -627,9 +627,9 @@ static void coroutine_fn tracked_request_begin(BdrvTrackedRequest *req,

qemu_co_queue_init(&req->wait_queue);

qemu_co_mutex_lock(&bs->reqs_lock);
qemu_mutex_lock(&bs->reqs_lock);
QLIST_INSERT_HEAD(&bs->tracked_requests, req, list);
qemu_co_mutex_unlock(&bs->reqs_lock);
qemu_mutex_unlock(&bs->reqs_lock);
}

static bool tracked_request_overlaps(BdrvTrackedRequest *req,
Expand Down Expand Up @@ -793,22 +793,22 @@ bdrv_wait_serialising_requests(BdrvTrackedRequest *self)
return;
}

qemu_co_mutex_lock(&bs->reqs_lock);
qemu_mutex_lock(&bs->reqs_lock);
bdrv_wait_serialising_requests_locked(self);
qemu_co_mutex_unlock(&bs->reqs_lock);
qemu_mutex_unlock(&bs->reqs_lock);
}

void coroutine_fn bdrv_make_request_serialising(BdrvTrackedRequest *req,
uint64_t align)
{
IO_CODE();

qemu_co_mutex_lock(&req->bs->reqs_lock);
qemu_mutex_lock(&req->bs->reqs_lock);

tracked_request_set_serialising(req, align);
bdrv_wait_serialising_requests_locked(req);

qemu_co_mutex_unlock(&req->bs->reqs_lock);
qemu_mutex_unlock(&req->bs->reqs_lock);
}

int bdrv_check_qiov_request(int64_t offset, int64_t bytes,
Expand Down Expand Up @@ -3002,7 +3002,7 @@ int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
goto early_exit;
}

qemu_co_mutex_lock(&bs->reqs_lock);
qemu_mutex_lock(&bs->reqs_lock);
current_gen = qatomic_read(&bs->write_gen);

/* Wait until any previous flushes are completed */
Expand All @@ -3012,7 +3012,7 @@ int coroutine_fn bdrv_co_flush(BlockDriverState *bs)

/* Flushes reach this point in nondecreasing current_gen order. */
bs->active_flush_req = true;
qemu_co_mutex_unlock(&bs->reqs_lock);
qemu_mutex_unlock(&bs->reqs_lock);

/* Write back all layers by calling one driver function */
if (bs->drv->bdrv_co_flush) {
Expand Down Expand Up @@ -3100,11 +3100,11 @@ int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
bs->flushed_gen = current_gen;
}

qemu_co_mutex_lock(&bs->reqs_lock);
qemu_mutex_lock(&bs->reqs_lock);
bs->active_flush_req = false;
/* Return value is ignored - it's ok if wait queue is empty */
qemu_co_queue_next(&bs->flush_queue);
qemu_co_mutex_unlock(&bs->reqs_lock);
qemu_mutex_unlock(&bs->reqs_lock);

early_exit:
bdrv_dec_in_flight(bs);
Expand Down
2 changes: 1 addition & 1 deletion include/block/block_int-common.h
Expand Up @@ -1231,7 +1231,7 @@ struct BlockDriverState {
unsigned int write_gen; /* Current data generation */

/* Protected by reqs_lock. */
CoMutex reqs_lock;
QemuMutex reqs_lock;
QLIST_HEAD(, BdrvTrackedRequest) tracked_requests;
CoQueue flush_queue; /* Serializing flush queue */
bool active_flush_req; /* Flush request in flight? */
Expand Down

0 comments on commit fa9185f

Please sign in to comment.