Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
Merge tag 'block-pull-request' of https://gitlab.com/stefanha/qemu in…
…to staging

Pull request

This pull request contain's Sam Li's zoned storage support in the QEMU block
layer and virtio-blk emulation.

v2:
- Sam fixed the CI failures. CI passes for me now. [Richard]

# -----BEGIN PGP SIGNATURE-----
#
# iQEzBAABCAAdFiEEhpWov9P5fNqsNXdanKSrs4Grc8gFAmRiWCgACgkQnKSrs4Gr
# c8h/7gf+MMm2cGEaf376t8HMwTc6wbXVfbmAlZrge2EXPZfFvEaxj7HClcEraOgV
# yJsGWeU6mOw4r68ICJ/4KhrY1cdv+VZym/LsMLMcFUTXFHnyX4pyU3am31FPOI4K
# +wrDYJOJhc4DkAESWGgEWiMKpuO/uUEgBmHdW+qPFCl77Yl/eP6H5uNP6nGFn55p
# QpS/l8iha7PDkc81EsrjA+e/YI0ubfNSP7+zZElhQ98354CQ0MCfmZ6h9bT+o2bu
# R7SBUj80e+2X0a1b9s/2Jz/x8l4TEsl8kr48/Q1usq3GVVkbjEgqsk6wTN13Q/4g
# CeIR7E61ZeYzmpb4tLFRIqK2Jw+NEQ==
# =Q8xW
# -----END PGP SIGNATURE-----
# gpg: Signature made Mon 15 May 2023 09:04:56 AM PDT
# gpg:                using RSA key 8695A8BFD3F97CDAAC35775A9CA4ABB381AB73C8
# gpg: Good signature from "Stefan Hajnoczi <stefanha@redhat.com>" [full]
# gpg:                 aka "Stefan Hajnoczi <stefanha@gmail.com>" [full]

* tag 'block-pull-request' of https://gitlab.com/stefanha/qemu:
  docs/zoned-storage:add zoned emulation use case
  virtio-blk: add some trace events for zoned emulation
  block: add accounting for zone append operation
  virtio-blk: add zoned storage emulation for zoned devices
  block: add some trace events for zone append
  qemu-iotests: test zone append operation
  block: introduce zone append write for zoned devices
  file-posix: add tracking of the zone write pointers
  docs/zoned-storage: add zoned device documentation
  block: add some trace events for new block layer APIs
  iotests: test new zone operations
  block: add zoned BlockDriver check to block layer
  block/raw-format: add zone operations to pass through requests
  block/block-backend: add block layer APIs resembling Linux ZonedBlockDevice ioctls
  block/file-posix: introduce helper functions for sysfs attributes
  block/block-common: add zoned device structs

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
  • Loading branch information
rth7680 committed May 15, 2023
2 parents 1f9e883 + 01562fe commit 2c4ead5
Show file tree
Hide file tree
Showing 30 changed files with 2,100 additions and 52 deletions.
19 changes: 19 additions & 0 deletions block.c
Expand Up @@ -7982,6 +7982,25 @@ void bdrv_add_child(BlockDriverState *parent_bs, BlockDriverState *child_bs,
return;
}

/*
* Non-zoned block drivers do not follow zoned storage constraints
* (i.e. sequential writes to zones). Refuse mixing zoned and non-zoned
* drivers in a graph.
*/
if (!parent_bs->drv->supports_zoned_children &&
child_bs->bl.zoned == BLK_Z_HM) {
/*
* The host-aware model allows zoned storage constraints and random
* write. Allow mixing host-aware and non-zoned drivers. Using
* host-aware device as a regular device.
*/
error_setg(errp, "Cannot add a %s child to a %s parent",
child_bs->bl.zoned == BLK_Z_HM ? "zoned" : "non-zoned",
parent_bs->drv->supports_zoned_children ?
"support zoned children" : "not support zoned children");
return;
}

if (!QLIST_EMPTY(&child_bs->parents)) {
error_setg(errp, "The node %s already has a parent",
child_bs->node_name);
Expand Down
198 changes: 198 additions & 0 deletions block/block-backend.c
Expand Up @@ -1845,6 +1845,204 @@ int coroutine_fn blk_co_flush(BlockBackend *blk)
return ret;
}

static void coroutine_fn blk_aio_zone_report_entry(void *opaque)
{
BlkAioEmAIOCB *acb = opaque;
BlkRwCo *rwco = &acb->rwco;

rwco->ret = blk_co_zone_report(rwco->blk, rwco->offset,
(unsigned int*)(uintptr_t)acb->bytes,
rwco->iobuf);
blk_aio_complete(acb);
}

BlockAIOCB *blk_aio_zone_report(BlockBackend *blk, int64_t offset,
unsigned int *nr_zones,
BlockZoneDescriptor *zones,
BlockCompletionFunc *cb, void *opaque)
{
BlkAioEmAIOCB *acb;
Coroutine *co;
IO_CODE();

blk_inc_in_flight(blk);
acb = blk_aio_get(&blk_aio_em_aiocb_info, blk, cb, opaque);
acb->rwco = (BlkRwCo) {
.blk = blk,
.offset = offset,
.iobuf = zones,
.ret = NOT_DONE,
};
acb->bytes = (int64_t)(uintptr_t)nr_zones,
acb->has_returned = false;

co = qemu_coroutine_create(blk_aio_zone_report_entry, acb);
aio_co_enter(blk_get_aio_context(blk), co);

acb->has_returned = true;
if (acb->rwco.ret != NOT_DONE) {
replay_bh_schedule_oneshot_event(blk_get_aio_context(blk),
blk_aio_complete_bh, acb);
}

return &acb->common;
}

static void coroutine_fn blk_aio_zone_mgmt_entry(void *opaque)
{
BlkAioEmAIOCB *acb = opaque;
BlkRwCo *rwco = &acb->rwco;

rwco->ret = blk_co_zone_mgmt(rwco->blk,
(BlockZoneOp)(uintptr_t)rwco->iobuf,
rwco->offset, acb->bytes);
blk_aio_complete(acb);
}

BlockAIOCB *blk_aio_zone_mgmt(BlockBackend *blk, BlockZoneOp op,
int64_t offset, int64_t len,
BlockCompletionFunc *cb, void *opaque) {
BlkAioEmAIOCB *acb;
Coroutine *co;
IO_CODE();

blk_inc_in_flight(blk);
acb = blk_aio_get(&blk_aio_em_aiocb_info, blk, cb, opaque);
acb->rwco = (BlkRwCo) {
.blk = blk,
.offset = offset,
.iobuf = (void *)(uintptr_t)op,
.ret = NOT_DONE,
};
acb->bytes = len;
acb->has_returned = false;

co = qemu_coroutine_create(blk_aio_zone_mgmt_entry, acb);
aio_co_enter(blk_get_aio_context(blk), co);

acb->has_returned = true;
if (acb->rwco.ret != NOT_DONE) {
replay_bh_schedule_oneshot_event(blk_get_aio_context(blk),
blk_aio_complete_bh, acb);
}

return &acb->common;
}

static void coroutine_fn blk_aio_zone_append_entry(void *opaque)
{
BlkAioEmAIOCB *acb = opaque;
BlkRwCo *rwco = &acb->rwco;

rwco->ret = blk_co_zone_append(rwco->blk, (int64_t *)(uintptr_t)acb->bytes,
rwco->iobuf, rwco->flags);
blk_aio_complete(acb);
}

BlockAIOCB *blk_aio_zone_append(BlockBackend *blk, int64_t *offset,
QEMUIOVector *qiov, BdrvRequestFlags flags,
BlockCompletionFunc *cb, void *opaque) {
BlkAioEmAIOCB *acb;
Coroutine *co;
IO_CODE();

blk_inc_in_flight(blk);
acb = blk_aio_get(&blk_aio_em_aiocb_info, blk, cb, opaque);
acb->rwco = (BlkRwCo) {
.blk = blk,
.ret = NOT_DONE,
.flags = flags,
.iobuf = qiov,
};
acb->bytes = (int64_t)(uintptr_t)offset;
acb->has_returned = false;

co = qemu_coroutine_create(blk_aio_zone_append_entry, acb);
aio_co_enter(blk_get_aio_context(blk), co);
acb->has_returned = true;
if (acb->rwco.ret != NOT_DONE) {
replay_bh_schedule_oneshot_event(blk_get_aio_context(blk),
blk_aio_complete_bh, acb);
}

return &acb->common;
}

/*
* Send a zone_report command.
* offset is a byte offset from the start of the device. No alignment
* required for offset.
* nr_zones represents IN maximum and OUT actual.
*/
int coroutine_fn blk_co_zone_report(BlockBackend *blk, int64_t offset,
unsigned int *nr_zones,
BlockZoneDescriptor *zones)
{
int ret;
IO_CODE();

blk_inc_in_flight(blk); /* increase before waiting */
blk_wait_while_drained(blk);
GRAPH_RDLOCK_GUARD();
if (!blk_is_available(blk)) {
blk_dec_in_flight(blk);
return -ENOMEDIUM;
}
ret = bdrv_co_zone_report(blk_bs(blk), offset, nr_zones, zones);
blk_dec_in_flight(blk);
return ret;
}

/*
* Send a zone_management command.
* op is the zone operation;
* offset is the byte offset from the start of the zoned device;
* len is the maximum number of bytes the command should operate on. It
* should be aligned with the device zone size.
*/
int coroutine_fn blk_co_zone_mgmt(BlockBackend *blk, BlockZoneOp op,
int64_t offset, int64_t len)
{
int ret;
IO_CODE();

blk_inc_in_flight(blk);
blk_wait_while_drained(blk);
GRAPH_RDLOCK_GUARD();

ret = blk_check_byte_request(blk, offset, len);
if (ret < 0) {
blk_dec_in_flight(blk);
return ret;
}

ret = bdrv_co_zone_mgmt(blk_bs(blk), op, offset, len);
blk_dec_in_flight(blk);
return ret;
}

/*
* Send a zone_append command.
*/
int coroutine_fn blk_co_zone_append(BlockBackend *blk, int64_t *offset,
QEMUIOVector *qiov, BdrvRequestFlags flags)
{
int ret;
IO_CODE();

blk_inc_in_flight(blk);
blk_wait_while_drained(blk);
GRAPH_RDLOCK_GUARD();
if (!blk_is_available(blk)) {
blk_dec_in_flight(blk);
return -ENOMEDIUM;
}

ret = bdrv_co_zone_append(blk_bs(blk), offset, qiov, flags);
blk_dec_in_flight(blk);
return ret;
}

void blk_drain(BlockBackend *blk)
{
BlockDriverState *bs = blk_bs(blk);
Expand Down

0 comments on commit 2c4ead5

Please sign in to comment.