Skip to content

Commit

Permalink
Merge tag 'block-pull-request' of https://gitlab.com/stefanha/qemu in…
Browse files Browse the repository at this point in the history
…to staging

Pull request

This pull request contains Sam Li's virtio-blk zoned storage work. These
patches were dropped from my previous block pull request due to CI failures.

# -----BEGIN PGP SIGNATURE-----
#
# iQEzBAABCAAdFiEEhpWov9P5fNqsNXdanKSrs4Grc8gFAmRLvo0ACgkQnKSrs4Gr
# c8iPDgf+IugpWRHiL+mM0VJFGo8DHAsWqixU8yM46Rig4xGPqcjdhmAqYUsziDcA
# 6jKtc+vhMGbj5jZrHOgLdIiLtFWs58zyXaQso2TAMWoI3s3DI3two+MT9DwOWnJM
# /0G2ZoEZnr1SO+Mzz0R8WKewB/ezbGpqwog/JILWu+vEXNCrtJYrd4XCSrL9Xs3n
# ZrwZCFc+bHKKmC251sqpe5ls9OkvFcIVPXSawNUTN8zesjd58dCssHnAgeIuopA5
# rM042u1xwINCc7uT06b9yBFEX058eWpvshwL+tZtPdyqg9XPtTBr6/mZPtNKFeU9
# bf0nuEGgRBSpbcj8uikjvefu8Rw4IQ==
# =pfhE
# -----END PGP SIGNATURE-----
# gpg: Signature made Fri 28 Apr 2023 01:39:41 PM BST
# gpg:                using RSA key 8695A8BFD3F97CDAAC35775A9CA4ABB381AB73C8
# gpg: Good signature from "Stefan Hajnoczi <stefanha@redhat.com>" [full]
# gpg:                 aka "Stefan Hajnoczi <stefanha@gmail.com>" [full]

* tag 'block-pull-request' of https://gitlab.com/stefanha/qemu:
  docs/zoned-storage:add zoned emulation use case
  virtio-blk: add some trace events for zoned emulation
  block: add accounting for zone append operation
  virtio-blk: add zoned storage emulation for zoned devices
  include: update virtio_blk headers to v6.3-rc1
  block: add some trace events for zone append
  qemu-iotests: test zone append operation
  block: introduce zone append write for zoned devices
  file-posix: add tracking of the zone write pointers
  docs/zoned-storage: add zoned device documentation
  block: add some trace events for new block layer APIs
  iotests: test new zone operations
  block: add zoned BlockDriver check to block layer
  block/raw-format: add zone operations to pass through requests
  block/block-backend: add block layer APIs resembling Linux ZonedBlockDevice ioctls
  block/file-posix: introduce helper functions for sysfs attributes
  block/block-common: add zoned device structs

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
  • Loading branch information
rth7680 committed Apr 29, 2023
2 parents 9b112b1 + d3c760b commit 9778b26
Show file tree
Hide file tree
Showing 40 changed files with 2,355 additions and 62 deletions.
19 changes: 19 additions & 0 deletions block.c
Expand Up @@ -7967,6 +7967,25 @@ void bdrv_add_child(BlockDriverState *parent_bs, BlockDriverState *child_bs,
return;
}

/*
* Non-zoned block drivers do not follow zoned storage constraints
* (i.e. sequential writes to zones). Refuse mixing zoned and non-zoned
* drivers in a graph.
*/
if (!parent_bs->drv->supports_zoned_children &&
child_bs->bl.zoned == BLK_Z_HM) {
/*
* The host-aware model allows zoned storage constraints and random
* write. Allow mixing host-aware and non-zoned drivers. Using
* host-aware device as a regular device.
*/
error_setg(errp, "Cannot add a %s child to a %s parent",
child_bs->bl.zoned == BLK_Z_HM ? "zoned" : "non-zoned",
parent_bs->drv->supports_zoned_children ?
"support zoned children" : "not support zoned children");
return;
}

if (!QLIST_EMPTY(&child_bs->parents)) {
error_setg(errp, "The node %s already has a parent",
child_bs->node_name);
Expand Down
198 changes: 198 additions & 0 deletions block/block-backend.c
Expand Up @@ -1845,6 +1845,204 @@ int coroutine_fn blk_co_flush(BlockBackend *blk)
return ret;
}

static void coroutine_fn blk_aio_zone_report_entry(void *opaque)
{
BlkAioEmAIOCB *acb = opaque;
BlkRwCo *rwco = &acb->rwco;

rwco->ret = blk_co_zone_report(rwco->blk, rwco->offset,
(unsigned int*)(uintptr_t)acb->bytes,
rwco->iobuf);
blk_aio_complete(acb);
}

BlockAIOCB *blk_aio_zone_report(BlockBackend *blk, int64_t offset,
unsigned int *nr_zones,
BlockZoneDescriptor *zones,
BlockCompletionFunc *cb, void *opaque)
{
BlkAioEmAIOCB *acb;
Coroutine *co;
IO_CODE();

blk_inc_in_flight(blk);
acb = blk_aio_get(&blk_aio_em_aiocb_info, blk, cb, opaque);
acb->rwco = (BlkRwCo) {
.blk = blk,
.offset = offset,
.iobuf = zones,
.ret = NOT_DONE,
};
acb->bytes = (int64_t)(uintptr_t)nr_zones,
acb->has_returned = false;

co = qemu_coroutine_create(blk_aio_zone_report_entry, acb);
aio_co_enter(blk_get_aio_context(blk), co);

acb->has_returned = true;
if (acb->rwco.ret != NOT_DONE) {
replay_bh_schedule_oneshot_event(blk_get_aio_context(blk),
blk_aio_complete_bh, acb);
}

return &acb->common;
}

static void coroutine_fn blk_aio_zone_mgmt_entry(void *opaque)
{
BlkAioEmAIOCB *acb = opaque;
BlkRwCo *rwco = &acb->rwco;

rwco->ret = blk_co_zone_mgmt(rwco->blk,
(BlockZoneOp)(uintptr_t)rwco->iobuf,
rwco->offset, acb->bytes);
blk_aio_complete(acb);
}

BlockAIOCB *blk_aio_zone_mgmt(BlockBackend *blk, BlockZoneOp op,
int64_t offset, int64_t len,
BlockCompletionFunc *cb, void *opaque) {
BlkAioEmAIOCB *acb;
Coroutine *co;
IO_CODE();

blk_inc_in_flight(blk);
acb = blk_aio_get(&blk_aio_em_aiocb_info, blk, cb, opaque);
acb->rwco = (BlkRwCo) {
.blk = blk,
.offset = offset,
.iobuf = (void *)(uintptr_t)op,
.ret = NOT_DONE,
};
acb->bytes = len;
acb->has_returned = false;

co = qemu_coroutine_create(blk_aio_zone_mgmt_entry, acb);
aio_co_enter(blk_get_aio_context(blk), co);

acb->has_returned = true;
if (acb->rwco.ret != NOT_DONE) {
replay_bh_schedule_oneshot_event(blk_get_aio_context(blk),
blk_aio_complete_bh, acb);
}

return &acb->common;
}

static void coroutine_fn blk_aio_zone_append_entry(void *opaque)
{
BlkAioEmAIOCB *acb = opaque;
BlkRwCo *rwco = &acb->rwco;

rwco->ret = blk_co_zone_append(rwco->blk, (int64_t *)(uintptr_t)acb->bytes,
rwco->iobuf, rwco->flags);
blk_aio_complete(acb);
}

BlockAIOCB *blk_aio_zone_append(BlockBackend *blk, int64_t *offset,
QEMUIOVector *qiov, BdrvRequestFlags flags,
BlockCompletionFunc *cb, void *opaque) {
BlkAioEmAIOCB *acb;
Coroutine *co;
IO_CODE();

blk_inc_in_flight(blk);
acb = blk_aio_get(&blk_aio_em_aiocb_info, blk, cb, opaque);
acb->rwco = (BlkRwCo) {
.blk = blk,
.ret = NOT_DONE,
.flags = flags,
.iobuf = qiov,
};
acb->bytes = (int64_t)(uintptr_t)offset;
acb->has_returned = false;

co = qemu_coroutine_create(blk_aio_zone_append_entry, acb);
aio_co_enter(blk_get_aio_context(blk), co);
acb->has_returned = true;
if (acb->rwco.ret != NOT_DONE) {
replay_bh_schedule_oneshot_event(blk_get_aio_context(blk),
blk_aio_complete_bh, acb);
}

return &acb->common;
}

/*
* Send a zone_report command.
* offset is a byte offset from the start of the device. No alignment
* required for offset.
* nr_zones represents IN maximum and OUT actual.
*/
int coroutine_fn blk_co_zone_report(BlockBackend *blk, int64_t offset,
unsigned int *nr_zones,
BlockZoneDescriptor *zones)
{
int ret;
IO_CODE();

blk_inc_in_flight(blk); /* increase before waiting */
blk_wait_while_drained(blk);
GRAPH_RDLOCK_GUARD();
if (!blk_is_available(blk)) {
blk_dec_in_flight(blk);
return -ENOMEDIUM;
}
ret = bdrv_co_zone_report(blk_bs(blk), offset, nr_zones, zones);
blk_dec_in_flight(blk);
return ret;
}

/*
* Send a zone_management command.
* op is the zone operation;
* offset is the byte offset from the start of the zoned device;
* len is the maximum number of bytes the command should operate on. It
* should be aligned with the device zone size.
*/
int coroutine_fn blk_co_zone_mgmt(BlockBackend *blk, BlockZoneOp op,
int64_t offset, int64_t len)
{
int ret;
IO_CODE();

blk_inc_in_flight(blk);
blk_wait_while_drained(blk);
GRAPH_RDLOCK_GUARD();

ret = blk_check_byte_request(blk, offset, len);
if (ret < 0) {
blk_dec_in_flight(blk);
return ret;
}

ret = bdrv_co_zone_mgmt(blk_bs(blk), op, offset, len);
blk_dec_in_flight(blk);
return ret;
}

/*
* Send a zone_append command.
*/
int coroutine_fn blk_co_zone_append(BlockBackend *blk, int64_t *offset,
QEMUIOVector *qiov, BdrvRequestFlags flags)
{
int ret;
IO_CODE();

blk_inc_in_flight(blk);
blk_wait_while_drained(blk);
GRAPH_RDLOCK_GUARD();
if (!blk_is_available(blk)) {
blk_dec_in_flight(blk);
return -ENOMEDIUM;
}

ret = bdrv_co_zone_append(blk_bs(blk), offset, qiov, flags);
blk_dec_in_flight(blk);
return ret;
}

void blk_drain(BlockBackend *blk)
{
BlockDriverState *bs = blk_bs(blk);
Expand Down

0 comments on commit 9778b26

Please sign in to comment.