Showing with 3,449 additions and 784 deletions.
  1. +19 −0 block.c
  2. +198 −0 block/block-backend.c
  3. +644 −40 block/file-posix.c
  4. +68 −0 block/io.c
  5. +4 −0 block/io_uring.c
  6. +3 −0 block/linux-aio.c
  7. +11 −0 block/qapi-sysemu.c
  8. +18 −0 block/qapi.c
  9. +26 −0 block/raw-format.c
  10. +4 −0 block/trace-events
  11. +1 −0 docs/devel/index-api.rst
  12. +59 −3 docs/devel/qapi-code-gen.rst
  13. +62 −0 docs/devel/zoned-storage.rst
  14. +3 −3 docs/interop/firmware.json
  15. +6 −0 docs/system/qemu-block-drivers.rst.inc
  16. +7 −0 hw/block/trace-events
  17. +2 −0 hw/block/virtio-blk-common.c
  18. +405 −0 hw/block/virtio-blk.c
  19. +2 −0 hw/virtio/virtio-qmp.c
  20. +1 −0 include/block/accounting.h
  21. +57 −0 include/block/block-common.h
  22. +13 −0 include/block/block-io.h
  23. +37 −0 include/block/block_int-common.h
  24. +7 −1 include/block/raw-aio.h
  25. +12 −0 include/standard-headers/drm/drm_fourcc.h
  26. +47 −1 include/standard-headers/linux/ethtool.h
  27. +44 −1 include/standard-headers/linux/fuse.h
  28. +1 −0 include/standard-headers/linux/pci_regs.h
  29. +2 −0 include/standard-headers/linux/vhost_types.h
  30. +105 −0 include/standard-headers/linux/virtio_blk.h
  31. +27 −0 include/sysemu/block-backend-io.h
  32. +1 −0 linux-headers/asm-arm64/kvm.h
  33. +32 −2 linux-headers/asm-x86/kvm.h
  34. +9 −0 linux-headers/linux/kvm.h
  35. +9 −6 linux-headers/linux/vfio.h
  36. +8 −0 linux-headers/linux/vhost.h
  37. +4 −0 meson.build
  38. +6 −1 migration/meson.build
  39. +17 −0 migration/migration-stats.c
  40. +41 −0 migration/migration-stats.h
  41. +26 −16 migration/migration.c
  42. +5 −7 migration/multifd.c
  43. +1 −1 migration/postcopy-ram.c
  44. +11 −0 migration/qemu-file.c
  45. +1 −0 migration/qemu-file.h
  46. +485 −0 migration/ram-compress.c
  47. +70 −0 migration/ram-compress.h
  48. +60 −490 migration/ram.c
  49. +0 −24 migration/ram.h
  50. +7 −2 migration/rdma.c
  51. +2 −1 migration/savevm.c
  52. +5 −10 migration/tls.c
  53. +1 −2 migration/tls.h
  54. +97 −53 qapi/block-core.json
  55. +4 −3 qapi/block-export.json
  56. +5 −1 qapi/block.json
  57. +2 −2 qapi/char.json
  58. +1 −1 qapi/control.json
  59. +4 −0 qapi/cryptodev.json
  60. +2 −2 qapi/job.json
  61. +1 −1 qapi/machine-target.json
  62. +18 −12 qapi/machine.json
  63. +24 −13 qapi/migration.json
  64. +6 −7 qapi/misc.json
  65. +10 −17 qapi/net.json
  66. +1 −23 qapi/qapi-schema.json
  67. +1 −1 qapi/qdev.json
  68. +2 −2 qapi/qom.json
  69. +1 −1 qapi/rdma.json
  70. +3 −0 qapi/replay.json
  71. +7 −4 qapi/run-state.json
  72. +2 −1 qapi/stats.json
  73. +1 −2 qapi/tpm.json
  74. +1 −0 qapi/trace.json
  75. +6 −6 qapi/ui.json
  76. +11 −10 qapi/yank.json
  77. +224 −0 qemu-io-cmds.c
  78. +5 −5 qga/qapi-schema.json
  79. +14 −6 storage-daemon/qapi/qapi-schema.json
  80. +105 −0 tests/qemu-iotests/tests/zoned
  81. +69 −0 tests/qemu-iotests/tests/zoned.out
  82. +126 −0 tests/qtest/migration-test.c
19 changes: 19 additions & 0 deletions block.c
Expand Up @@ -7967,6 +7967,25 @@ void bdrv_add_child(BlockDriverState *parent_bs, BlockDriverState *child_bs,
return;
}

/*
* Non-zoned block drivers do not follow zoned storage constraints
* (i.e. sequential writes to zones). Refuse mixing zoned and non-zoned
* drivers in a graph.
*/
if (!parent_bs->drv->supports_zoned_children &&
child_bs->bl.zoned == BLK_Z_HM) {
/*
* The host-aware model allows zoned storage constraints and random
* write. Allow mixing host-aware and non-zoned drivers. Using
* host-aware device as a regular device.
*/
error_setg(errp, "Cannot add a %s child to a %s parent",
child_bs->bl.zoned == BLK_Z_HM ? "zoned" : "non-zoned",
parent_bs->drv->supports_zoned_children ?
"support zoned children" : "not support zoned children");
return;
}

if (!QLIST_EMPTY(&child_bs->parents)) {
error_setg(errp, "The node %s already has a parent",
child_bs->node_name);
Expand Down
198 changes: 198 additions & 0 deletions block/block-backend.c
Expand Up @@ -1845,6 +1845,204 @@ int coroutine_fn blk_co_flush(BlockBackend *blk)
return ret;
}

static void coroutine_fn blk_aio_zone_report_entry(void *opaque)
{
BlkAioEmAIOCB *acb = opaque;
BlkRwCo *rwco = &acb->rwco;

rwco->ret = blk_co_zone_report(rwco->blk, rwco->offset,
(unsigned int*)(uintptr_t)acb->bytes,
rwco->iobuf);
blk_aio_complete(acb);
}

BlockAIOCB *blk_aio_zone_report(BlockBackend *blk, int64_t offset,
unsigned int *nr_zones,
BlockZoneDescriptor *zones,
BlockCompletionFunc *cb, void *opaque)
{
BlkAioEmAIOCB *acb;
Coroutine *co;
IO_CODE();

blk_inc_in_flight(blk);
acb = blk_aio_get(&blk_aio_em_aiocb_info, blk, cb, opaque);
acb->rwco = (BlkRwCo) {
.blk = blk,
.offset = offset,
.iobuf = zones,
.ret = NOT_DONE,
};
acb->bytes = (int64_t)(uintptr_t)nr_zones,
acb->has_returned = false;

co = qemu_coroutine_create(blk_aio_zone_report_entry, acb);
aio_co_enter(blk_get_aio_context(blk), co);

acb->has_returned = true;
if (acb->rwco.ret != NOT_DONE) {
replay_bh_schedule_oneshot_event(blk_get_aio_context(blk),
blk_aio_complete_bh, acb);
}

return &acb->common;
}

static void coroutine_fn blk_aio_zone_mgmt_entry(void *opaque)
{
BlkAioEmAIOCB *acb = opaque;
BlkRwCo *rwco = &acb->rwco;

rwco->ret = blk_co_zone_mgmt(rwco->blk,
(BlockZoneOp)(uintptr_t)rwco->iobuf,
rwco->offset, acb->bytes);
blk_aio_complete(acb);
}

BlockAIOCB *blk_aio_zone_mgmt(BlockBackend *blk, BlockZoneOp op,
int64_t offset, int64_t len,
BlockCompletionFunc *cb, void *opaque) {
BlkAioEmAIOCB *acb;
Coroutine *co;
IO_CODE();

blk_inc_in_flight(blk);
acb = blk_aio_get(&blk_aio_em_aiocb_info, blk, cb, opaque);
acb->rwco = (BlkRwCo) {
.blk = blk,
.offset = offset,
.iobuf = (void *)(uintptr_t)op,
.ret = NOT_DONE,
};
acb->bytes = len;
acb->has_returned = false;

co = qemu_coroutine_create(blk_aio_zone_mgmt_entry, acb);
aio_co_enter(blk_get_aio_context(blk), co);

acb->has_returned = true;
if (acb->rwco.ret != NOT_DONE) {
replay_bh_schedule_oneshot_event(blk_get_aio_context(blk),
blk_aio_complete_bh, acb);
}

return &acb->common;
}

static void coroutine_fn blk_aio_zone_append_entry(void *opaque)
{
BlkAioEmAIOCB *acb = opaque;
BlkRwCo *rwco = &acb->rwco;

rwco->ret = blk_co_zone_append(rwco->blk, (int64_t *)(uintptr_t)acb->bytes,
rwco->iobuf, rwco->flags);
blk_aio_complete(acb);
}

BlockAIOCB *blk_aio_zone_append(BlockBackend *blk, int64_t *offset,
QEMUIOVector *qiov, BdrvRequestFlags flags,
BlockCompletionFunc *cb, void *opaque) {
BlkAioEmAIOCB *acb;
Coroutine *co;
IO_CODE();

blk_inc_in_flight(blk);
acb = blk_aio_get(&blk_aio_em_aiocb_info, blk, cb, opaque);
acb->rwco = (BlkRwCo) {
.blk = blk,
.ret = NOT_DONE,
.flags = flags,
.iobuf = qiov,
};
acb->bytes = (int64_t)(uintptr_t)offset;
acb->has_returned = false;

co = qemu_coroutine_create(blk_aio_zone_append_entry, acb);
aio_co_enter(blk_get_aio_context(blk), co);
acb->has_returned = true;
if (acb->rwco.ret != NOT_DONE) {
replay_bh_schedule_oneshot_event(blk_get_aio_context(blk),
blk_aio_complete_bh, acb);
}

return &acb->common;
}

/*
* Send a zone_report command.
* offset is a byte offset from the start of the device. No alignment
* required for offset.
* nr_zones represents IN maximum and OUT actual.
*/
int coroutine_fn blk_co_zone_report(BlockBackend *blk, int64_t offset,
unsigned int *nr_zones,
BlockZoneDescriptor *zones)
{
int ret;
IO_CODE();

blk_inc_in_flight(blk); /* increase before waiting */
blk_wait_while_drained(blk);
GRAPH_RDLOCK_GUARD();
if (!blk_is_available(blk)) {
blk_dec_in_flight(blk);
return -ENOMEDIUM;
}
ret = bdrv_co_zone_report(blk_bs(blk), offset, nr_zones, zones);
blk_dec_in_flight(blk);
return ret;
}

/*
* Send a zone_management command.
* op is the zone operation;
* offset is the byte offset from the start of the zoned device;
* len is the maximum number of bytes the command should operate on. It
* should be aligned with the device zone size.
*/
int coroutine_fn blk_co_zone_mgmt(BlockBackend *blk, BlockZoneOp op,
int64_t offset, int64_t len)
{
int ret;
IO_CODE();

blk_inc_in_flight(blk);
blk_wait_while_drained(blk);
GRAPH_RDLOCK_GUARD();

ret = blk_check_byte_request(blk, offset, len);
if (ret < 0) {
blk_dec_in_flight(blk);
return ret;
}

ret = bdrv_co_zone_mgmt(blk_bs(blk), op, offset, len);
blk_dec_in_flight(blk);
return ret;
}

/*
* Send a zone_append command.
*/
int coroutine_fn blk_co_zone_append(BlockBackend *blk, int64_t *offset,
QEMUIOVector *qiov, BdrvRequestFlags flags)
{
int ret;
IO_CODE();

blk_inc_in_flight(blk);
blk_wait_while_drained(blk);
GRAPH_RDLOCK_GUARD();
if (!blk_is_available(blk)) {
blk_dec_in_flight(blk);
return -ENOMEDIUM;
}

ret = bdrv_co_zone_append(blk_bs(blk), offset, qiov, flags);
blk_dec_in_flight(blk);
return ret;
}

void blk_drain(BlockBackend *blk)
{
BlockDriverState *bs = blk_bs(blk);
Expand Down