Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
vduse-blk: Implement vduse-blk export
This implements a VDUSE block backends based on the libvduse library. We can use it to export the BDSs for both VM and container (host) usage. The new command-line syntax is: $ qemu-storage-daemon \ --blockdev file,node-name=drive0,filename=test.img \ --export vduse-blk,node-name=drive0,id=vduse-export0,writable=on After the qemu-storage-daemon started, we need to use the "vdpa" command to attach the device to vDPA bus: $ vdpa dev add name vduse-export0 mgmtdev vduse Also the device must be removed via the "vdpa" command before we stop the qemu-storage-daemon. Signed-off-by: Xie Yongji <xieyongji@bytedance.com> Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> Message-Id: <20220523084611.91-7-xieyongji@bytedance.com> Signed-off-by: Kevin Wolf <kwolf@redhat.com>
- Loading branch information
Showing
9 changed files
with
407 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,329 @@ | ||
/* | ||
* Export QEMU block device via VDUSE | ||
* | ||
* Copyright (C) 2022 Bytedance Inc. and/or its affiliates. All rights reserved. | ||
* | ||
* Author: | ||
* Xie Yongji <xieyongji@bytedance.com> | ||
* | ||
* This work is licensed under the terms of the GNU GPL, version 2 or | ||
* later. See the COPYING file in the top-level directory. | ||
*/ | ||
|
||
#include <sys/eventfd.h> | ||
|
||
#include "qemu/osdep.h" | ||
#include "qapi/error.h" | ||
#include "block/export.h" | ||
#include "qemu/error-report.h" | ||
#include "util/block-helpers.h" | ||
#include "subprojects/libvduse/libvduse.h" | ||
#include "virtio-blk-handler.h" | ||
|
||
#include "standard-headers/linux/virtio_blk.h" | ||
|
||
#define VDUSE_DEFAULT_NUM_QUEUE 1 | ||
#define VDUSE_DEFAULT_QUEUE_SIZE 256 | ||
|
||
typedef struct VduseBlkExport { | ||
BlockExport export; | ||
VirtioBlkHandler handler; | ||
VduseDev *dev; | ||
uint16_t num_queues; | ||
unsigned int inflight; | ||
} VduseBlkExport; | ||
|
||
typedef struct VduseBlkReq { | ||
VduseVirtqElement elem; | ||
VduseVirtq *vq; | ||
} VduseBlkReq; | ||
|
||
static void vduse_blk_inflight_inc(VduseBlkExport *vblk_exp) | ||
{ | ||
vblk_exp->inflight++; | ||
} | ||
|
||
static void vduse_blk_inflight_dec(VduseBlkExport *vblk_exp) | ||
{ | ||
if (--vblk_exp->inflight == 0) { | ||
aio_wait_kick(); | ||
} | ||
} | ||
|
||
static void vduse_blk_req_complete(VduseBlkReq *req, size_t in_len) | ||
{ | ||
vduse_queue_push(req->vq, &req->elem, in_len); | ||
vduse_queue_notify(req->vq); | ||
|
||
free(req); | ||
} | ||
|
||
static void coroutine_fn vduse_blk_virtio_process_req(void *opaque) | ||
{ | ||
VduseBlkReq *req = opaque; | ||
VduseVirtq *vq = req->vq; | ||
VduseDev *dev = vduse_queue_get_dev(vq); | ||
VduseBlkExport *vblk_exp = vduse_dev_get_priv(dev); | ||
VirtioBlkHandler *handler = &vblk_exp->handler; | ||
VduseVirtqElement *elem = &req->elem; | ||
struct iovec *in_iov = elem->in_sg; | ||
struct iovec *out_iov = elem->out_sg; | ||
unsigned in_num = elem->in_num; | ||
unsigned out_num = elem->out_num; | ||
int in_len; | ||
|
||
in_len = virtio_blk_process_req(handler, in_iov, | ||
out_iov, in_num, out_num); | ||
if (in_len < 0) { | ||
free(req); | ||
return; | ||
} | ||
|
||
vduse_blk_req_complete(req, in_len); | ||
vduse_blk_inflight_dec(vblk_exp); | ||
} | ||
|
||
static void vduse_blk_vq_handler(VduseDev *dev, VduseVirtq *vq) | ||
{ | ||
VduseBlkExport *vblk_exp = vduse_dev_get_priv(dev); | ||
|
||
while (1) { | ||
VduseBlkReq *req; | ||
|
||
req = vduse_queue_pop(vq, sizeof(VduseBlkReq)); | ||
if (!req) { | ||
break; | ||
} | ||
req->vq = vq; | ||
|
||
Coroutine *co = | ||
qemu_coroutine_create(vduse_blk_virtio_process_req, req); | ||
|
||
vduse_blk_inflight_inc(vblk_exp); | ||
qemu_coroutine_enter(co); | ||
} | ||
} | ||
|
||
static void on_vduse_vq_kick(void *opaque) | ||
{ | ||
VduseVirtq *vq = opaque; | ||
VduseDev *dev = vduse_queue_get_dev(vq); | ||
int fd = vduse_queue_get_fd(vq); | ||
eventfd_t kick_data; | ||
|
||
if (eventfd_read(fd, &kick_data) == -1) { | ||
error_report("failed to read data from eventfd"); | ||
return; | ||
} | ||
|
||
vduse_blk_vq_handler(dev, vq); | ||
} | ||
|
||
static void vduse_blk_enable_queue(VduseDev *dev, VduseVirtq *vq) | ||
{ | ||
VduseBlkExport *vblk_exp = vduse_dev_get_priv(dev); | ||
|
||
aio_set_fd_handler(vblk_exp->export.ctx, vduse_queue_get_fd(vq), | ||
true, on_vduse_vq_kick, NULL, NULL, NULL, vq); | ||
} | ||
|
||
static void vduse_blk_disable_queue(VduseDev *dev, VduseVirtq *vq) | ||
{ | ||
VduseBlkExport *vblk_exp = vduse_dev_get_priv(dev); | ||
|
||
aio_set_fd_handler(vblk_exp->export.ctx, vduse_queue_get_fd(vq), | ||
true, NULL, NULL, NULL, NULL, NULL); | ||
} | ||
|
||
static const VduseOps vduse_blk_ops = { | ||
.enable_queue = vduse_blk_enable_queue, | ||
.disable_queue = vduse_blk_disable_queue, | ||
}; | ||
|
||
static void on_vduse_dev_kick(void *opaque) | ||
{ | ||
VduseDev *dev = opaque; | ||
|
||
vduse_dev_handler(dev); | ||
} | ||
|
||
static void vduse_blk_attach_ctx(VduseBlkExport *vblk_exp, AioContext *ctx) | ||
{ | ||
int i; | ||
|
||
aio_set_fd_handler(vblk_exp->export.ctx, vduse_dev_get_fd(vblk_exp->dev), | ||
true, on_vduse_dev_kick, NULL, NULL, NULL, | ||
vblk_exp->dev); | ||
|
||
for (i = 0; i < vblk_exp->num_queues; i++) { | ||
VduseVirtq *vq = vduse_dev_get_queue(vblk_exp->dev, i); | ||
int fd = vduse_queue_get_fd(vq); | ||
|
||
if (fd < 0) { | ||
continue; | ||
} | ||
aio_set_fd_handler(vblk_exp->export.ctx, fd, true, | ||
on_vduse_vq_kick, NULL, NULL, NULL, vq); | ||
} | ||
} | ||
|
||
static void vduse_blk_detach_ctx(VduseBlkExport *vblk_exp) | ||
{ | ||
int i; | ||
|
||
for (i = 0; i < vblk_exp->num_queues; i++) { | ||
VduseVirtq *vq = vduse_dev_get_queue(vblk_exp->dev, i); | ||
int fd = vduse_queue_get_fd(vq); | ||
|
||
if (fd < 0) { | ||
continue; | ||
} | ||
aio_set_fd_handler(vblk_exp->export.ctx, fd, | ||
true, NULL, NULL, NULL, NULL, NULL); | ||
} | ||
aio_set_fd_handler(vblk_exp->export.ctx, vduse_dev_get_fd(vblk_exp->dev), | ||
true, NULL, NULL, NULL, NULL, NULL); | ||
|
||
AIO_WAIT_WHILE(vblk_exp->export.ctx, vblk_exp->inflight > 0); | ||
} | ||
|
||
|
||
static void blk_aio_attached(AioContext *ctx, void *opaque) | ||
{ | ||
VduseBlkExport *vblk_exp = opaque; | ||
|
||
vblk_exp->export.ctx = ctx; | ||
vduse_blk_attach_ctx(vblk_exp, ctx); | ||
} | ||
|
||
static void blk_aio_detach(void *opaque) | ||
{ | ||
VduseBlkExport *vblk_exp = opaque; | ||
|
||
vduse_blk_detach_ctx(vblk_exp); | ||
vblk_exp->export.ctx = NULL; | ||
} | ||
|
||
static int vduse_blk_exp_create(BlockExport *exp, BlockExportOptions *opts, | ||
Error **errp) | ||
{ | ||
VduseBlkExport *vblk_exp = container_of(exp, VduseBlkExport, export); | ||
BlockExportOptionsVduseBlk *vblk_opts = &opts->u.vduse_blk; | ||
uint64_t logical_block_size = VIRTIO_BLK_SECTOR_SIZE; | ||
uint16_t num_queues = VDUSE_DEFAULT_NUM_QUEUE; | ||
uint16_t queue_size = VDUSE_DEFAULT_QUEUE_SIZE; | ||
Error *local_err = NULL; | ||
struct virtio_blk_config config = { 0 }; | ||
uint64_t features; | ||
int i; | ||
|
||
if (vblk_opts->has_num_queues) { | ||
num_queues = vblk_opts->num_queues; | ||
if (num_queues == 0) { | ||
error_setg(errp, "num-queues must be greater than 0"); | ||
return -EINVAL; | ||
} | ||
} | ||
|
||
if (vblk_opts->has_queue_size) { | ||
queue_size = vblk_opts->queue_size; | ||
if (queue_size <= 2 || !is_power_of_2(queue_size) || | ||
queue_size > VIRTQUEUE_MAX_SIZE) { | ||
error_setg(errp, "queue-size is invalid"); | ||
return -EINVAL; | ||
} | ||
} | ||
|
||
if (vblk_opts->has_logical_block_size) { | ||
logical_block_size = vblk_opts->logical_block_size; | ||
check_block_size(exp->id, "logical-block-size", logical_block_size, | ||
&local_err); | ||
if (local_err) { | ||
error_propagate(errp, local_err); | ||
return -EINVAL; | ||
} | ||
} | ||
vblk_exp->num_queues = num_queues; | ||
vblk_exp->handler.blk = exp->blk; | ||
vblk_exp->handler.serial = exp->id; | ||
vblk_exp->handler.logical_block_size = logical_block_size; | ||
vblk_exp->handler.writable = opts->writable; | ||
|
||
config.capacity = | ||
cpu_to_le64(blk_getlength(exp->blk) >> VIRTIO_BLK_SECTOR_BITS); | ||
config.seg_max = cpu_to_le32(queue_size - 2); | ||
config.min_io_size = cpu_to_le16(1); | ||
config.opt_io_size = cpu_to_le32(1); | ||
config.num_queues = cpu_to_le16(num_queues); | ||
config.blk_size = cpu_to_le32(logical_block_size); | ||
config.max_discard_sectors = cpu_to_le32(VIRTIO_BLK_MAX_DISCARD_SECTORS); | ||
config.max_discard_seg = cpu_to_le32(1); | ||
config.discard_sector_alignment = | ||
cpu_to_le32(logical_block_size >> VIRTIO_BLK_SECTOR_BITS); | ||
config.max_write_zeroes_sectors = | ||
cpu_to_le32(VIRTIO_BLK_MAX_WRITE_ZEROES_SECTORS); | ||
config.max_write_zeroes_seg = cpu_to_le32(1); | ||
|
||
features = vduse_get_virtio_features() | | ||
(1ULL << VIRTIO_BLK_F_SEG_MAX) | | ||
(1ULL << VIRTIO_BLK_F_TOPOLOGY) | | ||
(1ULL << VIRTIO_BLK_F_BLK_SIZE) | | ||
(1ULL << VIRTIO_BLK_F_FLUSH) | | ||
(1ULL << VIRTIO_BLK_F_DISCARD) | | ||
(1ULL << VIRTIO_BLK_F_WRITE_ZEROES); | ||
|
||
if (num_queues > 1) { | ||
features |= 1ULL << VIRTIO_BLK_F_MQ; | ||
} | ||
if (!opts->writable) { | ||
features |= 1ULL << VIRTIO_BLK_F_RO; | ||
} | ||
|
||
vblk_exp->dev = vduse_dev_create(exp->id, VIRTIO_ID_BLOCK, 0, | ||
features, num_queues, | ||
sizeof(struct virtio_blk_config), | ||
(char *)&config, &vduse_blk_ops, | ||
vblk_exp); | ||
if (!vblk_exp->dev) { | ||
error_setg(errp, "failed to create vduse device"); | ||
return -ENOMEM; | ||
} | ||
|
||
for (i = 0; i < num_queues; i++) { | ||
vduse_dev_setup_queue(vblk_exp->dev, i, queue_size); | ||
} | ||
|
||
aio_set_fd_handler(exp->ctx, vduse_dev_get_fd(vblk_exp->dev), true, | ||
on_vduse_dev_kick, NULL, NULL, NULL, vblk_exp->dev); | ||
|
||
blk_add_aio_context_notifier(exp->blk, blk_aio_attached, blk_aio_detach, | ||
vblk_exp); | ||
|
||
return 0; | ||
} | ||
|
||
static void vduse_blk_exp_delete(BlockExport *exp) | ||
{ | ||
VduseBlkExport *vblk_exp = container_of(exp, VduseBlkExport, export); | ||
|
||
blk_remove_aio_context_notifier(exp->blk, blk_aio_attached, blk_aio_detach, | ||
vblk_exp); | ||
vduse_dev_destroy(vblk_exp->dev); | ||
} | ||
|
||
static void vduse_blk_exp_request_shutdown(BlockExport *exp) | ||
{ | ||
VduseBlkExport *vblk_exp = container_of(exp, VduseBlkExport, export); | ||
|
||
aio_context_acquire(vblk_exp->export.ctx); | ||
vduse_blk_detach_ctx(vblk_exp); | ||
aio_context_acquire(vblk_exp->export.ctx); | ||
} | ||
|
||
const BlockExportDriver blk_exp_vduse_blk = { | ||
.type = BLOCK_EXPORT_TYPE_VDUSE_BLK, | ||
.instance_size = sizeof(VduseBlkExport), | ||
.create = vduse_blk_exp_create, | ||
.delete = vduse_blk_exp_delete, | ||
.request_shutdown = vduse_blk_exp_request_shutdown, | ||
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
/* | ||
* Export QEMU block device via VDUSE | ||
* | ||
* Copyright (C) 2022 Bytedance Inc. and/or its affiliates. All rights reserved. | ||
* | ||
* Author: | ||
* Xie Yongji <xieyongji@bytedance.com> | ||
* | ||
* This work is licensed under the terms of the GNU GPL, version 2 or | ||
* later. See the COPYING file in the top-level directory. | ||
*/ | ||
|
||
#ifndef VDUSE_BLK_H | ||
#define VDUSE_BLK_H | ||
|
||
#include "block/export.h" | ||
|
||
extern const BlockExportDriver blk_exp_vduse_blk; | ||
|
||
#endif /* VDUSE_BLK_H */ |
Oops, something went wrong.