Skip to content

Commit

Permalink
virtio: move allocation to virtqueue_pop/vring_pop
Browse files Browse the repository at this point in the history
The return code of virtqueue_pop/vring_pop is unused except to check for
errors or 0.  We can thus easily move allocation inside the functions
and just return a pointer to the VirtQueueElement.

The advantage is that we will be able to allocate only the space that
is needed for the actual size of the s/g list instead of the full
VIRTQUEUE_MAX_SIZE items.  Currently VirtQueueElement takes about 48K
of memory, and this kind of allocation puts a lot of stress on malloc.
By cutting the size by two or three orders of magnitude, malloc can
use much more efficient algorithms.

The patch is pretty large, but changes to each device are testable
more or less independently.  Splitting it would mostly add churn.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
  • Loading branch information
bonzini authored and mstsirkin committed Feb 6, 2016
1 parent 6aa46d8 commit 51b19eb
Show file tree
Hide file tree
Showing 22 changed files with 209 additions and 142 deletions.
2 changes: 1 addition & 1 deletion hw/9pfs/9p.c
Expand Up @@ -1587,7 +1587,7 @@ static int v9fs_xattr_read(V9fsState *s, V9fsPDU *pdu, V9fsFidState *fidp,
int read_count;
int64_t xattr_len;
V9fsVirtioState *v = container_of(s, V9fsVirtioState, state);
VirtQueueElement *elem = &v->elems[pdu->idx];
VirtQueueElement *elem = v->elems[pdu->idx];

xattr_len = fidp->fs.xattr.len;
read_count = xattr_len - off;
Expand Down
17 changes: 10 additions & 7 deletions hw/9pfs/virtio-9p-device.c
Expand Up @@ -26,10 +26,12 @@ void virtio_9p_push_and_notify(V9fsPDU *pdu)
{
V9fsState *s = pdu->s;
V9fsVirtioState *v = container_of(s, V9fsVirtioState, state);
VirtQueueElement *elem = &v->elems[pdu->idx];
VirtQueueElement *elem = v->elems[pdu->idx];

/* push onto queue and notify */
virtqueue_push(v->vq, elem, pdu->size);
g_free(elem);
v->elems[pdu->idx] = NULL;

/* FIXME: we should batch these completions */
virtio_notify(VIRTIO_DEVICE(v), v->vq);
Expand All @@ -48,17 +50,18 @@ static void handle_9p_output(VirtIODevice *vdev, VirtQueue *vq)
uint8_t id;
uint16_t tag_le;
} QEMU_PACKED out;
VirtQueueElement *elem = &v->elems[pdu->idx];
VirtQueueElement *elem;

len = virtqueue_pop(vq, elem);
if (!len) {
elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
if (!elem) {
pdu_free(pdu);
break;
}

BUG_ON(elem->out_num == 0 || elem->in_num == 0);
QEMU_BUILD_BUG_ON(sizeof out != 7);

v->elems[pdu->idx] = elem;
len = iov_to_buf(elem->out_sg, elem->out_num, 0,
&out, sizeof out);
BUG_ON(len != sizeof out);
Expand Down Expand Up @@ -141,7 +144,7 @@ ssize_t virtio_pdu_vmarshal(V9fsPDU *pdu, size_t offset,
{
V9fsState *s = pdu->s;
V9fsVirtioState *v = container_of(s, V9fsVirtioState, state);
VirtQueueElement *elem = &v->elems[pdu->idx];
VirtQueueElement *elem = v->elems[pdu->idx];

return v9fs_iov_vmarshal(elem->in_sg, elem->in_num, offset, 1, fmt, ap);
}
Expand All @@ -151,7 +154,7 @@ ssize_t virtio_pdu_vunmarshal(V9fsPDU *pdu, size_t offset,
{
V9fsState *s = pdu->s;
V9fsVirtioState *v = container_of(s, V9fsVirtioState, state);
VirtQueueElement *elem = &v->elems[pdu->idx];
VirtQueueElement *elem = v->elems[pdu->idx];

return v9fs_iov_vunmarshal(elem->out_sg, elem->out_num, offset, 1, fmt, ap);
}
Expand All @@ -161,7 +164,7 @@ void virtio_init_iov_from_pdu(V9fsPDU *pdu, struct iovec **piov,
{
V9fsState *s = pdu->s;
V9fsVirtioState *v = container_of(s, V9fsVirtioState, state);
VirtQueueElement *elem = &v->elems[pdu->idx];
VirtQueueElement *elem = v->elems[pdu->idx];

if (is_write) {
*piov = elem->out_sg;
Expand Down
2 changes: 1 addition & 1 deletion hw/9pfs/virtio-9p.h
Expand Up @@ -11,7 +11,7 @@ typedef struct V9fsVirtioState
VirtQueue *vq;
size_t config_size;
V9fsPDU pdus[MAX_REQ];
VirtQueueElement elems[MAX_REQ];
VirtQueueElement *elems[MAX_REQ];
V9fsState state;
} V9fsVirtioState;

Expand Down
11 changes: 5 additions & 6 deletions hw/block/dataplane/virtio-blk.c
Expand Up @@ -100,20 +100,19 @@ static void handle_notify(EventNotifier *e)
blk_io_plug(s->conf->conf.blk);
for (;;) {
MultiReqBuffer mrb = {};
int ret;

/* Disable guest->host notifies to avoid unnecessary vmexits */
vring_disable_notification(s->vdev, &s->vring);

for (;;) {
VirtIOBlockReq *req = virtio_blk_alloc_request(vblk);
VirtIOBlockReq *req = vring_pop(s->vdev, &s->vring,
sizeof(VirtIOBlockReq));

ret = vring_pop(s->vdev, &s->vring, &req->elem);
if (ret < 0) {
virtio_blk_free_request(req);
if (req == NULL) {
break; /* no more requests */
}

virtio_blk_init_request(vblk, req);
trace_virtio_blk_data_plane_process_request(s, req->elem.out_num,
req->elem.in_num,
req->elem.index);
Expand All @@ -125,7 +124,7 @@ static void handle_notify(EventNotifier *e)
virtio_blk_submit_multireq(s->conf->conf.blk, &mrb);
}

if (likely(ret == -EAGAIN)) { /* vring emptied */
if (likely(!vring_more_avail(s->vdev, &s->vring))) { /* vring emptied */
/* Re-enable guest->host notifies and stop processing the vring.
* But if the guest has snuck in more descriptors, keep processing.
*/
Expand Down
15 changes: 6 additions & 9 deletions hw/block/virtio-blk.c
Expand Up @@ -29,15 +29,13 @@
#include "hw/virtio/virtio-bus.h"
#include "hw/virtio/virtio-access.h"

VirtIOBlockReq *virtio_blk_alloc_request(VirtIOBlock *s)
void virtio_blk_init_request(VirtIOBlock *s, VirtIOBlockReq *req)
{
VirtIOBlockReq *req = g_new(VirtIOBlockReq, 1);
req->dev = s;
req->qiov.size = 0;
req->in_len = 0;
req->next = NULL;
req->mr_next = NULL;
return req;
}

void virtio_blk_free_request(VirtIOBlockReq *req)
Expand Down Expand Up @@ -193,13 +191,11 @@ static void virtio_blk_ioctl_complete(void *opaque, int status)

static VirtIOBlockReq *virtio_blk_get_request(VirtIOBlock *s)
{
VirtIOBlockReq *req = virtio_blk_alloc_request(s);
VirtIOBlockReq *req = virtqueue_pop(s->vq, sizeof(VirtIOBlockReq));

if (!virtqueue_pop(s->vq, &req->elem)) {
virtio_blk_free_request(req);
return NULL;
if (req) {
virtio_blk_init_request(s, req);
}

return req;
}

Expand Down Expand Up @@ -836,7 +832,8 @@ static int virtio_blk_load_device(VirtIODevice *vdev, QEMUFile *f,
VirtIOBlock *s = VIRTIO_BLK(vdev);

while (qemu_get_sbyte(f)) {
VirtIOBlockReq *req = virtio_blk_alloc_request(s);
VirtIOBlockReq *req = g_new(VirtIOBlockReq, 1);
virtio_blk_init_request(s, req);
qemu_get_buffer(f, (unsigned char *)&req->elem,
sizeof(VirtQueueElement));
req->next = s->rq;
Expand Down
80 changes: 50 additions & 30 deletions hw/char/virtio-serial-bus.c
Expand Up @@ -83,7 +83,7 @@ static bool use_multiport(VirtIOSerial *vser)
static size_t write_to_port(VirtIOSerialPort *port,
const uint8_t *buf, size_t size)
{
VirtQueueElement elem;
VirtQueueElement *elem;
VirtQueue *vq;
size_t offset;

Expand All @@ -96,15 +96,17 @@ static size_t write_to_port(VirtIOSerialPort *port,
while (offset < size) {
size_t len;

if (!virtqueue_pop(vq, &elem)) {
elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
if (!elem) {
break;
}

len = iov_from_buf(elem.in_sg, elem.in_num, 0,
len = iov_from_buf(elem->in_sg, elem->in_num, 0,
buf + offset, size - offset);
offset += len;

virtqueue_push(vq, &elem, len);
virtqueue_push(vq, elem, len);
g_free(elem);
}

virtio_notify(VIRTIO_DEVICE(port->vser), vq);
Expand All @@ -113,13 +115,18 @@ static size_t write_to_port(VirtIOSerialPort *port,

static void discard_vq_data(VirtQueue *vq, VirtIODevice *vdev)
{
VirtQueueElement elem;
VirtQueueElement *elem;

if (!virtio_queue_ready(vq)) {
return;
}
while (virtqueue_pop(vq, &elem)) {
virtqueue_push(vq, &elem, 0);
for (;;) {
elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
if (!elem) {
break;
}
virtqueue_push(vq, elem, 0);
g_free(elem);
}
virtio_notify(vdev, vq);
}
Expand All @@ -138,21 +145,22 @@ static void do_flush_queued_data(VirtIOSerialPort *port, VirtQueue *vq,
unsigned int i;

/* Pop an elem only if we haven't left off a previous one mid-way */
if (!port->elem.out_num) {
if (!virtqueue_pop(vq, &port->elem)) {
if (!port->elem) {
port->elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
if (!port->elem) {
break;
}
port->iov_idx = 0;
port->iov_offset = 0;
}

for (i = port->iov_idx; i < port->elem.out_num; i++) {
for (i = port->iov_idx; i < port->elem->out_num; i++) {
size_t buf_size;
ssize_t ret;

buf_size = port->elem.out_sg[i].iov_len - port->iov_offset;
buf_size = port->elem->out_sg[i].iov_len - port->iov_offset;
ret = vsc->have_data(port,
port->elem.out_sg[i].iov_base
port->elem->out_sg[i].iov_base
+ port->iov_offset,
buf_size);
if (port->throttled) {
Expand All @@ -167,8 +175,9 @@ static void do_flush_queued_data(VirtIOSerialPort *port, VirtQueue *vq,
if (port->throttled) {
break;
}
virtqueue_push(vq, &port->elem, 0);
port->elem.out_num = 0;
virtqueue_push(vq, port->elem, 0);
g_free(port->elem);
port->elem = NULL;
}
virtio_notify(vdev, vq);
}
Expand All @@ -185,22 +194,26 @@ static void flush_queued_data(VirtIOSerialPort *port)

static size_t send_control_msg(VirtIOSerial *vser, void *buf, size_t len)
{
VirtQueueElement elem;
VirtQueueElement *elem;
VirtQueue *vq;

vq = vser->c_ivq;
if (!virtio_queue_ready(vq)) {
return 0;
}
if (!virtqueue_pop(vq, &elem)) {

elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
if (!elem) {
return 0;
}

/* TODO: detect a buffer that's too short, set NEEDS_RESET */
iov_from_buf(elem.in_sg, elem.in_num, 0, buf, len);
iov_from_buf(elem->in_sg, elem->in_num, 0, buf, len);

virtqueue_push(vq, &elem, len);
virtqueue_push(vq, elem, len);
virtio_notify(VIRTIO_DEVICE(vser), vq);
g_free(elem);

return len;
}

Expand Down Expand Up @@ -414,7 +427,7 @@ static void control_in(VirtIODevice *vdev, VirtQueue *vq)

static void control_out(VirtIODevice *vdev, VirtQueue *vq)
{
VirtQueueElement elem;
VirtQueueElement *elem;
VirtIOSerial *vser;
uint8_t *buf;
size_t len;
Expand All @@ -423,10 +436,15 @@ static void control_out(VirtIODevice *vdev, VirtQueue *vq)

len = 0;
buf = NULL;
while (virtqueue_pop(vq, &elem)) {
for (;;) {
size_t cur_len;

cur_len = iov_size(elem.out_sg, elem.out_num);
elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
if (!elem) {
break;
}

cur_len = iov_size(elem->out_sg, elem->out_num);
/*
* Allocate a new buf only if we didn't have one previously or
* if the size of the buf differs
Expand All @@ -437,10 +455,11 @@ static void control_out(VirtIODevice *vdev, VirtQueue *vq)
buf = g_malloc(cur_len);
len = cur_len;
}
iov_to_buf(elem.out_sg, elem.out_num, 0, buf, cur_len);
iov_to_buf(elem->out_sg, elem->out_num, 0, buf, cur_len);

handle_control_message(vser, buf, cur_len);
virtqueue_push(vq, &elem, 0);
virtqueue_push(vq, elem, 0);
g_free(elem);
}
g_free(buf);
virtio_notify(vdev, vq);
Expand Down Expand Up @@ -620,16 +639,16 @@ static void virtio_serial_save_device(VirtIODevice *vdev, QEMUFile *f)
qemu_put_byte(f, port->host_connected);

elem_popped = 0;
if (port->elem.out_num) {
if (port->elem) {
elem_popped = 1;
}
qemu_put_be32s(f, &elem_popped);
if (elem_popped) {
qemu_put_be32s(f, &port->iov_idx);
qemu_put_be64s(f, &port->iov_offset);

qemu_put_buffer(f, (unsigned char *)&port->elem,
sizeof(port->elem));
qemu_put_buffer(f, (unsigned char *)port->elem,
sizeof(VirtQueueElement));
}
}
}
Expand Down Expand Up @@ -704,9 +723,10 @@ static int fetch_active_ports_list(QEMUFile *f, int version_id,
qemu_get_be32s(f, &port->iov_idx);
qemu_get_be64s(f, &port->iov_offset);

qemu_get_buffer(f, (unsigned char *)&port->elem,
sizeof(port->elem));
virtqueue_map(&port->elem);
port->elem = g_new(VirtQueueElement, 1);
qemu_get_buffer(f, (unsigned char *)port->elem,
sizeof(VirtQueueElement));
virtqueue_map(port->elem);

/*
* Port was throttled on source machine. Let's
Expand Down Expand Up @@ -928,7 +948,7 @@ static void virtser_port_device_realize(DeviceState *dev, Error **errp)
return;
}

port->elem.out_num = 0;
port->elem = NULL;
}

static void virtser_port_device_plug(HotplugHandler *hotplug_dev,
Expand Down

0 comments on commit 51b19eb

Please sign in to comment.