Skip to content
Newer
Older
100644 942 lines (769 sloc) 24.7 KB
68396ea @rmustacc Initial commit of d32e8d0b8d9e0ef7cf7ab2e74548982972789dfc from qemu-kvm
rmustacc authored Jun 24, 2011
1 /*
2 * Virtio Support
3 *
4 * Copyright IBM, Corp. 2007
5 *
6 * Authors:
7 * Anthony Liguori <aliguori@us.ibm.com>
8 *
9 * This work is licensed under the terms of the GNU GPL, version 2. See
10 * the COPYING file in the top-level directory.
11 *
12 */
13
14 #include <inttypes.h>
15
16 #include "trace.h"
17 #include "qemu-error.h"
18 #include "virtio.h"
19 #include "sysemu.h"
6d85df9 @rmustacc HVM-750 guest virtio drivers are racy with respect to interrupts
rmustacc authored Sep 24, 2012
20 #include "qemu-barrier.h"
68396ea @rmustacc Initial commit of d32e8d0b8d9e0ef7cf7ab2e74548982972789dfc from qemu-kvm
rmustacc authored Jun 24, 2011
21
22 /* The alignment to use between consumer and producer parts of vring.
23 * x86 pagesize again. */
24 #define VIRTIO_PCI_VRING_ALIGN 4096
25
26 typedef struct VRingDesc
27 {
28 uint64_t addr;
29 uint32_t len;
30 uint16_t flags;
31 uint16_t next;
32 } VRingDesc;
33
34 typedef struct VRingAvail
35 {
36 uint16_t flags;
37 uint16_t idx;
38 uint16_t ring[0];
39 } VRingAvail;
40
41 typedef struct VRingUsedElem
42 {
43 uint32_t id;
44 uint32_t len;
45 } VRingUsedElem;
46
47 typedef struct VRingUsed
48 {
49 uint16_t flags;
50 uint16_t idx;
51 VRingUsedElem ring[0];
52 } VRingUsed;
53
54 typedef struct VRing
55 {
56 unsigned int num;
57 target_phys_addr_t desc;
58 target_phys_addr_t avail;
59 target_phys_addr_t used;
60 } VRing;
61
62 struct VirtQueue
63 {
64 VRing vring;
65 target_phys_addr_t pa;
66 uint16_t last_avail_idx;
67 int inuse;
68 uint16_t vector;
69 void (*handle_output)(VirtIODevice *vdev, VirtQueue *vq);
70 VirtIODevice *vdev;
71 EventNotifier guest_notifier;
72 EventNotifier host_notifier;
73 };
74
75 /* virt queue functions */
76 static void virtqueue_init(VirtQueue *vq)
77 {
78 target_phys_addr_t pa = vq->pa;
79
80 vq->vring.desc = pa;
81 vq->vring.avail = pa + vq->vring.num * sizeof(VRingDesc);
82 vq->vring.used = vring_align(vq->vring.avail +
83 offsetof(VRingAvail, ring[vq->vring.num]),
84 VIRTIO_PCI_VRING_ALIGN);
85 }
86
87 static inline uint64_t vring_desc_addr(target_phys_addr_t desc_pa, int i)
88 {
89 target_phys_addr_t pa;
90 pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, addr);
91 return ldq_phys(pa);
92 }
93
94 static inline uint32_t vring_desc_len(target_phys_addr_t desc_pa, int i)
95 {
96 target_phys_addr_t pa;
97 pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, len);
98 return ldl_phys(pa);
99 }
100
101 static inline uint16_t vring_desc_flags(target_phys_addr_t desc_pa, int i)
102 {
103 target_phys_addr_t pa;
104 pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, flags);
105 return lduw_phys(pa);
106 }
107
108 static inline uint16_t vring_desc_next(target_phys_addr_t desc_pa, int i)
109 {
110 target_phys_addr_t pa;
111 pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, next);
112 return lduw_phys(pa);
113 }
114
115 static inline uint16_t vring_avail_flags(VirtQueue *vq)
116 {
117 target_phys_addr_t pa;
118 pa = vq->vring.avail + offsetof(VRingAvail, flags);
119 return lduw_phys(pa);
120 }
121
122 static inline uint16_t vring_avail_idx(VirtQueue *vq)
123 {
124 target_phys_addr_t pa;
125 pa = vq->vring.avail + offsetof(VRingAvail, idx);
126 return lduw_phys(pa);
127 }
128
129 static inline uint16_t vring_avail_ring(VirtQueue *vq, int i)
130 {
131 target_phys_addr_t pa;
132 pa = vq->vring.avail + offsetof(VRingAvail, ring[i]);
133 return lduw_phys(pa);
134 }
135
136 static inline void vring_used_ring_id(VirtQueue *vq, int i, uint32_t val)
137 {
138 target_phys_addr_t pa;
139 pa = vq->vring.used + offsetof(VRingUsed, ring[i].id);
140 stl_phys(pa, val);
141 }
142
143 static inline void vring_used_ring_len(VirtQueue *vq, int i, uint32_t val)
144 {
145 target_phys_addr_t pa;
146 pa = vq->vring.used + offsetof(VRingUsed, ring[i].len);
147 stl_phys(pa, val);
148 }
149
150 static uint16_t vring_used_idx(VirtQueue *vq)
151 {
152 target_phys_addr_t pa;
153 pa = vq->vring.used + offsetof(VRingUsed, idx);
154 return lduw_phys(pa);
155 }
156
157 static inline void vring_used_idx_increment(VirtQueue *vq, uint16_t val)
158 {
159 target_phys_addr_t pa;
160 pa = vq->vring.used + offsetof(VRingUsed, idx);
161 stw_phys(pa, vring_used_idx(vq) + val);
162 }
163
6d85df9 @rmustacc HVM-750 guest virtio drivers are racy with respect to interrupts
rmustacc authored Sep 24, 2012
164 static inline uint16_t vring_used_flags(VirtQueue *vq)
165 {
166 target_phys_addr_t pa;
167 pa = vq->vring.used + offsetof(VRingUsed, flags);
168 return lduw_phys(pa);
169 }
170
68396ea @rmustacc Initial commit of d32e8d0b8d9e0ef7cf7ab2e74548982972789dfc from qemu-kvm
rmustacc authored Jun 24, 2011
171 static inline void vring_used_flags_set_bit(VirtQueue *vq, int mask)
172 {
173 target_phys_addr_t pa;
174 pa = vq->vring.used + offsetof(VRingUsed, flags);
175 stw_phys(pa, lduw_phys(pa) | mask);
176 }
177
178 static inline void vring_used_flags_unset_bit(VirtQueue *vq, int mask)
179 {
180 target_phys_addr_t pa;
181 pa = vq->vring.used + offsetof(VRingUsed, flags);
182 stw_phys(pa, lduw_phys(pa) & ~mask);
183 }
184
185 void virtio_queue_set_notification(VirtQueue *vq, int enable)
186 {
187 if (enable)
188 vring_used_flags_unset_bit(vq, VRING_USED_F_NO_NOTIFY);
189 else
190 vring_used_flags_set_bit(vq, VRING_USED_F_NO_NOTIFY);
191 }
192
193 int virtio_queue_ready(VirtQueue *vq)
194 {
195 return vq->vring.avail != 0;
196 }
197
198 int virtio_queue_empty(VirtQueue *vq)
199 {
200 return vring_avail_idx(vq) == vq->last_avail_idx;
201 }
202
203 void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
204 unsigned int len, unsigned int idx)
205 {
206 unsigned int offset;
207 int i;
208
209 trace_virtqueue_fill(vq, elem, len, idx);
210
211 offset = 0;
212 for (i = 0; i < elem->in_num; i++) {
213 size_t size = MIN(len - offset, elem->in_sg[i].iov_len);
214
215 cpu_physical_memory_unmap(elem->in_sg[i].iov_base,
216 elem->in_sg[i].iov_len,
217 1, size);
218
219 offset += elem->in_sg[i].iov_len;
220 }
221
222 for (i = 0; i < elem->out_num; i++)
223 cpu_physical_memory_unmap(elem->out_sg[i].iov_base,
224 elem->out_sg[i].iov_len,
225 0, elem->out_sg[i].iov_len);
226
227 idx = (idx + vring_used_idx(vq)) % vq->vring.num;
228
229 /* Get a pointer to the next entry in the used ring. */
230 vring_used_ring_id(vq, idx, elem->index);
231 vring_used_ring_len(vq, idx, len);
232 }
233
234 void virtqueue_flush(VirtQueue *vq, unsigned int count)
235 {
236 /* Make sure buffer is written before we update index. */
6d85df9 @rmustacc HVM-750 guest virtio drivers are racy with respect to interrupts
rmustacc authored Sep 24, 2012
237 smp_wmb();
68396ea @rmustacc Initial commit of d32e8d0b8d9e0ef7cf7ab2e74548982972789dfc from qemu-kvm
rmustacc authored Jun 24, 2011
238 trace_virtqueue_flush(vq, count);
239 vring_used_idx_increment(vq, count);
240 vq->inuse -= count;
241 }
242
243 void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem,
244 unsigned int len)
245 {
246 virtqueue_fill(vq, elem, len, 0);
247 virtqueue_flush(vq, 1);
248 }
249
250 static int virtqueue_num_heads(VirtQueue *vq, unsigned int idx)
251 {
252 uint16_t num_heads = vring_avail_idx(vq) - idx;
253
254 /* Check it isn't doing very strange things with descriptor numbers. */
255 if (num_heads > vq->vring.num) {
256 error_report("Guest moved used index from %u to %u",
257 idx, vring_avail_idx(vq));
258 exit(1);
259 }
6d85df9 @rmustacc HVM-750 guest virtio drivers are racy with respect to interrupts
rmustacc authored Sep 24, 2012
260 /* On success, callers read a descriptor at vq->last_avail_idx.
261 * Make sure descriptor read does not bypass avail index read. */
262 if (num_heads) {
263 smp_rmb();
264 }
68396ea @rmustacc Initial commit of d32e8d0b8d9e0ef7cf7ab2e74548982972789dfc from qemu-kvm
rmustacc authored Jun 24, 2011
265
266 return num_heads;
267 }
268
269 static unsigned int virtqueue_get_head(VirtQueue *vq, unsigned int idx)
270 {
271 unsigned int head;
272
273 /* Grab the next descriptor number they're advertising, and increment
274 * the index we've seen. */
275 head = vring_avail_ring(vq, idx % vq->vring.num);
276
277 /* If their number is silly, that's a fatal mistake. */
278 if (head >= vq->vring.num) {
279 error_report("Guest says index %u is available", head);
280 exit(1);
281 }
282
283 return head;
284 }
285
286 static unsigned virtqueue_next_desc(target_phys_addr_t desc_pa,
287 unsigned int i, unsigned int max)
288 {
289 unsigned int next;
290
291 /* If this descriptor says it doesn't chain, we're done. */
292 if (!(vring_desc_flags(desc_pa, i) & VRING_DESC_F_NEXT))
293 return max;
294
295 /* Check they're not leading us off end of descriptors. */
296 next = vring_desc_next(desc_pa, i);
297 /* Make sure compiler knows to grab that: we don't want it changing! */
6d85df9 @rmustacc HVM-750 guest virtio drivers are racy with respect to interrupts
rmustacc authored Sep 24, 2012
298 smp_wmb();
68396ea @rmustacc Initial commit of d32e8d0b8d9e0ef7cf7ab2e74548982972789dfc from qemu-kvm
rmustacc authored Jun 24, 2011
299
300 if (next >= max) {
301 error_report("Desc next is %u", next);
302 exit(1);
303 }
304
305 return next;
306 }
307
308 int virtqueue_avail_bytes(VirtQueue *vq, int in_bytes, int out_bytes)
309 {
310 unsigned int idx;
311 int total_bufs, in_total, out_total;
312
313 idx = vq->last_avail_idx;
314
315 total_bufs = in_total = out_total = 0;
316 while (virtqueue_num_heads(vq, idx)) {
317 unsigned int max, num_bufs, indirect = 0;
318 target_phys_addr_t desc_pa;
319 int i;
320
321 max = vq->vring.num;
322 num_bufs = total_bufs;
323 i = virtqueue_get_head(vq, idx++);
324 desc_pa = vq->vring.desc;
325
326 if (vring_desc_flags(desc_pa, i) & VRING_DESC_F_INDIRECT) {
327 if (vring_desc_len(desc_pa, i) % sizeof(VRingDesc)) {
328 error_report("Invalid size for indirect buffer table");
329 exit(1);
330 }
331
332 /* If we've got too many, that implies a descriptor loop. */
333 if (num_bufs >= max) {
334 error_report("Looped descriptor");
335 exit(1);
336 }
337
338 /* loop over the indirect descriptor table */
339 indirect = 1;
340 max = vring_desc_len(desc_pa, i) / sizeof(VRingDesc);
341 num_bufs = i = 0;
342 desc_pa = vring_desc_addr(desc_pa, i);
343 }
344
345 do {
346 /* If we've got too many, that implies a descriptor loop. */
347 if (++num_bufs > max) {
348 error_report("Looped descriptor");
349 exit(1);
350 }
351
352 if (vring_desc_flags(desc_pa, i) & VRING_DESC_F_WRITE) {
353 if (in_bytes > 0 &&
354 (in_total += vring_desc_len(desc_pa, i)) >= in_bytes)
355 return 1;
356 } else {
357 if (out_bytes > 0 &&
358 (out_total += vring_desc_len(desc_pa, i)) >= out_bytes)
359 return 1;
360 }
361 } while ((i = virtqueue_next_desc(desc_pa, i, max)) != max);
362
363 if (!indirect)
364 total_bufs = num_bufs;
365 else
366 total_bufs++;
367 }
368
369 return 0;
370 }
371
372 void virtqueue_map_sg(struct iovec *sg, target_phys_addr_t *addr,
373 size_t num_sg, int is_write)
374 {
375 unsigned int i;
376 target_phys_addr_t len;
377
378 for (i = 0; i < num_sg; i++) {
379 len = sg[i].iov_len;
380 sg[i].iov_base = cpu_physical_memory_map(addr[i], &len, is_write);
381 if (sg[i].iov_base == NULL || len != sg[i].iov_len) {
382 error_report("virtio: trying to map MMIO memory");
383 exit(1);
384 }
385 }
386 }
387
388 int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem)
389 {
390 unsigned int i, head, max;
391 target_phys_addr_t desc_pa = vq->vring.desc;
392
393 if (!virtqueue_num_heads(vq, vq->last_avail_idx))
394 return 0;
395
396 /* When we start there are none of either input nor output. */
397 elem->out_num = elem->in_num = 0;
398
399 max = vq->vring.num;
400
401 i = head = virtqueue_get_head(vq, vq->last_avail_idx++);
402
403 if (vring_desc_flags(desc_pa, i) & VRING_DESC_F_INDIRECT) {
404 if (vring_desc_len(desc_pa, i) % sizeof(VRingDesc)) {
405 error_report("Invalid size for indirect buffer table");
406 exit(1);
407 }
408
409 /* loop over the indirect descriptor table */
410 max = vring_desc_len(desc_pa, i) / sizeof(VRingDesc);
411 desc_pa = vring_desc_addr(desc_pa, i);
412 i = 0;
413 }
414
415 /* Collect all the descriptors */
416 do {
417 struct iovec *sg;
418
419 if (vring_desc_flags(desc_pa, i) & VRING_DESC_F_WRITE) {
420 elem->in_addr[elem->in_num] = vring_desc_addr(desc_pa, i);
421 sg = &elem->in_sg[elem->in_num++];
422 } else {
423 elem->out_addr[elem->out_num] = vring_desc_addr(desc_pa, i);
424 sg = &elem->out_sg[elem->out_num++];
425 }
426
427 sg->iov_len = vring_desc_len(desc_pa, i);
428
429 /* If we've got too many, that implies a descriptor loop. */
430 if ((elem->in_num + elem->out_num) > max) {
431 error_report("Looped descriptor");
432 exit(1);
433 }
434 } while ((i = virtqueue_next_desc(desc_pa, i, max)) != max);
435
436 /* Now map what we have collected */
437 virtqueue_map_sg(elem->in_sg, elem->in_addr, elem->in_num, 1);
438 virtqueue_map_sg(elem->out_sg, elem->out_addr, elem->out_num, 0);
439
440 elem->index = head;
441
442 vq->inuse++;
443
444 trace_virtqueue_pop(vq, elem, elem->in_num, elem->out_num);
445 return elem->in_num + elem->out_num;
446 }
447
448 /* virtio device */
449 static void virtio_notify_vector(VirtIODevice *vdev, uint16_t vector)
450 {
451 if (vdev->binding->notify) {
452 vdev->binding->notify(vdev->binding_opaque, vector);
453 }
454 }
455
456 void virtio_update_irq(VirtIODevice *vdev)
457 {
458 virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
459 }
460
461 void virtio_reset(void *opaque)
462 {
463 VirtIODevice *vdev = opaque;
464 int i;
465
466 virtio_set_status(vdev, 0);
467
468 if (vdev->reset)
469 vdev->reset(vdev);
470
471 vdev->guest_features = 0;
472 vdev->queue_sel = 0;
473 vdev->status = 0;
474 vdev->isr = 0;
475 vdev->config_vector = VIRTIO_NO_VECTOR;
476 virtio_notify_vector(vdev, vdev->config_vector);
477
478 for(i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
479 vdev->vq[i].vring.desc = 0;
480 vdev->vq[i].vring.avail = 0;
481 vdev->vq[i].vring.used = 0;
482 vdev->vq[i].last_avail_idx = 0;
483 vdev->vq[i].pa = 0;
484 vdev->vq[i].vector = VIRTIO_NO_VECTOR;
485 }
486 }
487
488 uint32_t virtio_config_readb(VirtIODevice *vdev, uint32_t addr)
489 {
490 uint8_t val;
491
492 vdev->get_config(vdev, vdev->config);
493
494 if (addr > (vdev->config_len - sizeof(val)))
495 return (uint32_t)-1;
496
497 memcpy(&val, vdev->config + addr, sizeof(val));
498 return val;
499 }
500
501 uint32_t virtio_config_readw(VirtIODevice *vdev, uint32_t addr)
502 {
503 uint16_t val;
504
505 vdev->get_config(vdev, vdev->config);
506
507 if (addr > (vdev->config_len - sizeof(val)))
508 return (uint32_t)-1;
509
510 memcpy(&val, vdev->config + addr, sizeof(val));
511 return val;
512 }
513
514 uint32_t virtio_config_readl(VirtIODevice *vdev, uint32_t addr)
515 {
516 uint32_t val;
517
518 vdev->get_config(vdev, vdev->config);
519
520 if (addr > (vdev->config_len - sizeof(val)))
521 return (uint32_t)-1;
522
523 memcpy(&val, vdev->config + addr, sizeof(val));
524 return val;
525 }
526
527 void virtio_config_writeb(VirtIODevice *vdev, uint32_t addr, uint32_t data)
528 {
529 uint8_t val = data;
530
531 if (addr > (vdev->config_len - sizeof(val)))
532 return;
533
534 memcpy(vdev->config + addr, &val, sizeof(val));
535
536 if (vdev->set_config)
537 vdev->set_config(vdev, vdev->config);
538 }
539
540 void virtio_config_writew(VirtIODevice *vdev, uint32_t addr, uint32_t data)
541 {
542 uint16_t val = data;
543
544 if (addr > (vdev->config_len - sizeof(val)))
545 return;
546
547 memcpy(vdev->config + addr, &val, sizeof(val));
548
549 if (vdev->set_config)
550 vdev->set_config(vdev, vdev->config);
551 }
552
553 void virtio_config_writel(VirtIODevice *vdev, uint32_t addr, uint32_t data)
554 {
555 uint32_t val = data;
556
557 if (addr > (vdev->config_len - sizeof(val)))
558 return;
559
560 memcpy(vdev->config + addr, &val, sizeof(val));
561
562 if (vdev->set_config)
563 vdev->set_config(vdev, vdev->config);
564 }
565
566 void virtio_queue_set_addr(VirtIODevice *vdev, int n, target_phys_addr_t addr)
567 {
568 vdev->vq[n].pa = addr;
569 virtqueue_init(&vdev->vq[n]);
570 }
571
572 target_phys_addr_t virtio_queue_get_addr(VirtIODevice *vdev, int n)
573 {
574 return vdev->vq[n].pa;
575 }
576
577 int virtio_queue_get_num(VirtIODevice *vdev, int n)
578 {
579 return vdev->vq[n].vring.num;
580 }
581
582 void virtio_queue_notify_vq(VirtQueue *vq)
583 {
584 if (vq->vring.desc) {
585 VirtIODevice *vdev = vq->vdev;
586 trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
587 vq->handle_output(vdev, vq);
588 }
589 }
590
591 void virtio_queue_notify(VirtIODevice *vdev, int n)
592 {
593 if (n < VIRTIO_PCI_QUEUE_MAX) {
594 virtio_queue_notify_vq(&vdev->vq[n]);
595 }
596 }
597
598 uint16_t virtio_queue_vector(VirtIODevice *vdev, int n)
599 {
600 return n < VIRTIO_PCI_QUEUE_MAX ? vdev->vq[n].vector :
601 VIRTIO_NO_VECTOR;
602 }
603
604 void virtio_queue_set_vector(VirtIODevice *vdev, int n, uint16_t vector)
605 {
606 if (n < VIRTIO_PCI_QUEUE_MAX)
607 vdev->vq[n].vector = vector;
608 }
609
610 VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size,
611 void (*handle_output)(VirtIODevice *, VirtQueue *))
612 {
613 int i;
614
615 for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
616 if (vdev->vq[i].vring.num == 0)
617 break;
618 }
619
620 if (i == VIRTIO_PCI_QUEUE_MAX || queue_size > VIRTQUEUE_MAX_SIZE)
621 abort();
622
623 vdev->vq[i].vring.num = queue_size;
624 vdev->vq[i].handle_output = handle_output;
625
626 return &vdev->vq[i];
627 }
628
629 void virtio_irq(VirtQueue *vq)
630 {
631 trace_virtio_irq(vq);
632 vq->vdev->isr |= 0x01;
633 virtio_notify_vector(vq->vdev, vq->vector);
634 }
635
6d85df9 @rmustacc HVM-750 guest virtio drivers are racy with respect to interrupts
rmustacc authored Sep 24, 2012
636 int virtio_notify(VirtIODevice *vdev, VirtQueue *vq)
68396ea @rmustacc Initial commit of d32e8d0b8d9e0ef7cf7ab2e74548982972789dfc from qemu-kvm
rmustacc authored Jun 24, 2011
637 {
6d85df9 @rmustacc HVM-750 guest virtio drivers are racy with respect to interrupts
rmustacc authored Sep 24, 2012
638 /* We need to expose used array entries before checking used event. */
639 smp_mb();
68396ea @rmustacc Initial commit of d32e8d0b8d9e0ef7cf7ab2e74548982972789dfc from qemu-kvm
rmustacc authored Jun 24, 2011
640 /* Always notify when queue is empty (when feature acknowledge) */
641 if ((vring_avail_flags(vq) & VRING_AVAIL_F_NO_INTERRUPT) &&
642 (!(vdev->guest_features & (1 << VIRTIO_F_NOTIFY_ON_EMPTY)) ||
643 (vq->inuse || vring_avail_idx(vq) != vq->last_avail_idx)))
6d85df9 @rmustacc HVM-750 guest virtio drivers are racy with respect to interrupts
rmustacc authored Sep 24, 2012
644 return 0;
68396ea @rmustacc Initial commit of d32e8d0b8d9e0ef7cf7ab2e74548982972789dfc from qemu-kvm
rmustacc authored Jun 24, 2011
645
646 trace_virtio_notify(vdev, vq);
647 vdev->isr |= 0x01;
648 virtio_notify_vector(vdev, vq->vector);
6d85df9 @rmustacc HVM-750 guest virtio drivers are racy with respect to interrupts
rmustacc authored Sep 24, 2012
649 return 1;
68396ea @rmustacc Initial commit of d32e8d0b8d9e0ef7cf7ab2e74548982972789dfc from qemu-kvm
rmustacc authored Jun 24, 2011
650 }
651
652 void virtio_notify_config(VirtIODevice *vdev)
653 {
654 if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))
655 return;
656
657 vdev->isr |= 0x03;
658 virtio_notify_vector(vdev, vdev->config_vector);
659 }
660
661 void virtio_save(VirtIODevice *vdev, QEMUFile *f)
662 {
663 int i;
664
665 if (vdev->binding->save_config)
666 vdev->binding->save_config(vdev->binding_opaque, f);
667
668 qemu_put_8s(f, &vdev->status);
669 qemu_put_8s(f, &vdev->isr);
670 qemu_put_be16s(f, &vdev->queue_sel);
671 qemu_put_be32s(f, &vdev->guest_features);
672 qemu_put_be32(f, vdev->config_len);
673 qemu_put_buffer(f, vdev->config, vdev->config_len);
674
675 for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
676 if (vdev->vq[i].vring.num == 0)
677 break;
678 }
679
680 qemu_put_be32(f, i);
681
682 for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
683 if (vdev->vq[i].vring.num == 0)
684 break;
685
686 qemu_put_be32(f, vdev->vq[i].vring.num);
687 qemu_put_be64(f, vdev->vq[i].pa);
688 qemu_put_be16s(f, &vdev->vq[i].last_avail_idx);
689 if (vdev->binding->save_queue)
690 vdev->binding->save_queue(vdev->binding_opaque, i, f);
691 }
692 }
693
694 int virtio_load(VirtIODevice *vdev, QEMUFile *f)
695 {
696 int num, i, ret;
697 uint32_t features;
698 uint32_t supported_features =
699 vdev->binding->get_features(vdev->binding_opaque);
700
701 if (vdev->binding->load_config) {
702 ret = vdev->binding->load_config(vdev->binding_opaque, f);
703 if (ret)
704 return ret;
705 }
706
707 qemu_get_8s(f, &vdev->status);
708 qemu_get_8s(f, &vdev->isr);
709 qemu_get_be16s(f, &vdev->queue_sel);
710 qemu_get_be32s(f, &features);
711 if (features & ~supported_features) {
712 error_report("Features 0x%x unsupported. Allowed features: 0x%x",
713 features, supported_features);
714 return -1;
715 }
716 if (vdev->set_features)
717 vdev->set_features(vdev, features);
718 vdev->guest_features = features;
719 vdev->config_len = qemu_get_be32(f);
720 qemu_get_buffer(f, vdev->config, vdev->config_len);
721
722 num = qemu_get_be32(f);
723
724 for (i = 0; i < num; i++) {
725 vdev->vq[i].vring.num = qemu_get_be32(f);
726 vdev->vq[i].pa = qemu_get_be64(f);
727 qemu_get_be16s(f, &vdev->vq[i].last_avail_idx);
728
729 if (vdev->vq[i].pa) {
730 uint16_t nheads;
731 virtqueue_init(&vdev->vq[i]);
732 nheads = vring_avail_idx(&vdev->vq[i]) - vdev->vq[i].last_avail_idx;
733 /* Check it isn't doing very strange things with descriptor numbers. */
734 if (nheads > vdev->vq[i].vring.num) {
735 error_report("VQ %d size 0x%x Guest index 0x%x "
736 "inconsistent with Host index 0x%x: delta 0x%x\n",
737 i, vdev->vq[i].vring.num,
738 vring_avail_idx(&vdev->vq[i]),
739 vdev->vq[i].last_avail_idx, nheads);
740 return -1;
741 }
742 } else if (vdev->vq[i].last_avail_idx) {
743 error_report("VQ %d address 0x0 "
744 "inconsistent with Host index 0x%x\n",
745 i, vdev->vq[i].last_avail_idx);
746 return -1;
747 }
748 if (vdev->binding->load_queue) {
749 ret = vdev->binding->load_queue(vdev->binding_opaque, i, f);
750 if (ret)
751 return ret;
752 }
753 }
754
755 virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
756 return 0;
757 }
758
759 void virtio_cleanup(VirtIODevice *vdev)
760 {
761 qemu_del_vm_change_state_handler(vdev->vmstate);
762 if (vdev->config)
763 qemu_free(vdev->config);
764 qemu_free(vdev->vq);
765 }
766
767 static void virtio_vmstate_change(void *opaque, int running, int reason)
768 {
769 VirtIODevice *vdev = opaque;
770 bool backend_run = running && (vdev->status & VIRTIO_CONFIG_S_DRIVER_OK);
771 vdev->vm_running = running;
772
773 if (backend_run) {
774 virtio_set_status(vdev, vdev->status);
775 }
776
777 if (vdev->binding->vmstate_change) {
778 vdev->binding->vmstate_change(vdev->binding_opaque, backend_run);
779 }
780
781 if (!backend_run) {
782 virtio_set_status(vdev, vdev->status);
783 }
784 }
785
786 VirtIODevice *virtio_common_init(const char *name, uint16_t device_id,
787 size_t config_size, size_t struct_size)
788 {
789 VirtIODevice *vdev;
790 int i;
791
792 vdev = qemu_mallocz(struct_size);
793
794 vdev->device_id = device_id;
795 vdev->status = 0;
796 vdev->isr = 0;
797 vdev->queue_sel = 0;
798 vdev->config_vector = VIRTIO_NO_VECTOR;
799 vdev->vq = qemu_mallocz(sizeof(VirtQueue) * VIRTIO_PCI_QUEUE_MAX);
800 for(i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
801 vdev->vq[i].vector = VIRTIO_NO_VECTOR;
802 vdev->vq[i].vdev = vdev;
803 }
804
805 vdev->name = name;
806 vdev->config_len = config_size;
807 if (vdev->config_len)
808 vdev->config = qemu_mallocz(config_size);
809 else
810 vdev->config = NULL;
811
812 vdev->vmstate = qemu_add_vm_change_state_handler(virtio_vmstate_change, vdev);
813
814 return vdev;
815 }
816
817 void virtio_bind_device(VirtIODevice *vdev, const VirtIOBindings *binding,
818 void *opaque)
819 {
820 vdev->binding = binding;
821 vdev->binding_opaque = opaque;
822 }
823
824 target_phys_addr_t virtio_queue_get_desc_addr(VirtIODevice *vdev, int n)
825 {
826 return vdev->vq[n].vring.desc;
827 }
828
829 target_phys_addr_t virtio_queue_get_avail_addr(VirtIODevice *vdev, int n)
830 {
831 return vdev->vq[n].vring.avail;
832 }
833
834 target_phys_addr_t virtio_queue_get_used_addr(VirtIODevice *vdev, int n)
835 {
836 return vdev->vq[n].vring.used;
837 }
838
839 target_phys_addr_t virtio_queue_get_ring_addr(VirtIODevice *vdev, int n)
840 {
841 return vdev->vq[n].vring.desc;
842 }
843
844 target_phys_addr_t virtio_queue_get_desc_size(VirtIODevice *vdev, int n)
845 {
846 return sizeof(VRingDesc) * vdev->vq[n].vring.num;
847 }
848
849 target_phys_addr_t virtio_queue_get_avail_size(VirtIODevice *vdev, int n)
850 {
851 return offsetof(VRingAvail, ring) +
852 sizeof(uint64_t) * vdev->vq[n].vring.num;
853 }
854
855 target_phys_addr_t virtio_queue_get_used_size(VirtIODevice *vdev, int n)
856 {
857 return offsetof(VRingUsed, ring) +
858 sizeof(VRingUsedElem) * vdev->vq[n].vring.num;
859 }
860
861 target_phys_addr_t virtio_queue_get_ring_size(VirtIODevice *vdev, int n)
862 {
863 return vdev->vq[n].vring.used - vdev->vq[n].vring.desc +
864 virtio_queue_get_used_size(vdev, n);
865 }
866
867 uint16_t virtio_queue_get_last_avail_idx(VirtIODevice *vdev, int n)
868 {
869 return vdev->vq[n].last_avail_idx;
870 }
871
872 void virtio_queue_set_last_avail_idx(VirtIODevice *vdev, int n, uint16_t idx)
873 {
874 vdev->vq[n].last_avail_idx = idx;
875 }
876
877 VirtQueue *virtio_get_queue(VirtIODevice *vdev, int n)
878 {
879 return vdev->vq + n;
880 }
881
882 EventNotifier *virtio_queue_get_guest_notifier(VirtQueue *vq)
883 {
884 return &vq->guest_notifier;
885 }
886 EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq)
887 {
888 return &vq->host_notifier;
889 }
6d85df9 @rmustacc HVM-750 guest virtio drivers are racy with respect to interrupts
rmustacc authored Sep 24, 2012
890
891 int virtqueue_handled(VirtQueue *vq)
892 {
893 smp_mb();
894 return (vq->last_avail_idx == vring_used_idx(vq) ||
895 vq->last_avail_idx != vring_avail_idx(vq));
896 }
897
898 /*
899 * We need to go through and check if we have hit the 'stalled' condition.
900 * Due to the way that the virtio driver is implemented in the Linux kernel, it
901 * will potentially kick the guest to process data, disable the queue, but not
902 * enable interrupts before the host is done processing packets. When this
903 * happens all network traffic from the guest ends up getting corked up because
904 * the guest disabled the queue and is waiting for an interrupt from the host to
905 * go and enable it again. In fact, when in this state a little bit of libproc
906 * magic gets us going again rather reliably.
907 *
908 * Eventually the guest will go through and unmask interrupts saying that it
909 * wants an injection. If we reach a point in time where the last seen available
910 * index is equal to the available index ring and is equal to the used index
911 * ring, then we'll go ahead and install the interupt.
912 */
913 int virtqueue_stalled(VirtQueue *vq)
914 {
915 smp_mb();
916
917 if (vring_avail_flags(vq) & VRING_AVAIL_F_NO_INTERRUPT)
918 return (0);
919
920 if (vring_used_flags(vq) & VRING_USED_F_NO_NOTIFY)
921 return (0);
922
923 if (vq->inuse)
924 return (0);
925
926 /* We could have also lost the interrupt the other way */
927 if (vq->last_avail_idx != vring_avail_idx(vq))
928 return (2);
929
930 if (vq->last_avail_idx != vring_used_idx(vq))
931 return (0);
932
933 /*
934 * Interrupts are enabled and we're at a point in time where we would
935 * have stalled. Let's go ahead and inject the interrupt.
936 */
937 trace_virtio_notify(vq->vdev, vq);
938 vq->vdev->isr |= 0x01;
939 virtio_notify_vector(vq->vdev, vq->vector);
940 return (1);
941 }
Something went wrong with that request. Please try again.