From e45e7ae281221de48148477fd4f066091995653e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= Date: Fri, 2 Jun 2017 18:12:21 +0400 Subject: [PATCH 01/41] exec: check kvm mmu notifiers earlier MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move kvm mmu notifiers check before calling file_ram_alloc(), with the other xen precondition. (file_ram_alloc() will be reused in other cases than -mem-path). Signed-off-by: Marc-André Lureau Message-Id: <20170602141229.15326-2-marcandre.lureau@redhat.com> Signed-off-by: Paolo Bonzini --- exec.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/exec.c b/exec.c index a93e20962597..a5be851fb30b 100644 --- a/exec.c +++ b/exec.c @@ -1495,12 +1495,6 @@ static void *file_ram_alloc(RAMBlock *block, int fd = -1; int64_t file_size; - if (kvm_enabled() && !kvm_has_sync_mmu()) { - error_setg(errp, - "host lacks kvm mmu notifiers, -mem-path unsupported"); - return NULL; - } - for (;;) { fd = open(path, O_RDWR); if (fd >= 0) { @@ -1943,6 +1937,12 @@ RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr, return NULL; } + if (kvm_enabled() && !kvm_has_sync_mmu()) { + error_setg(errp, + "host lacks kvm mmu notifiers, -mem-path unsupported"); + return NULL; + } + if (phys_mem_alloc != qemu_anon_ram_alloc) { /* * file_ram_alloc() needs to allocate just like From 8d37b030fe14917a95118db04ed898c1cd2c8f49 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= Date: Fri, 2 Jun 2017 18:12:22 +0400 Subject: [PATCH 02/41] exec: split file_ram_alloc() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move file opening part in a seperate function, file_ram_open(). This allows for reuse of file_ram_alloc() with a given fd. Signed-off-by: Marc-André Lureau Message-Id: <20170602141229.15326-3-marcandre.lureau@redhat.com> Signed-off-by: Paolo Bonzini --- exec.c | 83 ++++++++++++++++++++++++++++++++-------------------------- 1 file changed, 46 insertions(+), 37 deletions(-) diff --git a/exec.c b/exec.c index a5be851fb30b..42a5a7c8f027 100644 --- a/exec.c +++ b/exec.c @@ -1482,19 +1482,17 @@ static int64_t get_file_size(int fd) return size; } -static void *file_ram_alloc(RAMBlock *block, - ram_addr_t memory, - const char *path, - Error **errp) +static int file_ram_open(const char *path, + const char *region_name, + bool *created, + Error **errp) { - bool unlink_on_error = false; char *filename; char *sanitized_name; char *c; - void *area = MAP_FAILED; int fd = -1; - int64_t file_size; + *created = false; for (;;) { fd = open(path, O_RDWR); if (fd >= 0) { @@ -1505,13 +1503,13 @@ static void *file_ram_alloc(RAMBlock *block, /* @path names a file that doesn't exist, create it */ fd = open(path, O_RDWR | O_CREAT | O_EXCL, 0644); if (fd >= 0) { - unlink_on_error = true; + *created = true; break; } } else if (errno == EISDIR) { /* @path names a directory, create a file there */ /* Make name safe to use with mkstemp by replacing '/' with '_'. */ - sanitized_name = g_strdup(memory_region_name(block->mr)); + sanitized_name = g_strdup(region_name); for (c = sanitized_name; *c != '\0'; c++) { if (*c == '/') { *c = '_'; @@ -1534,7 +1532,7 @@ static void *file_ram_alloc(RAMBlock *block, error_setg_errno(errp, errno, "can't open backing store %s for guest RAM", path); - goto error; + return -1; } /* * Try again on EINTR and EEXIST. The latter happens when @@ -1542,6 +1540,17 @@ static void *file_ram_alloc(RAMBlock *block, */ } + return fd; +} + +static void *file_ram_alloc(RAMBlock *block, + ram_addr_t memory, + int fd, + bool truncate, + Error **errp) +{ + void *area; + block->page_size = qemu_fd_getpagesize(fd); block->mr->align = block->page_size; #if defined(__s390x__) @@ -1550,20 +1559,11 @@ static void *file_ram_alloc(RAMBlock *block, } #endif - file_size = get_file_size(fd); - if (memory < block->page_size) { error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to " "or larger than page size 0x%zx", memory, block->page_size); - goto error; - } - - if (file_size > 0 && file_size < memory) { - error_setg(errp, "backing store %s size 0x%" PRIx64 - " does not match 'size' option 0x" RAM_ADDR_FMT, - path, file_size, memory); - goto error; + return NULL; } memory = ROUND_UP(memory, block->page_size); @@ -1582,7 +1582,7 @@ static void *file_ram_alloc(RAMBlock *block, * those labels. Therefore, extending the non-empty backend file * is disabled as well. */ - if (!file_size && ftruncate(fd, memory)) { + if (truncate && ftruncate(fd, memory)) { perror("ftruncate"); } @@ -1591,30 +1591,19 @@ static void *file_ram_alloc(RAMBlock *block, if (area == MAP_FAILED) { error_setg_errno(errp, errno, "unable to map backing store for guest RAM"); - goto error; + return NULL; } if (mem_prealloc) { os_mem_prealloc(fd, area, memory, smp_cpus, errp); if (errp && *errp) { - goto error; + qemu_ram_munmap(area, memory); + return NULL; } } block->fd = fd; return area; - -error: - if (area != MAP_FAILED) { - qemu_ram_munmap(area, memory); - } - if (unlink_on_error) { - unlink(path); - } - if (fd != -1) { - close(fd); - } - return NULL; } #endif @@ -1931,6 +1920,9 @@ RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr, { RAMBlock *new_block; Error *local_err = NULL; + int fd; + bool created; + int64_t file_size; if (xen_enabled()) { error_setg(errp, "-mem-path not supported with Xen"); @@ -1954,15 +1946,32 @@ RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr, return NULL; } + fd = file_ram_open(mem_path, memory_region_name(mr), &created, errp); + if (fd < 0) { + return NULL; + } + size = HOST_PAGE_ALIGN(size); + file_size = get_file_size(fd); + if (file_size > 0 && file_size < size) { + error_setg(errp, "backing store %s size 0x%" PRIx64 + " does not match 'size' option 0x" RAM_ADDR_FMT, + mem_path, file_size, size); + close(fd); + return NULL; + } + new_block = g_malloc0(sizeof(*new_block)); new_block->mr = mr; new_block->used_length = size; new_block->max_length = size; new_block->flags = share ? RAM_SHARED : 0; - new_block->host = file_ram_alloc(new_block, size, - mem_path, errp); + new_block->host = file_ram_alloc(new_block, size, fd, !file_size, errp); if (!new_block->host) { + if (created) { + unlink(mem_path); + } + close(fd); g_free(new_block); return NULL; } From 38b3362dd15c2dc4ad0edd105cde914e639d90fa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= Date: Fri, 2 Jun 2017 18:12:23 +0400 Subject: [PATCH 03/41] exec: split qemu_ram_alloc_from_file() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add qemu_ram_alloc_from_fd(), which can be use to allocate ramblock from fd only. Signed-off-by: Marc-André Lureau Message-Id: <20170602141229.15326-4-marcandre.lureau@redhat.com> Signed-off-by: Paolo Bonzini --- exec.c | 45 +++++++++++++++++++++++++++-------------- include/exec/ram_addr.h | 3 +++ 2 files changed, 33 insertions(+), 15 deletions(-) diff --git a/exec.c b/exec.c index 42a5a7c8f027..42ad1eaedd50 100644 --- a/exec.c +++ b/exec.c @@ -1914,14 +1914,12 @@ static void ram_block_add(RAMBlock *new_block, Error **errp) } #ifdef __linux__ -RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr, - bool share, const char *mem_path, - Error **errp) +RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr, + bool share, int fd, + Error **errp) { RAMBlock *new_block; Error *local_err = NULL; - int fd; - bool created; int64_t file_size; if (xen_enabled()) { @@ -1946,18 +1944,12 @@ RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr, return NULL; } - fd = file_ram_open(mem_path, memory_region_name(mr), &created, errp); - if (fd < 0) { - return NULL; - } - size = HOST_PAGE_ALIGN(size); file_size = get_file_size(fd); if (file_size > 0 && file_size < size) { error_setg(errp, "backing store %s size 0x%" PRIx64 " does not match 'size' option 0x" RAM_ADDR_FMT, mem_path, file_size, size); - close(fd); return NULL; } @@ -1968,10 +1960,6 @@ RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr, new_block->flags = share ? RAM_SHARED : 0; new_block->host = file_ram_alloc(new_block, size, fd, !file_size, errp); if (!new_block->host) { - if (created) { - unlink(mem_path); - } - close(fd); g_free(new_block); return NULL; } @@ -1983,6 +1971,33 @@ RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr, return NULL; } return new_block; + +} + + +RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr, + bool share, const char *mem_path, + Error **errp) +{ + int fd; + bool created; + RAMBlock *block; + + fd = file_ram_open(mem_path, memory_region_name(mr), &created, errp); + if (fd < 0) { + return NULL; + } + + block = qemu_ram_alloc_from_fd(size, mr, share, fd, errp); + if (!block) { + if (created) { + unlink(mem_path); + } + close(fd); + return NULL; + } + + return block; } #endif diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h index 140efa840c47..73d1bea8b627 100644 --- a/include/exec/ram_addr.h +++ b/include/exec/ram_addr.h @@ -65,6 +65,9 @@ unsigned long last_ram_page(void); RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr, bool share, const char *mem_path, Error **errp); +RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr, + bool share, int fd, + Error **errp); RAMBlock *qemu_ram_alloc_from_ptr(ram_addr_t size, void *host, MemoryRegion *mr, Error **errp); RAMBlock *qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr, Error **errp); From fea617c58b777495b9285f3dc6b33f2898352886 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= Date: Fri, 2 Jun 2017 18:12:24 +0400 Subject: [PATCH 04/41] Add memory_region_init_ram_from_fd() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a new function to initialize a RAM memory region with a file descriptor to be mmap-ed. Signed-off-by: Marc-André Lureau Message-Id: <20170602141229.15326-5-marcandre.lureau@redhat.com> Signed-off-by: Paolo Bonzini --- include/exec/memory.h | 20 ++++++++++++++++++++ memory.c | 16 ++++++++++++++++ 2 files changed, 36 insertions(+) diff --git a/include/exec/memory.h b/include/exec/memory.h index 80e605a96abd..51a54c92d5f7 100644 --- a/include/exec/memory.h +++ b/include/exec/memory.h @@ -456,6 +456,26 @@ void memory_region_init_ram_from_file(MemoryRegion *mr, bool share, const char *path, Error **errp); + +/** + * memory_region_init_ram_from_fd: Initialize RAM memory region with a + * mmap-ed backend. + * + * @mr: the #MemoryRegion to be initialized. + * @owner: the object that tracks the region's reference count + * @name: the name of the region. + * @size: size of the region. + * @share: %true if memory must be mmaped with the MAP_SHARED flag + * @fd: the fd to mmap. + * @errp: pointer to Error*, to store an error if it happens. + */ +void memory_region_init_ram_from_fd(MemoryRegion *mr, + struct Object *owner, + const char *name, + uint64_t size, + bool share, + int fd, + Error **errp); #endif /** diff --git a/memory.c b/memory.c index 0ddc4cc28deb..b2ace20bac98 100644 --- a/memory.c +++ b/memory.c @@ -1397,6 +1397,22 @@ void memory_region_init_ram_from_file(MemoryRegion *mr, mr->ram_block = qemu_ram_alloc_from_file(size, mr, share, path, errp); mr->dirty_log_mask = tcg_enabled() ? (1 << DIRTY_MEMORY_CODE) : 0; } + +void memory_region_init_ram_from_fd(MemoryRegion *mr, + struct Object *owner, + const char *name, + uint64_t size, + bool share, + int fd, + Error **errp) +{ + memory_region_init(mr, owner, name, size); + mr->ram = true; + mr->terminates = true; + mr->destructor = memory_region_destructor_ram; + mr->ram_block = qemu_ram_alloc_from_fd(size, mr, share, fd, errp); + mr->dirty_log_mask = tcg_enabled() ? (1 << DIRTY_MEMORY_CODE) : 0; +} #endif void memory_region_init_ram_ptr(MemoryRegion *mr, From 8381d89bec0ca233c45c03939735e6eb89aab022 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= Date: Fri, 2 Jun 2017 18:12:25 +0400 Subject: [PATCH 05/41] ivshmem: use ram_from_fd() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of having its own mmap handling code, reuse the code from exec.c. Note: memory_region_init_ram_from_fd() adds some restrictions (check for xen, kvm sync-mmu, etc) and changes (such as size alignment). This may actually be more correct. Signed-off-by: Marc-André Lureau Message-Id: <20170602141229.15326-6-marcandre.lureau@redhat.com> Signed-off-by: Paolo Bonzini --- hw/misc/ivshmem.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/hw/misc/ivshmem.c b/hw/misc/ivshmem.c index 6367d041f04e..2f0819d977bf 100644 --- a/hw/misc/ivshmem.c +++ b/hw/misc/ivshmem.c @@ -491,9 +491,9 @@ static void setup_interrupt(IVShmemState *s, int vector, Error **errp) static void process_msg_shmem(IVShmemState *s, int fd, Error **errp) { + Error *local_err = NULL; struct stat buf; size_t size; - void *ptr; if (s->ivshmem_bar2) { error_setg(errp, "server sent unexpected shared memory message"); @@ -522,15 +522,13 @@ static void process_msg_shmem(IVShmemState *s, int fd, Error **errp) } /* mmap the region and map into the BAR2 */ - ptr = mmap(0, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); - if (ptr == MAP_FAILED) { - error_setg_errno(errp, errno, "Failed to mmap shared memory"); - close(fd); + memory_region_init_ram_from_fd(&s->server_bar2, OBJECT(s), + "ivshmem.bar2", size, true, fd, &local_err); + if (local_err) { + error_propagate(errp, local_err); return; } - memory_region_init_ram_ptr(&s->server_bar2, OBJECT(s), - "ivshmem.bar2", size, ptr); - memory_region_set_fd(&s->server_bar2, fd); + s->ivshmem_bar2 = &s->server_bar2; } From 6b9911d0b6508b5fe4efa55d4f3bc9bc851c73a2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= Date: Fri, 2 Jun 2017 18:12:26 +0400 Subject: [PATCH 06/41] memory: remove memory_region_set_fd MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now unnecessary since ivshmem uses memory_region_init_ram_from_fd. Signed-off-by: Marc-André Lureau Message-Id: <20170602141229.15326-7-marcandre.lureau@redhat.com> Signed-off-by: Paolo Bonzini --- include/exec/memory.h | 11 ----------- memory.c | 10 ---------- 2 files changed, 21 deletions(-) diff --git a/include/exec/memory.h b/include/exec/memory.h index 51a54c92d5f7..37f8e78e71dc 100644 --- a/include/exec/memory.h +++ b/include/exec/memory.h @@ -824,17 +824,6 @@ static inline bool memory_region_is_rom(MemoryRegion *mr) */ int memory_region_get_fd(MemoryRegion *mr); -/** - * memory_region_set_fd: Mark a RAM memory region as backed by a - * file descriptor. - * - * This function is typically used after memory_region_init_ram_ptr(). - * - * @mr: the memory region being queried. - * @fd: the file descriptor that backs @mr. - */ -void memory_region_set_fd(MemoryRegion *mr, int fd); - /** * memory_region_from_host: Convert a pointer into a RAM memory region * and an offset within it. diff --git a/memory.c b/memory.c index b2ace20bac98..e08fa0ae6c8a 100644 --- a/memory.c +++ b/memory.c @@ -1851,16 +1851,6 @@ int memory_region_get_fd(MemoryRegion *mr) return fd; } -void memory_region_set_fd(MemoryRegion *mr, int fd) -{ - rcu_read_lock(); - while (mr->alias) { - mr = mr->alias; - } - mr->ram_block->fd = fd; - rcu_read_unlock(); -} - void *memory_region_get_ram_ptr(MemoryRegion *mr) { void *ptr; From 660174fc1b346803b3f1d7c260e2a36329b66435 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 1 Jun 2017 16:45:16 +0200 Subject: [PATCH 07/41] megasas: add qtest Signed-off-by: Paolo Bonzini --- tests/Makefile.include | 3 +++ tests/megasas-test.c | 51 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 54 insertions(+) create mode 100644 tests/megasas-test.c diff --git a/tests/Makefile.include b/tests/Makefile.include index f42f3dfa72fd..77da9b7f4b4d 100644 --- a/tests/Makefile.include +++ b/tests/Makefile.include @@ -205,6 +205,8 @@ check-qtest-pci-y += tests/intel-hda-test$(EXESUF) gcov-files-pci-y += hw/audio/intel-hda.c hw/audio/hda-codec.c check-qtest-pci-$(CONFIG_EVENTFD) += tests/ivshmem-test$(EXESUF) gcov-files-pci-y += hw/misc/ivshmem.c +check-qtest-pci-y += tests/megasas-test$(EXESUF) +gcov-files-pci-y += hw/scsi/megasas.c check-qtest-i386-y = tests/endianness-test$(EXESUF) check-qtest-i386-y += tests/fdc-test$(EXESUF) @@ -755,6 +757,7 @@ tests/test-filter-mirror$(EXESUF): tests/test-filter-mirror.o $(qtest-obj-y) tests/test-filter-redirector$(EXESUF): tests/test-filter-redirector.o $(qtest-obj-y) tests/test-x86-cpuid-compat$(EXESUF): tests/test-x86-cpuid-compat.o $(qtest-obj-y) tests/ivshmem-test$(EXESUF): tests/ivshmem-test.o contrib/ivshmem-server/ivshmem-server.o $(libqos-pc-obj-y) $(libqos-spapr-obj-y) +tests/megasas-test$(EXESUF): tests/megasas-test.o $(libqos-spapr-obj-y) $(libqos-pc-obj-y) tests/vhost-user-bridge$(EXESUF): tests/vhost-user-bridge.o contrib/libvhost-user/libvhost-user.o $(test-util-obj-y) tests/test-uuid$(EXESUF): tests/test-uuid.o $(test-util-obj-y) tests/test-arm-mptimer$(EXESUF): tests/test-arm-mptimer.o diff --git a/tests/megasas-test.c b/tests/megasas-test.c new file mode 100644 index 000000000000..a9e56a238932 --- /dev/null +++ b/tests/megasas-test.c @@ -0,0 +1,51 @@ +/* + * QTest testcase for LSI MegaRAID + * + * Copyright (c) 2017 Red Hat Inc. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "libqtest.h" +#include "qemu/bswap.h" +#include "libqos/libqos-pc.h" +#include "libqos/libqos-spapr.h" + +static QOSState *qmegasas_start(const char *extra_opts) +{ + const char *arch = qtest_get_arch(); + const char *cmd = "-drive id=hd0,if=none,file=null-co://,format=raw " + "-device megasas,id=scsi0,addr=04.0 " + "-device scsi-hd,bus=scsi0.0,drive=hd0 %s"; + + if (strcmp(arch, "i386") == 0 || strcmp(arch, "x86_64") == 0) { + return qtest_pc_boot(cmd, extra_opts ? : ""); + } + + g_printerr("virtio-scsi tests are only available on x86 or ppc64\n"); + exit(EXIT_FAILURE); +} + +static void qmegasas_stop(QOSState *qs) +{ + qtest_shutdown(qs); +} + +/* Tests only initialization so far. TODO: Replace with functional tests */ +static void pci_nop(void) +{ + QOSState *qs; + + qs = qmegasas_start(NULL); + qmegasas_stop(qs); +} + +int main(int argc, char **argv) +{ + g_test_init(&argc, &argv, NULL); + qtest_add_func("/megasas/pci/nop", pci_nop); + + return g_test_run(); +} From 134550bf81a026e18cf58b81e2c2cceaf516f92e Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 1 Jun 2017 17:18:39 +0200 Subject: [PATCH 08/41] megasas: do not read sense length more than once from frame Avoid TOC-TOU bugs depending on how the compiler behaves. Signed-off-by: Paolo Bonzini --- hw/scsi/megasas.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/hw/scsi/megasas.c b/hw/scsi/megasas.c index 804122ab0519..1888118e5f29 100644 --- a/hw/scsi/megasas.c +++ b/hw/scsi/megasas.c @@ -309,9 +309,11 @@ static int megasas_build_sense(MegasasCmd *cmd, uint8_t *sense_ptr, PCIDevice *pcid = PCI_DEVICE(cmd->state); uint32_t pa_hi = 0, pa_lo; hwaddr pa; + int frame_sense_len; - if (sense_len > cmd->frame->header.sense_len) { - sense_len = cmd->frame->header.sense_len; + frame_sense_len = cmd->frame->header.sense_len; + if (sense_len > frame_sense_len) { + sense_len = frame_sense_len; } if (sense_len) { pa_lo = le32_to_cpu(cmd->frame->pass.sense_addr_lo); From 24c0c77af515acbf0f9705e8096f33ef24d37430 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 1 Jun 2017 17:18:57 +0200 Subject: [PATCH 09/41] megasas: do not read iovec count more than once from frame Avoid TOC-TOU bugs depending on how the compiler behaves. Signed-off-by: Paolo Bonzini --- hw/scsi/megasas.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/hw/scsi/megasas.c b/hw/scsi/megasas.c index 1888118e5f29..c3531188825c 100644 --- a/hw/scsi/megasas.c +++ b/hw/scsi/megasas.c @@ -675,15 +675,16 @@ static int megasas_init_firmware(MegasasState *s, MegasasCmd *cmd) static int megasas_map_dcmd(MegasasState *s, MegasasCmd *cmd) { dma_addr_t iov_pa, iov_size; + int iov_count; cmd->flags = le16_to_cpu(cmd->frame->header.flags); - if (!cmd->frame->header.sge_count) { + iov_count = cmd->frame->header.sge_count; + if (!iov_count) { trace_megasas_dcmd_zero_sge(cmd->index); cmd->iov_size = 0; return 0; - } else if (cmd->frame->header.sge_count > 1) { - trace_megasas_dcmd_invalid_sge(cmd->index, - cmd->frame->header.sge_count); + } else if (iov_count > 1) { + trace_megasas_dcmd_invalid_sge(cmd->index, iov_count); cmd->iov_size = 0; return -EINVAL; } From 5104fac8539eaf155fc6de93e164be43e1e62242 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 1 Jun 2017 17:18:23 +0200 Subject: [PATCH 10/41] megasas: do not read DCMD opcode more than once from frame Avoid TOC-TOU bugs by storing the DCMD opcode in the MegasasCmd Signed-off-by: Paolo Bonzini --- hw/scsi/megasas.c | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/hw/scsi/megasas.c b/hw/scsi/megasas.c index c3531188825c..a3f75c1650f5 100644 --- a/hw/scsi/megasas.c +++ b/hw/scsi/megasas.c @@ -63,6 +63,7 @@ typedef struct MegasasCmd { hwaddr pa; hwaddr pa_size; + uint32_t dcmd_opcode; union mfi_frame *frame; SCSIRequest *req; QEMUSGList qsg; @@ -513,6 +514,7 @@ static MegasasCmd *megasas_enqueue_frame(MegasasState *s, cmd->context &= (uint64_t)0xFFFFFFFF; } cmd->count = count; + cmd->dcmd_opcode = -1; s->busy++; if (s->consumer_pa) { @@ -1562,22 +1564,21 @@ static const struct dcmd_cmd_tbl_t { static int megasas_handle_dcmd(MegasasState *s, MegasasCmd *cmd) { - int opcode; int retval = 0; size_t len; const struct dcmd_cmd_tbl_t *cmdptr = dcmd_cmd_tbl; - opcode = le32_to_cpu(cmd->frame->dcmd.opcode); - trace_megasas_handle_dcmd(cmd->index, opcode); + cmd->dcmd_opcode = le32_to_cpu(cmd->frame->dcmd.opcode); + trace_megasas_handle_dcmd(cmd->index, cmd->dcmd_opcode); if (megasas_map_dcmd(s, cmd) < 0) { return MFI_STAT_MEMORY_NOT_AVAILABLE; } - while (cmdptr->opcode != -1 && cmdptr->opcode != opcode) { + while (cmdptr->opcode != -1 && cmdptr->opcode != cmd->dcmd_opcode) { cmdptr++; } len = cmd->iov_size; if (cmdptr->opcode == -1) { - trace_megasas_dcmd_unhandled(cmd->index, opcode, len); + trace_megasas_dcmd_unhandled(cmd->index, cmd->dcmd_opcode, len); retval = megasas_dcmd_dummy(s, cmd); } else { trace_megasas_dcmd_enter(cmd->index, cmdptr->desc, len); @@ -1592,13 +1593,11 @@ static int megasas_handle_dcmd(MegasasState *s, MegasasCmd *cmd) static int megasas_finish_internal_dcmd(MegasasCmd *cmd, SCSIRequest *req) { - int opcode; int retval = MFI_STAT_OK; int lun = req->lun; - opcode = le32_to_cpu(cmd->frame->dcmd.opcode); - trace_megasas_dcmd_internal_finish(cmd->index, opcode, lun); - switch (opcode) { + trace_megasas_dcmd_internal_finish(cmd->index, cmd->dcmd_opcode, lun); + switch (cmd->dcmd_opcode) { case MFI_DCMD_PD_GET_INFO: retval = megasas_pd_get_info_submit(req->dev, lun, cmd); break; @@ -1606,7 +1605,7 @@ static int megasas_finish_internal_dcmd(MegasasCmd *cmd, retval = megasas_ld_get_info_submit(req->dev, lun, cmd); break; default: - trace_megasas_dcmd_internal_invalid(cmd->index, opcode); + trace_megasas_dcmd_internal_invalid(cmd->index, cmd->dcmd_opcode); retval = MFI_STAT_INVALID_DCMD; break; } @@ -1827,7 +1826,6 @@ static void megasas_xfer_complete(SCSIRequest *req, uint32_t len) { MegasasCmd *cmd = req->hba_private; uint8_t *buf; - uint32_t opcode; trace_megasas_io_complete(cmd->index, len); @@ -1837,8 +1835,7 @@ static void megasas_xfer_complete(SCSIRequest *req, uint32_t len) } buf = scsi_req_get_buf(req); - opcode = le32_to_cpu(cmd->frame->dcmd.opcode); - if (opcode == MFI_DCMD_PD_GET_INFO && cmd->iov_buf) { + if (cmd->dcmd_opcode == MFI_DCMD_PD_GET_INFO && cmd->iov_buf) { struct mfi_pd_info *info = cmd->iov_buf; if (info->inquiry_data[0] == 0x7f) { @@ -1849,7 +1846,7 @@ static void megasas_xfer_complete(SCSIRequest *req, uint32_t len) memcpy(info->vpd_page83, buf, len); } scsi_req_continue(req); - } else if (opcode == MFI_DCMD_LD_GET_INFO) { + } else if (cmd->dcmd_opcode == MFI_DCMD_LD_GET_INFO) { struct mfi_ld_info *info = cmd->iov_buf; if (cmd->iov_buf) { From 36c327a69d723571f02a7691631667cdb1865ee1 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 1 Jun 2017 17:23:13 +0200 Subject: [PATCH 11/41] megasas: do not read command more than once from frame Avoid TOC-TOU bugs by passing the frame_cmd down, and checking cmd->dcmd_opcode instead of cmd->frame->header.frame_cmd. Signed-off-by: Paolo Bonzini --- hw/scsi/megasas.c | 60 ++++++++++++++++++++--------------------------- 1 file changed, 25 insertions(+), 35 deletions(-) diff --git a/hw/scsi/megasas.c b/hw/scsi/megasas.c index a3f75c1650f5..38e0a2f5eff3 100644 --- a/hw/scsi/megasas.c +++ b/hw/scsi/megasas.c @@ -1591,12 +1591,13 @@ static int megasas_handle_dcmd(MegasasState *s, MegasasCmd *cmd) } static int megasas_finish_internal_dcmd(MegasasCmd *cmd, - SCSIRequest *req) + SCSIRequest *req, size_t resid) { int retval = MFI_STAT_OK; int lun = req->lun; trace_megasas_dcmd_internal_finish(cmd->index, cmd->dcmd_opcode, lun); + cmd->iov_size -= resid; switch (cmd->dcmd_opcode) { case MFI_DCMD_PD_GET_INFO: retval = megasas_pd_get_info_submit(req->dev, lun, cmd); @@ -1649,11 +1650,12 @@ static int megasas_enqueue_req(MegasasCmd *cmd, bool is_write) } static int megasas_handle_scsi(MegasasState *s, MegasasCmd *cmd, - bool is_logical) + int frame_cmd) { uint8_t *cdb; bool is_write; struct SCSIDevice *sdev = NULL; + bool is_logical = (frame_cmd == MFI_CMD_LD_SCSI_IO); cdb = cmd->frame->pass.cdb; @@ -1661,7 +1663,7 @@ static int megasas_handle_scsi(MegasasState *s, MegasasCmd *cmd, if (cmd->frame->header.target_id >= MFI_MAX_LD || cmd->frame->header.lun_id != 0) { trace_megasas_scsi_target_not_present( - mfi_frame_desc[cmd->frame->header.frame_cmd], is_logical, + mfi_frame_desc[frame_cmd], is_logical, cmd->frame->header.target_id, cmd->frame->header.lun_id); return MFI_STAT_DEVICE_NOT_FOUND; } @@ -1671,19 +1673,20 @@ static int megasas_handle_scsi(MegasasState *s, MegasasCmd *cmd, cmd->iov_size = le32_to_cpu(cmd->frame->header.data_len); trace_megasas_handle_scsi(mfi_frame_desc[cmd->frame->header.frame_cmd], - is_logical, cmd->frame->header.target_id, + trace_megasas_handle_scsi(mfi_frame_desc[frame_cmd], is_logical, + cmd->frame->header.target_id, cmd->frame->header.lun_id, sdev, cmd->iov_size); if (!sdev || (megasas_is_jbod(s) && is_logical)) { trace_megasas_scsi_target_not_present( - mfi_frame_desc[cmd->frame->header.frame_cmd], is_logical, + mfi_frame_desc[frame_cmd], is_logical, cmd->frame->header.target_id, cmd->frame->header.lun_id); return MFI_STAT_DEVICE_NOT_FOUND; } if (cmd->frame->header.cdb_len > 16) { trace_megasas_scsi_invalid_cdb_len( - mfi_frame_desc[cmd->frame->header.frame_cmd], is_logical, + mfi_frame_desc[frame_cmd], is_logical, cmd->frame->header.target_id, cmd->frame->header.lun_id, cmd->frame->header.cdb_len); megasas_write_sense(cmd, SENSE_CODE(INVALID_OPCODE)); @@ -1703,7 +1706,7 @@ static int megasas_handle_scsi(MegasasState *s, MegasasCmd *cmd, cmd->frame->header.lun_id, cdb, cmd); if (!cmd->req) { trace_megasas_scsi_req_alloc_failed( - mfi_frame_desc[cmd->frame->header.frame_cmd], + mfi_frame_desc[frame_cmd], cmd->frame->header.target_id, cmd->frame->header.lun_id); megasas_write_sense(cmd, SENSE_CODE(NO_SENSE)); cmd->frame->header.scsi_status = BUSY; @@ -1725,11 +1728,11 @@ static int megasas_handle_scsi(MegasasState *s, MegasasCmd *cmd, return MFI_STAT_INVALID_STATUS; } -static int megasas_handle_io(MegasasState *s, MegasasCmd *cmd) +static int megasas_handle_io(MegasasState *s, MegasasCmd *cmd, int frame_cmd) { uint32_t lba_count, lba_start_hi, lba_start_lo; uint64_t lba_start; - bool is_write = (cmd->frame->header.frame_cmd == MFI_CMD_LD_WRITE); + bool is_write = (frame_cmd == MFI_CMD_LD_WRITE); uint8_t cdb[16]; int len; struct SCSIDevice *sdev = NULL; @@ -1746,20 +1749,20 @@ static int megasas_handle_io(MegasasState *s, MegasasCmd *cmd) } trace_megasas_handle_io(cmd->index, - mfi_frame_desc[cmd->frame->header.frame_cmd], + mfi_frame_desc[frame_cmd], cmd->frame->header.target_id, cmd->frame->header.lun_id, (unsigned long)lba_start, (unsigned long)lba_count); if (!sdev) { trace_megasas_io_target_not_present(cmd->index, - mfi_frame_desc[cmd->frame->header.frame_cmd], + mfi_frame_desc[frame_cmd], cmd->frame->header.target_id, cmd->frame->header.lun_id); return MFI_STAT_DEVICE_NOT_FOUND; } if (cmd->frame->header.cdb_len > 16) { trace_megasas_scsi_invalid_cdb_len( - mfi_frame_desc[cmd->frame->header.frame_cmd], 1, + mfi_frame_desc[frame_cmd], 1, cmd->frame->header.target_id, cmd->frame->header.lun_id, cmd->frame->header.cdb_len); megasas_write_sense(cmd, SENSE_CODE(INVALID_OPCODE)); @@ -1781,7 +1784,7 @@ static int megasas_handle_io(MegasasState *s, MegasasCmd *cmd) cmd->frame->header.lun_id, cdb, cmd); if (!cmd->req) { trace_megasas_scsi_req_alloc_failed( - mfi_frame_desc[cmd->frame->header.frame_cmd], + mfi_frame_desc[frame_cmd], cmd->frame->header.target_id, cmd->frame->header.lun_id); megasas_write_sense(cmd, SENSE_CODE(NO_SENSE)); cmd->frame->header.scsi_status = BUSY; @@ -1799,23 +1802,11 @@ static int megasas_handle_io(MegasasState *s, MegasasCmd *cmd) return MFI_STAT_INVALID_STATUS; } -static int megasas_finish_internal_command(MegasasCmd *cmd, - SCSIRequest *req, size_t resid) -{ - int retval = MFI_STAT_INVALID_CMD; - - if (cmd->frame->header.frame_cmd == MFI_CMD_DCMD) { - cmd->iov_size -= resid; - retval = megasas_finish_internal_dcmd(cmd, req); - } - return retval; -} - static QEMUSGList *megasas_get_sg_list(SCSIRequest *req) { MegasasCmd *cmd = req->hba_private; - if (cmd->frame->header.frame_cmd == MFI_CMD_DCMD) { + if (cmd->dcmd_opcode != -1) { return NULL; } else { return &cmd->qsg; @@ -1829,7 +1820,7 @@ static void megasas_xfer_complete(SCSIRequest *req, uint32_t len) trace_megasas_io_complete(cmd->index, len); - if (cmd->frame->header.frame_cmd != MFI_CMD_DCMD) { + if (cmd->dcmd_opcode != -1) { scsi_req_continue(req); return; } @@ -1872,7 +1863,7 @@ static void megasas_command_complete(SCSIRequest *req, uint32_t status, /* * Internal command complete */ - cmd_status = megasas_finish_internal_command(cmd, req, resid); + cmd_status = megasas_finish_internal_dcmd(cmd, req, resid); if (cmd_status == MFI_STAT_INVALID_STATUS) { return; } @@ -1943,6 +1934,7 @@ static void megasas_handle_frame(MegasasState *s, uint64_t frame_addr, { uint8_t frame_status = MFI_STAT_INVALID_CMD; uint64_t frame_context; + int frame_cmd; MegasasCmd *cmd; /* @@ -1961,7 +1953,8 @@ static void megasas_handle_frame(MegasasState *s, uint64_t frame_addr, s->event_count++; return; } - switch (cmd->frame->header.frame_cmd) { + frame_cmd = cmd->frame->header.frame_cmd; + switch (frame_cmd) { case MFI_CMD_INIT: frame_status = megasas_init_firmware(s, cmd); break; @@ -1972,18 +1965,15 @@ static void megasas_handle_frame(MegasasState *s, uint64_t frame_addr, frame_status = megasas_handle_abort(s, cmd); break; case MFI_CMD_PD_SCSI_IO: - frame_status = megasas_handle_scsi(s, cmd, 0); - break; case MFI_CMD_LD_SCSI_IO: - frame_status = megasas_handle_scsi(s, cmd, 1); + frame_status = megasas_handle_scsi(s, cmd, frame_cmd); break; case MFI_CMD_LD_READ: case MFI_CMD_LD_WRITE: - frame_status = megasas_handle_io(s, cmd); + frame_status = megasas_handle_io(s, cmd, frame_cmd); break; default: - trace_megasas_unhandled_frame_cmd(cmd->index, - cmd->frame->header.frame_cmd); + trace_megasas_unhandled_frame_cmd(cmd->index, frame_cmd); s->event_count++; break; } From b356807fcdfc45583c437f761fc579ab2a8eab11 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 1 Jun 2017 17:25:03 +0200 Subject: [PATCH 12/41] megasas: do not read SCSI req parameters more than once from frame Signed-off-by: Paolo Bonzini --- hw/scsi/megasas.c | 60 ++++++++++++++++++++--------------------------- 1 file changed, 26 insertions(+), 34 deletions(-) diff --git a/hw/scsi/megasas.c b/hw/scsi/megasas.c index 38e0a2f5eff3..135662df316d 100644 --- a/hw/scsi/megasas.c +++ b/hw/scsi/megasas.c @@ -1653,42 +1653,39 @@ static int megasas_handle_scsi(MegasasState *s, MegasasCmd *cmd, int frame_cmd) { uint8_t *cdb; + int target_id, lun_id, cdb_len; bool is_write; struct SCSIDevice *sdev = NULL; bool is_logical = (frame_cmd == MFI_CMD_LD_SCSI_IO); cdb = cmd->frame->pass.cdb; + target_id = cmd->frame->header.target_id; + lun_id = cmd->frame->header.lun_id; + cdb_len = cmd->frame->header.cdb_len; if (is_logical) { - if (cmd->frame->header.target_id >= MFI_MAX_LD || - cmd->frame->header.lun_id != 0) { + if (target_id >= MFI_MAX_LD || lun_id != 0) { trace_megasas_scsi_target_not_present( - mfi_frame_desc[frame_cmd], is_logical, - cmd->frame->header.target_id, cmd->frame->header.lun_id); + mfi_frame_desc[frame_cmd], is_logical, target_id, lun_id); return MFI_STAT_DEVICE_NOT_FOUND; } } - sdev = scsi_device_find(&s->bus, 0, cmd->frame->header.target_id, - cmd->frame->header.lun_id); + sdev = scsi_device_find(&s->bus, 0, target_id, lun_id); cmd->iov_size = le32_to_cpu(cmd->frame->header.data_len); - trace_megasas_handle_scsi(mfi_frame_desc[cmd->frame->header.frame_cmd], trace_megasas_handle_scsi(mfi_frame_desc[frame_cmd], is_logical, - cmd->frame->header.target_id, - cmd->frame->header.lun_id, sdev, cmd->iov_size); + target_id, lun_id, sdev, cmd->iov_size); if (!sdev || (megasas_is_jbod(s) && is_logical)) { trace_megasas_scsi_target_not_present( - mfi_frame_desc[frame_cmd], is_logical, - cmd->frame->header.target_id, cmd->frame->header.lun_id); + mfi_frame_desc[frame_cmd], is_logical, target_id, lun_id); return MFI_STAT_DEVICE_NOT_FOUND; } - if (cmd->frame->header.cdb_len > 16) { + if (cdb_len > 16) { trace_megasas_scsi_invalid_cdb_len( mfi_frame_desc[frame_cmd], is_logical, - cmd->frame->header.target_id, cmd->frame->header.lun_id, - cmd->frame->header.cdb_len); + target_id, lun_id, cdb_len); megasas_write_sense(cmd, SENSE_CODE(INVALID_OPCODE)); cmd->frame->header.scsi_status = CHECK_CONDITION; s->event_count++; @@ -1702,12 +1699,10 @@ static int megasas_handle_scsi(MegasasState *s, MegasasCmd *cmd, return MFI_STAT_SCSI_DONE_WITH_ERROR; } - cmd->req = scsi_req_new(sdev, cmd->index, - cmd->frame->header.lun_id, cdb, cmd); + cmd->req = scsi_req_new(sdev, cmd->index, lun_id, cdb, cmd); if (!cmd->req) { trace_megasas_scsi_req_alloc_failed( - mfi_frame_desc[frame_cmd], - cmd->frame->header.target_id, cmd->frame->header.lun_id); + mfi_frame_desc[frame_cmd], target_id, lun_id); megasas_write_sense(cmd, SENSE_CODE(NO_SENSE)); cmd->frame->header.scsi_status = BUSY; s->event_count++; @@ -1736,35 +1731,33 @@ static int megasas_handle_io(MegasasState *s, MegasasCmd *cmd, int frame_cmd) uint8_t cdb[16]; int len; struct SCSIDevice *sdev = NULL; + int target_id, lun_id, cdb_len; lba_count = le32_to_cpu(cmd->frame->io.header.data_len); lba_start_lo = le32_to_cpu(cmd->frame->io.lba_lo); lba_start_hi = le32_to_cpu(cmd->frame->io.lba_hi); lba_start = ((uint64_t)lba_start_hi << 32) | lba_start_lo; - if (cmd->frame->header.target_id < MFI_MAX_LD && - cmd->frame->header.lun_id == 0) { - sdev = scsi_device_find(&s->bus, 0, cmd->frame->header.target_id, - cmd->frame->header.lun_id); + target_id = cmd->frame->header.target_id; + lun_id = cmd->frame->header.lun_id; + cdb_len = cmd->frame->header.cdb_len; + + if (target_id < MFI_MAX_LD && lun_id == 0) { + sdev = scsi_device_find(&s->bus, 0, target_id, lun_id); } trace_megasas_handle_io(cmd->index, - mfi_frame_desc[frame_cmd], - cmd->frame->header.target_id, - cmd->frame->header.lun_id, + mfi_frame_desc[frame_cmd], target_id, lun_id, (unsigned long)lba_start, (unsigned long)lba_count); if (!sdev) { trace_megasas_io_target_not_present(cmd->index, - mfi_frame_desc[frame_cmd], - cmd->frame->header.target_id, cmd->frame->header.lun_id); + mfi_frame_desc[frame_cmd], target_id, lun_id); return MFI_STAT_DEVICE_NOT_FOUND; } - if (cmd->frame->header.cdb_len > 16) { + if (cdb_len > 16) { trace_megasas_scsi_invalid_cdb_len( - mfi_frame_desc[frame_cmd], 1, - cmd->frame->header.target_id, cmd->frame->header.lun_id, - cmd->frame->header.cdb_len); + mfi_frame_desc[frame_cmd], 1, target_id, lun_id, cdb_len); megasas_write_sense(cmd, SENSE_CODE(INVALID_OPCODE)); cmd->frame->header.scsi_status = CHECK_CONDITION; s->event_count++; @@ -1781,11 +1774,10 @@ static int megasas_handle_io(MegasasState *s, MegasasCmd *cmd, int frame_cmd) megasas_encode_lba(cdb, lba_start, lba_count, is_write); cmd->req = scsi_req_new(sdev, cmd->index, - cmd->frame->header.lun_id, cdb, cmd); + lun_id, cdb, cmd); if (!cmd->req) { trace_megasas_scsi_req_alloc_failed( - mfi_frame_desc[frame_cmd], - cmd->frame->header.target_id, cmd->frame->header.lun_id); + mfi_frame_desc[frame_cmd], target_id, lun_id); megasas_write_sense(cmd, SENSE_CODE(NO_SENSE)); cmd->frame->header.scsi_status = BUSY; s->event_count++; From 87e459a810d7b1ec1638085b5a80ea3d9b43119a Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 1 Jun 2017 17:26:14 +0200 Subject: [PATCH 13/41] megasas: always store SCSIRequest* into MegasasCmd This ensures that the request is unref'ed properly, and avoids a segmentation fault in the new qtest testcase that is added. This is CVE-2017-9503. Reported-by: Zhangyanyu Signed-off-by: Paolo Bonzini --- hw/scsi/megasas.c | 31 ++++++++++++++++--------------- tests/megasas-test.c | 35 +++++++++++++++++++++++++++++++++++ 2 files changed, 51 insertions(+), 15 deletions(-) diff --git a/hw/scsi/megasas.c b/hw/scsi/megasas.c index 135662df316d..734fdaef90d5 100644 --- a/hw/scsi/megasas.c +++ b/hw/scsi/megasas.c @@ -609,6 +609,9 @@ static void megasas_reset_frames(MegasasState *s) static void megasas_abort_command(MegasasCmd *cmd) { /* Never abort internal commands. */ + if (cmd->dcmd_opcode != -1) { + return; + } if (cmd->req != NULL) { scsi_req_cancel(cmd->req); } @@ -1017,7 +1020,6 @@ static int megasas_pd_get_info_submit(SCSIDevice *sdev, int lun, uint64_t pd_size; uint16_t pd_id = ((sdev->id & 0xFF) << 8) | (lun & 0xFF); uint8_t cmdbuf[6]; - SCSIRequest *req; size_t len, resid; if (!cmd->iov_buf) { @@ -1026,8 +1028,8 @@ static int megasas_pd_get_info_submit(SCSIDevice *sdev, int lun, info->inquiry_data[0] = 0x7f; /* Force PQual 0x3, PType 0x1f */ info->vpd_page83[0] = 0x7f; megasas_setup_inquiry(cmdbuf, 0, sizeof(info->inquiry_data)); - req = scsi_req_new(sdev, cmd->index, lun, cmdbuf, cmd); - if (!req) { + cmd->req = scsi_req_new(sdev, cmd->index, lun, cmdbuf, cmd); + if (!cmd->req) { trace_megasas_dcmd_req_alloc_failed(cmd->index, "PD get info std inquiry"); g_free(cmd->iov_buf); @@ -1036,26 +1038,26 @@ static int megasas_pd_get_info_submit(SCSIDevice *sdev, int lun, } trace_megasas_dcmd_internal_submit(cmd->index, "PD get info std inquiry", lun); - len = scsi_req_enqueue(req); + len = scsi_req_enqueue(cmd->req); if (len > 0) { cmd->iov_size = len; - scsi_req_continue(req); + scsi_req_continue(cmd->req); } return MFI_STAT_INVALID_STATUS; } else if (info->inquiry_data[0] != 0x7f && info->vpd_page83[0] == 0x7f) { megasas_setup_inquiry(cmdbuf, 0x83, sizeof(info->vpd_page83)); - req = scsi_req_new(sdev, cmd->index, lun, cmdbuf, cmd); - if (!req) { + cmd->req = scsi_req_new(sdev, cmd->index, lun, cmdbuf, cmd); + if (!cmd->req) { trace_megasas_dcmd_req_alloc_failed(cmd->index, "PD get info vpd inquiry"); return MFI_STAT_FLASH_ALLOC_FAIL; } trace_megasas_dcmd_internal_submit(cmd->index, "PD get info vpd inquiry", lun); - len = scsi_req_enqueue(req); + len = scsi_req_enqueue(cmd->req); if (len > 0) { cmd->iov_size = len; - scsi_req_continue(req); + scsi_req_continue(cmd->req); } return MFI_STAT_INVALID_STATUS; } @@ -1217,7 +1219,6 @@ static int megasas_ld_get_info_submit(SCSIDevice *sdev, int lun, struct mfi_ld_info *info = cmd->iov_buf; size_t dcmd_size = sizeof(struct mfi_ld_info); uint8_t cdb[6]; - SCSIRequest *req; ssize_t len, resid; uint16_t sdev_id = ((sdev->id & 0xFF) << 8) | (lun & 0xFF); uint64_t ld_size; @@ -1226,8 +1227,8 @@ static int megasas_ld_get_info_submit(SCSIDevice *sdev, int lun, cmd->iov_buf = g_malloc0(dcmd_size); info = cmd->iov_buf; megasas_setup_inquiry(cdb, 0x83, sizeof(info->vpd_page83)); - req = scsi_req_new(sdev, cmd->index, lun, cdb, cmd); - if (!req) { + cmd->req = scsi_req_new(sdev, cmd->index, lun, cdb, cmd); + if (!cmd->req) { trace_megasas_dcmd_req_alloc_failed(cmd->index, "LD get info vpd inquiry"); g_free(cmd->iov_buf); @@ -1236,10 +1237,10 @@ static int megasas_ld_get_info_submit(SCSIDevice *sdev, int lun, } trace_megasas_dcmd_internal_submit(cmd->index, "LD get info vpd inquiry", lun); - len = scsi_req_enqueue(req); + len = scsi_req_enqueue(cmd->req); if (len > 0) { cmd->iov_size = len; - scsi_req_continue(req); + scsi_req_continue(cmd->req); } return MFI_STAT_INVALID_STATUS; } @@ -1851,7 +1852,7 @@ static void megasas_command_complete(SCSIRequest *req, uint32_t status, return; } - if (cmd->req == NULL) { + if (cmd->dcmd_opcode != -1) { /* * Internal command complete */ diff --git a/tests/megasas-test.c b/tests/megasas-test.c index a9e56a238932..ce960e7f8192 100644 --- a/tests/megasas-test.c +++ b/tests/megasas-test.c @@ -42,10 +42,45 @@ static void pci_nop(void) qmegasas_stop(qs); } +/* This used to cause a NULL pointer dereference. */ +static void megasas_pd_get_info_fuzz(void) +{ + QPCIDevice *dev; + QOSState *qs; + QPCIBar bar; + uint32_t context[256]; + uint64_t context_pa; + int i; + + qs = qmegasas_start(NULL); + dev = qpci_device_find(qs->pcibus, QPCI_DEVFN(4,0)); + g_assert(dev != NULL); + + qpci_device_enable(dev); + bar = qpci_iomap(dev, 0, NULL); + + memset(context, 0, sizeof(context)); + context[0] = cpu_to_le32(0x05050505); + context[1] = cpu_to_le32(0x01010101); + for (i = 2; i < ARRAY_SIZE(context); i++) { + context[i] = cpu_to_le32(0x41414141); + } + context[6] = cpu_to_le32(0x02020000); + context[7] = cpu_to_le32(0); + + context_pa = qmalloc(qs, sizeof(context)); + memwrite(context_pa, context, sizeof(context)); + qpci_io_writel(dev, bar, 0x40, context_pa); + + g_free(dev); + qmegasas_stop(qs); +} + int main(int argc, char **argv) { g_test_init(&argc, &argv, NULL); qtest_add_func("/megasas/pci/nop", pci_nop); + qtest_add_func("/megasas/dcmd/pd-get-info/fuzz", megasas_pd_get_info_fuzz); return g_test_run(); } From 428952cfa966f5af8c8f3d33267d30969851e4c5 Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Thu, 8 Jun 2017 14:59:59 +0200 Subject: [PATCH 14/41] Makefile: Do not generate files if "configure" has not been run yet When doing a "make -j10" in the vanilla QEMU source tree (without running "configure" first), the Makefile currently generates two files already, qemu-version.h and qemu-options.def. This should not happen, so let's only build the generated files if config-host.mak is available (i.e. "configure" has been run already). Signed-off-by: Thomas Huth Message-Id: <1496926799-13040-1-git-send-email-thuth@redhat.com> Signed-off-by: Paolo Bonzini --- Makefile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Makefile b/Makefile index c830d7a46c16..32d4441697e4 100644 --- a/Makefile +++ b/Makefile @@ -791,9 +791,11 @@ endif # CONFIG_WIN # Add a dependency on the generated files, so that they are always # rebuilt before other object files +ifneq ($(wildcard config-host.mak),) ifneq ($(filter-out $(UNCHECKED_GOALS),$(MAKECMDGOALS)),$(if $(MAKECMDGOALS),,fail)) Makefile: $(GENERATED_FILES) endif +endif .SECONDARY: $(TRACE_HEADERS) $(TRACE_HEADERS:%=%-timestamp) \ $(TRACE_SOURCES) $(TRACE_SOURCES:%=%-timestamp) \ From a20fa79fa5585add650264ab9de1ea01624af943 Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Thu, 8 Jun 2017 07:20:57 +0200 Subject: [PATCH 15/41] vl: Fix broken thread=xxx option of the --accel parameter Commit bde4d9205 ("Fix the -accel parameter and the documentation for 'hax'") introduced a regression by adding a new local accel_opts variable which shadows the variable with the same name that is declared at the beginning of the main() scope. This causes the qemu_tcg_configure() call later to be always called with NULL, so that the thread=xxx option gets ignored. Fix it by removing the local accel_opts variable and use "opts" instead, which is meant for storing temporary QemuOpts values. And while we're at it, also change the exit(1) here to exit(0) since asking for help is not an error. Fixes: bde4d9205ee9def98852ff6054cdef4efd74e1f8 Reported-by: Markus Armbruster Reported-by: Emilio G. Cota Signed-off-by: Thomas Huth Message-Id: <1496899257-25800-1-git-send-email-thuth@redhat.com> Signed-off-by: Paolo Bonzini --- vl.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/vl.c b/vl.c index 32db19e3b912..59fea154885d 100644 --- a/vl.c +++ b/vl.c @@ -3757,21 +3757,18 @@ int main(int argc, char **argv, char **envp) qdev_prop_register_global(&kvm_pit_lost_tick_policy); break; } - case QEMU_OPTION_accel: { - QemuOpts *accel_opts; - + case QEMU_OPTION_accel: accel_opts = qemu_opts_parse_noisily(qemu_find_opts("accel"), optarg, true); optarg = qemu_opt_get(accel_opts, "accel"); if (!optarg || is_help_option(optarg)) { error_printf("Possible accelerators: kvm, xen, hax, tcg\n"); - exit(1); + exit(0); } - accel_opts = qemu_opts_create(qemu_find_opts("machine"), NULL, - false, &error_abort); - qemu_opt_set(accel_opts, "accel", optarg, &error_abort); + opts = qemu_opts_create(qemu_find_opts("machine"), NULL, + false, &error_abort); + qemu_opt_set(opts, "accel", optarg, &error_abort); break; - } case QEMU_OPTION_usb: olist = qemu_find_opts("machine"); qemu_opts_parse_noisily(olist, "usb=on", false); From 1d78a3c3ab8be0d7ea5509676aa38b96c7d551d4 Mon Sep 17 00:00:00 2001 From: Roman Kagan Date: Tue, 6 Jun 2017 21:19:39 +0300 Subject: [PATCH 16/41] kvm-all: make async_safe_run_on_cpu safe on kvm too Wrap the bulk of kvm_cpu_exec with cpu_exec_start/end, so that kvm version can also enjoy performing certain operations while all vCPUs are quiescent. Signed-off-by: Roman Kagan Message-Id: <20170606181948.16238-15-rkagan@virtuozzo.com> Signed-off-by: Paolo Bonzini --- kvm-all.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kvm-all.c b/kvm-all.c index ab8262f67287..98ad151375cb 100644 --- a/kvm-all.c +++ b/kvm-all.c @@ -1977,6 +1977,7 @@ int kvm_cpu_exec(CPUState *cpu) } qemu_mutex_unlock_iothread(); + cpu_exec_start(cpu); do { MemTxAttrs attrs; @@ -2106,6 +2107,7 @@ int kvm_cpu_exec(CPUState *cpu) } } while (ret == 0); + cpu_exec_end(cpu); qemu_mutex_lock_iothread(); if (ret < 0) { From 457e03559dc57859eb12f993380ddeaeb73e9017 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 8 Jun 2017 16:49:05 +0200 Subject: [PATCH 17/41] hax-all: make async_safe_run_on_cpu safe on HAX too While at it, drop the current_cpu assignment since this is a per-thread variable on modern QEMU. Cc: Vincent Palatin Signed-off-by: Paolo Bonzini --- target/i386/hax-all.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/target/i386/hax-all.c b/target/i386/hax-all.c index 097db5cae1cd..ba6117d7de5b 100644 --- a/target/i386/hax-all.c +++ b/target/i386/hax-all.c @@ -514,9 +514,10 @@ static int hax_vcpu_hax_exec(CPUArchState *env) hax_vcpu_interrupt(env); qemu_mutex_unlock_iothread(); + cpu_exec_start(cpu); hax_ret = hax_vcpu_run(vcpu); + cpu_exec_end(cpu); qemu_mutex_lock_iothread(); - current_cpu = cpu; /* Simply continue the vcpu_run if system call interrupted */ if (hax_ret == -EINTR || hax_ret == -EAGAIN) { From 0c9390d978cbf61e8f16c9f580fa96b305c43568 Mon Sep 17 00:00:00 2001 From: Eric Blake Date: Thu, 8 Jun 2017 17:26:17 -0500 Subject: [PATCH 18/41] nbd: Fix regression on resiliency to port scan Back in qemu 2.5, qemu-nbd was immune to port probes (a transient server would not quit, regardless of how many probe connections came and went, until a connection actually negotiated). But we broke that in commit ee7d7aa when removing the return value to nbd_client_new(), although that patch also introduced a bug causing an assertion failure on a client that fails negotiation. We then made it worse during refactoring in commit 1a6245a (a segfault before we could even assert); the (masked) assertion was cleaned up in d3780c2 (still in 2.6), and just recently we finally fixed the segfault ("nbd: Fully intialize client in case of failed negotiation"). But that still means that ever since we added TLS support to qemu-nbd, we have been vulnerable to an ill-timed port-scan being able to cause a denial of service by taking down qemu-nbd before a real client has a chance to connect. Since negotiation is now handled asynchronously via coroutines, we no longer have a synchronous point of return by re-adding a return value to nbd_client_new(). So this patch instead wires things up to pass the negotiation status through the close_fn callback function. Simple test across two terminals: $ qemu-nbd -f raw -p 30001 file $ nmap 127.0.0.1 -p 30001 && \ qemu-io -c 'r 0 512' -f raw nbd://localhost:30001 Note that this patch does not change what constitutes successful negotiation (thus, a client must enter transmission phase before that client can be considered as a reason to terminate the server when the connection ends). Perhaps we may want to tweak things in a later patch to also treat a client that uses NBD_OPT_ABORT as being a 'successful' negotiation (the client correctly talked the NBD protocol, and informed us it was not going to use our export after all), but that's a discussion for another day. Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1451614 Signed-off-by: Eric Blake Message-Id: <20170608222617.20376-1-eblake@redhat.com> Signed-off-by: Paolo Bonzini --- blockdev-nbd.c | 6 +++++- include/block/nbd.h | 2 +- nbd/server.c | 24 +++++++++++++++--------- qemu-nbd.c | 4 ++-- 4 files changed, 23 insertions(+), 13 deletions(-) diff --git a/blockdev-nbd.c b/blockdev-nbd.c index dd0860f4a6e4..28f551a7b044 100644 --- a/blockdev-nbd.c +++ b/blockdev-nbd.c @@ -27,6 +27,10 @@ typedef struct NBDServerData { static NBDServerData *nbd_server; +static void nbd_blockdev_client_closed(NBDClient *client, bool ignored) +{ + nbd_client_put(client); +} static gboolean nbd_accept(QIOChannel *ioc, GIOCondition condition, gpointer opaque) @@ -46,7 +50,7 @@ static gboolean nbd_accept(QIOChannel *ioc, GIOCondition condition, qio_channel_set_name(QIO_CHANNEL(cioc), "nbd-server"); nbd_client_new(NULL, cioc, nbd_server->tlscreds, NULL, - nbd_client_put); + nbd_blockdev_client_closed); object_unref(OBJECT(cioc)); return TRUE; } diff --git a/include/block/nbd.h b/include/block/nbd.h index 416257abcad2..8fa5ce51f39d 100644 --- a/include/block/nbd.h +++ b/include/block/nbd.h @@ -162,7 +162,7 @@ void nbd_client_new(NBDExport *exp, QIOChannelSocket *sioc, QCryptoTLSCreds *tlscreds, const char *tlsaclname, - void (*close)(NBDClient *)); + void (*close_fn)(NBDClient *, bool)); void nbd_client_get(NBDClient *client); void nbd_client_put(NBDClient *client); diff --git a/nbd/server.c b/nbd/server.c index 49b55f6ede19..f2b1aa47ce8c 100644 --- a/nbd/server.c +++ b/nbd/server.c @@ -81,7 +81,7 @@ static QTAILQ_HEAD(, NBDExport) exports = QTAILQ_HEAD_INITIALIZER(exports); struct NBDClient { int refcount; - void (*close)(NBDClient *client); + void (*close_fn)(NBDClient *client, bool negotiated); bool no_zeroes; NBDExport *exp; @@ -778,7 +778,7 @@ void nbd_client_put(NBDClient *client) } } -static void client_close(NBDClient *client) +static void client_close(NBDClient *client, bool negotiated) { if (client->closing) { return; @@ -793,8 +793,8 @@ static void client_close(NBDClient *client) NULL); /* Also tell the client, so that they release their reference. */ - if (client->close) { - client->close(client); + if (client->close_fn) { + client->close_fn(client, negotiated); } } @@ -975,7 +975,7 @@ void nbd_export_close(NBDExport *exp) nbd_export_get(exp); QTAILQ_FOREACH_SAFE(client, &exp->clients, next, next) { - client_close(client); + client_close(client, true); } nbd_export_set_name(exp, NULL); nbd_export_set_description(exp, NULL); @@ -1337,7 +1337,7 @@ static coroutine_fn void nbd_trip(void *opaque) out: nbd_request_put(req); - client_close(client); + client_close(client, true); nbd_client_put(client); } @@ -1363,7 +1363,7 @@ static coroutine_fn void nbd_co_client_start(void *opaque) qemu_co_mutex_init(&client->send_lock); if (nbd_negotiate(data)) { - client_close(client); + client_close(client, false); goto out; } @@ -1373,11 +1373,17 @@ static coroutine_fn void nbd_co_client_start(void *opaque) g_free(data); } +/* + * Create a new client listener on the given export @exp, using the + * given channel @sioc. Begin servicing it in a coroutine. When the + * connection closes, call @close_fn with an indication of whether the + * client completed negotiation. + */ void nbd_client_new(NBDExport *exp, QIOChannelSocket *sioc, QCryptoTLSCreds *tlscreds, const char *tlsaclname, - void (*close_fn)(NBDClient *)) + void (*close_fn)(NBDClient *, bool)) { NBDClient *client; NBDClientNewData *data = g_new(NBDClientNewData, 1); @@ -1394,7 +1400,7 @@ void nbd_client_new(NBDExport *exp, object_ref(OBJECT(client->sioc)); client->ioc = QIO_CHANNEL(sioc); object_ref(OBJECT(client->ioc)); - client->close = close_fn; + client->close_fn = close_fn; data->client = client; data->co = qemu_coroutine_create(nbd_co_client_start, data); diff --git a/qemu-nbd.c b/qemu-nbd.c index 651f85ecc1f0..9464a0461c1a 100644 --- a/qemu-nbd.c +++ b/qemu-nbd.c @@ -336,10 +336,10 @@ static void nbd_export_closed(NBDExport *exp) static void nbd_update_server_watch(void); -static void nbd_client_closed(NBDClient *client) +static void nbd_client_closed(NBDClient *client, bool negotiated) { nb_fds--; - if (nb_fds == 0 && !persistent && state == RUNNING) { + if (negotiated && nb_fds == 0 && !persistent && state == RUNNING) { state = TERMINATE; } nbd_update_server_watch(); From 041e32b8d9d076980b4e35317c0339e57ab888f1 Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Sun, 11 Jun 2017 14:37:14 +0200 Subject: [PATCH 19/41] qemu-nbd: Ignore SIGPIPE qemu proper has done so for 13 years (8a7ddc38a60648257dc0645ab4a05b33d6040063), qemu-img and qemu-io have done so for four years (526eda14a68d5b3596be715505289b541288ef2a). Ignoring this signal is especially important in qemu-nbd because otherwise a client can easily take down the qemu-nbd server by dropping the connection when the server wants to send something, for example: $ qemu-nbd -x foo -f raw -t null-co:// & [1] 12726 $ qemu-io -c quit nbd://localhost/bar can't open device nbd://localhost/bar: No export with name 'bar' available [1] + 12726 broken pipe qemu-nbd -x foo -f raw -t null-co:// In this case, the client sends an NBD_OPT_ABORT and closes the connection (because it is not required to wait for a reply), but the server replies with an NBD_REP_ACK (because it is required to reply). Signed-off-by: Max Reitz Message-Id: <20170611123714.31292-1-mreitz@redhat.com> Signed-off-by: Paolo Bonzini --- qemu-nbd.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/qemu-nbd.c b/qemu-nbd.c index 9464a0461c1a..4dd3fd47328d 100644 --- a/qemu-nbd.c +++ b/qemu-nbd.c @@ -581,6 +581,10 @@ int main(int argc, char **argv) sa_sigterm.sa_handler = termsig_handler; sigaction(SIGTERM, &sa_sigterm, NULL); +#ifdef CONFIG_POSIX + signal(SIGPIPE, SIG_IGN); +#endif + module_call_init(MODULE_INIT_TRACE); qcrypto_init(&error_fatal); From a9ded6017ea39ea845fb1acf5187528bd74acb04 Mon Sep 17 00:00:00 2001 From: Yang Zhong Date: Fri, 2 Jun 2017 14:06:43 +0800 Subject: [PATCH 20/41] accel: split the tcg accelerator from accel.c file there are some types of accelerators in qemu, and all accelerators have their own file except tcg. tcg accelerator is also defined in accel.c file. tcg accelerator file will be splited from accel.c and re-name to tcg-all.c. accel/ directory will be created to include kvm and tcg related files. Signed-off-by: Yang Zhong Message-Id: <1496383606-18060-2-git-send-email-yang.zhong@intel.com> Signed-off-by: Paolo Bonzini --- Makefile.objs | 1 - Makefile.target | 1 + accel/Makefile.objs | 2 ++ accel.c => accel/accel.c | 27 ------------------ accel/tcg/Makefile.objs | 1 + accel/tcg/tcg-all.c | 61 ++++++++++++++++++++++++++++++++++++++++ 6 files changed, 65 insertions(+), 28 deletions(-) create mode 100644 accel/Makefile.objs rename accel.c => accel/accel.c (87%) create mode 100644 accel/tcg/Makefile.objs create mode 100644 accel/tcg/tcg-all.c diff --git a/Makefile.objs b/Makefile.objs index 057580244061..d32f2db34d3a 100644 --- a/Makefile.objs +++ b/Makefile.objs @@ -52,7 +52,6 @@ common-obj-y += migration/ common-obj-y += audio/ common-obj-y += hw/ -common-obj-y += accel.o common-obj-y += replay/ diff --git a/Makefile.target b/Makefile.target index ce8dfe44a81b..c3c08ff7cc43 100644 --- a/Makefile.target +++ b/Makefile.target @@ -143,6 +143,7 @@ obj-y += arch_init.o cpus.o monitor.o gdbstub.o balloon.o ioport.o numa.o obj-y += qtest.o bootdevice.o obj-y += hw/ obj-$(CONFIG_KVM) += kvm-all.o +obj-y += accel/ obj-y += memory.o cputlb.o obj-y += memory_mapping.o obj-y += dump.o diff --git a/accel/Makefile.objs b/accel/Makefile.objs new file mode 100644 index 000000000000..11ad8236090d --- /dev/null +++ b/accel/Makefile.objs @@ -0,0 +1,2 @@ +obj-$(CONFIG_SOFTMMU) += accel.o +obj-y += tcg/ diff --git a/accel.c b/accel/accel.c similarity index 87% rename from accel.c rename to accel/accel.c index 664bb88422a4..7c079a56115f 100644 --- a/accel.c +++ b/accel/accel.c @@ -34,15 +34,6 @@ #include "hw/xen/xen.h" #include "qom/object.h" -int tcg_tb_size; -static bool tcg_allowed = true; - -static int tcg_init(MachineState *ms) -{ - tcg_exec_init(tcg_tb_size * 1024 * 1024); - return 0; -} - static const TypeInfo accel_type = { .name = TYPE_ACCEL, .parent = TYPE_OBJECT, @@ -129,27 +120,9 @@ void configure_accelerator(MachineState *ms) } } - -static void tcg_accel_class_init(ObjectClass *oc, void *data) -{ - AccelClass *ac = ACCEL_CLASS(oc); - ac->name = "tcg"; - ac->init_machine = tcg_init; - ac->allowed = &tcg_allowed; -} - -#define TYPE_TCG_ACCEL ACCEL_CLASS_NAME("tcg") - -static const TypeInfo tcg_accel_type = { - .name = TYPE_TCG_ACCEL, - .parent = TYPE_ACCEL, - .class_init = tcg_accel_class_init, -}; - static void register_accel_types(void) { type_register_static(&accel_type); - type_register_static(&tcg_accel_type); } type_init(register_accel_types); diff --git a/accel/tcg/Makefile.objs b/accel/tcg/Makefile.objs new file mode 100644 index 000000000000..6e3211ade15d --- /dev/null +++ b/accel/tcg/Makefile.objs @@ -0,0 +1 @@ +obj-$(CONFIG_SOFTMMU) += tcg-all.o diff --git a/accel/tcg/tcg-all.c b/accel/tcg/tcg-all.c new file mode 100644 index 000000000000..dba99315e37f --- /dev/null +++ b/accel/tcg/tcg-all.c @@ -0,0 +1,61 @@ +/* + * QEMU System Emulator, accelerator interfaces + * + * Copyright (c) 2003-2008 Fabrice Bellard + * Copyright (c) 2014 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "qemu/osdep.h" +#include "sysemu/accel.h" +#include "sysemu/sysemu.h" +#include "qom/object.h" + +int tcg_tb_size; +static bool tcg_allowed = true; + +static int tcg_init(MachineState *ms) +{ + tcg_exec_init(tcg_tb_size * 1024 * 1024); + return 0; +} + +static void tcg_accel_class_init(ObjectClass *oc, void *data) +{ + AccelClass *ac = ACCEL_CLASS(oc); + ac->name = "tcg"; + ac->init_machine = tcg_init; + ac->allowed = &tcg_allowed; +} + +#define TYPE_TCG_ACCEL ACCEL_CLASS_NAME("tcg") + +static const TypeInfo tcg_accel_type = { + .name = TYPE_TCG_ACCEL, + .parent = TYPE_ACCEL, + .class_init = tcg_accel_class_init, +}; + +static void register_accel_types(void) +{ + type_register_static(&tcg_accel_type); +} + +type_init(register_accel_types); From d9bb58e51068dfc48746c6af0179926c8dc05bce Mon Sep 17 00:00:00 2001 From: Yang Zhong Date: Fri, 2 Jun 2017 14:06:44 +0800 Subject: [PATCH 21/41] tcg: move tcg related files into accel/tcg/ subdirectory move cputlb.c, cpu-exec-common.c and cpu-exec.c related tcg exec file into accel/tcg/ subdirectory. Signed-off-by: Yang Zhong Message-Id: <1496383606-18060-3-git-send-email-yang.zhong@intel.com> Signed-off-by: Paolo Bonzini --- Makefile.objs | 1 + Makefile.target | 7 +++---- accel/tcg/Makefile.objs | 2 ++ cpu-exec-common.c => accel/tcg/cpu-exec-common.c | 0 cpu-exec.c => accel/tcg/cpu-exec.c | 2 +- cputlb.c => accel/tcg/cputlb.c | 0 accel/tcg/trace-events | 7 +++++++ trace-events | 5 ----- 8 files changed, 14 insertions(+), 10 deletions(-) rename cpu-exec-common.c => accel/tcg/cpu-exec-common.c (100%) rename cpu-exec.c => accel/tcg/cpu-exec.c (99%) rename cputlb.c => accel/tcg/cputlb.c (100%) create mode 100644 accel/tcg/trace-events diff --git a/Makefile.objs b/Makefile.objs index d32f2db34d3a..871eca80e954 100644 --- a/Makefile.objs +++ b/Makefile.objs @@ -162,6 +162,7 @@ trace-events-subdirs += target/ppc trace-events-subdirs += qom trace-events-subdirs += linux-user trace-events-subdirs += qapi +trace-events-subdirs += accel/tcg trace-events-files = $(SRC_PATH)/trace-events $(trace-events-subdirs:%=$(SRC_PATH)/%/trace-events) diff --git a/Makefile.target b/Makefile.target index c3c08ff7cc43..18470ddb1257 100644 --- a/Makefile.target +++ b/Makefile.target @@ -88,9 +88,9 @@ all: $(PROGS) stap ######################################################### # cpu emulator library -obj-y = exec.o translate-all.o cpu-exec.o +obj-y = exec.o translate-all.o +obj-y += accel/ obj-y += translate-common.o -obj-y += cpu-exec-common.o obj-y += tcg/tcg.o tcg/tcg-op.o tcg/optimize.o obj-$(CONFIG_TCG_INTERPRETER) += tci.o obj-y += tcg/tcg-common.o @@ -143,8 +143,7 @@ obj-y += arch_init.o cpus.o monitor.o gdbstub.o balloon.o ioport.o numa.o obj-y += qtest.o bootdevice.o obj-y += hw/ obj-$(CONFIG_KVM) += kvm-all.o -obj-y += accel/ -obj-y += memory.o cputlb.o +obj-y += memory.o obj-y += memory_mapping.o obj-y += dump.o obj-y += migration/ram.o diff --git a/accel/tcg/Makefile.objs b/accel/tcg/Makefile.objs index 6e3211ade15d..940379beb8a3 100644 --- a/accel/tcg/Makefile.objs +++ b/accel/tcg/Makefile.objs @@ -1 +1,3 @@ obj-$(CONFIG_SOFTMMU) += tcg-all.o +obj-$(CONFIG_SOFTMMU) += cputlb.o +obj-y += cpu-exec.o cpu-exec-common.o diff --git a/cpu-exec-common.c b/accel/tcg/cpu-exec-common.c similarity index 100% rename from cpu-exec-common.c rename to accel/tcg/cpu-exec-common.c diff --git a/cpu-exec.c b/accel/tcg/cpu-exec.c similarity index 99% rename from cpu-exec.c rename to accel/tcg/cpu-exec.c index 5b181c18edbf..3581618bc05f 100644 --- a/cpu-exec.c +++ b/accel/tcg/cpu-exec.c @@ -18,7 +18,7 @@ */ #include "qemu/osdep.h" #include "cpu.h" -#include "trace-root.h" +#include "trace.h" #include "disas/disas.h" #include "exec/exec-all.h" #include "tcg.h" diff --git a/cputlb.c b/accel/tcg/cputlb.c similarity index 100% rename from cputlb.c rename to accel/tcg/cputlb.c diff --git a/accel/tcg/trace-events b/accel/tcg/trace-events new file mode 100644 index 000000000000..f2db388bdc54 --- /dev/null +++ b/accel/tcg/trace-events @@ -0,0 +1,7 @@ +# Trace events for debugging and performance instrumentation + +# TCG related tracing (mostly disabled by default) +# cpu-exec.c +disable exec_tb(void *tb, uintptr_t pc) "tb:%p pc=0x%"PRIxPTR +disable exec_tb_nocache(void *tb, uintptr_t pc) "tb:%p pc=0x%"PRIxPTR +disable exec_tb_exit(void *last_tb, unsigned int flags) "tb:%p flags=%x" diff --git a/trace-events b/trace-events index fd83087e3941..279aedee1f7c 100644 --- a/trace-events +++ b/trace-events @@ -69,11 +69,6 @@ kvm_irqchip_update_msi_route(int virq) "Updating MSI route virq=%d" kvm_irqchip_release_virq(int virq) "virq %d" # TCG related tracing (mostly disabled by default) -# cpu-exec.c -disable exec_tb(void *tb, uintptr_t pc) "tb:%p pc=0x%"PRIxPTR -disable exec_tb_nocache(void *tb, uintptr_t pc) "tb:%p pc=0x%"PRIxPTR -disable exec_tb_exit(void *last_tb, unsigned int flags) "tb:%p flags=%x" - # translate-all.c translate_block(void *tb, uintptr_t pc, uint8_t *tb_code) "tb:%p, pc:0x%"PRIxPTR", tb_code:%p" From 244f144134d0dd182f1af8654e7f9a79fe770368 Mon Sep 17 00:00:00 2001 From: Yang Zhong Date: Fri, 2 Jun 2017 14:06:45 +0800 Subject: [PATCH 22/41] tcg: move tcg backend files into accel/tcg/ move tcg-runtime.c, translate-all.(ch) and translate-common.c into accel/tcg/ subdirectory and updated related trace-events file. Signed-off-by: Yang Zhong Message-Id: <1496383606-18060-4-git-send-email-yang.zhong@intel.com> Signed-off-by: Paolo Bonzini --- Makefile.target | 6 ++---- accel/tcg/Makefile.objs | 2 +- accel/tcg/trace-events | 3 +++ translate-all.c => accel/tcg/translate-all.c | 2 +- translate-all.h => accel/tcg/translate-all.h | 0 translate-common.c => accel/tcg/translate-common.c | 0 configure | 2 +- tcg-runtime.c => tcg/tcg-runtime.c | 0 tci.c => tcg/tci.c | 0 trace-events | 4 ---- 10 files changed, 8 insertions(+), 11 deletions(-) rename translate-all.c => accel/tcg/translate-all.c (99%) rename translate-all.h => accel/tcg/translate-all.h (100%) rename translate-common.c => accel/tcg/translate-common.c (100%) rename tcg-runtime.c => tcg/tcg-runtime.c (100%) rename tci.c => tcg/tci.c (100%) diff --git a/Makefile.target b/Makefile.target index 18470ddb1257..73cd9c6ad72e 100644 --- a/Makefile.target +++ b/Makefile.target @@ -88,17 +88,15 @@ all: $(PROGS) stap ######################################################### # cpu emulator library -obj-y = exec.o translate-all.o +obj-y = exec.o obj-y += accel/ -obj-y += translate-common.o obj-y += tcg/tcg.o tcg/tcg-op.o tcg/optimize.o +obj-y += tcg/tcg-common.o tcg/tcg-runtime.o obj-$(CONFIG_TCG_INTERPRETER) += tci.o -obj-y += tcg/tcg-common.o obj-$(CONFIG_TCG_INTERPRETER) += disas/tci.o obj-y += fpu/softfloat.o obj-y += target/$(TARGET_BASE_ARCH)/ obj-y += disas.o -obj-y += tcg-runtime.o obj-$(call notempty,$(TARGET_XML_FILES)) += gdbstub-xml.o obj-$(call lnot,$(CONFIG_HAX)) += hax-stub.o obj-$(call lnot,$(CONFIG_KVM)) += kvm-stub.o diff --git a/accel/tcg/Makefile.objs b/accel/tcg/Makefile.objs index 940379beb8a3..f173cd5397c5 100644 --- a/accel/tcg/Makefile.objs +++ b/accel/tcg/Makefile.objs @@ -1,3 +1,3 @@ obj-$(CONFIG_SOFTMMU) += tcg-all.o obj-$(CONFIG_SOFTMMU) += cputlb.o -obj-y += cpu-exec.o cpu-exec-common.o +obj-y += cpu-exec.o cpu-exec-common.o translate-all.o translate-common.o diff --git a/accel/tcg/trace-events b/accel/tcg/trace-events index f2db388bdc54..2de8359670f7 100644 --- a/accel/tcg/trace-events +++ b/accel/tcg/trace-events @@ -5,3 +5,6 @@ disable exec_tb(void *tb, uintptr_t pc) "tb:%p pc=0x%"PRIxPTR disable exec_tb_nocache(void *tb, uintptr_t pc) "tb:%p pc=0x%"PRIxPTR disable exec_tb_exit(void *last_tb, unsigned int flags) "tb:%p flags=%x" + +# translate-all.c +translate_block(void *tb, uintptr_t pc, uint8_t *tb_code) "tb:%p, pc:0x%"PRIxPTR", tb_code:%p" diff --git a/translate-all.c b/accel/tcg/translate-all.c similarity index 99% rename from translate-all.c rename to accel/tcg/translate-all.c index b3ee876526a8..7b25a1624440 100644 --- a/translate-all.c +++ b/accel/tcg/translate-all.c @@ -25,7 +25,7 @@ #include "qemu-common.h" #define NO_CPU_IO_DEFS #include "cpu.h" -#include "trace-root.h" +#include "trace.h" #include "disas/disas.h" #include "exec/exec-all.h" #include "tcg.h" diff --git a/translate-all.h b/accel/tcg/translate-all.h similarity index 100% rename from translate-all.h rename to accel/tcg/translate-all.h diff --git a/translate-common.c b/accel/tcg/translate-common.c similarity index 100% rename from translate-common.c rename to accel/tcg/translate-common.c diff --git a/configure b/configure index b147191ae698..48d5ceb8c692 100755 --- a/configure +++ b/configure @@ -407,7 +407,7 @@ QEMU_CFLAGS="-fno-strict-aliasing -fno-common -fwrapv $QEMU_CFLAGS" QEMU_CFLAGS="-Wall -Wundef -Wwrite-strings -Wmissing-prototypes $QEMU_CFLAGS" QEMU_CFLAGS="-Wstrict-prototypes -Wredundant-decls $QEMU_CFLAGS" QEMU_CFLAGS="-D_GNU_SOURCE -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE $QEMU_CFLAGS" -QEMU_INCLUDES="-I. -I\$(SRC_PATH) -I\$(SRC_PATH)/include" +QEMU_INCLUDES="-I. -I\$(SRC_PATH) -I\$(SRC_PATH)/accel/tcg -I\$(SRC_PATH)/include" if test "$debug_info" = "yes"; then CFLAGS="-g $CFLAGS" LDFLAGS="-g $LDFLAGS" diff --git a/tcg-runtime.c b/tcg/tcg-runtime.c similarity index 100% rename from tcg-runtime.c rename to tcg/tcg-runtime.c diff --git a/tci.c b/tcg/tci.c similarity index 100% rename from tci.c rename to tcg/tci.c diff --git a/trace-events b/trace-events index 279aedee1f7c..62d8100c8d70 100644 --- a/trace-events +++ b/trace-events @@ -68,10 +68,6 @@ kvm_irqchip_add_msi_route(char *name, int vector, int virq) "dev %s vector %d vi kvm_irqchip_update_msi_route(int virq) "Updating MSI route virq=%d" kvm_irqchip_release_virq(int virq) "virq %d" -# TCG related tracing (mostly disabled by default) -# translate-all.c -translate_block(void *tb, uintptr_t pc, uint8_t *tb_code) "tb:%p, pc:0x%"PRIxPTR", tb_code:%p" - # memory.c memory_region_ops_read(int cpu_index, void *mr, uint64_t addr, uint64_t value, unsigned size) "cpu %d mr %p addr %#"PRIx64" value %#"PRIx64" size %u" memory_region_ops_write(int cpu_index, void *mr, uint64_t addr, uint64_t value, unsigned size) "cpu %d mr %p addr %#"PRIx64" value %#"PRIx64" size %u" From 92229a57bba10d5bbf469221c32a18d4bed642f4 Mon Sep 17 00:00:00 2001 From: Yang Zhong Date: Fri, 2 Jun 2017 14:06:46 +0800 Subject: [PATCH 23/41] accel: move kvm related accelerator files into accel/ move kvm related accelerator files into accel/ subdirectory, also create one stub subdirectory, which will include accelerator's stub files. Signed-off-by: Yang Zhong Message-Id: <1496383606-18060-5-git-send-email-yang.zhong@intel.com> Signed-off-by: Paolo Bonzini --- Makefile.objs | 1 + Makefile.target | 4 +--- accel/Makefile.objs | 2 ++ accel/kvm/Makefile.objs | 1 + kvm-all.c => accel/kvm/kvm-all.c | 2 +- accel/kvm/trace-events | 15 +++++++++++++++ accel/stubs/Makefile.objs | 1 + kvm-stub.c => accel/stubs/kvm-stub.c | 0 trace-events | 13 ------------- 9 files changed, 22 insertions(+), 17 deletions(-) create mode 100644 accel/kvm/Makefile.objs rename kvm-all.c => accel/kvm/kvm-all.c (99%) create mode 100644 accel/kvm/trace-events create mode 100644 accel/stubs/Makefile.objs rename kvm-stub.c => accel/stubs/kvm-stub.c (100%) diff --git a/Makefile.objs b/Makefile.objs index 871eca80e954..6e5c7069942a 100644 --- a/Makefile.objs +++ b/Makefile.objs @@ -163,6 +163,7 @@ trace-events-subdirs += qom trace-events-subdirs += linux-user trace-events-subdirs += qapi trace-events-subdirs += accel/tcg +trace-events-subdirs += accel/kvm trace-events-files = $(SRC_PATH)/trace-events $(trace-events-subdirs:%=$(SRC_PATH)/%/trace-events) diff --git a/Makefile.target b/Makefile.target index 73cd9c6ad72e..3d49f2945741 100644 --- a/Makefile.target +++ b/Makefile.target @@ -88,7 +88,7 @@ all: $(PROGS) stap ######################################################### # cpu emulator library -obj-y = exec.o +obj-y += exec.o obj-y += accel/ obj-y += tcg/tcg.o tcg/tcg-op.o tcg/optimize.o obj-y += tcg/tcg-common.o tcg/tcg-runtime.o @@ -99,7 +99,6 @@ obj-y += target/$(TARGET_BASE_ARCH)/ obj-y += disas.o obj-$(call notempty,$(TARGET_XML_FILES)) += gdbstub-xml.o obj-$(call lnot,$(CONFIG_HAX)) += hax-stub.o -obj-$(call lnot,$(CONFIG_KVM)) += kvm-stub.o obj-$(CONFIG_LIBDECNUMBER) += libdecnumber/decContext.o obj-$(CONFIG_LIBDECNUMBER) += libdecnumber/decNumber.o @@ -140,7 +139,6 @@ ifdef CONFIG_SOFTMMU obj-y += arch_init.o cpus.o monitor.o gdbstub.o balloon.o ioport.o numa.o obj-y += qtest.o bootdevice.o obj-y += hw/ -obj-$(CONFIG_KVM) += kvm-all.o obj-y += memory.o obj-y += memory_mapping.o obj-y += dump.o diff --git a/accel/Makefile.objs b/accel/Makefile.objs index 11ad8236090d..cd5702f3477e 100644 --- a/accel/Makefile.objs +++ b/accel/Makefile.objs @@ -1,2 +1,4 @@ obj-$(CONFIG_SOFTMMU) += accel.o +obj-y += kvm/ obj-y += tcg/ +obj-y += stubs/ diff --git a/accel/kvm/Makefile.objs b/accel/kvm/Makefile.objs new file mode 100644 index 000000000000..85351e7de7e8 --- /dev/null +++ b/accel/kvm/Makefile.objs @@ -0,0 +1 @@ +obj-$(CONFIG_KVM) += kvm-all.o diff --git a/kvm-all.c b/accel/kvm/kvm-all.c similarity index 99% rename from kvm-all.c rename to accel/kvm/kvm-all.c index 98ad151375cb..75feffa5042f 100644 --- a/kvm-all.c +++ b/accel/kvm/kvm-all.c @@ -36,7 +36,7 @@ #include "exec/ram_addr.h" #include "exec/address-spaces.h" #include "qemu/event_notifier.h" -#include "trace-root.h" +#include "trace.h" #include "hw/irq.h" #include "hw/boards.h" diff --git a/accel/kvm/trace-events b/accel/kvm/trace-events new file mode 100644 index 000000000000..f89ba5578dc1 --- /dev/null +++ b/accel/kvm/trace-events @@ -0,0 +1,15 @@ +# Trace events for debugging and performance instrumentation + +# kvm-all.c +kvm_ioctl(int type, void *arg) "type 0x%x, arg %p" +kvm_vm_ioctl(int type, void *arg) "type 0x%x, arg %p" +kvm_vcpu_ioctl(int cpu_index, int type, void *arg) "cpu_index %d, type 0x%x, arg %p" +kvm_run_exit(int cpu_index, uint32_t reason) "cpu_index %d, reason %d" +kvm_device_ioctl(int fd, int type, void *arg) "dev fd %d, type 0x%x, arg %p" +kvm_failed_reg_get(uint64_t id, const char *msg) "Warning: Unable to retrieve ONEREG %" PRIu64 " from KVM: %s" +kvm_failed_reg_set(uint64_t id, const char *msg) "Warning: Unable to set ONEREG %" PRIu64 " to KVM: %s" +kvm_irqchip_commit_routes(void) "" +kvm_irqchip_add_msi_route(char *name, int vector, int virq) "dev %s vector %d virq %d" +kvm_irqchip_update_msi_route(int virq) "Updating MSI route virq=%d" +kvm_irqchip_release_virq(int virq) "virq %d" + diff --git a/accel/stubs/Makefile.objs b/accel/stubs/Makefile.objs new file mode 100644 index 000000000000..bd5794f22297 --- /dev/null +++ b/accel/stubs/Makefile.objs @@ -0,0 +1 @@ +obj-$(call lnot,$(CONFIG_KVM)) += kvm-stub.o diff --git a/kvm-stub.c b/accel/stubs/kvm-stub.c similarity index 100% rename from kvm-stub.c rename to accel/stubs/kvm-stub.c diff --git a/trace-events b/trace-events index 62d8100c8d70..bae63fdb1db5 100644 --- a/trace-events +++ b/trace-events @@ -55,19 +55,6 @@ dma_complete(void *dbs, int ret, void *cb) "dbs=%p ret=%d cb=%p" dma_blk_cb(void *dbs, int ret) "dbs=%p ret=%d" dma_map_wait(void *dbs) "dbs=%p" -# kvm-all.c -kvm_ioctl(int type, void *arg) "type 0x%x, arg %p" -kvm_vm_ioctl(int type, void *arg) "type 0x%x, arg %p" -kvm_vcpu_ioctl(int cpu_index, int type, void *arg) "cpu_index %d, type 0x%x, arg %p" -kvm_run_exit(int cpu_index, uint32_t reason) "cpu_index %d, reason %d" -kvm_device_ioctl(int fd, int type, void *arg) "dev fd %d, type 0x%x, arg %p" -kvm_failed_reg_get(uint64_t id, const char *msg) "Warning: Unable to retrieve ONEREG %" PRIu64 " from KVM: %s" -kvm_failed_reg_set(uint64_t id, const char *msg) "Warning: Unable to set ONEREG %" PRIu64 " to KVM: %s" -kvm_irqchip_commit_routes(void) "" -kvm_irqchip_add_msi_route(char *name, int vector, int virq) "dev %s vector %d virq %d" -kvm_irqchip_update_msi_route(int virq) "Updating MSI route virq=%d" -kvm_irqchip_release_virq(int virq) "virq %d" - # memory.c memory_region_ops_read(int cpu_index, void *mr, uint64_t addr, uint64_t value, unsigned size) "cpu %d mr %p addr %#"PRIx64" value %#"PRIx64" size %u" memory_region_ops_write(int cpu_index, void *mr, uint64_t addr, uint64_t value, unsigned size) "cpu %d mr %p addr %#"PRIx64" value %#"PRIx64" size %u" From d1fdf257d52822695f5ace6c586e059aa17d4b79 Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Fri, 2 Jun 2017 18:01:39 +0300 Subject: [PATCH 24/41] nbd: rename read_sync and friends Rename nbd_wr_syncv -> nbd_rwv read_sync -> nbd_read read_sync_eof -> nbd_read_eof write_sync -> nbd_write drop_sync -> nbd_drop 1. nbd_ prefix read_sync and write_sync are already shared, so it is good to have a namespace prefix. drop_sync will be shared, and read_sync_eof is related to read_sync, so let's rename them all. 2. _sync suffix _sync is related to the fact that nbd_wr_syncv doesn't return if a write to socket returns EAGAIN. The first implementation of nbd_wr_syncv (was wr_sync in 7a5ca8648b) just loops while getting EAGAIN, the current implementation yields in this case. Why we want to get rid of it: - it is normal for r/w functions to be synchronous, so having an additional suffix for it looks redundant (contrariwise, we have _aio suffix for async functions) - _sync suffix in block layer is used when function does flush (so using it for other thing is confusing a bit) - keep function names short after adding nbd_ prefix 3. for nbd_wr_syncv let's use more common notation 'rw' Reviewed-by: Eric Blake Signed-off-by: Vladimir Sementsov-Ogievskiy Message-Id: <20170602150150.258222-2-vsementsov@virtuozzo.com> Signed-off-by: Paolo Bonzini --- block/nbd-client.c | 8 ++++---- include/block/nbd.h | 8 ++------ nbd/client.c | 42 +++++++++++++++++++++--------------------- nbd/common.c | 8 ++------ nbd/nbd-internal.h | 26 +++++++++++++------------- nbd/server.c | 12 ++++++------ 6 files changed, 48 insertions(+), 56 deletions(-) diff --git a/block/nbd-client.c b/block/nbd-client.c index 87d19c72536c..d64e775385d9 100644 --- a/block/nbd-client.c +++ b/block/nbd-client.c @@ -144,8 +144,8 @@ static int nbd_co_send_request(BlockDriverState *bs, qio_channel_set_cork(s->ioc, true); rc = nbd_send_request(s->ioc, request); if (rc >= 0) { - ret = nbd_wr_syncv(s->ioc, qiov->iov, qiov->niov, request->len, - false, NULL); + ret = nbd_rwv(s->ioc, qiov->iov, qiov->niov, request->len, false, + NULL); if (ret != request->len) { rc = -EIO; } @@ -173,8 +173,8 @@ static void nbd_co_receive_reply(NBDClientSession *s, reply->error = EIO; } else { if (qiov && reply->error == 0) { - ret = nbd_wr_syncv(s->ioc, qiov->iov, qiov->niov, request->len, - true, NULL); + ret = nbd_rwv(s->ioc, qiov->iov, qiov->niov, request->len, true, + NULL); if (ret != request->len) { reply->error = EIO; } diff --git a/include/block/nbd.h b/include/block/nbd.h index 8fa5ce51f39d..6d75d5a67027 100644 --- a/include/block/nbd.h +++ b/include/block/nbd.h @@ -123,12 +123,8 @@ enum { * aren't overflowing some other buffer. */ #define NBD_MAX_NAME_SIZE 256 -ssize_t nbd_wr_syncv(QIOChannel *ioc, - struct iovec *iov, - size_t niov, - size_t length, - bool do_read, - Error **errp); +ssize_t nbd_rwv(QIOChannel *ioc, struct iovec *iov, size_t niov, size_t length, + bool do_read, Error **errp); int nbd_receive_negotiate(QIOChannel *ioc, const char *name, uint16_t *flags, QCryptoTLSCreds *tlscreds, const char *hostname, QIOChannel **outioc, diff --git a/nbd/client.c b/nbd/client.c index 595d99ed3032..17ac85ff5739 100644 --- a/nbd/client.c +++ b/nbd/client.c @@ -88,7 +88,7 @@ static QTAILQ_HEAD(, NBDExport) exports = QTAILQ_HEAD_INITIALIZER(exports); /* Discard length bytes from channel. Return -errno on failure and 0 on * success*/ -static int drop_sync(QIOChannel *ioc, size_t size, Error **errp) +static int nbd_drop(QIOChannel *ioc, size_t size, Error **errp) { ssize_t ret = 0; char small[1024]; @@ -97,7 +97,7 @@ static int drop_sync(QIOChannel *ioc, size_t size, Error **errp) buffer = sizeof(small) >= size ? small : g_malloc(MIN(65536, size)); while (size > 0) { ssize_t count = MIN(65536, size); - ret = read_sync(ioc, buffer, MIN(65536, size), errp); + ret = nbd_read(ioc, buffer, MIN(65536, size), errp); if (ret < 0) { goto cleanup; @@ -135,12 +135,12 @@ static int nbd_send_option_request(QIOChannel *ioc, uint32_t opt, stl_be_p(&req.option, opt); stl_be_p(&req.length, len); - if (write_sync(ioc, &req, sizeof(req), errp) < 0) { + if (nbd_write(ioc, &req, sizeof(req), errp) < 0) { error_prepend(errp, "Failed to send option request header"); return -1; } - if (len && write_sync(ioc, (char *) data, len, errp) < 0) { + if (len && nbd_write(ioc, (char *) data, len, errp) < 0) { error_prepend(errp, "Failed to send option request data"); return -1; } @@ -169,7 +169,7 @@ static int nbd_receive_option_reply(QIOChannel *ioc, uint32_t opt, nbd_opt_reply *reply, Error **errp) { QEMU_BUILD_BUG_ON(sizeof(*reply) != 20); - if (read_sync(ioc, reply, sizeof(*reply), errp) < 0) { + if (nbd_read(ioc, reply, sizeof(*reply), errp) < 0) { error_prepend(errp, "failed to read option reply"); nbd_send_opt_abort(ioc); return -1; @@ -218,7 +218,7 @@ static int nbd_handle_reply_err(QIOChannel *ioc, nbd_opt_reply *reply, goto cleanup; } msg = g_malloc(reply->length + 1); - if (read_sync(ioc, msg, reply->length, errp) < 0) { + if (nbd_read(ioc, msg, reply->length, errp) < 0) { error_prepend(errp, "failed to read option error message"); goto cleanup; } @@ -320,7 +320,7 @@ static int nbd_receive_list(QIOChannel *ioc, const char *want, bool *match, nbd_send_opt_abort(ioc); return -1; } - if (read_sync(ioc, &namelen, sizeof(namelen), errp) < 0) { + if (nbd_read(ioc, &namelen, sizeof(namelen), errp) < 0) { error_prepend(errp, "failed to read option name length"); nbd_send_opt_abort(ioc); return -1; @@ -333,7 +333,7 @@ static int nbd_receive_list(QIOChannel *ioc, const char *want, bool *match, return -1; } if (namelen != strlen(want)) { - if (drop_sync(ioc, len, errp) < 0) { + if (nbd_drop(ioc, len, errp) < 0) { error_prepend(errp, "failed to skip export name with wrong length"); nbd_send_opt_abort(ioc); return -1; @@ -342,14 +342,14 @@ static int nbd_receive_list(QIOChannel *ioc, const char *want, bool *match, } assert(namelen < sizeof(name)); - if (read_sync(ioc, name, namelen, errp) < 0) { + if (nbd_read(ioc, name, namelen, errp) < 0) { error_prepend(errp, "failed to read export name"); nbd_send_opt_abort(ioc); return -1; } name[namelen] = '\0'; len -= namelen; - if (drop_sync(ioc, len, errp) < 0) { + if (nbd_drop(ioc, len, errp) < 0) { error_prepend(errp, "failed to read export description"); nbd_send_opt_abort(ioc); return -1; @@ -476,7 +476,7 @@ int nbd_receive_negotiate(QIOChannel *ioc, const char *name, uint16_t *flags, goto fail; } - if (read_sync(ioc, buf, 8, errp) < 0) { + if (nbd_read(ioc, buf, 8, errp) < 0) { error_prepend(errp, "Failed to read data"); goto fail; } @@ -502,7 +502,7 @@ int nbd_receive_negotiate(QIOChannel *ioc, const char *name, uint16_t *flags, goto fail; } - if (read_sync(ioc, &magic, sizeof(magic), errp) < 0) { + if (nbd_read(ioc, &magic, sizeof(magic), errp) < 0) { error_prepend(errp, "Failed to read magic"); goto fail; } @@ -514,7 +514,7 @@ int nbd_receive_negotiate(QIOChannel *ioc, const char *name, uint16_t *flags, uint16_t globalflags; bool fixedNewStyle = false; - if (read_sync(ioc, &globalflags, sizeof(globalflags), errp) < 0) { + if (nbd_read(ioc, &globalflags, sizeof(globalflags), errp) < 0) { error_prepend(errp, "Failed to read server flags"); goto fail; } @@ -532,7 +532,7 @@ int nbd_receive_negotiate(QIOChannel *ioc, const char *name, uint16_t *flags, } /* client requested flags */ clientflags = cpu_to_be32(clientflags); - if (write_sync(ioc, &clientflags, sizeof(clientflags), errp) < 0) { + if (nbd_write(ioc, &clientflags, sizeof(clientflags), errp) < 0) { error_prepend(errp, "Failed to send clientflags field"); goto fail; } @@ -570,13 +570,13 @@ int nbd_receive_negotiate(QIOChannel *ioc, const char *name, uint16_t *flags, } /* Read the response */ - if (read_sync(ioc, &s, sizeof(s), errp) < 0) { + if (nbd_read(ioc, &s, sizeof(s), errp) < 0) { error_prepend(errp, "Failed to read export length"); goto fail; } *size = be64_to_cpu(s); - if (read_sync(ioc, flags, sizeof(*flags), errp) < 0) { + if (nbd_read(ioc, flags, sizeof(*flags), errp) < 0) { error_prepend(errp, "Failed to read export flags"); goto fail; } @@ -593,14 +593,14 @@ int nbd_receive_negotiate(QIOChannel *ioc, const char *name, uint16_t *flags, goto fail; } - if (read_sync(ioc, &s, sizeof(s), errp) < 0) { + if (nbd_read(ioc, &s, sizeof(s), errp) < 0) { error_prepend(errp, "Failed to read export length"); goto fail; } *size = be64_to_cpu(s); TRACE("Size is %" PRIu64, *size); - if (read_sync(ioc, &oldflags, sizeof(oldflags), errp) < 0) { + if (nbd_read(ioc, &oldflags, sizeof(oldflags), errp) < 0) { error_prepend(errp, "Failed to read export flags"); goto fail; } @@ -616,7 +616,7 @@ int nbd_receive_negotiate(QIOChannel *ioc, const char *name, uint16_t *flags, } TRACE("Size is %" PRIu64 ", export flags %" PRIx16, *size, *flags); - if (zeroes && drop_sync(ioc, 124, errp) < 0) { + if (zeroes && nbd_drop(ioc, 124, errp) < 0) { error_prepend(errp, "Failed to read reserved block"); goto fail; } @@ -759,7 +759,7 @@ ssize_t nbd_send_request(QIOChannel *ioc, NBDRequest *request) stq_be_p(buf + 16, request->from); stl_be_p(buf + 24, request->len); - return write_sync(ioc, buf, sizeof(buf), NULL); + return nbd_write(ioc, buf, sizeof(buf), NULL); } ssize_t nbd_receive_reply(QIOChannel *ioc, NBDReply *reply, Error **errp) @@ -768,7 +768,7 @@ ssize_t nbd_receive_reply(QIOChannel *ioc, NBDReply *reply, Error **errp) uint32_t magic; ssize_t ret; - ret = read_sync_eof(ioc, buf, sizeof(buf), errp); + ret = nbd_read_eof(ioc, buf, sizeof(buf), errp); if (ret <= 0) { return ret; } diff --git a/nbd/common.c b/nbd/common.c index bd81637ab944..d6b719ddaa2e 100644 --- a/nbd/common.c +++ b/nbd/common.c @@ -24,12 +24,8 @@ * The function may be called from coroutine or from non-coroutine context. * When called from non-coroutine context @ioc must be in blocking mode. */ -ssize_t nbd_wr_syncv(QIOChannel *ioc, - struct iovec *iov, - size_t niov, - size_t length, - bool do_read, - Error **errp) +ssize_t nbd_rwv(QIOChannel *ioc, struct iovec *iov, size_t niov, size_t length, + bool do_read, Error **errp) { ssize_t done = 0; struct iovec *local_iov = g_new(struct iovec, niov); diff --git a/nbd/nbd-internal.h b/nbd/nbd-internal.h index d6071640a0e3..753cb9d9c5d1 100644 --- a/nbd/nbd-internal.h +++ b/nbd/nbd-internal.h @@ -94,14 +94,14 @@ #define NBD_ENOSPC 28 #define NBD_ESHUTDOWN 108 -/* read_sync_eof +/* nbd_read_eof * Tries to read @size bytes from @ioc. Returns number of bytes actually read. * May return a value >= 0 and < size only on EOF, i.e. when iteratively called - * qio_channel_readv() returns 0. So, there are no needs to call read_sync_eof + * qio_channel_readv() returns 0. So, there is no need to call nbd_read_eof * iteratively. */ -static inline ssize_t read_sync_eof(QIOChannel *ioc, void *buffer, size_t size, - Error **errp) +static inline ssize_t nbd_read_eof(QIOChannel *ioc, void *buffer, size_t size, + Error **errp) { struct iovec iov = { .iov_base = buffer, .iov_len = size }; /* Sockets are kept in blocking mode in the negotiation phase. After @@ -109,16 +109,16 @@ static inline ssize_t read_sync_eof(QIOChannel *ioc, void *buffer, size_t size, * our request/reply. Synchronization is done with recv_coroutine, so * that this is coroutine-safe. */ - return nbd_wr_syncv(ioc, &iov, 1, size, true, errp); + return nbd_rwv(ioc, &iov, 1, size, true, errp); } -/* read_sync +/* nbd_read * Reads @size bytes from @ioc. Returns 0 on success. */ -static inline int read_sync(QIOChannel *ioc, void *buffer, size_t size, - Error **errp) +static inline int nbd_read(QIOChannel *ioc, void *buffer, size_t size, + Error **errp) { - ssize_t ret = read_sync_eof(ioc, buffer, size, errp); + ssize_t ret = nbd_read_eof(ioc, buffer, size, errp); if (ret >= 0 && ret != size) { ret = -EINVAL; @@ -128,15 +128,15 @@ static inline int read_sync(QIOChannel *ioc, void *buffer, size_t size, return ret < 0 ? ret : 0; } -/* write_sync +/* nbd_write * Writes @size bytes to @ioc. Returns 0 on success. */ -static inline int write_sync(QIOChannel *ioc, const void *buffer, size_t size, - Error **errp) +static inline int nbd_write(QIOChannel *ioc, const void *buffer, size_t size, + Error **errp) { struct iovec iov = { .iov_base = (void *) buffer, .iov_len = size }; - ssize_t ret = nbd_wr_syncv(ioc, &iov, 1, size, false, errp); + ssize_t ret = nbd_rwv(ioc, &iov, 1, size, false, errp); assert(ret < 0 || ret == size); diff --git a/nbd/server.c b/nbd/server.c index f2b1aa47ce8c..d8bd9270131c 100644 --- a/nbd/server.c +++ b/nbd/server.c @@ -124,7 +124,7 @@ static int nbd_negotiate_read(QIOChannel *ioc, void *buffer, size_t size) nbd_negotiate_continue, qemu_coroutine_self(), NULL); - ret = read_sync(ioc, buffer, size, NULL); + ret = nbd_read(ioc, buffer, size, NULL); g_source_remove(watch); return ret; @@ -142,7 +142,7 @@ static int nbd_negotiate_write(QIOChannel *ioc, const void *buffer, size_t size) nbd_negotiate_continue, qemu_coroutine_self(), NULL); - ret = write_sync(ioc, buffer, size, NULL); + ret = nbd_write(ioc, buffer, size, NULL); g_source_remove(watch); return ret; } @@ -694,7 +694,7 @@ static ssize_t nbd_receive_request(QIOChannel *ioc, NBDRequest *request) uint32_t magic; ssize_t ret; - ret = read_sync(ioc, buf, sizeof(buf), NULL); + ret = nbd_read(ioc, buf, sizeof(buf), NULL); if (ret < 0) { return ret; } @@ -745,7 +745,7 @@ static ssize_t nbd_send_reply(QIOChannel *ioc, NBDReply *reply) stl_be_p(buf + 4, reply->error); stq_be_p(buf + 8, reply->handle); - return write_sync(ioc, buf, sizeof(buf), NULL); + return nbd_write(ioc, buf, sizeof(buf), NULL); } #define MAX_NBD_REQUESTS 16 @@ -1048,7 +1048,7 @@ static ssize_t nbd_co_send_reply(NBDRequestData *req, NBDReply *reply, qio_channel_set_cork(client->ioc, true); rc = nbd_send_reply(client->ioc, reply); if (rc >= 0) { - ret = write_sync(client->ioc, req->data, len, NULL); + ret = nbd_write(client->ioc, req->data, len, NULL); if (ret < 0) { rc = -EIO; } @@ -1123,7 +1123,7 @@ static ssize_t nbd_co_receive_request(NBDRequestData *req, if (request->type == NBD_CMD_WRITE) { TRACE("Reading %" PRIu32 " byte(s)", request->len); - if (read_sync(client->ioc, req->data, request->len, NULL) < 0) { + if (nbd_read(client->ioc, req->data, request->len, NULL) < 0) { LOG("reading from socket failed"); rc = -EIO; goto out; From 44298024d30ad36439707b89715a76333f58791b Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Fri, 2 Jun 2017 18:01:40 +0300 Subject: [PATCH 25/41] nbd: make nbd_drop public Following commit will reuse it for nbd server too. Reviewed-by: Eric Blake Signed-off-by: Vladimir Sementsov-Ogievskiy Message-Id: <20170602150150.258222-3-vsementsov@virtuozzo.com> Signed-off-by: Paolo Bonzini --- nbd/client.c | 26 -------------------------- nbd/common.c | 26 ++++++++++++++++++++++++++ nbd/nbd-internal.h | 2 ++ 3 files changed, 28 insertions(+), 26 deletions(-) diff --git a/nbd/client.c b/nbd/client.c index 17ac85ff5739..b97143fa609f 100644 --- a/nbd/client.c +++ b/nbd/client.c @@ -86,32 +86,6 @@ static QTAILQ_HEAD(, NBDExport) exports = QTAILQ_HEAD_INITIALIZER(exports); */ -/* Discard length bytes from channel. Return -errno on failure and 0 on - * success*/ -static int nbd_drop(QIOChannel *ioc, size_t size, Error **errp) -{ - ssize_t ret = 0; - char small[1024]; - char *buffer; - - buffer = sizeof(small) >= size ? small : g_malloc(MIN(65536, size)); - while (size > 0) { - ssize_t count = MIN(65536, size); - ret = nbd_read(ioc, buffer, MIN(65536, size), errp); - - if (ret < 0) { - goto cleanup; - } - size -= count; - } - - cleanup: - if (buffer != small) { - g_free(buffer); - } - return ret; -} - /* Send an option request. * * The request is for option @opt, with @data containing @len bytes of diff --git a/nbd/common.c b/nbd/common.c index d6b719ddaa2e..6b5c1b7b0246 100644 --- a/nbd/common.c +++ b/nbd/common.c @@ -65,6 +65,32 @@ ssize_t nbd_rwv(QIOChannel *ioc, struct iovec *iov, size_t niov, size_t length, return done; } +/* Discard length bytes from channel. Return -errno on failure and 0 on + * success */ +int nbd_drop(QIOChannel *ioc, size_t size, Error **errp) +{ + ssize_t ret = 0; + char small[1024]; + char *buffer; + + buffer = sizeof(small) >= size ? small : g_malloc(MIN(65536, size)); + while (size > 0) { + ssize_t count = MIN(65536, size); + ret = nbd_read(ioc, buffer, MIN(65536, size), errp); + + if (ret < 0) { + goto cleanup; + } + size -= count; + } + + cleanup: + if (buffer != small) { + g_free(buffer); + } + return ret; +} + void nbd_tls_handshake(QIOTask *task, void *opaque) diff --git a/nbd/nbd-internal.h b/nbd/nbd-internal.h index 753cb9d9c5d1..39bfed177c59 100644 --- a/nbd/nbd-internal.h +++ b/nbd/nbd-internal.h @@ -153,4 +153,6 @@ struct NBDTLSHandshakeData { void nbd_tls_handshake(QIOTask *task, void *opaque); +int nbd_drop(QIOChannel *ioc, size_t size, Error **errp); + #endif From 2b0bbc4f8809c972bad134bc1a2570dbb01dea0b Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Fri, 2 Jun 2017 18:01:41 +0300 Subject: [PATCH 26/41] nbd/server: get rid of nbd_negotiate_read and friends Functions nbd_negotiate_{read,write,drop_sync} were introduced in 1a6245a5b, when nbd_rwv (was nbd_wr_sync) was working through qemu_co_sendv_recvv (the path is nbd_wr_sync -> qemu_co_{recv/send} -> qemu_co_send_recv -> qemu_co_sendv_recvv), which just yields, without setting any handlers. But starting from ff82911cd nbd_rwv (was nbd_wr_syncv) works through qio_channel_yield() which sets handlers, so watchers are redundant in nbd_negotiate_{read,write,drop_sync}, then, let's just use nbd_{read,write,drop} functions. Functions nbd_{read,write,drop} has errp parameter, which is unused in this patch. This will be fixed later. Signed-off-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Eric Blake Message-Id: <20170602150150.258222-4-vsementsov@virtuozzo.com> Signed-off-by: Paolo Bonzini --- nbd/server.c | 107 +++++++++++---------------------------------------- 1 file changed, 22 insertions(+), 85 deletions(-) diff --git a/nbd/server.c b/nbd/server.c index d8bd9270131c..7f44ef0b1505 100644 --- a/nbd/server.c +++ b/nbd/server.c @@ -104,69 +104,6 @@ struct NBDClient { static void nbd_client_receive_next_request(NBDClient *client); -static gboolean nbd_negotiate_continue(QIOChannel *ioc, - GIOCondition condition, - void *opaque) -{ - qemu_coroutine_enter(opaque); - return TRUE; -} - -static int nbd_negotiate_read(QIOChannel *ioc, void *buffer, size_t size) -{ - ssize_t ret; - guint watch; - - assert(qemu_in_coroutine()); - /* Negotiation are always in main loop. */ - watch = qio_channel_add_watch(ioc, - G_IO_IN, - nbd_negotiate_continue, - qemu_coroutine_self(), - NULL); - ret = nbd_read(ioc, buffer, size, NULL); - g_source_remove(watch); - return ret; - -} - -static int nbd_negotiate_write(QIOChannel *ioc, const void *buffer, size_t size) -{ - ssize_t ret; - guint watch; - - assert(qemu_in_coroutine()); - /* Negotiation are always in main loop. */ - watch = qio_channel_add_watch(ioc, - G_IO_OUT, - nbd_negotiate_continue, - qemu_coroutine_self(), - NULL); - ret = nbd_write(ioc, buffer, size, NULL); - g_source_remove(watch); - return ret; -} - -static int nbd_negotiate_drop_sync(QIOChannel *ioc, size_t size) -{ - ssize_t ret; - uint8_t *buffer = g_malloc(MIN(65536, size)); - - while (size > 0) { - size_t count = MIN(65536, size); - ret = nbd_negotiate_read(ioc, buffer, count); - if (ret < 0) { - g_free(buffer); - return ret; - } - - size -= count; - } - - g_free(buffer); - return 0; -} - /* Basic flow for negotiation Server Client @@ -205,22 +142,22 @@ static int nbd_negotiate_send_rep_len(QIOChannel *ioc, uint32_t type, type, opt, len); magic = cpu_to_be64(NBD_REP_MAGIC); - if (nbd_negotiate_write(ioc, &magic, sizeof(magic)) < 0) { + if (nbd_write(ioc, &magic, sizeof(magic), NULL) < 0) { LOG("write failed (rep magic)"); return -EINVAL; } opt = cpu_to_be32(opt); - if (nbd_negotiate_write(ioc, &opt, sizeof(opt)) < 0) { + if (nbd_write(ioc, &opt, sizeof(opt), NULL) < 0) { LOG("write failed (rep opt)"); return -EINVAL; } type = cpu_to_be32(type); - if (nbd_negotiate_write(ioc, &type, sizeof(type)) < 0) { + if (nbd_write(ioc, &type, sizeof(type), NULL) < 0) { LOG("write failed (rep type)"); return -EINVAL; } len = cpu_to_be32(len); - if (nbd_negotiate_write(ioc, &len, sizeof(len)) < 0) { + if (nbd_write(ioc, &len, sizeof(len), NULL) < 0) { LOG("write failed (rep data length)"); return -EINVAL; } @@ -255,7 +192,7 @@ nbd_negotiate_send_rep_err(QIOChannel *ioc, uint32_t type, if (ret < 0) { goto out; } - if (nbd_negotiate_write(ioc, msg, len) < 0) { + if (nbd_write(ioc, msg, len, NULL) < 0) { LOG("write failed (error message)"); ret = -EIO; } else { @@ -286,15 +223,15 @@ static int nbd_negotiate_send_rep_list(QIOChannel *ioc, NBDExport *exp) } len = cpu_to_be32(name_len); - if (nbd_negotiate_write(ioc, &len, sizeof(len)) < 0) { + if (nbd_write(ioc, &len, sizeof(len), NULL) < 0) { LOG("write failed (name length)"); return -EINVAL; } - if (nbd_negotiate_write(ioc, name, name_len) < 0) { + if (nbd_write(ioc, name, name_len, NULL) < 0) { LOG("write failed (name buffer)"); return -EINVAL; } - if (nbd_negotiate_write(ioc, desc, desc_len) < 0) { + if (nbd_write(ioc, desc, desc_len, NULL) < 0) { LOG("write failed (description buffer)"); return -EINVAL; } @@ -308,7 +245,7 @@ static int nbd_negotiate_handle_list(NBDClient *client, uint32_t length) NBDExport *exp; if (length) { - if (nbd_negotiate_drop_sync(client->ioc, length) < 0) { + if (nbd_drop(client->ioc, length, NULL) < 0) { return -EIO; } return nbd_negotiate_send_rep_err(client->ioc, @@ -339,7 +276,7 @@ static int nbd_negotiate_handle_export_name(NBDClient *client, uint32_t length) LOG("Bad length received"); goto fail; } - if (nbd_negotiate_read(client->ioc, name, length) < 0) { + if (nbd_read(client->ioc, name, length, NULL) < 0) { LOG("read failed"); goto fail; } @@ -372,7 +309,7 @@ static QIOChannel *nbd_negotiate_handle_starttls(NBDClient *client, TRACE("Setting up TLS"); ioc = client->ioc; if (length) { - if (nbd_negotiate_drop_sync(ioc, length) < 0) { + if (nbd_drop(ioc, length, NULL) < 0) { return NULL; } nbd_negotiate_send_rep_err(ioc, NBD_REP_ERR_INVALID, NBD_OPT_STARTTLS, @@ -436,7 +373,7 @@ static int nbd_negotiate_options(NBDClient *client) ... Rest of request */ - if (nbd_negotiate_read(client->ioc, &flags, sizeof(flags)) < 0) { + if (nbd_read(client->ioc, &flags, sizeof(flags), NULL) < 0) { LOG("read failed"); return -EIO; } @@ -462,7 +399,7 @@ static int nbd_negotiate_options(NBDClient *client) uint32_t clientflags, length; uint64_t magic; - if (nbd_negotiate_read(client->ioc, &magic, sizeof(magic)) < 0) { + if (nbd_read(client->ioc, &magic, sizeof(magic), NULL) < 0) { LOG("read failed"); return -EINVAL; } @@ -472,15 +409,15 @@ static int nbd_negotiate_options(NBDClient *client) return -EINVAL; } - if (nbd_negotiate_read(client->ioc, &clientflags, - sizeof(clientflags)) < 0) + if (nbd_read(client->ioc, &clientflags, + sizeof(clientflags), NULL) < 0) { LOG("read failed"); return -EINVAL; } clientflags = be32_to_cpu(clientflags); - if (nbd_negotiate_read(client->ioc, &length, sizeof(length)) < 0) { + if (nbd_read(client->ioc, &length, sizeof(length), NULL) < 0) { LOG("read failed"); return -EINVAL; } @@ -510,7 +447,7 @@ static int nbd_negotiate_options(NBDClient *client) return -EINVAL; default: - if (nbd_negotiate_drop_sync(client->ioc, length) < 0) { + if (nbd_drop(client->ioc, length, NULL) < 0) { return -EIO; } ret = nbd_negotiate_send_rep_err(client->ioc, @@ -548,7 +485,7 @@ static int nbd_negotiate_options(NBDClient *client) return nbd_negotiate_handle_export_name(client, length); case NBD_OPT_STARTTLS: - if (nbd_negotiate_drop_sync(client->ioc, length) < 0) { + if (nbd_drop(client->ioc, length, NULL) < 0) { return -EIO; } if (client->tlscreds) { @@ -567,7 +504,7 @@ static int nbd_negotiate_options(NBDClient *client) } break; default: - if (nbd_negotiate_drop_sync(client->ioc, length) < 0) { + if (nbd_drop(client->ioc, length, NULL) < 0) { return -EIO; } ret = nbd_negotiate_send_rep_err(client->ioc, @@ -656,12 +593,12 @@ static coroutine_fn int nbd_negotiate(NBDClientNewData *data) TRACE("TLS cannot be enabled with oldstyle protocol"); goto fail; } - if (nbd_negotiate_write(client->ioc, buf, sizeof(buf)) < 0) { + if (nbd_write(client->ioc, buf, sizeof(buf), NULL) < 0) { LOG("write failed"); goto fail; } } else { - if (nbd_negotiate_write(client->ioc, buf, 18) < 0) { + if (nbd_write(client->ioc, buf, 18, NULL) < 0) { LOG("write failed"); goto fail; } @@ -676,7 +613,7 @@ static coroutine_fn int nbd_negotiate(NBDClientNewData *data) stq_be_p(buf + 18, client->exp->size); stw_be_p(buf + 26, client->exp->nbdflags | myflags); len = client->no_zeroes ? 10 : sizeof(buf) - 18; - if (nbd_negotiate_write(client->ioc, buf + 18, len) < 0) { + if (nbd_write(client->ioc, buf + 18, len, NULL) < 0) { LOG("write failed"); goto fail; } From a0dc63a6b76c3ee071d3300896039da434e1c2c9 Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Fri, 2 Jun 2017 18:01:42 +0300 Subject: [PATCH 27/41] nbd/server: get rid of ssize_t Now nbd_read and friends return int, so get rid of ssize_t. Signed-off-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Eric Blake Message-Id: <20170602150150.258222-5-vsementsov@virtuozzo.com> Signed-off-by: Paolo Bonzini --- nbd/server.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/nbd/server.c b/nbd/server.c index 7f44ef0b1505..895cd7f132b4 100644 --- a/nbd/server.c +++ b/nbd/server.c @@ -625,11 +625,11 @@ static coroutine_fn int nbd_negotiate(NBDClientNewData *data) return rc; } -static ssize_t nbd_receive_request(QIOChannel *ioc, NBDRequest *request) +static int nbd_receive_request(QIOChannel *ioc, NBDRequest *request) { uint8_t buf[NBD_REQUEST_SIZE]; uint32_t magic; - ssize_t ret; + int ret; ret = nbd_read(ioc, buf, sizeof(buf), NULL); if (ret < 0) { @@ -663,7 +663,7 @@ static ssize_t nbd_receive_request(QIOChannel *ioc, NBDRequest *request) return 0; } -static ssize_t nbd_send_reply(QIOChannel *ioc, NBDReply *reply) +static int nbd_send_reply(QIOChannel *ioc, NBDReply *reply) { uint8_t buf[NBD_REPLY_SIZE]; @@ -969,11 +969,10 @@ void nbd_export_close_all(void) } } -static ssize_t nbd_co_send_reply(NBDRequestData *req, NBDReply *reply, - int len) +static int nbd_co_send_reply(NBDRequestData *req, NBDReply *reply, int len) { NBDClient *client = req->client; - ssize_t rc, ret; + int rc, ret; g_assert(qemu_in_coroutine()); qemu_co_mutex_lock(&client->send_lock); @@ -1003,11 +1002,10 @@ static ssize_t nbd_co_send_reply(NBDRequestData *req, NBDReply *reply, * and any other negative value to report an error to the client * (although the caller may still need to disconnect after reporting * the error). */ -static ssize_t nbd_co_receive_request(NBDRequestData *req, - NBDRequest *request) +static int nbd_co_receive_request(NBDRequestData *req, NBDRequest *request) { NBDClient *client = req->client; - ssize_t rc; + int rc; g_assert(qemu_in_coroutine()); assert(client->recv_coroutine == qemu_coroutine_self()); @@ -1105,7 +1103,7 @@ static coroutine_fn void nbd_trip(void *opaque) NBDRequestData *req; NBDRequest request = { 0 }; /* GCC thinks it can be used uninitialized */ NBDReply reply; - ssize_t ret; + int ret; int flags; TRACE("Reading request."); From 572b97e7223a75c14e1620440c4f887b4e8b3da3 Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Fri, 2 Jun 2017 18:01:43 +0300 Subject: [PATCH 28/41] nbd/server: refactor nbd_co_send_reply As nbd_write never returns value > 0, we can get rid of extra ret. Signed-off-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Eric Blake Message-Id: <20170602150150.258222-6-vsementsov@virtuozzo.com> Signed-off-by: Paolo Bonzini --- nbd/server.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/nbd/server.c b/nbd/server.c index 895cd7f132b4..8ceac89a23be 100644 --- a/nbd/server.c +++ b/nbd/server.c @@ -972,7 +972,7 @@ void nbd_export_close_all(void) static int nbd_co_send_reply(NBDRequestData *req, NBDReply *reply, int len) { NBDClient *client = req->client; - int rc, ret; + int rc; g_assert(qemu_in_coroutine()); qemu_co_mutex_lock(&client->send_lock); @@ -983,9 +983,9 @@ static int nbd_co_send_reply(NBDRequestData *req, NBDReply *reply, int len) } else { qio_channel_set_cork(client->ioc, true); rc = nbd_send_reply(client->ioc, reply); - if (rc >= 0) { - ret = nbd_write(client->ioc, req->data, len, NULL); - if (ret < 0) { + if (rc == 0) { + rc = nbd_write(client->ioc, req->data, len, NULL); + if (rc < 0) { rc = -EIO; } } From 2a6e128bfaf4c6661dac2dfdca1162d275c88da2 Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Fri, 2 Jun 2017 18:01:44 +0300 Subject: [PATCH 29/41] nbd/server: get rid of EAGAIN dead code For now nbd_read never returns EAGAIN. So, don't handle it. Signed-off-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Eric Blake Message-Id: <20170602150150.258222-7-vsementsov@virtuozzo.com> Signed-off-by: Paolo Bonzini --- nbd/server.c | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/nbd/server.c b/nbd/server.c index 8ceac89a23be..00a19f6abbdc 100644 --- a/nbd/server.c +++ b/nbd/server.c @@ -997,11 +997,12 @@ static int nbd_co_send_reply(NBDRequestData *req, NBDReply *reply, int len) return rc; } -/* Collect a client request. Return 0 if request looks valid, -EAGAIN - * to keep trying the collection, -EIO to drop connection right away, - * and any other negative value to report an error to the client - * (although the caller may still need to disconnect after reporting - * the error). */ +/* nbd_co_receive_request + * Collect a client request. Return 0 if request looks valid, -EIO to drop + * connection right away, and any other negative value to report an error to + * the client (although the caller may still need to disconnect after reporting + * the error). + */ static int nbd_co_receive_request(NBDRequestData *req, NBDRequest *request) { NBDClient *client = req->client; @@ -1011,9 +1012,7 @@ static int nbd_co_receive_request(NBDRequestData *req, NBDRequest *request) assert(client->recv_coroutine == qemu_coroutine_self()); rc = nbd_receive_request(client->ioc, request); if (rc < 0) { - if (rc != -EAGAIN) { - rc = -EIO; - } + rc = -EIO; goto out; } @@ -1114,9 +1113,6 @@ static coroutine_fn void nbd_trip(void *opaque) req = nbd_request_get(client); ret = nbd_co_receive_request(req, &request); - if (ret == -EAGAIN) { - goto done; - } if (ret == -EIO) { goto out; } From ee898b870fe864bf93370482aafbece1ca205dc3 Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Fri, 2 Jun 2017 18:01:45 +0300 Subject: [PATCH 30/41] nbd/server: refactor nbd_co_receive_request Move function tail, about receiving next request out of the function. Error path is simplified and nbd_co_receive_request becomes more corresponding to its name. Signed-off-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Eric Blake Message-Id: <20170602150150.258222-8-vsementsov@virtuozzo.com> Signed-off-by: Paolo Bonzini --- nbd/server.c | 41 +++++++++++++---------------------------- 1 file changed, 13 insertions(+), 28 deletions(-) diff --git a/nbd/server.c b/nbd/server.c index 00a19f6abbdc..9fb03ce0a73e 100644 --- a/nbd/server.c +++ b/nbd/server.c @@ -1006,14 +1006,11 @@ static int nbd_co_send_reply(NBDRequestData *req, NBDReply *reply, int len) static int nbd_co_receive_request(NBDRequestData *req, NBDRequest *request) { NBDClient *client = req->client; - int rc; g_assert(qemu_in_coroutine()); assert(client->recv_coroutine == qemu_coroutine_self()); - rc = nbd_receive_request(client->ioc, request); - if (rc < 0) { - rc = -EIO; - goto out; + if (nbd_receive_request(client->ioc, request) < 0) { + return -EIO; } TRACE("Decoding type"); @@ -1027,8 +1024,7 @@ static int nbd_co_receive_request(NBDRequestData *req, NBDRequest *request) /* Special case: we're going to disconnect without a reply, * whether or not flags, from, or len are bogus */ TRACE("Request type is DISCONNECT"); - rc = -EIO; - goto out; + return -EIO; } /* Check for sanity in the parameters, part 1. Defer as many @@ -1036,22 +1032,19 @@ static int nbd_co_receive_request(NBDRequestData *req, NBDRequest *request) * payload, so we can try and keep the connection alive. */ if ((request->from + request->len) < request->from) { LOG("integer overflow detected, you're probably being attacked"); - rc = -EINVAL; - goto out; + return -EINVAL; } if (request->type == NBD_CMD_READ || request->type == NBD_CMD_WRITE) { if (request->len > NBD_MAX_BUFFER_SIZE) { LOG("len (%" PRIu32" ) is larger than max len (%u)", request->len, NBD_MAX_BUFFER_SIZE); - rc = -EINVAL; - goto out; + return -EINVAL; } req->data = blk_try_blockalign(client->exp->blk, request->len); if (req->data == NULL) { - rc = -ENOMEM; - goto out; + return -ENOMEM; } } if (request->type == NBD_CMD_WRITE) { @@ -1059,8 +1052,7 @@ static int nbd_co_receive_request(NBDRequestData *req, NBDRequest *request) if (nbd_read(client->ioc, req->data, request->len, NULL) < 0) { LOG("reading from socket failed"); - rc = -EIO; - goto out; + return -EIO; } req->complete = true; } @@ -1070,28 +1062,19 @@ static int nbd_co_receive_request(NBDRequestData *req, NBDRequest *request) LOG("operation past EOF; From: %" PRIu64 ", Len: %" PRIu32 ", Size: %" PRIu64, request->from, request->len, (uint64_t)client->exp->size); - rc = request->type == NBD_CMD_WRITE ? -ENOSPC : -EINVAL; - goto out; + return request->type == NBD_CMD_WRITE ? -ENOSPC : -EINVAL; } if (request->flags & ~(NBD_CMD_FLAG_FUA | NBD_CMD_FLAG_NO_HOLE)) { LOG("unsupported flags (got 0x%x)", request->flags); - rc = -EINVAL; - goto out; + return -EINVAL; } if (request->type != NBD_CMD_WRITE_ZEROES && (request->flags & NBD_CMD_FLAG_NO_HOLE)) { LOG("unexpected flags (got 0x%x)", request->flags); - rc = -EINVAL; - goto out; + return -EINVAL; } - rc = 0; - -out: - client->recv_coroutine = NULL; - nbd_client_receive_next_request(client); - - return rc; + return 0; } /* Owns a reference to the NBDClient passed as opaque. */ @@ -1113,6 +1096,8 @@ static coroutine_fn void nbd_trip(void *opaque) req = nbd_request_get(client); ret = nbd_co_receive_request(req, &request); + client->recv_coroutine = NULL; + nbd_client_receive_next_request(client); if (ret == -EIO) { goto out; } From c84087f2f52f6e87ad1cbd07e5502ab748c17039 Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Fri, 2 Jun 2017 18:01:46 +0300 Subject: [PATCH 31/41] nbd/server: remove NBDClientNewData "co" field of NBDClientNewData has never been used, all the way back to its declaration in commit 1a6245a5. So let's just use client pointer instead of extra structure. Signed-off-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Eric Blake Message-Id: <20170602150150.258222-9-vsementsov@virtuozzo.com> Signed-off-by: Paolo Bonzini --- nbd/server.c | 25 +++++++------------------ 1 file changed, 7 insertions(+), 18 deletions(-) diff --git a/nbd/server.c b/nbd/server.c index 9fb03ce0a73e..d52682741f01 100644 --- a/nbd/server.c +++ b/nbd/server.c @@ -535,14 +535,8 @@ static int nbd_negotiate_options(NBDClient *client) } } -typedef struct { - NBDClient *client; - Coroutine *co; -} NBDClientNewData; - -static coroutine_fn int nbd_negotiate(NBDClientNewData *data) +static coroutine_fn int nbd_negotiate(NBDClient *client) { - NBDClient *client = data->client; char buf[8 + 8 + 8 + 128]; int rc; const uint16_t myflags = (NBD_FLAG_HAS_FLAGS | NBD_FLAG_SEND_TRIM | @@ -1268,8 +1262,7 @@ static void nbd_client_receive_next_request(NBDClient *client) static coroutine_fn void nbd_co_client_start(void *opaque) { - NBDClientNewData *data = opaque; - NBDClient *client = data->client; + NBDClient *client = opaque; NBDExport *exp = client->exp; if (exp) { @@ -1278,15 +1271,12 @@ static coroutine_fn void nbd_co_client_start(void *opaque) } qemu_co_mutex_init(&client->send_lock); - if (nbd_negotiate(data)) { + if (nbd_negotiate(client)) { client_close(client, false); - goto out; + return; } nbd_client_receive_next_request(client); - -out: - g_free(data); } /* @@ -1302,7 +1292,7 @@ void nbd_client_new(NBDExport *exp, void (*close_fn)(NBDClient *, bool)) { NBDClient *client; - NBDClientNewData *data = g_new(NBDClientNewData, 1); + Coroutine *co; client = g_malloc0(sizeof(NBDClient)); client->refcount = 1; @@ -1318,7 +1308,6 @@ void nbd_client_new(NBDExport *exp, object_ref(OBJECT(client->ioc)); client->close_fn = close_fn; - data->client = client; - data->co = qemu_coroutine_create(nbd_co_client_start, data); - qemu_coroutine_enter(data->co); + co = qemu_coroutine_create(nbd_co_client_start, client); + qemu_coroutine_enter(co); } From 7798d3aab981b1ccb29ba7343368a55a32a26d30 Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Fri, 2 Jun 2017 18:01:47 +0300 Subject: [PATCH 32/41] nbd/server: nbd_negotiate: fix error path Current code will return 0 on this nbd_write fail, as rc is 0 after successful nbd_negotiate_options. Fix this. Signed-off-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Eric Blake Message-Id: <20170602150150.258222-10-vsementsov@virtuozzo.com> Signed-off-by: Paolo Bonzini --- nbd/server.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/nbd/server.c b/nbd/server.c index d52682741f01..c9ca84702151 100644 --- a/nbd/server.c +++ b/nbd/server.c @@ -607,7 +607,8 @@ static coroutine_fn int nbd_negotiate(NBDClient *client) stq_be_p(buf + 18, client->exp->size); stw_be_p(buf + 26, client->exp->nbdflags | myflags); len = client->no_zeroes ? 10 : sizeof(buf) - 18; - if (nbd_write(client->ioc, buf + 18, len, NULL) < 0) { + rc = nbd_write(client->ioc, buf + 18, len, NULL); + if (rc < 0) { LOG("write failed"); goto fail; } From d9faeed854ddc635adebd26923ece7e90c1080c6 Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Fri, 2 Jun 2017 18:01:48 +0300 Subject: [PATCH 33/41] nbd/server: get rid of fail: return rc "goto fail" error handling scheme is not needed for just returning error code. Better is return it immediately. Signed-off-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Eric Blake Message-Id: <20170602150150.258222-11-vsementsov@virtuozzo.com> Signed-off-by: Paolo Bonzini --- nbd/server.c | 28 ++++++++++++---------------- 1 file changed, 12 insertions(+), 16 deletions(-) diff --git a/nbd/server.c b/nbd/server.c index c9ca84702151..3b46038d7155 100644 --- a/nbd/server.c +++ b/nbd/server.c @@ -265,7 +265,6 @@ static int nbd_negotiate_handle_list(NBDClient *client, uint32_t length) static int nbd_negotiate_handle_export_name(NBDClient *client, uint32_t length) { - int rc = -EINVAL; char name[NBD_MAX_NAME_SIZE + 1]; /* Client sends: @@ -274,11 +273,11 @@ static int nbd_negotiate_handle_export_name(NBDClient *client, uint32_t length) TRACE("Checking length"); if (length >= sizeof(name)) { LOG("Bad length received"); - goto fail; + return -EINVAL; } if (nbd_read(client->ioc, name, length, NULL) < 0) { LOG("read failed"); - goto fail; + return -EINVAL; } name[length] = '\0'; @@ -287,14 +286,13 @@ static int nbd_negotiate_handle_export_name(NBDClient *client, uint32_t length) client->exp = nbd_export_find(name); if (!client->exp) { LOG("export not found"); - goto fail; + return -EINVAL; } QTAILQ_INSERT_TAIL(&client->exp->clients, client, next); nbd_export_get(client->exp); - rc = 0; -fail: - return rc; + + return 0; } /* Handle NBD_OPT_STARTTLS. Return NULL to drop connection, or else the @@ -564,7 +562,6 @@ static coroutine_fn int nbd_negotiate(NBDClient *client) */ qio_channel_set_blocking(client->ioc, false, NULL); - rc = -EINVAL; TRACE("Beginning negotiation."); memset(buf, 0, sizeof(buf)); @@ -585,21 +582,21 @@ static coroutine_fn int nbd_negotiate(NBDClient *client) if (oldStyle) { if (client->tlscreds) { TRACE("TLS cannot be enabled with oldstyle protocol"); - goto fail; + return -EINVAL; } if (nbd_write(client->ioc, buf, sizeof(buf), NULL) < 0) { LOG("write failed"); - goto fail; + return -EINVAL; } } else { if (nbd_write(client->ioc, buf, 18, NULL) < 0) { LOG("write failed"); - goto fail; + return -EINVAL; } rc = nbd_negotiate_options(client); if (rc != 0) { LOG("option negotiation failed"); - goto fail; + return rc; } TRACE("advertising size %" PRIu64 " and flags %x", @@ -610,14 +607,13 @@ static coroutine_fn int nbd_negotiate(NBDClient *client) rc = nbd_write(client->ioc, buf + 18, len, NULL); if (rc < 0) { LOG("write failed"); - goto fail; + return rc; } } TRACE("Negotiation succeeded."); - rc = 0; -fail: - return rc; + + return 0; } static int nbd_receive_request(QIOChannel *ioc, NBDRequest *request) From 2e5c9ad6f4cf164b098312ffe2f07396a5d994d3 Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Fri, 2 Jun 2017 18:01:49 +0300 Subject: [PATCH 34/41] nbd/server: rename rc to ret For consistency use 'ret' name for saving return code everywhere in the file. Signed-off-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Eric Blake Message-Id: <20170602150150.258222-12-vsementsov@virtuozzo.com> Signed-off-by: Paolo Bonzini --- nbd/server.c | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/nbd/server.c b/nbd/server.c index 3b46038d7155..f19f5fde65c7 100644 --- a/nbd/server.c +++ b/nbd/server.c @@ -211,15 +211,15 @@ static int nbd_negotiate_send_rep_list(QIOChannel *ioc, NBDExport *exp) uint32_t len; const char *name = exp->name ? exp->name : ""; const char *desc = exp->description ? exp->description : ""; - int rc; + int ret; TRACE("Advertising export name '%s' description '%s'", name, desc); name_len = strlen(name); desc_len = strlen(desc); len = name_len + desc_len + sizeof(len); - rc = nbd_negotiate_send_rep_len(ioc, NBD_REP_SERVER, NBD_OPT_LIST, len); - if (rc < 0) { - return rc; + ret = nbd_negotiate_send_rep_len(ioc, NBD_REP_SERVER, NBD_OPT_LIST, len); + if (ret < 0) { + return ret; } len = cpu_to_be32(name_len); @@ -536,7 +536,7 @@ static int nbd_negotiate_options(NBDClient *client) static coroutine_fn int nbd_negotiate(NBDClient *client) { char buf[8 + 8 + 8 + 128]; - int rc; + int ret; const uint16_t myflags = (NBD_FLAG_HAS_FLAGS | NBD_FLAG_SEND_TRIM | NBD_FLAG_SEND_FLUSH | NBD_FLAG_SEND_FUA | NBD_FLAG_SEND_WRITE_ZEROES); @@ -593,10 +593,10 @@ static coroutine_fn int nbd_negotiate(NBDClient *client) LOG("write failed"); return -EINVAL; } - rc = nbd_negotiate_options(client); - if (rc != 0) { + ret = nbd_negotiate_options(client); + if (ret != 0) { LOG("option negotiation failed"); - return rc; + return ret; } TRACE("advertising size %" PRIu64 " and flags %x", @@ -604,10 +604,10 @@ static coroutine_fn int nbd_negotiate(NBDClient *client) stq_be_p(buf + 18, client->exp->size); stw_be_p(buf + 26, client->exp->nbdflags | myflags); len = client->no_zeroes ? 10 : sizeof(buf) - 18; - rc = nbd_write(client->ioc, buf + 18, len, NULL); - if (rc < 0) { + ret = nbd_write(client->ioc, buf + 18, len, NULL); + if (ret < 0) { LOG("write failed"); - return rc; + return ret; } } @@ -963,21 +963,21 @@ void nbd_export_close_all(void) static int nbd_co_send_reply(NBDRequestData *req, NBDReply *reply, int len) { NBDClient *client = req->client; - int rc; + int ret; g_assert(qemu_in_coroutine()); qemu_co_mutex_lock(&client->send_lock); client->send_coroutine = qemu_coroutine_self(); if (!len) { - rc = nbd_send_reply(client->ioc, reply); + ret = nbd_send_reply(client->ioc, reply); } else { qio_channel_set_cork(client->ioc, true); - rc = nbd_send_reply(client->ioc, reply); - if (rc == 0) { - rc = nbd_write(client->ioc, req->data, len, NULL); - if (rc < 0) { - rc = -EIO; + ret = nbd_send_reply(client->ioc, reply); + if (ret == 0) { + ret = nbd_write(client->ioc, req->data, len, NULL); + if (ret < 0) { + ret = -EIO; } } qio_channel_set_cork(client->ioc, false); @@ -985,7 +985,7 @@ static int nbd_co_send_reply(NBDRequestData *req, NBDReply *reply, int len) client->send_coroutine = NULL; qemu_co_mutex_unlock(&client->send_lock); - return rc; + return ret; } /* nbd_co_receive_request From 8c372a02e0c36fbdea28434d7c04f4dce5d7368f Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Fri, 2 Jun 2017 18:01:50 +0300 Subject: [PATCH 35/41] nbd/server: refactor nbd_trip - do not use 'goto error_reply' outside a switch to jump into the middle of the switch's default case label - reduce code duplication Signed-off-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Eric Blake Message-Id: <20170602150150.258222-13-vsementsov@virtuozzo.com> Signed-off-by: Paolo Bonzini --- nbd/server.c | 53 ++++++++++++++++++++-------------------------------- 1 file changed, 20 insertions(+), 33 deletions(-) diff --git a/nbd/server.c b/nbd/server.c index f19f5fde65c7..8a70c054a6b8 100644 --- a/nbd/server.c +++ b/nbd/server.c @@ -1078,6 +1078,7 @@ static coroutine_fn void nbd_trip(void *opaque) NBDReply reply; int ret; int flags; + int reply_data_len = 0; TRACE("Reading request."); if (client->closing) { @@ -1090,7 +1091,7 @@ static coroutine_fn void nbd_trip(void *opaque) client->recv_coroutine = NULL; nbd_client_receive_next_request(client); if (ret == -EIO) { - goto out; + goto disconnect; } reply.handle = request.handle; @@ -1098,7 +1099,7 @@ static coroutine_fn void nbd_trip(void *opaque) if (ret < 0) { reply.error = -ret; - goto error_reply; + goto reply; } if (client->closing) { @@ -1119,7 +1120,7 @@ static coroutine_fn void nbd_trip(void *opaque) if (ret < 0) { LOG("flush failed"); reply.error = -ret; - goto error_reply; + break; } } @@ -1128,12 +1129,12 @@ static coroutine_fn void nbd_trip(void *opaque) if (ret < 0) { LOG("reading from file failed"); reply.error = -ret; - goto error_reply; + break; } + reply_data_len = request.len; TRACE("Read %" PRIu32" byte(s)", request.len); - if (nbd_co_send_reply(req, &reply, request.len) < 0) - goto out; + break; case NBD_CMD_WRITE: TRACE("Request type is WRITE"); @@ -1141,7 +1142,7 @@ static coroutine_fn void nbd_trip(void *opaque) if (exp->nbdflags & NBD_FLAG_READ_ONLY) { TRACE("Server is read-only, return error"); reply.error = EROFS; - goto error_reply; + break; } TRACE("Writing to device"); @@ -1155,21 +1156,16 @@ static coroutine_fn void nbd_trip(void *opaque) if (ret < 0) { LOG("writing to file failed"); reply.error = -ret; - goto error_reply; } - if (nbd_co_send_reply(req, &reply, 0) < 0) { - goto out; - } break; - case NBD_CMD_WRITE_ZEROES: TRACE("Request type is WRITE_ZEROES"); if (exp->nbdflags & NBD_FLAG_READ_ONLY) { TRACE("Server is read-only, return error"); reply.error = EROFS; - goto error_reply; + break; } TRACE("Writing to device"); @@ -1186,14 +1182,9 @@ static coroutine_fn void nbd_trip(void *opaque) if (ret < 0) { LOG("writing to file failed"); reply.error = -ret; - goto error_reply; } - if (nbd_co_send_reply(req, &reply, 0) < 0) { - goto out; - } break; - case NBD_CMD_DISC: /* unreachable, thanks to special case in nbd_co_receive_request() */ abort(); @@ -1206,9 +1197,7 @@ static coroutine_fn void nbd_trip(void *opaque) LOG("flush failed"); reply.error = -ret; } - if (nbd_co_send_reply(req, &reply, 0) < 0) { - goto out; - } + break; case NBD_CMD_TRIM: TRACE("Request type is TRIM"); @@ -1218,21 +1207,19 @@ static coroutine_fn void nbd_trip(void *opaque) LOG("discard failed"); reply.error = -ret; } - if (nbd_co_send_reply(req, &reply, 0) < 0) { - goto out; - } + break; default: LOG("invalid request type (%" PRIu32 ") received", request.type); reply.error = EINVAL; - error_reply: - /* We must disconnect after NBD_CMD_WRITE if we did not - * read the payload. - */ - if (nbd_co_send_reply(req, &reply, 0) < 0 || !req->complete) { - goto out; - } - break; + } + +reply: + /* We must disconnect after NBD_CMD_WRITE if we did not + * read the payload. + */ + if (nbd_co_send_reply(req, &reply, reply_data_len) < 0 || !req->complete) { + goto disconnect; } TRACE("Request/Reply complete"); @@ -1242,7 +1229,7 @@ static coroutine_fn void nbd_trip(void *opaque) nbd_client_put(client); return; -out: +disconnect: nbd_request_put(req); client_close(client, true); nbd_client_put(client); From e947738e3840025704e6d037aec4c341c8008994 Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Wed, 14 Jun 2017 21:21:50 +0200 Subject: [PATCH 36/41] include/exec/poison: Add missing TARGET defines Since we've got some new CPU targets in QEMU during the last months and years, we've got some new TARGET_xxx defines now which should be marked as poisoned for common code. Signed-off-by: Thomas Huth Message-Id: <1497468113-2874-2-git-send-email-thuth@redhat.com> Signed-off-by: Paolo Bonzini --- include/exec/poison.h | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/include/exec/poison.h b/include/exec/poison.h index 3ca7929ccea4..9356d5fff37c 100644 --- a/include/exec/poison.h +++ b/include/exec/poison.h @@ -12,17 +12,28 @@ #pragma GCC poison TARGET_CRIS #pragma GCC poison TARGET_LM32 #pragma GCC poison TARGET_M68K +#pragma GCC poison TARGET_MICROBLAZE #pragma GCC poison TARGET_MIPS +#pragma GCC poison TARGET_ABI_MIPSO32 #pragma GCC poison TARGET_MIPS64 +#pragma GCC poison TARGET_ABI_MIPSN64 +#pragma GCC poison TARGET_MOXIE +#pragma GCC poison TARGET_NIOS2 #pragma GCC poison TARGET_OPENRISC #pragma GCC poison TARGET_PPC #pragma GCC poison TARGET_PPCEMB #pragma GCC poison TARGET_PPC64 #pragma GCC poison TARGET_ABI32 +#pragma GCC poison TARGET_S390X #pragma GCC poison TARGET_SH4 #pragma GCC poison TARGET_SPARC #pragma GCC poison TARGET_SPARC64 +#pragma GCC poison TARGET_TRICORE +#pragma GCC poison TARGET_UNICORE32 +#pragma GCC poison TARGET_XTENSA +#pragma GCC poison TARGET_NAME +#pragma GCC poison TARGET_SUPPORTS_MTTCG #pragma GCC poison TARGET_WORDS_BIGENDIAN #pragma GCC poison BSWAP_NEEDED From 067b913619ac36299be5ab23921fd19a0347df60 Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Wed, 14 Jun 2017 21:21:51 +0200 Subject: [PATCH 37/41] include/exec/poison: Mark some CONFIG defines as poisoned, too These are defined in config-target.h and thus should never be used in common code. Signed-off-by: Thomas Huth Message-Id: <1497468113-2874-3-git-send-email-thuth@redhat.com> Signed-off-by: Paolo Bonzini --- include/exec/poison.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/include/exec/poison.h b/include/exec/poison.h index 9356d5fff37c..5ffed4d56eaa 100644 --- a/include/exec/poison.h +++ b/include/exec/poison.h @@ -61,5 +61,25 @@ #pragma GCC poison CPU_INTERRUPT_TGT_INT_1 #pragma GCC poison CPU_INTERRUPT_TGT_INT_2 +#pragma GCC poison CONFIG_ALPHA_DIS +#pragma GCC poison CONFIG_ARM_A64_DIS +#pragma GCC poison CONFIG_ARM_DIS +#pragma GCC poison CONFIG_CRIS_DIS +#pragma GCC poison CONFIG_I386_DIS +#pragma GCC poison CONFIG_LM32_DIS +#pragma GCC poison CONFIG_M68K_DIS +#pragma GCC poison CONFIG_MICROBLAZE_DIS +#pragma GCC poison CONFIG_MIPS_DIS +#pragma GCC poison CONFIG_MOXIE_DIS +#pragma GCC poison CONFIG_NIOS2_DIS +#pragma GCC poison CONFIG_PPC_DIS +#pragma GCC poison CONFIG_S390_DIS +#pragma GCC poison CONFIG_SH4_DIS +#pragma GCC poison CONFIG_SPARC_DIS +#pragma GCC poison CONFIG_XTENSA_DIS + +#pragma GCC poison CONFIG_LINUX_USER +#pragma GCC poison CONFIG_VHOST_NET + #endif #endif From d59157ea058b55b95f27675b33275ffe0f4c7bd6 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 6 Jun 2017 16:55:19 +0200 Subject: [PATCH 38/41] docs: create interop/ subdirectory This is for the future interoperability & management guide. It includes the QAPI docs, including the automatically generated ones, other socket protocols (vhost-user, VNC), and the qcow2 file format. Signed-off-by: Paolo Bonzini --- .gitignore | 16 ++--- Makefile | 69 +++++++++++-------- configure | 2 +- docs/{specs => interop}/parallels.txt | 0 docs/{specs => interop}/qcow2.txt | 0 docs/{specs => interop}/qed_spec.txt | 0 docs/{ => interop}/qemu-ga-ref.texi | 0 docs/{ => interop}/qemu-qmp-ref.texi | 0 docs/{ => interop}/qmp-intro.txt | 0 docs/{ => interop}/qmp-spec.txt | 0 docs/{specs => interop}/vhost-user.txt | 0 .../vnc-ledstate-Pseudo-encoding.txt | 0 rules.mak | 2 +- 13 files changed, 50 insertions(+), 39 deletions(-) rename docs/{specs => interop}/parallels.txt (100%) rename docs/{specs => interop}/qcow2.txt (100%) rename docs/{specs => interop}/qed_spec.txt (100%) rename docs/{ => interop}/qemu-ga-ref.texi (100%) rename docs/{ => interop}/qemu-qmp-ref.texi (100%) rename docs/{ => interop}/qmp-intro.txt (100%) rename docs/{ => interop}/qmp-spec.txt (100%) rename docs/{specs => interop}/vhost-user.txt (100%) rename docs/{ => interop}/vnc-ledstate-Pseudo-encoding.txt (100%) diff --git a/.gitignore b/.gitignore index 55a001e3b88f..11bb1b12dabf 100644 --- a/.gitignore +++ b/.gitignore @@ -99,14 +99,14 @@ /pc-bios/optionrom/kvmvapic.img /pc-bios/s390-ccw/s390-ccw.elf /pc-bios/s390-ccw/s390-ccw.img -/docs/qemu-ga-qapi.texi -/docs/qemu-ga-ref.html -/docs/qemu-ga-ref.info* -/docs/qemu-ga-ref.txt -/docs/qemu-qmp-qapi.texi -/docs/qemu-qmp-ref.html -/docs/qemu-qmp-ref.info* -/docs/qemu-qmp-ref.txt +/docs/interop/qemu-ga-qapi.texi +/docs/interop/qemu-ga-ref.html +/docs/interop/qemu-ga-ref.info* +/docs/interop/qemu-ga-ref.txt +/docs/interop/qemu-qmp-qapi.texi +/docs/interop/qemu-qmp-ref.html +/docs/interop/qemu-qmp-ref.info* +/docs/interop/qemu-qmp-ref.txt /docs/version.texi *.tps .stgit-* diff --git a/Makefile b/Makefile index 32d4441697e4..c27389af2827 100644 --- a/Makefile +++ b/Makefile @@ -207,8 +207,8 @@ HELPERS-$(CONFIG_LINUX) = qemu-bridge-helper$(EXESUF) ifdef BUILD_DOCS DOCS=qemu-doc.html qemu-doc.txt qemu.1 qemu-img.1 qemu-nbd.8 qemu-ga.8 -DOCS+=docs/qemu-qmp-ref.html docs/qemu-qmp-ref.txt docs/qemu-qmp-ref.7 -DOCS+=docs/qemu-ga-ref.html docs/qemu-ga-ref.txt docs/qemu-ga-ref.7 +DOCS+=docs/interop/qemu-qmp-ref.html docs/interop/qemu-qmp-ref.txt docs/interop/qemu-qmp-ref.7 +DOCS+=docs/interop/qemu-ga-ref.html docs/interop/qemu-ga-ref.txt docs/interop/qemu-ga-ref.7 ifdef CONFIG_VIRTFS DOCS+=fsdev/virtfs-proxy-helper.1 endif @@ -519,11 +519,12 @@ distclean: clean rm -f qemu-doc.vr qemu-doc.txt rm -f config.log rm -f linux-headers/asm - rm -f docs/qemu-ga-qapi.texi docs/qemu-qmp-qapi.texi docs/version.texi - rm -f docs/qemu-qmp-ref.7 docs/qemu-ga-ref.7 - rm -f docs/qemu-qmp-ref.txt docs/qemu-ga-ref.txt - rm -f docs/qemu-qmp-ref.pdf docs/qemu-ga-ref.pdf - rm -f docs/qemu-qmp-ref.html docs/qemu-ga-ref.html + rm -f docs/version.texi + rm -f docs/interop/qemu-ga-qapi.texi docs/interop/qemu-qmp-qapi.texi + rm -f docs/interop/qemu-qmp-ref.7 docs/interop/qemu-ga-ref.7 + rm -f docs/interop/qemu-qmp-ref.txt docs/interop/qemu-ga-ref.txt + rm -f docs/interop/qemu-qmp-ref.pdf docs/interop/qemu-ga-ref.pdf + rm -f docs/interop/qemu-qmp-ref.html docs/interop/qemu-ga-ref.html for d in $(TARGET_DIRS); do \ rm -rf $$d || exit 1 ; \ done @@ -562,13 +563,13 @@ install-doc: $(DOCS) $(INSTALL_DIR) "$(DESTDIR)$(qemu_docdir)" $(INSTALL_DATA) qemu-doc.html "$(DESTDIR)$(qemu_docdir)" $(INSTALL_DATA) qemu-doc.txt "$(DESTDIR)$(qemu_docdir)" - $(INSTALL_DATA) docs/qemu-qmp-ref.html "$(DESTDIR)$(qemu_docdir)" - $(INSTALL_DATA) docs/qemu-qmp-ref.txt "$(DESTDIR)$(qemu_docdir)" + $(INSTALL_DATA) docs/interop/qemu-qmp-ref.html "$(DESTDIR)$(qemu_docdir)" + $(INSTALL_DATA) docs/interop/qemu-qmp-ref.txt "$(DESTDIR)$(qemu_docdir)" ifdef CONFIG_POSIX $(INSTALL_DIR) "$(DESTDIR)$(mandir)/man1" $(INSTALL_DATA) qemu.1 "$(DESTDIR)$(mandir)/man1" $(INSTALL_DIR) "$(DESTDIR)$(mandir)/man7" - $(INSTALL_DATA) docs/qemu-qmp-ref.7 "$(DESTDIR)$(mandir)/man7" + $(INSTALL_DATA) docs/interop/qemu-qmp-ref.7 "$(DESTDIR)$(mandir)/man7" ifneq ($(TOOLS),) $(INSTALL_DATA) qemu-img.1 "$(DESTDIR)$(mandir)/man1" $(INSTALL_DIR) "$(DESTDIR)$(mandir)/man8" @@ -576,9 +577,9 @@ ifneq ($(TOOLS),) endif ifneq (,$(findstring qemu-ga,$(TOOLS))) $(INSTALL_DATA) qemu-ga.8 "$(DESTDIR)$(mandir)/man8" - $(INSTALL_DATA) docs/qemu-ga-ref.html "$(DESTDIR)$(qemu_docdir)" - $(INSTALL_DATA) docs/qemu-ga-ref.txt "$(DESTDIR)$(qemu_docdir)" - $(INSTALL_DATA) docs/qemu-ga-ref.7 "$(DESTDIR)$(mandir)/man7" + $(INSTALL_DATA) docs/interop/qemu-ga-ref.html "$(DESTDIR)$(qemu_docdir)" + $(INSTALL_DATA) docs/interop/qemu-ga-ref.txt "$(DESTDIR)$(qemu_docdir)" + $(INSTALL_DATA) docs/interop/qemu-ga-ref.7 "$(DESTDIR)$(mandir)/man7" endif endif ifdef CONFIG_VIRTFS @@ -666,8 +667,10 @@ ui/console-gl.o: $(SRC_PATH)/ui/console-gl.c \ # documentation MAKEINFO=makeinfo -MAKEINFOFLAGS=--no-split --number-sections -I docs -TEXIFLAG=$(if $(V),,--quiet) +MAKEINFOINCLUDES= -I docs -I $( $@,"GEN","$@") @@ -684,10 +687,14 @@ docs/version.texi: $(SRC_PATH)/VERSION --plaintext $< -o $@,"GEN","$@") %.pdf: %.texi - $(call quiet-command,texi2pdf $(TEXIFLAG) -I $(SRC_PATH) -I docs $< -o $@,"GEN","$@") + $(call quiet-command,texi2pdf $(TEXI2PDFFLAGS) $< -o $@,"GEN","$@") -docs/qemu-ga-ref.html docs/qemu-ga-ref.info docs/qemu-ga-ref.txt docs/qemu-ga-ref.pdf docs/qemu-ga-ref.7.pod: docs/version.texi -docs/qemu-qmp-ref.html docs/qemu-qmp-ref.info docs/qemu-qmp-ref.txt docs/qemu-qmp-ref.pdf docs/qemu-qmp-ref.pod: docs/version.texi +docs/interop/qemu-ga-ref.html docs/interop/qemu-ga-ref.info \ + docs/interop/qemu-ga-ref.txt docs/interop/qemu-ga-ref.pdf \ + docs/interop/qemu-ga-ref.7.pod: docs/version.texi +docs/interop/qemu-qmp-ref.html docs/interop/qemu-qmp-ref.info \ + docs/interop/qemu-qmp-ref.txt docs/interop/qemu-qmp-ref.pdf \ + docs/interop/qemu-qmp-ref.pod: docs/version.texi qemu-options.texi: $(SRC_PATH)/qemu-options.hx $(SRC_PATH)/scripts/hxtool $(call quiet-command,sh $(SRC_PATH)/scripts/hxtool -t < $< > $@,"GEN","$@") @@ -701,12 +708,12 @@ qemu-monitor-info.texi: $(SRC_PATH)/hmp-commands-info.hx $(SRC_PATH)/scripts/hxt qemu-img-cmds.texi: $(SRC_PATH)/qemu-img-cmds.hx $(SRC_PATH)/scripts/hxtool $(call quiet-command,sh $(SRC_PATH)/scripts/hxtool -t < $< > $@,"GEN","$@") -docs/qemu-qmp-qapi.texi docs/qemu-ga-qapi.texi: $(SRC_PATH)/scripts/qapi2texi.py $(qapi-py) +docs/interop/qemu-qmp-qapi.texi docs/interop/qemu-ga-qapi.texi: $(SRC_PATH)/scripts/qapi2texi.py $(qapi-py) -docs/qemu-qmp-qapi.texi: $(qapi-modules) +docs/interop/qemu-qmp-qapi.texi: $(qapi-modules) $(call quiet-command,$(PYTHON) $(SRC_PATH)/scripts/qapi2texi.py $< > $@,"GEN","$@") -docs/qemu-ga-qapi.texi: $(SRC_PATH)/qga/qapi-schema.json +docs/interop/qemu-ga-qapi.texi: $(SRC_PATH)/qga/qapi-schema.json $(call quiet-command,$(PYTHON) $(SRC_PATH)/scripts/qapi2texi.py $< > $@,"GEN","$@") qemu.1: qemu-doc.texi qemu-options.texi qemu-monitor.texi qemu-monitor-info.texi @@ -716,21 +723,25 @@ fsdev/virtfs-proxy-helper.1: fsdev/virtfs-proxy-helper.texi qemu-nbd.8: qemu-nbd.texi qemu-option-trace.texi qemu-ga.8: qemu-ga.texi -html: qemu-doc.html docs/qemu-qmp-ref.html docs/qemu-ga-ref.html -info: qemu-doc.info docs/qemu-qmp-ref.info docs/qemu-ga-ref.info -pdf: qemu-doc.pdf docs/qemu-qmp-ref.pdf docs/qemu-ga-ref.pdf -txt: qemu-doc.txt docs/qemu-qmp-ref.txt docs/qemu-ga-ref.txt +html: qemu-doc.html docs/interop/qemu-qmp-ref.html docs/interop/qemu-ga-ref.html +info: qemu-doc.info docs/interop/qemu-qmp-ref.info docs/interop/qemu-ga-ref.info +pdf: qemu-doc.pdf docs/interop/qemu-qmp-ref.pdf docs/interop/qemu-ga-ref.pdf +txt: qemu-doc.txt docs/interop/qemu-qmp-ref.txt docs/interop/qemu-ga-ref.txt qemu-doc.html qemu-doc.info qemu-doc.pdf qemu-doc.txt: \ qemu-img.texi qemu-nbd.texi qemu-options.texi qemu-option-trace.texi \ qemu-monitor.texi qemu-img-cmds.texi qemu-ga.texi \ qemu-monitor-info.texi -docs/qemu-ga-ref.dvi docs/qemu-ga-ref.html docs/qemu-ga-ref.info docs/qemu-ga-ref.pdf docs/qemu-ga-ref.txt docs/qemu-ga-ref.7: \ -docs/qemu-ga-ref.texi docs/qemu-ga-qapi.texi +docs/interop/qemu-ga-ref.dvi docs/interop/qemu-ga-ref.html \ + docs/interop/qemu-ga-ref.info docs/interop/qemu-ga-ref.pdf \ + docs/interop/qemu-ga-ref.txt docs/interop/qemu-ga-ref.7: \ + docs/interop/qemu-ga-ref.texi docs/interop/qemu-ga-qapi.texi -docs/qemu-qmp-ref.dvi docs/qemu-qmp-ref.html docs/qemu-qmp-ref.info docs/qemu-qmp-ref.pdf docs/qemu-qmp-ref.txt docs/qemu-qmp-ref.7: \ -docs/qemu-qmp-ref.texi docs/qemu-qmp-qapi.texi +docs/interop/qemu-qmp-ref.dvi docs/interop/qemu-qmp-ref.html \ + docs/interop/qemu-qmp-ref.info docs/interop/qemu-qmp-ref.pdf \ + docs/interop/qemu-qmp-ref.txt docs/interop/qemu-qmp-ref.7: \ + docs/interop/qemu-qmp-ref.texi docs/interop/qemu-qmp-qapi.texi ifdef CONFIG_WIN32 diff --git a/configure b/configure index 48d5ceb8c692..f70a3b583843 100755 --- a/configure +++ b/configure @@ -6374,7 +6374,7 @@ fi # build tree in object directory in case the source is not in the current directory DIRS="tests tests/tcg tests/tcg/cris tests/tcg/lm32 tests/libqos tests/qapi-schema tests/tcg/xtensa tests/qemu-iotests" -DIRS="$DIRS docs fsdev" +DIRS="$DIRS docs docs/interop fsdev" DIRS="$DIRS pc-bios/optionrom pc-bios/spapr-rtas pc-bios/s390-ccw" DIRS="$DIRS roms/seabios roms/vgabios" DIRS="$DIRS qapi-generated" diff --git a/docs/specs/parallels.txt b/docs/interop/parallels.txt similarity index 100% rename from docs/specs/parallels.txt rename to docs/interop/parallels.txt diff --git a/docs/specs/qcow2.txt b/docs/interop/qcow2.txt similarity index 100% rename from docs/specs/qcow2.txt rename to docs/interop/qcow2.txt diff --git a/docs/specs/qed_spec.txt b/docs/interop/qed_spec.txt similarity index 100% rename from docs/specs/qed_spec.txt rename to docs/interop/qed_spec.txt diff --git a/docs/qemu-ga-ref.texi b/docs/interop/qemu-ga-ref.texi similarity index 100% rename from docs/qemu-ga-ref.texi rename to docs/interop/qemu-ga-ref.texi diff --git a/docs/qemu-qmp-ref.texi b/docs/interop/qemu-qmp-ref.texi similarity index 100% rename from docs/qemu-qmp-ref.texi rename to docs/interop/qemu-qmp-ref.texi diff --git a/docs/qmp-intro.txt b/docs/interop/qmp-intro.txt similarity index 100% rename from docs/qmp-intro.txt rename to docs/interop/qmp-intro.txt diff --git a/docs/qmp-spec.txt b/docs/interop/qmp-spec.txt similarity index 100% rename from docs/qmp-spec.txt rename to docs/interop/qmp-spec.txt diff --git a/docs/specs/vhost-user.txt b/docs/interop/vhost-user.txt similarity index 100% rename from docs/specs/vhost-user.txt rename to docs/interop/vhost-user.txt diff --git a/docs/vnc-ledstate-Pseudo-encoding.txt b/docs/interop/vnc-ledstate-Pseudo-encoding.txt similarity index 100% rename from docs/vnc-ledstate-Pseudo-encoding.txt rename to docs/interop/vnc-ledstate-Pseudo-encoding.txt diff --git a/rules.mak b/rules.mak index 2a2fb72e8535..6e943335f301 100644 --- a/rules.mak +++ b/rules.mak @@ -377,7 +377,7 @@ define unnest-vars endef TEXI2MAN = $(call quiet-command, \ - perl -Ww -- $(SRC_PATH)/scripts/texi2pod.pl -I docs $< $@.pod && \ + perl -Ww -- $(SRC_PATH)/scripts/texi2pod.pl $(TEXI2PODFLAGS) $< $@.pod && \ $(POD2MAN) --section=$(subst .,,$(suffix $@)) --center=" " --release=" " $@.pod > $@, \ "GEN","$@") From 44cb280d3325395cbebad09e20622423055a4d7a Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 6 Jun 2017 16:59:37 +0200 Subject: [PATCH 39/41] qemu-doc: include version number Signed-off-by: Paolo Bonzini --- Makefile | 17 +++++------------ qemu-doc.texi | 5 +++-- 2 files changed, 8 insertions(+), 14 deletions(-) diff --git a/Makefile b/Makefile index c27389af2827..b017e3abf8e4 100644 --- a/Makefile +++ b/Makefile @@ -669,33 +669,26 @@ ui/console-gl.o: $(SRC_PATH)/ui/console-gl.c \ MAKEINFO=makeinfo MAKEINFOINCLUDES= -I docs -I $( $@,"GEN","$@") -%.html: %.texi +%.html: %.texi docs/version.texi $(call quiet-command,LC_ALL=C $(MAKEINFO) $(MAKEINFOFLAGS) --no-headers \ --html $< -o $@,"GEN","$@") -%.info: %.texi +%.info: %.texi docs/version.texi $(call quiet-command,$(MAKEINFO) $(MAKEINFOFLAGS) $< -o $@,"GEN","$@") -%.txt: %.texi +%.txt: %.texi docs/version.texi $(call quiet-command,LC_ALL=C $(MAKEINFO) $(MAKEINFOFLAGS) --no-headers \ --plaintext $< -o $@,"GEN","$@") -%.pdf: %.texi +%.pdf: %.texi docs/version.texi $(call quiet-command,texi2pdf $(TEXI2PDFFLAGS) $< -o $@,"GEN","$@") -docs/interop/qemu-ga-ref.html docs/interop/qemu-ga-ref.info \ - docs/interop/qemu-ga-ref.txt docs/interop/qemu-ga-ref.pdf \ - docs/interop/qemu-ga-ref.7.pod: docs/version.texi -docs/interop/qemu-qmp-ref.html docs/interop/qemu-qmp-ref.info \ - docs/interop/qemu-qmp-ref.txt docs/interop/qemu-qmp-ref.pdf \ - docs/interop/qemu-qmp-ref.pod: docs/version.texi - qemu-options.texi: $(SRC_PATH)/qemu-options.hx $(SRC_PATH)/scripts/hxtool $(call quiet-command,sh $(SRC_PATH)/scripts/hxtool -t < $< > $@,"GEN","$@") diff --git a/qemu-doc.texi b/qemu-doc.texi index 965ba5929eb3..21079fd6752b 100644 --- a/qemu-doc.texi +++ b/qemu-doc.texi @@ -1,11 +1,12 @@ \input texinfo @c -*- texinfo -*- @c %**start of header @setfilename qemu-doc.info +@include version.texi @documentlanguage en @documentencoding UTF-8 -@settitle QEMU Emulator User Documentation +@settitle QEMU version @value{VERSION} User Documentation @exampleindent 0 @paragraphindent 0 @c %**end of header @@ -19,7 +20,7 @@ @iftex @titlepage @sp 7 -@center @titlefont{QEMU Emulator} +@center @titlefont{QEMU version @value{VERSION}} @sp 1 @center @titlefont{User Documentation} @sp 3 From f12c1ebddf79c965d4fdca752c6b316c889c7675 Mon Sep 17 00:00:00 2001 From: Felipe Franciosi Date: Thu, 2 Mar 2017 10:25:52 -0800 Subject: [PATCH 40/41] vhost-user-scsi: Introduce vhost-user-scsi host device This commit introduces a vhost-user device for SCSI. This is based on the existing vhost-scsi implementation, but done over vhost-user instead. It also uses a chardev to connect to the backend. Unlike vhost-scsi (today), VMs using vhost-user-scsi can be live migrated. To use it, start Qemu with a command line equivalent to: qemu-system-x86_64 \ -chardev socket,id=vus0,path=/tmp/vus.sock \ -device vhost-user-scsi-pci,chardev=vus0,bus=pci.0,addr=... A separate commit presents a sample application linked with libiscsi to provide a backend for vhost-user-scsi. Signed-off-by: Felipe Franciosi Message-Id: <1488479153-21203-4-git-send-email-felipe@nutanix.com> Signed-off-by: Paolo Bonzini --- .gitignore | 1 + default-configs/pci.mak | 1 + default-configs/s390x-softmmu.mak | 1 + hw/scsi/Makefile.objs | 1 + hw/scsi/vhost-scsi-common.c | 1 - hw/scsi/vhost-user-scsi.c | 205 ++++++++++++++++++++++++++++ hw/virtio/virtio-pci.c | 58 ++++++++ hw/virtio/virtio-pci.h | 11 ++ include/hw/virtio/vhost-user-scsi.h | 35 +++++ include/hw/virtio/virtio-scsi.h | 2 + 10 files changed, 315 insertions(+), 1 deletion(-) create mode 100644 hw/scsi/vhost-user-scsi.c create mode 100644 include/hw/virtio/vhost-user-scsi.h diff --git a/.gitignore b/.gitignore index 11bb1b12dabf..09c2363acf62 100644 --- a/.gitignore +++ b/.gitignore @@ -50,6 +50,7 @@ /qemu-version.h.tmp /module_block.h /vscclient +/vhost-user-scsi /fsdev/virtfs-proxy-helper *.[1-9] *.a diff --git a/default-configs/pci.mak b/default-configs/pci.mak index 3bbeb62d9a18..53ff10975c48 100644 --- a/default-configs/pci.mak +++ b/default-configs/pci.mak @@ -43,3 +43,4 @@ CONFIG_VGA=y CONFIG_VGA_PCI=y CONFIG_IVSHMEM=$(CONFIG_EVENTFD) CONFIG_ROCKER=y +CONFIG_VHOST_USER_SCSI=$(CONFIG_LINUX) diff --git a/default-configs/s390x-softmmu.mak b/default-configs/s390x-softmmu.mak index 18aed56fc026..b227a361791e 100644 --- a/default-configs/s390x-softmmu.mak +++ b/default-configs/s390x-softmmu.mak @@ -1,5 +1,6 @@ CONFIG_PCI=y CONFIG_VIRTIO_PCI=y +CONFIG_VHOST_USER_SCSI=$(CONFIG_LINUX) CONFIG_VIRTIO=y CONFIG_SCLPCONSOLE=y CONFIG_TERMINAL3270=y diff --git a/hw/scsi/Makefile.objs b/hw/scsi/Makefile.objs index 54d8754e9a7e..b188f7242b82 100644 --- a/hw/scsi/Makefile.objs +++ b/hw/scsi/Makefile.objs @@ -11,4 +11,5 @@ obj-$(CONFIG_PSERIES) += spapr_vscsi.o ifeq ($(CONFIG_VIRTIO),y) obj-y += virtio-scsi.o virtio-scsi-dataplane.o obj-$(CONFIG_VHOST_SCSI) += vhost-scsi-common.o vhost-scsi.o +obj-$(CONFIG_VHOST_USER_SCSI) += vhost-scsi-common.o vhost-user-scsi.o endif diff --git a/hw/scsi/vhost-scsi-common.c b/hw/scsi/vhost-scsi-common.c index e41c0314db4b..d434b3e99a75 100644 --- a/hw/scsi/vhost-scsi-common.c +++ b/hw/scsi/vhost-scsi-common.c @@ -16,7 +16,6 @@ */ #include "qemu/osdep.h" -#include #include "qapi/error.h" #include "qemu/error-report.h" #include "migration/migration.h" diff --git a/hw/scsi/vhost-user-scsi.c b/hw/scsi/vhost-user-scsi.c new file mode 100644 index 000000000000..500fa6a0674e --- /dev/null +++ b/hw/scsi/vhost-user-scsi.c @@ -0,0 +1,205 @@ +/* + * vhost-user-scsi host device + * + * Copyright (c) 2016 Nutanix Inc. All rights reserved. + * + * Author: + * Felipe Franciosi + * + * This work is largely based on the "vhost-scsi" implementation by: + * Stefan Hajnoczi + * Nicholas Bellinger + * + * This work is licensed under the terms of the GNU LGPL, version 2 or later. + * See the COPYING.LIB file in the top-level directory. + * + */ + +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "qemu/error-report.h" +#include "qemu/typedefs.h" +#include "qom/object.h" +#include "hw/fw-path-provider.h" +#include "hw/qdev-core.h" +#include "hw/virtio/vhost.h" +#include "hw/virtio/vhost-backend.h" +#include "hw/virtio/vhost-user-scsi.h" +#include "hw/virtio/virtio.h" +#include "hw/virtio/virtio-access.h" +#include "chardev/char-fe.h" + +/* Features supported by the host application */ +static const int user_feature_bits[] = { + VIRTIO_F_NOTIFY_ON_EMPTY, + VIRTIO_RING_F_INDIRECT_DESC, + VIRTIO_RING_F_EVENT_IDX, + VIRTIO_SCSI_F_HOTPLUG, + VHOST_INVALID_FEATURE_BIT +}; + +static void vhost_user_scsi_set_status(VirtIODevice *vdev, uint8_t status) +{ + VHostUserSCSI *s = (VHostUserSCSI *)vdev; + VHostSCSICommon *vsc = VHOST_SCSI_COMMON(s); + bool start = (status & VIRTIO_CONFIG_S_DRIVER_OK) && vdev->vm_running; + + if (vsc->dev.started == start) { + return; + } + + if (start) { + int ret; + + ret = vhost_scsi_common_start(vsc); + if (ret < 0) { + error_report("unable to start vhost-user-scsi: %s", strerror(-ret)); + exit(1); + } + } else { + vhost_scsi_common_stop(vsc); + } +} + +static void vhost_dummy_handle_output(VirtIODevice *vdev, VirtQueue *vq) +{ +} + +static void vhost_user_scsi_realize(DeviceState *dev, Error **errp) +{ + VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(dev); + VHostUserSCSI *s = VHOST_USER_SCSI(dev); + VHostSCSICommon *vsc = VHOST_SCSI_COMMON(s); + Error *err = NULL; + int ret; + + if (!vs->conf.chardev.chr) { + error_setg(errp, "vhost-user-scsi: missing chardev"); + return; + } + + virtio_scsi_common_realize(dev, vhost_dummy_handle_output, + vhost_dummy_handle_output, + vhost_dummy_handle_output, &err); + if (err != NULL) { + error_propagate(errp, err); + return; + } + + vsc->dev.nvqs = 2 + vs->conf.num_queues; + vsc->dev.vqs = g_new(struct vhost_virtqueue, vsc->dev.nvqs); + vsc->dev.vq_index = 0; + vsc->dev.backend_features = 0; + + ret = vhost_dev_init(&vsc->dev, (void *)&vs->conf.chardev, + VHOST_BACKEND_TYPE_USER, 0); + if (ret < 0) { + error_setg(errp, "vhost-user-scsi: vhost initialization failed: %s", + strerror(-ret)); + return; + } + + /* Channel and lun both are 0 for bootable vhost-user-scsi disk */ + vsc->channel = 0; + vsc->lun = 0; + vsc->target = vs->conf.boot_tpgt; +} + +static void vhost_user_scsi_unrealize(DeviceState *dev, Error **errp) +{ + VirtIODevice *vdev = VIRTIO_DEVICE(dev); + VHostUserSCSI *s = VHOST_USER_SCSI(dev); + VHostSCSICommon *vsc = VHOST_SCSI_COMMON(s); + + /* This will stop the vhost backend. */ + vhost_user_scsi_set_status(vdev, 0); + + vhost_dev_cleanup(&vsc->dev); + g_free(vsc->dev.vqs); + + virtio_scsi_common_unrealize(dev, errp); +} + +static uint64_t vhost_user_scsi_get_features(VirtIODevice *vdev, + uint64_t features, Error **errp) +{ + VHostUserSCSI *s = VHOST_USER_SCSI(vdev); + + /* Turn on predefined features supported by this device */ + features |= s->host_features; + + return vhost_scsi_common_get_features(vdev, features, errp); +} + +static Property vhost_user_scsi_properties[] = { + DEFINE_PROP_CHR("chardev", VirtIOSCSICommon, conf.chardev), + DEFINE_PROP_UINT32("boot_tpgt", VirtIOSCSICommon, conf.boot_tpgt, 0), + DEFINE_PROP_UINT32("num_queues", VirtIOSCSICommon, conf.num_queues, 1), + DEFINE_PROP_UINT32("max_sectors", VirtIOSCSICommon, conf.max_sectors, + 0xFFFF), + DEFINE_PROP_UINT32("cmd_per_lun", VirtIOSCSICommon, conf.cmd_per_lun, 128), + DEFINE_PROP_BIT64("hotplug", VHostUserSCSI, host_features, + VIRTIO_SCSI_F_HOTPLUG, + true), + DEFINE_PROP_BIT64("param_change", VHostUserSCSI, host_features, + VIRTIO_SCSI_F_CHANGE, + true), + DEFINE_PROP_END_OF_LIST(), +}; + +static const VMStateDescription vmstate_vhost_scsi = { + .name = "virtio-scsi", + .minimum_version_id = 1, + .version_id = 1, + .fields = (VMStateField[]) { + VMSTATE_VIRTIO_DEVICE, + VMSTATE_END_OF_LIST() + }, +}; + +static void vhost_user_scsi_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass); + FWPathProviderClass *fwc = FW_PATH_PROVIDER_CLASS(klass); + + dc->props = vhost_user_scsi_properties; + dc->vmsd = &vmstate_vhost_scsi; + set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); + vdc->realize = vhost_user_scsi_realize; + vdc->unrealize = vhost_user_scsi_unrealize; + vdc->get_features = vhost_user_scsi_get_features; + vdc->set_config = vhost_scsi_common_set_config; + vdc->set_status = vhost_user_scsi_set_status; + fwc->get_dev_path = vhost_scsi_common_get_fw_dev_path; +} + +static void vhost_user_scsi_instance_init(Object *obj) +{ + VHostSCSICommon *vsc = VHOST_SCSI_COMMON(obj); + + vsc->feature_bits = user_feature_bits; + + /* Add the bootindex property for this object */ + device_add_bootindex_property(obj, &vsc->bootindex, "bootindex", NULL, + DEVICE(vsc), NULL); +} + +static const TypeInfo vhost_user_scsi_info = { + .name = TYPE_VHOST_USER_SCSI, + .parent = TYPE_VHOST_SCSI_COMMON, + .instance_size = sizeof(VHostUserSCSI), + .class_init = vhost_user_scsi_class_init, + .instance_init = vhost_user_scsi_instance_init, + .interfaces = (InterfaceInfo[]) { + { TYPE_FW_PATH_PROVIDER }, + { } + }, +}; + +static void virtio_register_types(void) +{ + type_register_static(&vhost_user_scsi_info); +} + +type_init(virtio_register_types) diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c index f9b7244808b5..20d6a0861612 100644 --- a/hw/virtio/virtio-pci.c +++ b/hw/virtio/virtio-pci.c @@ -2135,6 +2135,61 @@ static const TypeInfo vhost_scsi_pci_info = { }; #endif +#ifdef CONFIG_LINUX +/* vhost-user-scsi-pci */ +static Property vhost_user_scsi_pci_properties[] = { + DEFINE_PROP_UINT32("vectors", VirtIOPCIProxy, nvectors, + DEV_NVECTORS_UNSPECIFIED), + DEFINE_PROP_END_OF_LIST(), +}; + +static void vhost_user_scsi_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp) +{ + VHostUserSCSIPCI *dev = VHOST_USER_SCSI_PCI(vpci_dev); + DeviceState *vdev = DEVICE(&dev->vdev); + VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(vdev); + + if (vpci_dev->nvectors == DEV_NVECTORS_UNSPECIFIED) { + vpci_dev->nvectors = vs->conf.num_queues + 3; + } + + qdev_set_parent_bus(vdev, BUS(&vpci_dev->bus)); + object_property_set_bool(OBJECT(vdev), true, "realized", errp); +} + +static void vhost_user_scsi_pci_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + VirtioPCIClass *k = VIRTIO_PCI_CLASS(klass); + PCIDeviceClass *pcidev_k = PCI_DEVICE_CLASS(klass); + k->realize = vhost_user_scsi_pci_realize; + set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); + dc->props = vhost_user_scsi_pci_properties; + pcidev_k->vendor_id = PCI_VENDOR_ID_REDHAT_QUMRANET; + pcidev_k->device_id = PCI_DEVICE_ID_VIRTIO_SCSI; + pcidev_k->revision = 0x00; + pcidev_k->class_id = PCI_CLASS_STORAGE_SCSI; +} + +static void vhost_user_scsi_pci_instance_init(Object *obj) +{ + VHostUserSCSIPCI *dev = VHOST_USER_SCSI_PCI(obj); + + virtio_instance_init_common(obj, &dev->vdev, sizeof(dev->vdev), + TYPE_VHOST_USER_SCSI); + object_property_add_alias(obj, "bootindex", OBJECT(&dev->vdev), + "bootindex", &error_abort); +} + +static const TypeInfo vhost_user_scsi_pci_info = { + .name = TYPE_VHOST_USER_SCSI_PCI, + .parent = TYPE_VIRTIO_PCI, + .instance_size = sizeof(VHostUserSCSIPCI), + .instance_init = vhost_user_scsi_pci_instance_init, + .class_init = vhost_user_scsi_pci_class_init, +}; +#endif + /* vhost-vsock-pci */ #ifdef CONFIG_VHOST_VSOCK @@ -2612,6 +2667,9 @@ static void virtio_pci_register_types(void) #ifdef CONFIG_VHOST_SCSI type_register_static(&vhost_scsi_pci_info); #endif +#ifdef CONFIG_LINUX + type_register_static(&vhost_user_scsi_pci_info); +#endif #ifdef CONFIG_VHOST_VSOCK type_register_static(&vhost_vsock_pci_info); #endif diff --git a/hw/virtio/virtio-pci.h b/hw/virtio/virtio-pci.h index b095dfc6d9e7..69f5959623d1 100644 --- a/hw/virtio/virtio-pci.h +++ b/hw/virtio/virtio-pci.h @@ -26,6 +26,7 @@ #include "hw/virtio/virtio-input.h" #include "hw/virtio/virtio-gpu.h" #include "hw/virtio/virtio-crypto.h" +#include "hw/virtio/vhost-user-scsi.h" #ifdef CONFIG_VIRTFS #include "hw/9pfs/virtio-9p.h" @@ -44,6 +45,7 @@ typedef struct VirtIOBalloonPCI VirtIOBalloonPCI; typedef struct VirtIOSerialPCI VirtIOSerialPCI; typedef struct VirtIONetPCI VirtIONetPCI; typedef struct VHostSCSIPCI VHostSCSIPCI; +typedef struct VHostUserSCSIPCI VHostUserSCSIPCI; typedef struct VirtIORngPCI VirtIORngPCI; typedef struct VirtIOInputPCI VirtIOInputPCI; typedef struct VirtIOInputHIDPCI VirtIOInputHIDPCI; @@ -230,6 +232,15 @@ struct VHostSCSIPCI { }; #endif +#define TYPE_VHOST_USER_SCSI_PCI "vhost-user-scsi-pci" +#define VHOST_USER_SCSI_PCI(obj) \ + OBJECT_CHECK(VHostUserSCSIPCI, (obj), TYPE_VHOST_USER_SCSI_PCI) + +struct VHostUserSCSIPCI { + VirtIOPCIProxy parent_obj; + VHostUserSCSI vdev; +}; + /* * virtio-blk-pci: This extends VirtioPCIProxy. */ diff --git a/include/hw/virtio/vhost-user-scsi.h b/include/hw/virtio/vhost-user-scsi.h new file mode 100644 index 000000000000..01861f78d056 --- /dev/null +++ b/include/hw/virtio/vhost-user-scsi.h @@ -0,0 +1,35 @@ +/* + * vhost-user-scsi host device + * + * Copyright (c) 2016 Nutanix Inc. All rights reserved. + * + * Author: + * Felipe Franciosi + * + * This file is largely based on "vhost-scsi.h" by: + * Stefan Hajnoczi + * + * This work is licensed under the terms of the GNU LGPL, version 2 or later. + * See the COPYING.LIB file in the top-level directory. + * + */ + +#ifndef VHOST_USER_SCSI_H +#define VHOST_USER_SCSI_H + +#include "qemu-common.h" +#include "hw/qdev.h" +#include "hw/virtio/virtio-scsi.h" +#include "hw/virtio/vhost.h" +#include "hw/virtio/vhost-scsi-common.h" + +#define TYPE_VHOST_USER_SCSI "vhost-user-scsi" +#define VHOST_USER_SCSI(obj) \ + OBJECT_CHECK(VHostUserSCSI, (obj), TYPE_VHOST_USER_SCSI) + +typedef struct VHostUserSCSI { + VHostSCSICommon parent_obj; + uint64_t host_features; +} VHostUserSCSI; + +#endif /* VHOST_USER_SCSI_H */ diff --git a/include/hw/virtio/virtio-scsi.h b/include/hw/virtio/virtio-scsi.h index eac2013ddde5..de6ae5a9f6eb 100644 --- a/include/hw/virtio/virtio-scsi.h +++ b/include/hw/virtio/virtio-scsi.h @@ -21,6 +21,7 @@ #include "hw/virtio/virtio.h" #include "hw/pci/pci.h" #include "hw/scsi/scsi.h" +#include "chardev/char-fe.h" #include "sysemu/iothread.h" #define TYPE_VIRTIO_SCSI_COMMON "virtio-scsi-common" @@ -53,6 +54,7 @@ struct VirtIOSCSIConf { char *vhostfd; char *wwpn; #endif + CharBackend chardev; uint32_t boot_tpgt; IOThread *iothread; }; From 49cc0340f8be18871319ffec6efc72147e73ff0b Mon Sep 17 00:00:00 2001 From: Felipe Franciosi Date: Thu, 2 Mar 2017 10:25:53 -0800 Subject: [PATCH 41/41] vhost-user-scsi: Introduce a vhost-user-scsi sample application This commit introduces a vhost-user-scsi backend sample application. It must be linked with libiscsi and libvhost-user. To use it, compile with: $ make vhost-user-scsi And run as follows: $ ./vhost-user-scsi -u vus.sock -i iscsi://uri_to_target/ $ qemu-system-x86_64 --enable-kvm -m 512 \ -object memory-backend-file,id=mem,size=512m,share=on,mem-path=guestmem \ -numa node,memdev=mem \ -chardev socket,id=vhost-user-scsi,path=vus.sock \ -device vhost-user-scsi-pci,chardev=vhost-user-scsi \ The application is currently limited at one LUN only and it processes requests synchronously (therefore only achieving QD1). The purpose of the code is to show how a backend can be implemented and to test the vhost-user-scsi Qemu implementation. If a different instance of this vhost-user-scsi application is executed at a remote host, a VM can be live migrated to such a host. Signed-off-by: Felipe Franciosi Message-Id: <1488479153-21203-5-git-send-email-felipe@nutanix.com> --- Makefile | 3 + Makefile.objs | 4 + contrib/libvhost-user/libvhost-user.h | 11 +- contrib/vhost-user-scsi/Makefile.objs | 1 + contrib/vhost-user-scsi/vhost-user-scsi.c | 886 ++++++++++++++++++++++ 5 files changed, 900 insertions(+), 5 deletions(-) create mode 100644 contrib/vhost-user-scsi/Makefile.objs create mode 100644 contrib/vhost-user-scsi/vhost-user-scsi.c diff --git a/Makefile b/Makefile index b017e3abf8e4..16a0430c6caf 100644 --- a/Makefile +++ b/Makefile @@ -269,6 +269,7 @@ dummy := $(call unnest-vars,, \ ivshmem-client-obj-y \ ivshmem-server-obj-y \ libvhost-user-obj-y \ + vhost-user-scsi-obj-y \ qga-vss-dll-obj-y \ block-obj-y \ block-obj-m \ @@ -473,6 +474,8 @@ ivshmem-client$(EXESUF): $(ivshmem-client-obj-y) $(COMMON_LDADDS) $(call LINK, $^) ivshmem-server$(EXESUF): $(ivshmem-server-obj-y) $(COMMON_LDADDS) $(call LINK, $^) +vhost-user-scsi$(EXESUF): $(vhost-user-scsi-obj-y) + $(call LINK, $^) module_block.h: $(SRC_PATH)/scripts/modules/module_block.py config-host.mak $(call quiet-command,$(PYTHON) $< $@ \ diff --git a/Makefile.objs b/Makefile.objs index 6e5c7069942a..b2e6322ef0de 100644 --- a/Makefile.objs +++ b/Makefile.objs @@ -110,6 +110,10 @@ qga-vss-dll-obj-y = qga/ ivshmem-client-obj-y = contrib/ivshmem-client/ ivshmem-server-obj-y = contrib/ivshmem-server/ libvhost-user-obj-y = contrib/libvhost-user/ +vhost-user-scsi.o-cflags := $(LIBISCSI_CFLAGS) +vhost-user-scsi.o-libs := $(LIBISCSI_LIBS) +vhost-user-scsi-obj-y = contrib/vhost-user-scsi/ +vhost-user-scsi-obj-y += contrib/libvhost-user/libvhost-user.o ###################################################################### trace-events-subdirs = diff --git a/contrib/libvhost-user/libvhost-user.h b/contrib/libvhost-user/libvhost-user.h index af02a31ebe8e..53ef222c0b4f 100644 --- a/contrib/libvhost-user/libvhost-user.h +++ b/contrib/libvhost-user/libvhost-user.h @@ -17,6 +17,7 @@ #include #include #include +#include #include #include "standard-headers/linux/virtio_ring.h" @@ -192,11 +193,11 @@ typedef struct VuVirtq { } VuVirtq; enum VuWatchCondtion { - VU_WATCH_IN = 1 << 0, - VU_WATCH_OUT = 1 << 1, - VU_WATCH_PRI = 1 << 2, - VU_WATCH_ERR = 1 << 3, - VU_WATCH_HUP = 1 << 4, + VU_WATCH_IN = POLLIN, + VU_WATCH_OUT = POLLOUT, + VU_WATCH_PRI = POLLPRI, + VU_WATCH_ERR = POLLERR, + VU_WATCH_HUP = POLLHUP, }; typedef void (*vu_panic_cb) (VuDev *dev, const char *err); diff --git a/contrib/vhost-user-scsi/Makefile.objs b/contrib/vhost-user-scsi/Makefile.objs new file mode 100644 index 000000000000..e83a38a85bc2 --- /dev/null +++ b/contrib/vhost-user-scsi/Makefile.objs @@ -0,0 +1 @@ +vhost-user-scsi-obj-y = vhost-user-scsi.o diff --git a/contrib/vhost-user-scsi/vhost-user-scsi.c b/contrib/vhost-user-scsi/vhost-user-scsi.c new file mode 100644 index 000000000000..b5ae02c96f07 --- /dev/null +++ b/contrib/vhost-user-scsi/vhost-user-scsi.c @@ -0,0 +1,886 @@ +/* + * vhost-user-scsi sample application + * + * Copyright (c) 2016 Nutanix Inc. All rights reserved. + * + * Author: + * Felipe Franciosi + * + * This work is licensed under the terms of the GNU GPL, version 2 only. + * See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "contrib/libvhost-user/libvhost-user.h" +#include "hw/virtio/virtio-scsi.h" +#include "iscsi/iscsi.h" + +#include + +/* Small compat shim from glib 2.32 */ +#ifndef G_SOURCE_CONTINUE +#define G_SOURCE_CONTINUE TRUE +#endif +#ifndef G_SOURCE_REMOVE +#define G_SOURCE_REMOVE FALSE +#endif + +/* #define VUS_DEBUG 1 */ + +/** Log helpers **/ + +#define PPRE \ + struct timespec ts; \ + char timebuf[64]; \ + struct tm tm; \ + (void)clock_gettime(CLOCK_REALTIME, &ts); \ + (void)strftime(timebuf, 64, "%Y%m%d %T", gmtime_r(&ts.tv_sec, &tm)) + +#define PEXT(lvl, msg, ...) do { \ + PPRE; \ + fprintf(stderr, "%s.%06ld " lvl ": %s:%s():%d: " msg "\n", \ + timebuf, ts.tv_nsec / 1000, \ + __FILE__, __func__, __LINE__, ## __VA_ARGS__); \ +} while (0) + +#define PNOR(lvl, msg, ...) do { \ + PPRE; \ + fprintf(stderr, "%s.%06ld " lvl ": " msg "\n", \ + timebuf, ts.tv_nsec / 1000, ## __VA_ARGS__); \ +} while (0) + +#ifdef VUS_DEBUG +#define PDBG(msg, ...) PEXT("DBG", msg, ## __VA_ARGS__) +#define PERR(msg, ...) PEXT("ERR", msg, ## __VA_ARGS__) +#define PLOG(msg, ...) PEXT("LOG", msg, ## __VA_ARGS__) +#else +#define PDBG(msg, ...) { } +#define PERR(msg, ...) PNOR("ERR", msg, ## __VA_ARGS__) +#define PLOG(msg, ...) PNOR("LOG", msg, ## __VA_ARGS__) +#endif + +/** vhost-user-scsi specific definitions **/ + + /* Only 1 LUN and device supported today */ +#define VUS_MAX_LUNS 1 +#define VUS_MAX_DEVS 1 + +#define VUS_ISCSI_INITIATOR "iqn.2016-11.com.nutanix:vhost-user-scsi" + +typedef struct iscsi_lun { + struct iscsi_context *iscsi_ctx; + int iscsi_lun; +} iscsi_lun_t; + +typedef struct vhost_scsi_dev { + VuDev vu_dev; + int server_sock; + GMainLoop *loop; + GTree *fdmap; /* fd -> gsource context id */ + iscsi_lun_t luns[VUS_MAX_LUNS]; +} vhost_scsi_dev_t; + +static vhost_scsi_dev_t *vhost_scsi_devs[VUS_MAX_DEVS]; + +/** glib event loop integration for libvhost-user and misc callbacks **/ + +QEMU_BUILD_BUG_ON((int)G_IO_IN != (int)VU_WATCH_IN); +QEMU_BUILD_BUG_ON((int)G_IO_OUT != (int)VU_WATCH_OUT); +QEMU_BUILD_BUG_ON((int)G_IO_PRI != (int)VU_WATCH_PRI); +QEMU_BUILD_BUG_ON((int)G_IO_ERR != (int)VU_WATCH_ERR); +QEMU_BUILD_BUG_ON((int)G_IO_HUP != (int)VU_WATCH_HUP); + +typedef struct vus_gsrc { + GSource parent; + vhost_scsi_dev_t *vdev_scsi; + GPollFD gfd; + vu_watch_cb vu_cb; +} vus_gsrc_t; + +static gint vus_fdmap_compare(gconstpointer a, gconstpointer b) +{ + return (b > a) - (b < a); +} + +static gboolean vus_gsrc_prepare(GSource *src, gint *timeout) +{ + assert(timeout); + + *timeout = -1; + return FALSE; +} + +static gboolean vus_gsrc_check(GSource *src) +{ + vus_gsrc_t *vus_src = (vus_gsrc_t *)src; + + assert(vus_src); + + return vus_src->gfd.revents & vus_src->gfd.events; +} + +static gboolean vus_gsrc_dispatch(GSource *src, GSourceFunc cb, gpointer data) +{ + vhost_scsi_dev_t *vdev_scsi; + vus_gsrc_t *vus_src = (vus_gsrc_t *)src; + + assert(vus_src); + assert(!(vus_src->vu_cb && cb)); + + vdev_scsi = vus_src->vdev_scsi; + + assert(vdev_scsi); + + if (cb) { + return cb(data); + } + if (vus_src->vu_cb) { + vus_src->vu_cb(&vdev_scsi->vu_dev, vus_src->gfd.revents, data); + } + return G_SOURCE_CONTINUE; +} + +static GSourceFuncs vus_gsrc_funcs = { + vus_gsrc_prepare, + vus_gsrc_check, + vus_gsrc_dispatch, + NULL +}; + +static int vus_gsrc_new(vhost_scsi_dev_t *vdev_scsi, int fd, GIOCondition cond, + vu_watch_cb vu_cb, GSourceFunc gsrc_cb, gpointer data) +{ + GSource *vus_gsrc; + vus_gsrc_t *vus_src; + guint id; + + assert(vdev_scsi); + assert(fd >= 0); + assert(vu_cb || gsrc_cb); + assert(!(vu_cb && gsrc_cb)); + + vus_gsrc = g_source_new(&vus_gsrc_funcs, sizeof(vus_gsrc_t)); + if (!vus_gsrc) { + PERR("Error creating GSource for new watch"); + return -1; + } + vus_src = (vus_gsrc_t *)vus_gsrc; + + vus_src->vdev_scsi = vdev_scsi; + vus_src->gfd.fd = fd; + vus_src->gfd.events = cond; + vus_src->vu_cb = vu_cb; + + g_source_add_poll(vus_gsrc, &vus_src->gfd); + g_source_set_callback(vus_gsrc, gsrc_cb, data, NULL); + id = g_source_attach(vus_gsrc, NULL); + assert(id); + g_source_unref(vus_gsrc); + + g_tree_insert(vdev_scsi->fdmap, (gpointer)(uintptr_t)fd, + (gpointer)(uintptr_t)id); + + return 0; +} + +/* from libiscsi's scsi-lowlevel.h ** + * + * nb. We can't directly include scsi-lowlevel.h due to a namespace conflict: + * QEMU's scsi.h also defines "SCSI_XFER_NONE". + */ + +#define SCSI_CDB_MAX_SIZE 16 + +struct scsi_iovector { + struct scsi_iovec *iov; + int niov; + int nalloc; + size_t offset; + int consumed; +}; + +struct scsi_allocated_memory { + struct scsi_allocated_memory *next; + char buf[0]; +}; + +struct scsi_data { + int size; + unsigned char *data; +}; + +enum scsi_sense_key { + SCSI_SENSE_NO_SENSE = 0x00, + SCSI_SENSE_RECOVERED_ERROR = 0x01, + SCSI_SENSE_NOT_READY = 0x02, + SCSI_SENSE_MEDIUM_ERROR = 0x03, + SCSI_SENSE_HARDWARE_ERROR = 0x04, + SCSI_SENSE_ILLEGAL_REQUEST = 0x05, + SCSI_SENSE_UNIT_ATTENTION = 0x06, + SCSI_SENSE_DATA_PROTECTION = 0x07, + SCSI_SENSE_BLANK_CHECK = 0x08, + SCSI_SENSE_VENDOR_SPECIFIC = 0x09, + SCSI_SENSE_COPY_ABORTED = 0x0a, + SCSI_SENSE_COMMAND_ABORTED = 0x0b, + SCSI_SENSE_OBSOLETE_ERROR_CODE = 0x0c, + SCSI_SENSE_OVERFLOW_COMMAND = 0x0d, + SCSI_SENSE_MISCOMPARE = 0x0e +}; + +struct scsi_sense { + unsigned char error_type; + enum scsi_sense_key key; + int ascq; + unsigned sense_specific:1; + unsigned ill_param_in_cdb:1; + unsigned bit_pointer_valid:1; + unsigned char bit_pointer; + uint16_t field_pointer; +}; + +enum scsi_residual { + SCSI_RESIDUAL_NO_RESIDUAL = 0, + SCSI_RESIDUAL_UNDERFLOW, + SCSI_RESIDUAL_OVERFLOW +}; + +struct scsi_task { + int status; + int cdb_size; + int xfer_dir; + int expxferlen; + unsigned char cdb[SCSI_CDB_MAX_SIZE]; + enum scsi_residual residual_status; + size_t residual; + struct scsi_sense sense; + struct scsi_data datain; + struct scsi_allocated_memory *mem; + void *ptr; + + uint32_t itt; + uint32_t cmdsn; + uint32_t lun; + + struct scsi_iovector iovector_in; + struct scsi_iovector iovector_out; +}; + +/** libiscsi integration **/ + +static int iscsi_add_lun(iscsi_lun_t *lun, char *iscsi_uri) +{ + struct iscsi_url *iscsi_url; + struct iscsi_context *iscsi_ctx; + int ret = 0; + + assert(lun); + assert(iscsi_uri); + + iscsi_ctx = iscsi_create_context(VUS_ISCSI_INITIATOR); + if (!iscsi_ctx) { + PERR("Unable to create iSCSI context"); + return -1; + } + + iscsi_url = iscsi_parse_full_url(iscsi_ctx, iscsi_uri); + if (!iscsi_url) { + PERR("Unable to parse iSCSI URL: %s", iscsi_get_error(iscsi_ctx)); + goto fail; + } + + iscsi_set_session_type(iscsi_ctx, ISCSI_SESSION_NORMAL); + iscsi_set_header_digest(iscsi_ctx, ISCSI_HEADER_DIGEST_NONE_CRC32C); + if (iscsi_full_connect_sync(iscsi_ctx, iscsi_url->portal, iscsi_url->lun)) { + PERR("Unable to login to iSCSI portal: %s", iscsi_get_error(iscsi_ctx)); + goto fail; + } + + lun->iscsi_ctx = iscsi_ctx; + lun->iscsi_lun = iscsi_url->lun; + + PDBG("Context %p created for lun 0: %s", iscsi_ctx, iscsi_uri); + +out: + if (iscsi_url) { + iscsi_destroy_url(iscsi_url); + } + return ret; + +fail: + (void)iscsi_destroy_context(iscsi_ctx); + ret = -1; + goto out; +} + +static struct scsi_task *scsi_task_new(int cdb_len, uint8_t *cdb, int dir, + int xfer_len) { + struct scsi_task *task; + + assert(cdb_len > 0); + assert(cdb); + + task = calloc(1, sizeof(struct scsi_task)); + if (!task) { + PERR("Error allocating task: %s", strerror(errno)); + return NULL; + } + + memcpy(task->cdb, cdb, cdb_len); + task->cdb_size = cdb_len; + task->xfer_dir = dir; + task->expxferlen = xfer_len; + + return task; +} + +static int get_cdb_len(uint8_t *cdb) +{ + assert(cdb); + + switch (cdb[0] >> 5) { + case 0: return 6; + case 1: /* fall through */ + case 2: return 10; + case 4: return 16; + case 5: return 12; + } + PERR("Unable to determine cdb len (0x%02hhX)", cdb[0] >> 5); + return -1; +} + +static int handle_cmd_sync(struct iscsi_context *ctx, + VirtIOSCSICmdReq *req, + struct iovec *out, unsigned int out_len, + VirtIOSCSICmdResp *rsp, + struct iovec *in, unsigned int in_len) { + struct scsi_task *task; + uint32_t dir; + uint32_t len; + int cdb_len; + int i; + + assert(ctx); + assert(req); + assert(rsp); + + if (!(!req->lun[1] && req->lun[2] == 0x40 && !req->lun[3])) { + /* Ignore anything different than target=0, lun=0 */ + PDBG("Ignoring unconnected lun (0x%hhX, 0x%hhX)", + req->lun[1], req->lun[3]); + rsp->status = SCSI_STATUS_CHECK_CONDITION; + memset(rsp->sense, 0, sizeof(rsp->sense)); + rsp->sense_len = 18; + rsp->sense[0] = 0x70; + rsp->sense[2] = SCSI_SENSE_ILLEGAL_REQUEST; + rsp->sense[7] = 10; + rsp->sense[12] = 0x24; + + return 0; + } + + cdb_len = get_cdb_len(req->cdb); + if (cdb_len == -1) { + return -1; + } + + len = 0; + if (!out_len && !in_len) { + dir = SCSI_XFER_NONE; + } else if (out_len) { + dir = SCSI_XFER_TO_DEV; + for (i = 0; i < out_len; i++) { + len += out[i].iov_len; + } + } else { + dir = SCSI_XFER_FROM_DEV; + for (i = 0; i < in_len; i++) { + len += in[i].iov_len; + } + } + + task = scsi_task_new(cdb_len, req->cdb, dir, len); + if (!task) { + PERR("Unable to create iscsi task"); + return -1; + } + + if (dir == SCSI_XFER_TO_DEV) { + task->iovector_out.iov = (struct scsi_iovec *)out; + task->iovector_out.niov = out_len; + } else if (dir == SCSI_XFER_FROM_DEV) { + task->iovector_in.iov = (struct scsi_iovec *)in; + task->iovector_in.niov = in_len; + } + + PDBG("Sending iscsi cmd (cdb_len=%d, dir=%d, task=%p)", + cdb_len, dir, task); + if (!iscsi_scsi_command_sync(ctx, 0, task, NULL)) { + PERR("Error serving SCSI command"); + free(task); + return -1; + } + + memset(rsp, 0, sizeof(*rsp)); + + rsp->status = task->status; + rsp->resid = task->residual; + + if (task->status == SCSI_STATUS_CHECK_CONDITION) { + rsp->response = VIRTIO_SCSI_S_FAILURE; + rsp->sense_len = task->datain.size - 2; + memcpy(rsp->sense, &task->datain.data[2], rsp->sense_len); + } + + free(task); + + PDBG("Filled in rsp: status=%hhX, resid=%u, response=%hhX, sense_len=%u", + rsp->status, rsp->resid, rsp->response, rsp->sense_len); + + return 0; +} + +/** libvhost-user callbacks **/ + +static vhost_scsi_dev_t *vdev_scsi_find_by_vu(VuDev *vu_dev); + +static void vus_panic_cb(VuDev *vu_dev, const char *buf) +{ + vhost_scsi_dev_t *vdev_scsi; + + assert(vu_dev); + + vdev_scsi = vdev_scsi_find_by_vu(vu_dev); + + if (buf) { + PERR("vu_panic: %s", buf); + } + + if (vdev_scsi) { + assert(vdev_scsi->loop); + g_main_loop_quit(vdev_scsi->loop); + } +} + +static void vus_add_watch_cb(VuDev *vu_dev, int fd, int vu_evt, vu_watch_cb cb, + void *pvt) { + vhost_scsi_dev_t *vdev_scsi; + guint id; + + assert(vu_dev); + assert(fd >= 0); + assert(cb); + + vdev_scsi = vdev_scsi_find_by_vu(vu_dev); + if (!vdev_scsi) { + vus_panic_cb(vu_dev, NULL); + return; + } + + id = (guint)(uintptr_t)g_tree_lookup(vdev_scsi->fdmap, + (gpointer)(uintptr_t)fd); + if (id) { + GSource *vus_src = g_main_context_find_source_by_id(NULL, id); + assert(vus_src); + g_source_destroy(vus_src); + (void)g_tree_remove(vdev_scsi->fdmap, (gpointer)(uintptr_t)fd); + } + + if (vus_gsrc_new(vdev_scsi, fd, vu_evt, cb, NULL, pvt)) { + vus_panic_cb(vu_dev, NULL); + } +} + +static void vus_del_watch_cb(VuDev *vu_dev, int fd) +{ + vhost_scsi_dev_t *vdev_scsi; + guint id; + + assert(vu_dev); + assert(fd >= 0); + + vdev_scsi = vdev_scsi_find_by_vu(vu_dev); + if (!vdev_scsi) { + vus_panic_cb(vu_dev, NULL); + return; + } + + id = (guint)(uintptr_t)g_tree_lookup(vdev_scsi->fdmap, + (gpointer)(uintptr_t)fd); + if (id) { + GSource *vus_src = g_main_context_find_source_by_id(NULL, id); + assert(vus_src); + g_source_destroy(vus_src); + (void)g_tree_remove(vdev_scsi->fdmap, (gpointer)(uintptr_t)fd); + } +} + +static void vus_proc_ctl(VuDev *vu_dev, int idx) +{ + /* Control VQ not implemented */ +} + +static void vus_proc_evt(VuDev *vu_dev, int idx) +{ + /* Event VQ not implemented */ +} + +static void vus_proc_req(VuDev *vu_dev, int idx) +{ + vhost_scsi_dev_t *vdev_scsi; + VuVirtq *vq; + + assert(vu_dev); + + vdev_scsi = vdev_scsi_find_by_vu(vu_dev); + if (!vdev_scsi) { + vus_panic_cb(vu_dev, NULL); + return; + } + + if ((idx < 0) || (idx >= VHOST_MAX_NR_VIRTQUEUE)) { + PERR("VQ Index out of range: %d", idx); + vus_panic_cb(vu_dev, NULL); + return; + } + + vq = vu_get_queue(vu_dev, idx); + if (!vq) { + PERR("Error fetching VQ (dev=%p, idx=%d)", vu_dev, idx); + vus_panic_cb(vu_dev, NULL); + return; + } + + PDBG("Got kicked on vq[%d]@%p", idx, vq); + + while (1) { + VuVirtqElement *elem; + VirtIOSCSICmdReq *req; + VirtIOSCSICmdResp *rsp; + + elem = vu_queue_pop(vu_dev, vq, sizeof(VuVirtqElement)); + if (!elem) { + PDBG("No more elements pending on vq[%d]@%p", idx, vq); + break; + } + PDBG("Popped elem@%p", elem); + + assert(!((elem->out_num > 1) && (elem->in_num > 1))); + assert((elem->out_num > 0) && (elem->in_num > 0)); + + if (elem->out_sg[0].iov_len < sizeof(VirtIOSCSICmdReq)) { + PERR("Invalid virtio-scsi req header"); + vus_panic_cb(vu_dev, NULL); + break; + } + req = (VirtIOSCSICmdReq *)elem->out_sg[0].iov_base; + + if (elem->in_sg[0].iov_len < sizeof(VirtIOSCSICmdResp)) { + PERR("Invalid virtio-scsi rsp header"); + vus_panic_cb(vu_dev, NULL); + break; + } + rsp = (VirtIOSCSICmdResp *)elem->in_sg[0].iov_base; + + if (handle_cmd_sync(vdev_scsi->luns[0].iscsi_ctx, + req, &elem->out_sg[1], elem->out_num - 1, + rsp, &elem->in_sg[1], elem->in_num - 1) != 0) { + vus_panic_cb(vu_dev, NULL); + break; + } + + vu_queue_push(vu_dev, vq, elem, 0); + vu_queue_notify(vu_dev, vq); + + free(elem); + } +} + +static void vus_queue_set_started(VuDev *vu_dev, int idx, bool started) +{ + VuVirtq *vq; + + assert(vu_dev); + + if ((idx < 0) || (idx >= VHOST_MAX_NR_VIRTQUEUE)) { + PERR("VQ Index out of range: %d", idx); + vus_panic_cb(vu_dev, NULL); + return; + } + + vq = vu_get_queue(vu_dev, idx); + + switch (idx) { + case 0: + vu_set_queue_handler(vu_dev, vq, started ? vus_proc_ctl : NULL); + break; + case 1: + vu_set_queue_handler(vu_dev, vq, started ? vus_proc_evt : NULL); + break; + default: + vu_set_queue_handler(vu_dev, vq, started ? vus_proc_req : NULL); + } +} + +static const VuDevIface vus_iface = { + .queue_set_started = vus_queue_set_started, +}; + +static gboolean vus_vhost_cb(gpointer data) +{ + VuDev *vu_dev = (VuDev *)data; + + assert(vu_dev); + + if (!vu_dispatch(vu_dev) != 0) { + PERR("Error processing vhost message"); + vus_panic_cb(vu_dev, NULL); + return G_SOURCE_REMOVE; + } + + return G_SOURCE_CONTINUE; +} + +/** misc helpers **/ + +static int unix_sock_new(char *unix_fn) +{ + int sock; + struct sockaddr_un un; + size_t len; + + assert(unix_fn); + + sock = socket(AF_UNIX, SOCK_STREAM, 0); + if (sock <= 0) { + perror("socket"); + return -1; + } + + un.sun_family = AF_UNIX; + (void)snprintf(un.sun_path, sizeof(un.sun_path), "%s", unix_fn); + len = sizeof(un.sun_family) + strlen(un.sun_path); + + (void)unlink(unix_fn); + if (bind(sock, (struct sockaddr *)&un, len) < 0) { + perror("bind"); + goto fail; + } + + if (listen(sock, 1) < 0) { + perror("listen"); + goto fail; + } + + return sock; + +fail: + (void)close(sock); + + return -1; +} + +/** vhost-user-scsi **/ + +static vhost_scsi_dev_t *vdev_scsi_find_by_vu(VuDev *vu_dev) +{ + int i; + + assert(vu_dev); + + for (i = 0; i < VUS_MAX_DEVS; i++) { + if (&vhost_scsi_devs[i]->vu_dev == vu_dev) { + return vhost_scsi_devs[i]; + } + } + + PERR("Unknown VuDev %p", vu_dev); + return NULL; +} + +static void vdev_scsi_deinit(vhost_scsi_dev_t *vdev_scsi) +{ + if (!vdev_scsi) { + return; + } + + if (vdev_scsi->server_sock >= 0) { + struct sockaddr_storage ss; + socklen_t sslen = sizeof(ss); + + if (getsockname(vdev_scsi->server_sock, (struct sockaddr *)&ss, + &sslen) == 0) { + struct sockaddr_un *su = (struct sockaddr_un *)&ss; + (void)unlink(su->sun_path); + } + + (void)close(vdev_scsi->server_sock); + vdev_scsi->server_sock = -1; + } + + if (vdev_scsi->loop) { + g_main_loop_unref(vdev_scsi->loop); + vdev_scsi->loop = NULL; + } +} + +static vhost_scsi_dev_t *vdev_scsi_new(char *unix_fn) +{ + vhost_scsi_dev_t *vdev_scsi = NULL; + + assert(unix_fn); + + vdev_scsi = calloc(1, sizeof(vhost_scsi_dev_t)); + if (!vdev_scsi) { + PERR("calloc: %s", strerror(errno)); + return NULL; + } + + vdev_scsi->server_sock = unix_sock_new(unix_fn); + if (vdev_scsi->server_sock < 0) { + goto err; + } + + vdev_scsi->loop = g_main_loop_new(NULL, FALSE); + if (!vdev_scsi->loop) { + PERR("Error creating glib event loop"); + goto err; + } + + vdev_scsi->fdmap = g_tree_new(vus_fdmap_compare); + if (!vdev_scsi->fdmap) { + PERR("Error creating glib tree for fdmap"); + goto err; + } + + return vdev_scsi; + +err: + vdev_scsi_deinit(vdev_scsi); + free(vdev_scsi); + + return NULL; +} + +static int vdev_scsi_add_iscsi_lun(vhost_scsi_dev_t *vdev_scsi, + char *iscsi_uri, uint32_t lun) { + assert(vdev_scsi); + assert(iscsi_uri); + assert(lun < VUS_MAX_LUNS); + + if (vdev_scsi->luns[lun].iscsi_ctx) { + PERR("Lun %d already configured", lun); + return -1; + } + + if (iscsi_add_lun(&vdev_scsi->luns[lun], iscsi_uri) != 0) { + return -1; + } + + return 0; +} + +static int vdev_scsi_run(vhost_scsi_dev_t *vdev_scsi) +{ + int cli_sock; + int ret = 0; + + assert(vdev_scsi); + assert(vdev_scsi->server_sock >= 0); + assert(vdev_scsi->loop); + + cli_sock = accept(vdev_scsi->server_sock, (void *)0, (void *)0); + if (cli_sock < 0) { + perror("accept"); + return -1; + } + + vu_init(&vdev_scsi->vu_dev, + cli_sock, + vus_panic_cb, + vus_add_watch_cb, + vus_del_watch_cb, + &vus_iface); + + if (vus_gsrc_new(vdev_scsi, cli_sock, G_IO_IN, NULL, vus_vhost_cb, + &vdev_scsi->vu_dev)) { + goto fail; + } + + g_main_loop_run(vdev_scsi->loop); + +out: + vu_deinit(&vdev_scsi->vu_dev); + + return ret; + +fail: + ret = -1; + goto out; +} + +int main(int argc, char **argv) +{ + vhost_scsi_dev_t *vdev_scsi = NULL; + char *unix_fn = NULL; + char *iscsi_uri = NULL; + int opt, err = EXIT_SUCCESS; + + while ((opt = getopt(argc, argv, "u:i:")) != -1) { + switch (opt) { + case 'h': + goto help; + case 'u': + unix_fn = strdup(optarg); + break; + case 'i': + iscsi_uri = strdup(optarg); + break; + default: + goto help; + } + } + if (!unix_fn || !iscsi_uri) { + goto help; + } + + vdev_scsi = vdev_scsi_new(unix_fn); + if (!vdev_scsi) { + goto err; + } + vhost_scsi_devs[0] = vdev_scsi; + + if (vdev_scsi_add_iscsi_lun(vdev_scsi, iscsi_uri, 0) != 0) { + goto err; + } + + if (vdev_scsi_run(vdev_scsi) != 0) { + goto err; + } + +out: + if (vdev_scsi) { + vdev_scsi_deinit(vdev_scsi); + free(vdev_scsi); + } + if (unix_fn) { + free(unix_fn); + } + if (iscsi_uri) { + free(iscsi_uri); + } + + return err; + +err: + err = EXIT_FAILURE; + goto out; + +help: + fprintf(stderr, "Usage: %s [ -u unix_sock_path -i iscsi_uri ] | [ -h ]\n", + argv[0]); + fprintf(stderr, " -u path to unix socket\n"); + fprintf(stderr, " -i iscsi uri for lun 0\n"); + fprintf(stderr, " -h print help and quit\n"); + + goto err; +}