From 315a1309defd8ddf910c6c17e28cbbd7faf92f2e Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Tue, 10 Feb 2015 11:17:53 +0100 Subject: [PATCH 01/73] coroutine: Fix use after free with qemu_coroutine_yield() Instead of using the same function for entering and exiting coroutines, and hoping that it doesn't add any functionality that hurts with the parameters used for exiting, we can just directly call into the real task switch in qemu_coroutine_switch(). This fixes a use-after-free scenario where reentering a coroutine that has yielded still accesses the old parent coroutine (which may have meanwhile terminated) in the part of coroutine_swap() that follows qemu_coroutine_switch(). Cc: qemu-stable@nongnu.org Signed-off-by: Kevin Wolf Reviewed-by: Paolo Bonzini --- qemu-coroutine.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/qemu-coroutine.c b/qemu-coroutine.c index 525247b0501d..5019b81dd547 100644 --- a/qemu-coroutine.c +++ b/qemu-coroutine.c @@ -148,5 +148,5 @@ void coroutine_fn qemu_coroutine_yield(void) } self->caller = NULL; - coroutine_swap(self, to); + qemu_coroutine_switch(self, to, COROUTINE_YIELD); } From cd12bb567cdcd9665a5acdecd6ac8afd9a977003 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Tue, 10 Feb 2015 11:31:52 +0100 Subject: [PATCH 02/73] coroutine: Clean up qemu_coroutine_enter() qemu_coroutine_enter() is now the only user of coroutine_swap(). Both functions are short, so inline it. Also, using COROUTINE_YIELD is now even more confusing because this code is never called during qemu_coroutine_yield() any more. In fact, this value is never read back, so we can just introduce a new COROUTINE_ENTER which documents the purpose of the task switch better. Signed-off-by: Kevin Wolf Reviewed-by: Paolo Bonzini --- include/block/coroutine_int.h | 1 + qemu-coroutine.c | 36 +++++++++++++++-------------------- 2 files changed, 16 insertions(+), 21 deletions(-) diff --git a/include/block/coroutine_int.h b/include/block/coroutine_int.h index f133d65af86d..9aa1aae5d56d 100644 --- a/include/block/coroutine_int.h +++ b/include/block/coroutine_int.h @@ -31,6 +31,7 @@ typedef enum { COROUTINE_YIELD = 1, COROUTINE_TERMINATE = 2, + COROUTINE_ENTER = 3, } CoroutineAction; struct Coroutine { diff --git a/qemu-coroutine.c b/qemu-coroutine.c index 5019b81dd547..c17a92b107e8 100644 --- a/qemu-coroutine.c +++ b/qemu-coroutine.c @@ -99,29 +99,10 @@ static void coroutine_delete(Coroutine *co) qemu_coroutine_delete(co); } -static void coroutine_swap(Coroutine *from, Coroutine *to) -{ - CoroutineAction ret; - - ret = qemu_coroutine_switch(from, to, COROUTINE_YIELD); - - qemu_co_queue_run_restart(to); - - switch (ret) { - case COROUTINE_YIELD: - return; - case COROUTINE_TERMINATE: - trace_qemu_coroutine_terminate(to); - coroutine_delete(to); - return; - default: - abort(); - } -} - void qemu_coroutine_enter(Coroutine *co, void *opaque) { Coroutine *self = qemu_coroutine_self(); + CoroutineAction ret; trace_qemu_coroutine_enter(self, co, opaque); @@ -132,7 +113,20 @@ void qemu_coroutine_enter(Coroutine *co, void *opaque) co->caller = self; co->entry_arg = opaque; - coroutine_swap(self, co); + ret = qemu_coroutine_switch(self, co, COROUTINE_ENTER); + + qemu_co_queue_run_restart(co); + + switch (ret) { + case COROUTINE_YIELD: + return; + case COROUTINE_TERMINATE: + trace_qemu_coroutine_terminate(co); + coroutine_delete(co); + return; + default: + abort(); + } } void coroutine_fn qemu_coroutine_yield(void) From 7c2eed3efa86c19436bc33372bfa9d7fbb6fda2c Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Tue, 10 Feb 2015 11:15:59 +0100 Subject: [PATCH 03/73] test-coroutine: Regression test for yield bug This adds a test for reentering a coroutine that previously yielded to a coroutine that has meanwhile terminated. Signed-off-by: Stefan Hajnoczi Reviewed-by: Paolo Bonzini Signed-off-by: Kevin Wolf --- tests/test-coroutine.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/tests/test-coroutine.c b/tests/test-coroutine.c index 27d1b6f8e851..b552d9f5e90d 100644 --- a/tests/test-coroutine.c +++ b/tests/test-coroutine.c @@ -13,6 +13,7 @@ #include #include "block/coroutine.h" +#include "block/coroutine_int.h" /* * Check that qemu_in_coroutine() works @@ -122,6 +123,30 @@ static void test_yield(void) g_assert_cmpint(i, ==, 5); /* coroutine must yield 5 times */ } +static void coroutine_fn c2_fn(void *opaque) +{ + qemu_coroutine_yield(); +} + +static void coroutine_fn c1_fn(void *opaque) +{ + Coroutine *c2 = opaque; + qemu_coroutine_enter(c2, NULL); +} + +static void test_co_queue(void) +{ + Coroutine *c1; + Coroutine *c2; + + c1 = qemu_coroutine_create(c1_fn); + c2 = qemu_coroutine_create(c2_fn); + + qemu_coroutine_enter(c1, c2); + memset(c1, 0xff, sizeof(Coroutine)); + qemu_coroutine_enter(c2, NULL); +} + /* * Check that creation, enter, and return work */ @@ -343,6 +368,7 @@ static void perf_cost(void) int main(int argc, char **argv) { g_test_init(&argc, &argv, NULL); + g_test_add_func("/basic/co_queue", test_co_queue); g_test_add_func("/basic/lifecycle", test_lifecycle); g_test_add_func("/basic/yield", test_yield); g_test_add_func("/basic/nesting", test_nesting); From 3f3f20dcd34fc2fcf6dea2fe4e9b45d1c4d67288 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Wed, 11 Feb 2015 17:19:57 +0100 Subject: [PATCH 04/73] vpc: Fix size in fixed image creation If total_sectors is rounded to match the geometry, total_size needs to be changed as well. Otherwise we end up with an image whose geometry describes a disk larger than the image file, which doesn't end well. Signed-off-by: Kevin Wolf Reviewed-by: Max Reitz --- block/vpc.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/block/vpc.c b/block/vpc.c index 46803b14be3e..7fddbf0b7558 100644 --- a/block/vpc.c +++ b/block/vpc.c @@ -801,6 +801,7 @@ static int vpc_create(const char *filename, QemuOpts *opts, Error **errp) } total_sectors = (int64_t) cyls * heads * secs_per_cyl; + total_size = total_sectors * BDRV_SECTOR_SIZE; /* Prepare the Hard Disk Footer */ memset(buf, 0, 1024); @@ -822,13 +823,8 @@ static int vpc_create(const char *filename, QemuOpts *opts, Error **errp) /* Version of Virtual PC 2007 */ footer->major = cpu_to_be16(0x0005); footer->minor = cpu_to_be16(0x0003); - if (disk_type == VHD_DYNAMIC) { - footer->orig_size = cpu_to_be64(total_sectors * 512); - footer->size = cpu_to_be64(total_sectors * 512); - } else { - footer->orig_size = cpu_to_be64(total_size); - footer->size = cpu_to_be64(total_size); - } + footer->orig_size = cpu_to_be64(total_size); + footer->size = cpu_to_be64(total_size); footer->cyls = cpu_to_be16(cyls); footer->heads = heads; footer->secs_per_cyl = secs_per_cyl; From 0cc84887068eeb59eed84dbab6547b39e83d739c Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Wed, 11 Feb 2015 15:56:01 +0100 Subject: [PATCH 05/73] vpc: Implement bdrv_co_get_block_status() This implements bdrv_co_get_block_status() for VHD images. This can significantly speed up qemu-img convert operation because only with this function implemented sparseness can be considered. (Before, converting a 1 TB empty image took several minutes for me, now it's instantaneous.) Signed-off-by: Kevin Wolf Reviewed-by: Max Reitz --- block/vpc.c | 50 ++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 48 insertions(+), 2 deletions(-) diff --git a/block/vpc.c b/block/vpc.c index 7fddbf0b7558..1533b6a64d64 100644 --- a/block/vpc.c +++ b/block/vpc.c @@ -597,6 +597,51 @@ static coroutine_fn int vpc_co_write(BlockDriverState *bs, int64_t sector_num, return ret; } +static int64_t coroutine_fn vpc_co_get_block_status(BlockDriverState *bs, + int64_t sector_num, int nb_sectors, int *pnum) +{ + BDRVVPCState *s = bs->opaque; + VHDFooter *footer = (VHDFooter*) s->footer_buf; + int64_t start, offset, next; + bool allocated; + int n; + + if (be32_to_cpu(footer->type) == VHD_FIXED) { + *pnum = nb_sectors; + return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID | BDRV_BLOCK_DATA | + (sector_num << BDRV_SECTOR_BITS); + } + + offset = get_sector_offset(bs, sector_num, 0); + start = offset; + allocated = (offset != -1); + *pnum = 0; + + do { + /* All sectors in a block are contiguous (without using the bitmap) */ + n = ROUND_UP(sector_num + 1, s->block_size / BDRV_SECTOR_SIZE) + - sector_num; + n = MIN(n, nb_sectors); + + *pnum += n; + sector_num += n; + nb_sectors -= n; + next = start + (*pnum * BDRV_SECTOR_SIZE); + + if (nb_sectors == 0) { + break; + } + + offset = get_sector_offset(bs, sector_num, 0); + } while ((allocated && offset == next) || (!allocated && offset == -1)); + + if (allocated) { + return BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | start; + } else { + return 0; + } +} + /* * Calculates the number of cylinders, heads and sectors per cylinder * based on a given number of sectors. This is the algorithm described @@ -903,8 +948,9 @@ static BlockDriver bdrv_vpc = { .bdrv_reopen_prepare = vpc_reopen_prepare, .bdrv_create = vpc_create, - .bdrv_read = vpc_co_read, - .bdrv_write = vpc_co_write, + .bdrv_read = vpc_co_read, + .bdrv_write = vpc_co_write, + .bdrv_co_get_block_status = vpc_co_get_block_status, .bdrv_get_info = vpc_get_info, From 876eb1b0cc2b04927739cba10e4e73e8b990d65e Mon Sep 17 00:00:00 2001 From: Teruaki Ishizaki Date: Fri, 13 Feb 2015 18:20:53 +0900 Subject: [PATCH 06/73] sheepdog: selectable object size support Previously, qemu block driver of sheepdog used hard-coded VDI object size. This patch enables users to handle VDI object size. When you start qemu, you don't need to specify additional command option. But when you create the VDI which doesn't have default object size with qemu-img command, you specify object_size option. If you want to create a VDI of 8MB object size, you need to specify following command option. # qemu-img create -o object_size=8M sheepdog:test1 100M In addition, when you don't specify qemu-img command option, a default value of sheepdog cluster is used for creating VDI. # qemu-img create sheepdog:test2 100M Signed-off-by: Teruaki Ishizaki Acked-by: Hitoshi Mitake Signed-off-by: Kevin Wolf --- block/sheepdog.c | 155 ++++++++++++++++++++++++++++++++------ include/block/block_int.h | 1 + 2 files changed, 134 insertions(+), 22 deletions(-) diff --git a/block/sheepdog.c b/block/sheepdog.c index d17ee360c52a..a2679c280335 100644 --- a/block/sheepdog.c +++ b/block/sheepdog.c @@ -37,6 +37,7 @@ #define SD_OP_READ_VDIS 0x15 #define SD_OP_FLUSH_VDI 0x16 #define SD_OP_DEL_VDI 0x17 +#define SD_OP_GET_CLUSTER_DEFAULT 0x18 #define SD_FLAG_CMD_WRITE 0x01 #define SD_FLAG_CMD_COW 0x02 @@ -91,6 +92,7 @@ #define SD_NR_VDIS (1U << 24) #define SD_DATA_OBJ_SIZE (UINT64_C(1) << 22) #define SD_MAX_VDI_SIZE (SD_DATA_OBJ_SIZE * MAX_DATA_OBJS) +#define SD_DEFAULT_BLOCK_SIZE_SHIFT 22 /* * For erasure coding, we use at most SD_EC_MAX_STRIP for data strips and * (SD_EC_MAX_STRIP - 1) for parity strips @@ -167,7 +169,8 @@ typedef struct SheepdogVdiReq { uint32_t base_vdi_id; uint8_t copies; uint8_t copy_policy; - uint8_t reserved[2]; + uint8_t store_policy; + uint8_t block_size_shift; uint32_t snapid; uint32_t type; uint32_t pad[2]; @@ -186,6 +189,21 @@ typedef struct SheepdogVdiRsp { uint32_t pad[5]; } SheepdogVdiRsp; +typedef struct SheepdogClusterRsp { + uint8_t proto_ver; + uint8_t opcode; + uint16_t flags; + uint32_t epoch; + uint32_t id; + uint32_t data_length; + uint32_t result; + uint8_t nr_copies; + uint8_t copy_policy; + uint8_t block_size_shift; + uint8_t __pad1; + uint32_t __pad2[6]; +} SheepdogClusterRsp; + typedef struct SheepdogInode { char name[SD_MAX_VDI_LEN]; char tag[SD_MAX_VDI_TAG_LEN]; @@ -1541,6 +1559,7 @@ static int do_sd_create(BDRVSheepdogState *s, uint32_t *vdi_id, int snapshot, hdr.vdi_size = s->inode.vdi_size; hdr.copy_policy = s->inode.copy_policy; hdr.copies = s->inode.nr_copies; + hdr.block_size_shift = s->inode.block_size_shift; ret = do_req(fd, s->aio_context, (SheepdogReq *)&hdr, buf, &wlen, &rlen); @@ -1566,9 +1585,12 @@ static int do_sd_create(BDRVSheepdogState *s, uint32_t *vdi_id, int snapshot, static int sd_prealloc(const char *filename, Error **errp) { BlockDriverState *bs = NULL; + BDRVSheepdogState *base = NULL; + unsigned long buf_size; uint32_t idx, max_idx; + uint32_t object_size; int64_t vdi_size; - void *buf = g_malloc0(SD_DATA_OBJ_SIZE); + void *buf = NULL; int ret; ret = bdrv_open(&bs, filename, NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL, @@ -1582,18 +1604,24 @@ static int sd_prealloc(const char *filename, Error **errp) ret = vdi_size; goto out; } - max_idx = DIV_ROUND_UP(vdi_size, SD_DATA_OBJ_SIZE); + + base = bs->opaque; + object_size = (UINT32_C(1) << base->inode.block_size_shift); + buf_size = MIN(object_size, SD_DATA_OBJ_SIZE); + buf = g_malloc0(buf_size); + + max_idx = DIV_ROUND_UP(vdi_size, buf_size); for (idx = 0; idx < max_idx; idx++) { /* * The created image can be a cloned image, so we need to read * a data from the source image. */ - ret = bdrv_pread(bs, idx * SD_DATA_OBJ_SIZE, buf, SD_DATA_OBJ_SIZE); + ret = bdrv_pread(bs, idx * buf_size, buf, buf_size); if (ret < 0) { goto out; } - ret = bdrv_pwrite(bs, idx * SD_DATA_OBJ_SIZE, buf, SD_DATA_OBJ_SIZE); + ret = bdrv_pwrite(bs, idx * buf_size, buf, buf_size); if (ret < 0) { goto out; } @@ -1666,6 +1694,27 @@ static int parse_redundancy(BDRVSheepdogState *s, const char *opt) return 0; } +static int parse_block_size_shift(BDRVSheepdogState *s, QemuOpts *opt) +{ + struct SheepdogInode *inode = &s->inode; + uint64_t object_size; + int obj_order; + + object_size = qemu_opt_get_size_del(opt, BLOCK_OPT_OBJECT_SIZE, 0); + if (object_size) { + if ((object_size - 1) & object_size) { /* not a power of 2? */ + return -EINVAL; + } + obj_order = ffs(object_size) - 1; + if (obj_order < 20 || obj_order > 31) { + return -EINVAL; + } + inode->block_size_shift = (uint8_t)obj_order; + } + + return 0; +} + static int sd_create(const char *filename, QemuOpts *opts, Error **errp) { @@ -1676,6 +1725,7 @@ static int sd_create(const char *filename, QemuOpts *opts, BDRVSheepdogState *s; char tag[SD_MAX_VDI_TAG_LEN]; uint32_t snapid; + uint64_t max_vdi_size; bool prealloc = false; s = g_new0(BDRVSheepdogState, 1); @@ -1714,10 +1764,11 @@ static int sd_create(const char *filename, QemuOpts *opts, goto out; } } - - if (s->inode.vdi_size > SD_MAX_VDI_SIZE) { - error_setg(errp, "too big image size"); - ret = -EINVAL; + ret = parse_block_size_shift(s, opts); + if (ret < 0) { + error_setg(errp, "Invalid object_size." + " obect_size needs to be power of 2" + " and be limited from 2^20 to 2^31"); goto out; } @@ -1754,6 +1805,51 @@ static int sd_create(const char *filename, QemuOpts *opts, } s->aio_context = qemu_get_aio_context(); + + /* if block_size_shift is not specified, get cluster default value */ + if (s->inode.block_size_shift == 0) { + SheepdogVdiReq hdr; + SheepdogClusterRsp *rsp = (SheepdogClusterRsp *)&hdr; + Error *local_err = NULL; + int fd; + unsigned int wlen = 0, rlen = 0; + + fd = connect_to_sdog(s, &local_err); + if (fd < 0) { + error_report("%s", error_get_pretty(local_err)); + error_free(local_err); + ret = -EIO; + goto out; + } + + memset(&hdr, 0, sizeof(hdr)); + hdr.opcode = SD_OP_GET_CLUSTER_DEFAULT; + hdr.proto_ver = SD_PROTO_VER; + + ret = do_req(fd, s->aio_context, (SheepdogReq *)&hdr, + NULL, &wlen, &rlen); + closesocket(fd); + if (ret) { + error_setg_errno(errp, -ret, "failed to get cluster default"); + goto out; + } + if (rsp->result == SD_RES_SUCCESS) { + s->inode.block_size_shift = rsp->block_size_shift; + } else { + s->inode.block_size_shift = SD_DEFAULT_BLOCK_SIZE_SHIFT; + } + } + + max_vdi_size = (UINT64_C(1) << s->inode.block_size_shift) * MAX_DATA_OBJS; + + if (s->inode.vdi_size > max_vdi_size) { + error_setg(errp, "An image is too large." + " The maximum image size is %"PRIu64 "GB", + max_vdi_size / 1024 / 1024 / 1024); + ret = -EINVAL; + goto out; + } + ret = do_sd_create(s, &vid, 0, errp); if (ret) { goto out; @@ -1823,11 +1919,13 @@ static int sd_truncate(BlockDriverState *bs, int64_t offset) BDRVSheepdogState *s = bs->opaque; int ret, fd; unsigned int datalen; + uint64_t max_vdi_size; + max_vdi_size = (UINT64_C(1) << s->inode.block_size_shift) * MAX_DATA_OBJS; if (offset < s->inode.vdi_size) { error_report("shrinking is not supported"); return -EINVAL; - } else if (offset > SD_MAX_VDI_SIZE) { + } else if (offset > max_vdi_size) { error_report("too big image size"); return -EINVAL; } @@ -2005,9 +2103,10 @@ static int coroutine_fn sd_co_rw_vector(void *p) SheepdogAIOCB *acb = p; int ret = 0; unsigned long len, done = 0, total = acb->nb_sectors * BDRV_SECTOR_SIZE; - unsigned long idx = acb->sector_num * BDRV_SECTOR_SIZE / SD_DATA_OBJ_SIZE; + unsigned long idx; + uint32_t object_size; uint64_t oid; - uint64_t offset = (acb->sector_num * BDRV_SECTOR_SIZE) % SD_DATA_OBJ_SIZE; + uint64_t offset; BDRVSheepdogState *s = acb->common.bs->opaque; SheepdogInode *inode = &s->inode; AIOReq *aio_req; @@ -2024,6 +2123,10 @@ static int coroutine_fn sd_co_rw_vector(void *p) } } + object_size = (UINT32_C(1) << inode->block_size_shift); + idx = acb->sector_num * BDRV_SECTOR_SIZE / object_size; + offset = (acb->sector_num * BDRV_SECTOR_SIZE) % object_size; + /* * Make sure we don't free the aiocb before we are done with all requests. * This additional reference is dropped at the end of this function. @@ -2037,7 +2140,7 @@ static int coroutine_fn sd_co_rw_vector(void *p) oid = vid_to_data_oid(inode->data_vdi_id[idx], idx); - len = MIN(total - done, SD_DATA_OBJ_SIZE - offset); + len = MIN(total - done, object_size - offset); switch (acb->aiocb_type) { case AIOCB_READ_UDATA: @@ -2061,7 +2164,7 @@ static int coroutine_fn sd_co_rw_vector(void *p) * We discard the object only when the whole object is * 1) allocated 2) trimmed. Otherwise, simply skip it. */ - if (len != SD_DATA_OBJ_SIZE || inode->data_vdi_id[idx] == 0) { + if (len != object_size || inode->data_vdi_id[idx] == 0) { goto done; } break; @@ -2414,6 +2517,7 @@ static int do_load_save_vmstate(BDRVSheepdogState *s, uint8_t *data, uint64_t offset; uint32_t vdi_index; uint32_t vdi_id = load ? s->inode.parent_vdi_id : s->inode.vdi_id; + uint32_t object_size = (UINT32_C(1) << s->inode.block_size_shift); fd = connect_to_sdog(s, &local_err); if (fd < 0) { @@ -2422,10 +2526,10 @@ static int do_load_save_vmstate(BDRVSheepdogState *s, uint8_t *data, } while (remaining) { - vdi_index = pos / SD_DATA_OBJ_SIZE; - offset = pos % SD_DATA_OBJ_SIZE; + vdi_index = pos / object_size; + offset = pos % object_size; - data_len = MIN(remaining, SD_DATA_OBJ_SIZE - offset); + data_len = MIN(remaining, object_size - offset); vmstate_oid = vid_to_vmstate_oid(vdi_id, vdi_index); @@ -2512,10 +2616,11 @@ sd_co_get_block_status(BlockDriverState *bs, int64_t sector_num, int nb_sectors, { BDRVSheepdogState *s = bs->opaque; SheepdogInode *inode = &s->inode; + uint32_t object_size = (UINT32_C(1) << inode->block_size_shift); uint64_t offset = sector_num * BDRV_SECTOR_SIZE; - unsigned long start = offset / SD_DATA_OBJ_SIZE, + unsigned long start = offset / object_size, end = DIV_ROUND_UP((sector_num + nb_sectors) * - BDRV_SECTOR_SIZE, SD_DATA_OBJ_SIZE); + BDRV_SECTOR_SIZE, object_size); unsigned long idx; int64_t ret = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | offset; @@ -2534,7 +2639,7 @@ sd_co_get_block_status(BlockDriverState *bs, int64_t sector_num, int nb_sectors, } } - *pnum = (idx - start) * SD_DATA_OBJ_SIZE / BDRV_SECTOR_SIZE; + *pnum = (idx - start) * object_size / BDRV_SECTOR_SIZE; if (*pnum > nb_sectors) { *pnum = nb_sectors; } @@ -2545,14 +2650,15 @@ static int64_t sd_get_allocated_file_size(BlockDriverState *bs) { BDRVSheepdogState *s = bs->opaque; SheepdogInode *inode = &s->inode; - unsigned long i, last = DIV_ROUND_UP(inode->vdi_size, SD_DATA_OBJ_SIZE); + uint32_t object_size = (UINT32_C(1) << inode->block_size_shift); + unsigned long i, last = DIV_ROUND_UP(inode->vdi_size, object_size); uint64_t size = 0; for (i = 0; i < last; i++) { if (inode->data_vdi_id[i] == 0) { continue; } - size += SD_DATA_OBJ_SIZE; + size += object_size; } return size; } @@ -2581,6 +2687,11 @@ static QemuOptsList sd_create_opts = { .type = QEMU_OPT_STRING, .help = "Redundancy of the image" }, + { + .name = BLOCK_OPT_OBJECT_SIZE, + .type = QEMU_OPT_SIZE, + .help = "Object size of the image" + }, { /* end of list */ } } }; diff --git a/include/block/block_int.h b/include/block/block_int.h index b340e7e140ce..9f7b5bd18917 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -56,6 +56,7 @@ #define BLOCK_OPT_ADAPTER_TYPE "adapter_type" #define BLOCK_OPT_REDUNDANCY "redundancy" #define BLOCK_OPT_NOCOW "nocow" +#define BLOCK_OPT_OBJECT_SIZE "object_size" #define BLOCK_PROBE_BUF_SIZE 512 From a6dcf097fad2773fdee9ea12f8452dcc259e9ee1 Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Thu, 12 Feb 2015 08:35:49 +0300 Subject: [PATCH 07/73] block/raw-posix: fix compilation warning on OSX block/raw-posix.c:947:19: warning: unused variable 's' [-Wunused-variable] BDRVRawState *s = aiocb->bs->opaque; This variable is used only when on of the following macros are defined CONFIG_XFS, CONFIG_FALLOCATE, CONFIG_FALLOCATE_PUNCH_HOLE or CONFIG_FALLOCATE_ZERO_RANGE. Fortunately, CONFIG_FALLOCATE_PUNCH_HOLE and CONFIG_FALLOCATE_ZERO_RANGE could be defined only along with CONFIG_FALLOCATE. Therefore checking for CONFIG_XFS or CONFIG_FALLOCATE would be enough. Signed-off-by: Denis V. Lunev CC: Peter Maydell CC: Kevin Wolf Signed-off-by: Kevin Wolf --- block/raw-posix.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/block/raw-posix.c b/block/raw-posix.c index b5f077a8f15a..c0b46cac9e87 100644 --- a/block/raw-posix.c +++ b/block/raw-posix.c @@ -944,7 +944,9 @@ static ssize_t handle_aiocb_write_zeroes_block(RawPosixAIOData *aiocb) static ssize_t handle_aiocb_write_zeroes(RawPosixAIOData *aiocb) { +#if defined(CONFIG_FALLOCATE) || defined(CONFIG_XFS) BDRVRawState *s = aiocb->bs->opaque; +#endif if (aiocb->aio_type & QEMU_AIO_BLKDEV) { return handle_aiocb_write_zeroes_block(aiocb); From 20a1f9d07125bead22efd1dc208b4d14ae1b2a21 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Tue, 17 Feb 2015 14:47:54 +0100 Subject: [PATCH 08/73] qcow2: Remove unused struct QCowCreateState The only user went away five years ago with commit a9420734 ('qcow2: Simplify image creation'). It's about time to remove it. Signed-off-by: Kevin Wolf Reviewed-by: Max Reitz Reviewed-by: Stefan Hajnoczi --- block/qcow2.h | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/block/qcow2.h b/block/qcow2.h index 6e39a1b639ec..0fee29bb5e7f 100644 --- a/block/qcow2.h +++ b/block/qcow2.h @@ -275,17 +275,6 @@ typedef struct BDRVQcowState { bool cache_discards; } BDRVQcowState; -/* XXX: use std qcow open function ? */ -typedef struct QCowCreateState { - int cluster_size; - int cluster_bits; - uint16_t *refcount_block; - uint64_t *refcount_table; - int64_t l1_table_offset; - int64_t refcount_table_offset; - int64_t refcount_block_offset; -} QCowCreateState; - struct QCowAIOCB; typedef struct Qcow2COWRegion { From a209f4615c6853a226e847810b6c607c71b6a046 Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Tue, 17 Feb 2015 17:55:53 +0800 Subject: [PATCH 09/73] virtio-blk: Check return value of blk_aio_ioctl Since commit 1dc936aa84 (virtio-blk: Use blk_aio_ioctl) we silently lose the request if blk_aio_ioctl returns NULL (not implemented). Fix it by directly returning VIRTIO_BLK_S_UNSUPP as we used to do. Signed-off-by: Fam Zheng Reviewed-by: Stefan Hajnoczi [ kwolf: Fixed build error on win32 ] Signed-off-by: Kevin Wolf --- hw/block/virtio-blk.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c index cb71772f9567..3ad5fe44e2ad 100644 --- a/hw/block/virtio-blk.c +++ b/hw/block/virtio-blk.c @@ -201,6 +201,7 @@ static int virtio_blk_handle_scsi_req(VirtIOBlockReq *req) #ifdef __linux__ int i; VirtIOBlockIoctlReq *ioctl_req; + BlockAIOCB *acb; #endif /* @@ -278,8 +279,13 @@ static int virtio_blk_handle_scsi_req(VirtIOBlockReq *req) ioctl_req->hdr.sbp = elem->in_sg[elem->in_num - 3].iov_base; ioctl_req->hdr.mx_sb_len = elem->in_sg[elem->in_num - 3].iov_len; - blk_aio_ioctl(blk->blk, SG_IO, &ioctl_req->hdr, - virtio_blk_ioctl_complete, ioctl_req); + acb = blk_aio_ioctl(blk->blk, SG_IO, &ioctl_req->hdr, + virtio_blk_ioctl_complete, ioctl_req); + if (!acb) { + g_free(ioctl_req); + status = VIRTIO_BLK_S_UNSUPP; + goto fail; + } return -EINPROGRESS; #else abort(); From 728312b8c81564e437902aeca9f51fdb1e7c52b6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc=20Mar=C3=AD?= Date: Tue, 24 Feb 2015 22:21:51 +0100 Subject: [PATCH 10/73] libqos: Change use of pointers to uint64_t in virtio MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Convert use of pointers in functions of virtio to uint64_t in order to make it platform-independent. Add casting from pointers (in PCI functions) to uint64_t and vice versa through uintptr_t. Signed-off-by: Marc Marí Reviewed-by: Stefan Hajnoczi Message-id: 1424812915-25728-2-git-send-email-marc.mari.barcelo@gmail.com Signed-off-by: Stefan Hajnoczi Signed-off-by: Kevin Wolf --- tests/libqos/virtio-pci.c | 20 +++++++++++--------- tests/libqos/virtio.c | 8 ++++---- tests/libqos/virtio.h | 16 ++++++++-------- tests/virtio-blk-test.c | 21 ++++++++++++++------- 4 files changed, 37 insertions(+), 28 deletions(-) diff --git a/tests/libqos/virtio-pci.c b/tests/libqos/virtio-pci.c index 788ebaff467d..92bcac1478ab 100644 --- a/tests/libqos/virtio-pci.c +++ b/tests/libqos/virtio-pci.c @@ -60,25 +60,25 @@ static void qvirtio_pci_assign_device(QVirtioDevice *d, void *data) *vpcidev = (QVirtioPCIDevice *)d; } -static uint8_t qvirtio_pci_config_readb(QVirtioDevice *d, void *addr) +static uint8_t qvirtio_pci_config_readb(QVirtioDevice *d, uint64_t addr) { QVirtioPCIDevice *dev = (QVirtioPCIDevice *)d; - return qpci_io_readb(dev->pdev, addr); + return qpci_io_readb(dev->pdev, (void *)(uintptr_t)addr); } -static uint16_t qvirtio_pci_config_readw(QVirtioDevice *d, void *addr) +static uint16_t qvirtio_pci_config_readw(QVirtioDevice *d, uint64_t addr) { QVirtioPCIDevice *dev = (QVirtioPCIDevice *)d; - return qpci_io_readw(dev->pdev, addr); + return qpci_io_readw(dev->pdev, (void *)(uintptr_t)addr); } -static uint32_t qvirtio_pci_config_readl(QVirtioDevice *d, void *addr) +static uint32_t qvirtio_pci_config_readl(QVirtioDevice *d, uint64_t addr) { QVirtioPCIDevice *dev = (QVirtioPCIDevice *)d; - return qpci_io_readl(dev->pdev, addr); + return qpci_io_readl(dev->pdev, (void *)(uintptr_t)addr); } -static uint64_t qvirtio_pci_config_readq(QVirtioDevice *d, void *addr) +static uint64_t qvirtio_pci_config_readq(QVirtioDevice *d, uint64_t addr) { QVirtioPCIDevice *dev = (QVirtioPCIDevice *)d; int i; @@ -86,11 +86,13 @@ static uint64_t qvirtio_pci_config_readq(QVirtioDevice *d, void *addr) if (qtest_big_endian()) { for (i = 0; i < 8; ++i) { - u64 |= (uint64_t)qpci_io_readb(dev->pdev, addr + i) << (7 - i) * 8; + u64 |= (uint64_t)qpci_io_readb(dev->pdev, + (void *)(uintptr_t)addr + i) << (7 - i) * 8; } } else { for (i = 0; i < 8; ++i) { - u64 |= (uint64_t)qpci_io_readb(dev->pdev, addr + i) << i * 8; + u64 |= (uint64_t)qpci_io_readb(dev->pdev, + (void *)(uintptr_t)addr + i) << i * 8; } } diff --git a/tests/libqos/virtio.c b/tests/libqos/virtio.c index a0612892492b..3205b88d9010 100644 --- a/tests/libqos/virtio.c +++ b/tests/libqos/virtio.c @@ -12,25 +12,25 @@ #include "libqos/virtio.h" uint8_t qvirtio_config_readb(const QVirtioBus *bus, QVirtioDevice *d, - void *addr) + uint64_t addr) { return bus->config_readb(d, addr); } uint16_t qvirtio_config_readw(const QVirtioBus *bus, QVirtioDevice *d, - void *addr) + uint64_t addr) { return bus->config_readw(d, addr); } uint32_t qvirtio_config_readl(const QVirtioBus *bus, QVirtioDevice *d, - void *addr) + uint64_t addr) { return bus->config_readl(d, addr); } uint64_t qvirtio_config_readq(const QVirtioBus *bus, QVirtioDevice *d, - void *addr) + uint64_t addr) { return bus->config_readq(d, addr); } diff --git a/tests/libqos/virtio.h b/tests/libqos/virtio.h index 29fbacbc998a..2449feec5927 100644 --- a/tests/libqos/virtio.h +++ b/tests/libqos/virtio.h @@ -93,10 +93,10 @@ typedef struct QVRingIndirectDesc { } QVRingIndirectDesc; typedef struct QVirtioBus { - uint8_t (*config_readb)(QVirtioDevice *d, void *addr); - uint16_t (*config_readw)(QVirtioDevice *d, void *addr); - uint32_t (*config_readl)(QVirtioDevice *d, void *addr); - uint64_t (*config_readq)(QVirtioDevice *d, void *addr); + uint8_t (*config_readb)(QVirtioDevice *d, uint64_t addr); + uint16_t (*config_readw)(QVirtioDevice *d, uint64_t addr); + uint32_t (*config_readl)(QVirtioDevice *d, uint64_t addr); + uint64_t (*config_readq)(QVirtioDevice *d, uint64_t addr); /* Get features of the device */ uint32_t (*get_features)(QVirtioDevice *d); @@ -144,13 +144,13 @@ static inline uint32_t qvring_size(uint32_t num, uint32_t align) } uint8_t qvirtio_config_readb(const QVirtioBus *bus, QVirtioDevice *d, - void *addr); + uint64_t addr); uint16_t qvirtio_config_readw(const QVirtioBus *bus, QVirtioDevice *d, - void *addr); + uint64_t addr); uint32_t qvirtio_config_readl(const QVirtioBus *bus, QVirtioDevice *d, - void *addr); + uint64_t addr); uint64_t qvirtio_config_readq(const QVirtioBus *bus, QVirtioDevice *d, - void *addr); + uint64_t addr); uint32_t qvirtio_get_features(const QVirtioBus *bus, QVirtioDevice *d); void qvirtio_set_features(const QVirtioBus *bus, QVirtioDevice *d, uint32_t features); diff --git a/tests/virtio-blk-test.c b/tests/virtio-blk-test.c index 89d7cbf919f1..2424c84161c4 100644 --- a/tests/virtio-blk-test.c +++ b/tests/virtio-blk-test.c @@ -157,7 +157,8 @@ static void pci_basic(void) /* MSI-X is not enabled */ addr = dev->addr + QVIRTIO_DEVICE_SPECIFIC_NO_MSIX; - capacity = qvirtio_config_readq(&qvirtio_pci, &dev->vdev, addr); + capacity = qvirtio_config_readq(&qvirtio_pci, &dev->vdev, + (uint64_t)(uintptr_t)addr); g_assert_cmpint(capacity, ==, TEST_IMAGE_SIZE / 512); features = qvirtio_get_features(&qvirtio_pci, &dev->vdev); @@ -303,7 +304,8 @@ static void pci_indirect(void) /* MSI-X is not enabled */ addr = dev->addr + QVIRTIO_DEVICE_SPECIFIC_NO_MSIX; - capacity = qvirtio_config_readq(&qvirtio_pci, &dev->vdev, addr); + capacity = qvirtio_config_readq(&qvirtio_pci, &dev->vdev, + (uint64_t)(uintptr_t)addr); g_assert_cmpint(capacity, ==, TEST_IMAGE_SIZE / 512); features = qvirtio_get_features(&qvirtio_pci, &dev->vdev); @@ -394,7 +396,8 @@ static void pci_config(void) /* MSI-X is not enabled */ addr = dev->addr + QVIRTIO_DEVICE_SPECIFIC_NO_MSIX; - capacity = qvirtio_config_readq(&qvirtio_pci, &dev->vdev, addr); + capacity = qvirtio_config_readq(&qvirtio_pci, &dev->vdev, + (uint64_t)(uintptr_t)addr); g_assert_cmpint(capacity, ==, TEST_IMAGE_SIZE / 512); qvirtio_set_driver_ok(&qvirtio_pci, &dev->vdev); @@ -403,7 +406,8 @@ static void pci_config(void) " 'size': %d } }", n_size); qvirtio_wait_config_isr(&qvirtio_pci, &dev->vdev, QVIRTIO_BLK_TIMEOUT_US); - capacity = qvirtio_config_readq(&qvirtio_pci, &dev->vdev, addr); + capacity = qvirtio_config_readq(&qvirtio_pci, &dev->vdev, + (uint64_t)(uintptr_t)addr); g_assert_cmpint(capacity, ==, n_size / 512); qvirtio_pci_device_disable(dev); @@ -438,7 +442,8 @@ static void pci_msix(void) /* MSI-X is enabled */ addr = dev->addr + QVIRTIO_DEVICE_SPECIFIC_MSIX; - capacity = qvirtio_config_readq(&qvirtio_pci, &dev->vdev, addr); + capacity = qvirtio_config_readq(&qvirtio_pci, &dev->vdev, + (uint64_t)(uintptr_t)addr); g_assert_cmpint(capacity, ==, TEST_IMAGE_SIZE / 512); features = qvirtio_get_features(&qvirtio_pci, &dev->vdev); @@ -458,7 +463,8 @@ static void pci_msix(void) qvirtio_wait_config_isr(&qvirtio_pci, &dev->vdev, QVIRTIO_BLK_TIMEOUT_US); - capacity = qvirtio_config_readq(&qvirtio_pci, &dev->vdev, addr); + capacity = qvirtio_config_readq(&qvirtio_pci, &dev->vdev, + (uint64_t)(uintptr_t)addr); g_assert_cmpint(capacity, ==, n_size / 512); /* Write request */ @@ -547,7 +553,8 @@ static void pci_idx(void) /* MSI-X is enabled */ addr = dev->addr + QVIRTIO_DEVICE_SPECIFIC_MSIX; - capacity = qvirtio_config_readq(&qvirtio_pci, &dev->vdev, addr); + capacity = qvirtio_config_readq(&qvirtio_pci, &dev->vdev, + (uint64_t)(uintptr_t)addr); g_assert_cmpint(capacity, ==, TEST_IMAGE_SIZE / 512); features = qvirtio_get_features(&qvirtio_pci, &dev->vdev); From 38d8364f4f9388b9f3a593d2664d019cf6c72b38 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc=20Mar=C3=AD?= Date: Tue, 24 Feb 2015 22:21:52 +0100 Subject: [PATCH 11/73] tests: Prepare virtio-blk-test for multi-arch implementation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Modularize functions in virtio-blk-test and add PCI suffix for PCI specific components. Signed-off-by: Marc Marí Reviewed-by: Stefan Hajnoczi Message-id: 1424812915-25728-3-git-send-email-marc.mari.barcelo@gmail.com Signed-off-by: Stefan Hajnoczi Signed-off-by: Kevin Wolf --- tests/virtio-blk-test.c | 154 +++++++++++++++++++++++----------------- 1 file changed, 89 insertions(+), 65 deletions(-) diff --git a/tests/virtio-blk-test.c b/tests/virtio-blk-test.c index 2424c84161c4..143d54fa4e84 100644 --- a/tests/virtio-blk-test.c +++ b/tests/virtio-blk-test.c @@ -55,11 +55,10 @@ typedef struct QVirtioBlkReq { uint8_t status; } QVirtioBlkReq; -static QPCIBus *test_start(void) +static char *drive_create(void) { - char *cmdline; - char tmp_path[] = "/tmp/qtest.XXXXXX"; int fd, ret; + char *tmp_path = g_strdup("/tmp/qtest.XXXXXX"); /* Create a temporary raw image */ fd = mkstemp(tmp_path); @@ -68,13 +67,24 @@ static QPCIBus *test_start(void) g_assert_cmpint(ret, ==, 0); close(fd); + return tmp_path; +} + +static QPCIBus *pci_test_start(void) +{ + char *cmdline; + char *tmp_path; + + tmp_path = drive_create(); + cmdline = g_strdup_printf("-drive if=none,id=drive0,file=%s,format=raw " - "-drive if=none,id=drive1,file=/dev/null,format=raw " - "-device virtio-blk-pci,id=drv0,drive=drive0," - "addr=%x.%x", - tmp_path, PCI_SLOT, PCI_FN); + "-drive if=none,id=drive1,file=/dev/null,format=raw " + "-device virtio-blk-pci,id=drv0,drive=drive0," + "addr=%x.%x", + tmp_path, PCI_SLOT, PCI_FN); qtest_start(cmdline); unlink(tmp_path); + g_free(tmp_path); g_free(cmdline); return qpci_init_pc(); @@ -85,7 +95,7 @@ static void test_end(void) qtest_end(); } -static QVirtioPCIDevice *virtio_blk_init(QPCIBus *bus, int slot) +static QVirtioPCIDevice *virtio_blk_pci_init(QPCIBus *bus, int slot) { QVirtioPCIDevice *dev; @@ -135,14 +145,10 @@ static uint64_t virtio_blk_request(QGuestAllocator *alloc, QVirtioBlkReq *req, return addr; } -static void pci_basic(void) +static void test_basic(const QVirtioBus *bus, QVirtioDevice *dev, + QGuestAllocator *alloc, QVirtQueue *vq, uint64_t device_specific) { - QVirtioPCIDevice *dev; - QPCIBus *bus; - QVirtQueuePCI *vqpci; - QGuestAllocator *alloc; QVirtioBlkReq req; - void *addr; uint64_t req_addr; uint64_t capacity; uint32_t features; @@ -150,28 +156,16 @@ static void pci_basic(void) uint8_t status; char *data; - bus = test_start(); - - dev = virtio_blk_init(bus, PCI_SLOT); - - /* MSI-X is not enabled */ - addr = dev->addr + QVIRTIO_DEVICE_SPECIFIC_NO_MSIX; - - capacity = qvirtio_config_readq(&qvirtio_pci, &dev->vdev, - (uint64_t)(uintptr_t)addr); + capacity = qvirtio_config_readq(bus, dev, device_specific); g_assert_cmpint(capacity, ==, TEST_IMAGE_SIZE / 512); - features = qvirtio_get_features(&qvirtio_pci, &dev->vdev); + features = qvirtio_get_features(bus, dev); features = features & ~(QVIRTIO_F_BAD_FEATURE | QVIRTIO_F_RING_INDIRECT_DESC | QVIRTIO_F_RING_EVENT_IDX | QVIRTIO_BLK_F_SCSI); - qvirtio_set_features(&qvirtio_pci, &dev->vdev, features); + qvirtio_set_features(bus, dev, features); - alloc = pc_alloc_init(); - vqpci = (QVirtQueuePCI *)qvirtqueue_setup(&qvirtio_pci, &dev->vdev, - alloc, 0); - - qvirtio_set_driver_ok(&qvirtio_pci, &dev->vdev); + qvirtio_set_driver_ok(bus, dev); /* Write and read with 2 descriptor layout */ /* Write request */ @@ -185,12 +179,11 @@ static void pci_basic(void) g_free(req.data); - free_head = qvirtqueue_add(&vqpci->vq, req_addr, 528, false, true); - qvirtqueue_add(&vqpci->vq, req_addr + 528, 1, true, false); - qvirtqueue_kick(&qvirtio_pci, &dev->vdev, &vqpci->vq, free_head); + free_head = qvirtqueue_add(vq, req_addr, 528, false, true); + qvirtqueue_add(vq, req_addr + 528, 1, true, false); + qvirtqueue_kick(bus, dev, vq, free_head); - qvirtio_wait_queue_isr(&qvirtio_pci, &dev->vdev, &vqpci->vq, - QVIRTIO_BLK_TIMEOUT_US); + qvirtio_wait_queue_isr(bus, dev, vq, QVIRTIO_BLK_TIMEOUT_US); status = readb(req_addr + 528); g_assert_cmpint(status, ==, 0); @@ -206,13 +199,12 @@ static void pci_basic(void) g_free(req.data); - free_head = qvirtqueue_add(&vqpci->vq, req_addr, 16, false, true); - qvirtqueue_add(&vqpci->vq, req_addr + 16, 513, true, false); + free_head = qvirtqueue_add(vq, req_addr, 16, false, true); + qvirtqueue_add(vq, req_addr + 16, 513, true, false); - qvirtqueue_kick(&qvirtio_pci, &dev->vdev, &vqpci->vq, free_head); + qvirtqueue_kick(bus, dev, vq, free_head); - qvirtio_wait_queue_isr(&qvirtio_pci, &dev->vdev, &vqpci->vq, - QVIRTIO_BLK_TIMEOUT_US); + qvirtio_wait_queue_isr(bus, dev, vq, QVIRTIO_BLK_TIMEOUT_US); status = readb(req_addr + 528); g_assert_cmpint(status, ==, 0); @@ -233,14 +225,15 @@ static void pci_basic(void) req_addr = virtio_blk_request(alloc, &req, 512); - free_head = qvirtqueue_add(&vqpci->vq, req_addr, 16, false, true); - qvirtqueue_add(&vqpci->vq, req_addr + 16, 512, false, true); - qvirtqueue_add(&vqpci->vq, req_addr + 528, 1, true, false); + g_free(req.data); - qvirtqueue_kick(&qvirtio_pci, &dev->vdev, &vqpci->vq, free_head); + free_head = qvirtqueue_add(vq, req_addr, 16, false, true); + qvirtqueue_add(vq, req_addr + 16, 512, false, true); + qvirtqueue_add(vq, req_addr + 528, 1, true, false); - qvirtio_wait_queue_isr(&qvirtio_pci, &dev->vdev, &vqpci->vq, - QVIRTIO_BLK_TIMEOUT_US); + qvirtqueue_kick(bus, dev, vq, free_head); + + qvirtio_wait_queue_isr(bus, dev, vq, QVIRTIO_BLK_TIMEOUT_US); status = readb(req_addr + 528); g_assert_cmpint(status, ==, 0); @@ -256,14 +249,13 @@ static void pci_basic(void) g_free(req.data); - free_head = qvirtqueue_add(&vqpci->vq, req_addr, 16, false, true); - qvirtqueue_add(&vqpci->vq, req_addr + 16, 512, true, true); - qvirtqueue_add(&vqpci->vq, req_addr + 528, 1, true, false); + free_head = qvirtqueue_add(vq, req_addr, 16, false, true); + qvirtqueue_add(vq, req_addr + 16, 512, true, true); + qvirtqueue_add(vq, req_addr + 528, 1, true, false); - qvirtqueue_kick(&qvirtio_pci, &dev->vdev, &vqpci->vq, free_head); + qvirtqueue_kick(bus, dev, vq, free_head); - qvirtio_wait_queue_isr(&qvirtio_pci, &dev->vdev, &vqpci->vq, - QVIRTIO_BLK_TIMEOUT_US); + qvirtio_wait_queue_isr(bus, dev, vq, QVIRTIO_BLK_TIMEOUT_US); status = readb(req_addr + 528); g_assert_cmpint(status, ==, 0); @@ -273,11 +265,35 @@ static void pci_basic(void) g_free(data); guest_free(alloc, req_addr); +} + +static void pci_basic(void) +{ + QVirtioPCIDevice *dev; + QPCIBus *bus; + QVirtQueuePCI *vqpci; + QGuestAllocator *alloc; + void *addr; + + bus = pci_test_start(); + dev = virtio_blk_pci_init(bus, PCI_SLOT); + + alloc = pc_alloc_init(); + vqpci = (QVirtQueuePCI *)qvirtqueue_setup(&qvirtio_pci, &dev->vdev, + alloc, 0); + + /* MSI-X is not enabled */ + addr = dev->addr + QVIRTIO_DEVICE_SPECIFIC_NO_MSIX; + + test_basic(&qvirtio_pci, &dev->vdev, alloc, &vqpci->vq, + (uint64_t)(uintptr_t)addr); /* End test */ guest_free(alloc, vqpci->vq.desc); + pc_alloc_uninit(alloc); qvirtio_pci_device_disable(dev); g_free(dev); + qpci_free_pc(bus); test_end(); } @@ -297,9 +313,9 @@ static void pci_indirect(void) uint8_t status; char *data; - bus = test_start(); + bus = pci_test_start(); - dev = virtio_blk_init(bus, PCI_SLOT); + dev = virtio_blk_pci_init(bus, PCI_SLOT); /* MSI-X is not enabled */ addr = dev->addr + QVIRTIO_DEVICE_SPECIFIC_NO_MSIX; @@ -376,8 +392,10 @@ static void pci_indirect(void) /* End test */ guest_free(alloc, vqpci->vq.desc); + pc_alloc_uninit(alloc); qvirtio_pci_device_disable(dev); g_free(dev); + qpci_free_pc(bus); test_end(); } @@ -389,9 +407,9 @@ static void pci_config(void) void *addr; uint64_t capacity; - bus = test_start(); + bus = pci_test_start(); - dev = virtio_blk_init(bus, PCI_SLOT); + dev = virtio_blk_pci_init(bus, PCI_SLOT); /* MSI-X is not enabled */ addr = dev->addr + QVIRTIO_DEVICE_SPECIFIC_NO_MSIX; @@ -412,6 +430,7 @@ static void pci_config(void) qvirtio_pci_device_disable(dev); g_free(dev); + qpci_free_pc(bus); test_end(); } @@ -431,10 +450,10 @@ static void pci_msix(void) uint8_t status; char *data; - bus = test_start(); + bus = pci_test_start(); alloc = pc_alloc_init(); - dev = virtio_blk_init(bus, PCI_SLOT); + dev = virtio_blk_pci_init(bus, PCI_SLOT); qpci_msix_enable(dev->pdev); qvirtio_pci_set_msix_configuration_vector(dev, alloc, 0); @@ -520,10 +539,12 @@ static void pci_msix(void) guest_free(alloc, req_addr); /* End test */ - guest_free(alloc, (uint64_t)vqpci->vq.desc); + guest_free(alloc, vqpci->vq.desc); + pc_alloc_uninit(alloc); qpci_msix_disable(dev->pdev); qvirtio_pci_device_disable(dev); g_free(dev); + qpci_free_pc(bus); test_end(); } @@ -542,10 +563,10 @@ static void pci_idx(void) uint8_t status; char *data; - bus = test_start(); + bus = pci_test_start(); alloc = pc_alloc_init(); - dev = virtio_blk_init(bus, PCI_SLOT); + dev = virtio_blk_pci_init(bus, PCI_SLOT); qpci_msix_enable(dev->pdev); qvirtio_pci_set_msix_configuration_vector(dev, alloc, 0); @@ -643,30 +664,33 @@ static void pci_idx(void) /* End test */ guest_free(alloc, vqpci->vq.desc); + pc_alloc_uninit(alloc); qpci_msix_disable(dev->pdev); qvirtio_pci_device_disable(dev); g_free(dev); + qpci_free_pc(bus); test_end(); } -static void hotplug(void) +static void pci_hotplug(void) { QPCIBus *bus; QVirtioPCIDevice *dev; - bus = test_start(); + bus = pci_test_start(); /* plug secondary disk */ qpci_plug_device_test("virtio-blk-pci", "drv1", PCI_SLOT_HP, "'drive': 'drive1'"); - dev = virtio_blk_init(bus, PCI_SLOT_HP); + dev = virtio_blk_pci_init(bus, PCI_SLOT_HP); g_assert(dev); qvirtio_pci_device_disable(dev); g_free(dev); /* unplug secondary disk */ qpci_unplug_acpi_device_test("drv1", PCI_SLOT_HP); + qpci_free_pc(bus); test_end(); } @@ -681,7 +705,7 @@ int main(int argc, char **argv) g_test_add_func("/virtio/blk/pci/config", pci_config); g_test_add_func("/virtio/blk/pci/msix", pci_msix); g_test_add_func("/virtio/blk/pci/idx", pci_idx); - g_test_add_func("/virtio/blk/pci/hotplug", hotplug); + g_test_add_func("/virtio/blk/pci/hotplug", pci_hotplug); ret = g_test_run(); From 50311a81f812d234ed0782df7a09971978e226a5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc=20Mar=C3=AD?= Date: Tue, 24 Feb 2015 22:21:53 +0100 Subject: [PATCH 12/73] libqos: Remove PCI assumptions in constants of virtio driver MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Convert PCI-specific constants names of libqos virtio driver. Signed-off-by: Marc Marí Reviewed-by: Stefan Hajnoczi Message-id: 1424812915-25728-4-git-send-email-marc.mari.barcelo@gmail.com Signed-off-by: Stefan Hajnoczi Signed-off-by: Kevin Wolf --- tests/libqos/virtio-pci.c | 30 +++++++++++++++--------------- tests/libqos/virtio-pci.h | 24 ++++++++++++------------ tests/virtio-blk-test.c | 11 ++++++----- 3 files changed, 33 insertions(+), 32 deletions(-) diff --git a/tests/libqos/virtio-pci.c b/tests/libqos/virtio-pci.c index 92bcac1478ab..046a3160df26 100644 --- a/tests/libqos/virtio-pci.c +++ b/tests/libqos/virtio-pci.c @@ -102,31 +102,31 @@ static uint64_t qvirtio_pci_config_readq(QVirtioDevice *d, uint64_t addr) static uint32_t qvirtio_pci_get_features(QVirtioDevice *d) { QVirtioPCIDevice *dev = (QVirtioPCIDevice *)d; - return qpci_io_readl(dev->pdev, dev->addr + QVIRTIO_DEVICE_FEATURES); + return qpci_io_readl(dev->pdev, dev->addr + QVIRTIO_PCI_DEVICE_FEATURES); } static void qvirtio_pci_set_features(QVirtioDevice *d, uint32_t features) { QVirtioPCIDevice *dev = (QVirtioPCIDevice *)d; - qpci_io_writel(dev->pdev, dev->addr + QVIRTIO_GUEST_FEATURES, features); + qpci_io_writel(dev->pdev, dev->addr + QVIRTIO_PCI_GUEST_FEATURES, features); } static uint32_t qvirtio_pci_get_guest_features(QVirtioDevice *d) { QVirtioPCIDevice *dev = (QVirtioPCIDevice *)d; - return qpci_io_readl(dev->pdev, dev->addr + QVIRTIO_GUEST_FEATURES); + return qpci_io_readl(dev->pdev, dev->addr + QVIRTIO_PCI_GUEST_FEATURES); } static uint8_t qvirtio_pci_get_status(QVirtioDevice *d) { QVirtioPCIDevice *dev = (QVirtioPCIDevice *)d; - return qpci_io_readb(dev->pdev, dev->addr + QVIRTIO_DEVICE_STATUS); + return qpci_io_readb(dev->pdev, dev->addr + QVIRTIO_PCI_DEVICE_STATUS); } static void qvirtio_pci_set_status(QVirtioDevice *d, uint8_t status) { QVirtioPCIDevice *dev = (QVirtioPCIDevice *)d; - qpci_io_writeb(dev->pdev, dev->addr + QVIRTIO_DEVICE_STATUS, status); + qpci_io_writeb(dev->pdev, dev->addr + QVIRTIO_PCI_DEVICE_STATUS, status); } static bool qvirtio_pci_get_queue_isr_status(QVirtioDevice *d, QVirtQueue *vq) @@ -146,7 +146,7 @@ static bool qvirtio_pci_get_queue_isr_status(QVirtioDevice *d, QVirtQueue *vq) return data == vqpci->msix_data; } } else { - return qpci_io_readb(dev->pdev, dev->addr + QVIRTIO_ISR_STATUS) & 1; + return qpci_io_readb(dev->pdev, dev->addr + QVIRTIO_PCI_ISR_STATUS) & 1; } } @@ -166,26 +166,26 @@ static bool qvirtio_pci_get_config_isr_status(QVirtioDevice *d) return data == dev->config_msix_data; } } else { - return qpci_io_readb(dev->pdev, dev->addr + QVIRTIO_ISR_STATUS) & 2; + return qpci_io_readb(dev->pdev, dev->addr + QVIRTIO_PCI_ISR_STATUS) & 2; } } static void qvirtio_pci_queue_select(QVirtioDevice *d, uint16_t index) { QVirtioPCIDevice *dev = (QVirtioPCIDevice *)d; - qpci_io_writeb(dev->pdev, dev->addr + QVIRTIO_QUEUE_SELECT, index); + qpci_io_writeb(dev->pdev, dev->addr + QVIRTIO_PCI_QUEUE_SELECT, index); } static uint16_t qvirtio_pci_get_queue_size(QVirtioDevice *d) { QVirtioPCIDevice *dev = (QVirtioPCIDevice *)d; - return qpci_io_readw(dev->pdev, dev->addr + QVIRTIO_QUEUE_SIZE); + return qpci_io_readw(dev->pdev, dev->addr + QVIRTIO_PCI_QUEUE_SIZE); } static void qvirtio_pci_set_queue_address(QVirtioDevice *d, uint32_t pfn) { QVirtioPCIDevice *dev = (QVirtioPCIDevice *)d; - qpci_io_writel(dev->pdev, dev->addr + QVIRTIO_QUEUE_ADDRESS, pfn); + qpci_io_writel(dev->pdev, dev->addr + QVIRTIO_PCI_QUEUE_ADDRESS, pfn); } static QVirtQueue *qvirtio_pci_virtqueue_setup(QVirtioDevice *d, @@ -227,7 +227,7 @@ static QVirtQueue *qvirtio_pci_virtqueue_setup(QVirtioDevice *d, static void qvirtio_pci_virtqueue_kick(QVirtioDevice *d, QVirtQueue *vq) { QVirtioPCIDevice *dev = (QVirtioPCIDevice *)d; - qpci_io_writew(dev->pdev, dev->addr + QVIRTIO_QUEUE_NOTIFY, vq->index); + qpci_io_writew(dev->pdev, dev->addr + QVIRTIO_PCI_QUEUE_NOTIFY, vq->index); } const QVirtioBus qvirtio_pci = { @@ -307,8 +307,8 @@ void qvirtqueue_pci_msix_setup(QVirtioPCIDevice *d, QVirtQueuePCI *vqpci, control & ~PCI_MSIX_ENTRY_CTRL_MASKBIT); qvirtio_pci_queue_select(&d->vdev, vqpci->vq.index); - qpci_io_writew(d->pdev, d->addr + QVIRTIO_MSIX_QUEUE_VECTOR, entry); - vector = qpci_io_readw(d->pdev, d->addr + QVIRTIO_MSIX_QUEUE_VECTOR); + qpci_io_writew(d->pdev, d->addr + QVIRTIO_PCI_MSIX_QUEUE_VECTOR, entry); + vector = qpci_io_readw(d->pdev, d->addr + QVIRTIO_PCI_MSIX_QUEUE_VECTOR); g_assert_cmphex(vector, !=, QVIRTIO_MSI_NO_VECTOR); } @@ -339,7 +339,7 @@ void qvirtio_pci_set_msix_configuration_vector(QVirtioPCIDevice *d, qpci_io_writel(d->pdev, addr + PCI_MSIX_ENTRY_VECTOR_CTRL, control & ~PCI_MSIX_ENTRY_CTRL_MASKBIT); - qpci_io_writew(d->pdev, d->addr + QVIRTIO_MSIX_CONF_VECTOR, entry); - vector = qpci_io_readw(d->pdev, d->addr + QVIRTIO_MSIX_CONF_VECTOR); + qpci_io_writew(d->pdev, d->addr + QVIRTIO_PCI_MSIX_CONF_VECTOR, entry); + vector = qpci_io_readw(d->pdev, d->addr + QVIRTIO_PCI_MSIX_CONF_VECTOR); g_assert_cmphex(vector, !=, QVIRTIO_MSI_NO_VECTOR); } diff --git a/tests/libqos/virtio-pci.h b/tests/libqos/virtio-pci.h index 883f7ff26796..8f0e52ad4749 100644 --- a/tests/libqos/virtio-pci.h +++ b/tests/libqos/virtio-pci.h @@ -13,18 +13,18 @@ #include "libqos/virtio.h" #include "libqos/pci.h" -#define QVIRTIO_DEVICE_FEATURES 0x00 -#define QVIRTIO_GUEST_FEATURES 0x04 -#define QVIRTIO_QUEUE_ADDRESS 0x08 -#define QVIRTIO_QUEUE_SIZE 0x0C -#define QVIRTIO_QUEUE_SELECT 0x0E -#define QVIRTIO_QUEUE_NOTIFY 0x10 -#define QVIRTIO_DEVICE_STATUS 0x12 -#define QVIRTIO_ISR_STATUS 0x13 -#define QVIRTIO_MSIX_CONF_VECTOR 0x14 -#define QVIRTIO_MSIX_QUEUE_VECTOR 0x16 -#define QVIRTIO_DEVICE_SPECIFIC_MSIX 0x18 -#define QVIRTIO_DEVICE_SPECIFIC_NO_MSIX 0x14 +#define QVIRTIO_PCI_DEVICE_FEATURES 0x00 +#define QVIRTIO_PCI_GUEST_FEATURES 0x04 +#define QVIRTIO_PCI_QUEUE_ADDRESS 0x08 +#define QVIRTIO_PCI_QUEUE_SIZE 0x0C +#define QVIRTIO_PCI_QUEUE_SELECT 0x0E +#define QVIRTIO_PCI_QUEUE_NOTIFY 0x10 +#define QVIRTIO_PCI_DEVICE_STATUS 0x12 +#define QVIRTIO_PCI_ISR_STATUS 0x13 +#define QVIRTIO_PCI_MSIX_CONF_VECTOR 0x14 +#define QVIRTIO_PCI_MSIX_QUEUE_VECTOR 0x16 +#define QVIRTIO_PCI_DEVICE_SPECIFIC_MSIX 0x18 +#define QVIRTIO_PCI_DEVICE_SPECIFIC_NO_MSIX 0x14 #define QVIRTIO_PCI_ALIGN 4096 diff --git a/tests/virtio-blk-test.c b/tests/virtio-blk-test.c index 143d54fa4e84..e9030325b4e2 100644 --- a/tests/virtio-blk-test.c +++ b/tests/virtio-blk-test.c @@ -157,6 +157,7 @@ static void test_basic(const QVirtioBus *bus, QVirtioDevice *dev, char *data; capacity = qvirtio_config_readq(bus, dev, device_specific); + g_assert_cmpint(capacity, ==, TEST_IMAGE_SIZE / 512); features = qvirtio_get_features(bus, dev); @@ -283,7 +284,7 @@ static void pci_basic(void) alloc, 0); /* MSI-X is not enabled */ - addr = dev->addr + QVIRTIO_DEVICE_SPECIFIC_NO_MSIX; + addr = dev->addr + QVIRTIO_PCI_DEVICE_SPECIFIC_NO_MSIX; test_basic(&qvirtio_pci, &dev->vdev, alloc, &vqpci->vq, (uint64_t)(uintptr_t)addr); @@ -318,7 +319,7 @@ static void pci_indirect(void) dev = virtio_blk_pci_init(bus, PCI_SLOT); /* MSI-X is not enabled */ - addr = dev->addr + QVIRTIO_DEVICE_SPECIFIC_NO_MSIX; + addr = dev->addr + QVIRTIO_PCI_DEVICE_SPECIFIC_NO_MSIX; capacity = qvirtio_config_readq(&qvirtio_pci, &dev->vdev, (uint64_t)(uintptr_t)addr); @@ -412,7 +413,7 @@ static void pci_config(void) dev = virtio_blk_pci_init(bus, PCI_SLOT); /* MSI-X is not enabled */ - addr = dev->addr + QVIRTIO_DEVICE_SPECIFIC_NO_MSIX; + addr = dev->addr + QVIRTIO_PCI_DEVICE_SPECIFIC_NO_MSIX; capacity = qvirtio_config_readq(&qvirtio_pci, &dev->vdev, (uint64_t)(uintptr_t)addr); @@ -459,7 +460,7 @@ static void pci_msix(void) qvirtio_pci_set_msix_configuration_vector(dev, alloc, 0); /* MSI-X is enabled */ - addr = dev->addr + QVIRTIO_DEVICE_SPECIFIC_MSIX; + addr = dev->addr + QVIRTIO_PCI_DEVICE_SPECIFIC_MSIX; capacity = qvirtio_config_readq(&qvirtio_pci, &dev->vdev, (uint64_t)(uintptr_t)addr); @@ -572,7 +573,7 @@ static void pci_idx(void) qvirtio_pci_set_msix_configuration_vector(dev, alloc, 0); /* MSI-X is enabled */ - addr = dev->addr + QVIRTIO_DEVICE_SPECIFIC_MSIX; + addr = dev->addr + QVIRTIO_PCI_DEVICE_SPECIFIC_MSIX; capacity = qvirtio_config_readq(&qvirtio_pci, &dev->vdev, (uint64_t)(uintptr_t)addr); From 870a306dec0cfdf32c8179acfd2c4d0bfd000d6b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc=20Mar=C3=AD?= Date: Tue, 24 Feb 2015 22:21:54 +0100 Subject: [PATCH 13/73] libqos: Add malloc generic MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This malloc is a basic interface implementation that works for any platform. It should be replaced in the future for a real malloc implementation for each of the platforms. Signed-off-by: Marc Marí Reviewed-by: Stefan Hajnoczi Message-id: 1424812915-25728-5-git-send-email-marc.mari.barcelo@gmail.com Signed-off-by: Stefan Hajnoczi Signed-off-by: Kevin Wolf --- tests/libqos/malloc-generic.c | 39 +++++++++++++++++++++++++++++++++++ tests/libqos/malloc-generic.h | 21 +++++++++++++++++++ 2 files changed, 60 insertions(+) create mode 100644 tests/libqos/malloc-generic.c create mode 100644 tests/libqos/malloc-generic.h diff --git a/tests/libqos/malloc-generic.c b/tests/libqos/malloc-generic.c new file mode 100644 index 000000000000..d30a2f424072 --- /dev/null +++ b/tests/libqos/malloc-generic.c @@ -0,0 +1,39 @@ +/* + * Basic libqos generic malloc support + * + * Copyright (c) 2014 Marc Marí + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include +#include "libqos/malloc-generic.h" +#include "libqos/malloc.h" + +/* + * Mostly for valgrind happiness, but it does offer + * a chokepoint for debugging guest memory leaks, too. + */ +void generic_alloc_uninit(QGuestAllocator *allocator) +{ + alloc_uninit(allocator); +} + +QGuestAllocator *generic_alloc_init_flags(uint64_t base_addr, uint64_t size, + uint32_t page_size, QAllocOpts flags) +{ + QGuestAllocator *s; + uint64_t start = base_addr + (1 << 20); /* Start at 1MB */ + + s = alloc_init_flags(flags, start, start + size); + alloc_set_page_size(s, page_size); + + return s; +} + +inline QGuestAllocator *generic_alloc_init(uint64_t base_addr, uint64_t size, + uint32_t page_size) +{ + return generic_alloc_init_flags(base_addr, size, page_size, ALLOC_NO_FLAGS); +} diff --git a/tests/libqos/malloc-generic.h b/tests/libqos/malloc-generic.h new file mode 100644 index 000000000000..90104ecec9eb --- /dev/null +++ b/tests/libqos/malloc-generic.h @@ -0,0 +1,21 @@ +/* + * Basic libqos generic malloc support + * + * Copyright (c) 2014 Marc Marí + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef LIBQOS_MALLOC_GENERIC_H +#define LIBQOS_MALLOC_GENERIC_H + +#include "libqos/malloc.h" + +QGuestAllocator *generic_alloc_init(uint64_t base_addr, uint64_t size, + uint32_t page_size); +QGuestAllocator *generic_alloc_init_flags(uint64_t base_addr, uint64_t size, + uint32_t page_size, QAllocOpts flags); +void generic_alloc_uninit(QGuestAllocator *allocator); + +#endif From 0a6ed700061fdd86b3c4b71366be42503ac2ce23 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc=20Mar=C3=AD?= Date: Tue, 24 Feb 2015 22:21:55 +0100 Subject: [PATCH 14/73] libqos: Add virtio MMIO support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add virtio MMIO support. Add virtio-blk-test MMIO test case. Signed-off-by: Marc Marí Reviewed-by: Stefan Hajnoczi Message-id: 1424812915-25728-6-git-send-email-marc.mari.barcelo@gmail.com Signed-off-by: Stefan Hajnoczi Signed-off-by: Kevin Wolf --- tests/Makefile | 4 +- tests/libqos/virtio-mmio.c | 198 +++++++++++++++++++++++++++++++++++++ tests/libqos/virtio-mmio.h | 46 +++++++++ tests/virtio-blk-test.c | 83 ++++++++++++++-- 4 files changed, 323 insertions(+), 8 deletions(-) create mode 100644 tests/libqos/virtio-mmio.c create mode 100644 tests/libqos/virtio-mmio.h diff --git a/tests/Makefile b/tests/Makefile index 307035c26cda..fed8096e943f 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -191,6 +191,8 @@ gcov-files-sparc-y += hw/timer/m48t59.c gcov-files-sparc64-y += hw/timer/m48t59.c check-qtest-arm-y = tests/tmp105-test$(EXESUF) gcov-files-arm-y += hw/misc/tmp105.c +check-qtest-arm-y += tests/virtio-blk-test$(EXESUF) +gcov-files-arm-y += arm-softmmu/hw/block/virtio-blk.c check-qtest-ppc-y += tests/boot-order-test$(EXESUF) check-qtest-ppc64-y += tests/boot-order-test$(EXESUF) check-qtest-ppc64-y += tests/spapr-phb-test$(EXESUF) @@ -315,8 +317,8 @@ libqos-pc-obj-y = $(libqos-obj-y) tests/libqos/pci-pc.o libqos-pc-obj-y += tests/libqos/malloc-pc.o tests/libqos/libqos-pc.o libqos-pc-obj-y += tests/libqos/ahci.o libqos-omap-obj-y = $(libqos-obj-y) tests/libqos/i2c-omap.o -libqos-virtio-obj-y = $(libqos-obj-y) $(libqos-pc-obj-y) tests/libqos/virtio.o tests/libqos/virtio-pci.o libqos-usb-obj-y = $(libqos-pc-obj-y) tests/libqos/usb.o +libqos-virtio-obj-y = $(libqos-pc-obj-y) tests/libqos/virtio.o tests/libqos/virtio-pci.o tests/libqos/virtio-mmio.o tests/libqos/malloc-generic.o tests/rtc-test$(EXESUF): tests/rtc-test.o tests/m48t59-test$(EXESUF): tests/m48t59-test.o diff --git a/tests/libqos/virtio-mmio.c b/tests/libqos/virtio-mmio.c new file mode 100644 index 000000000000..b3e62e77d815 --- /dev/null +++ b/tests/libqos/virtio-mmio.c @@ -0,0 +1,198 @@ +/* + * libqos virtio MMIO driver + * + * Copyright (c) 2014 Marc Marí + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include +#include +#include "libqtest.h" +#include "libqos/virtio.h" +#include "libqos/virtio-mmio.h" +#include "libqos/malloc.h" +#include "libqos/malloc-generic.h" + +static uint8_t qvirtio_mmio_config_readb(QVirtioDevice *d, uint64_t addr) +{ + QVirtioMMIODevice *dev = (QVirtioMMIODevice *)d; + return readb(dev->addr + addr); +} + +static uint16_t qvirtio_mmio_config_readw(QVirtioDevice *d, uint64_t addr) +{ + QVirtioMMIODevice *dev = (QVirtioMMIODevice *)d; + return readw(dev->addr + addr); +} + +static uint32_t qvirtio_mmio_config_readl(QVirtioDevice *d, uint64_t addr) +{ + QVirtioMMIODevice *dev = (QVirtioMMIODevice *)d; + return readl(dev->addr + addr); +} + +static uint64_t qvirtio_mmio_config_readq(QVirtioDevice *d, uint64_t addr) +{ + QVirtioMMIODevice *dev = (QVirtioMMIODevice *)d; + return readq(dev->addr + addr); +} + +static uint32_t qvirtio_mmio_get_features(QVirtioDevice *d) +{ + QVirtioMMIODevice *dev = (QVirtioMMIODevice *)d; + writel(dev->addr + QVIRTIO_MMIO_HOST_FEATURES_SEL, 0); + return readl(dev->addr + QVIRTIO_MMIO_HOST_FEATURES); +} + +static void qvirtio_mmio_set_features(QVirtioDevice *d, uint32_t features) +{ + QVirtioMMIODevice *dev = (QVirtioMMIODevice *)d; + dev->features = features; + writel(dev->addr + QVIRTIO_MMIO_GUEST_FEATURES_SEL, 0); + writel(dev->addr + QVIRTIO_MMIO_GUEST_FEATURES, features); +} + +static uint32_t qvirtio_mmio_get_guest_features(QVirtioDevice *d) +{ + QVirtioMMIODevice *dev = (QVirtioMMIODevice *)d; + return dev->features; +} + +static uint8_t qvirtio_mmio_get_status(QVirtioDevice *d) +{ + QVirtioMMIODevice *dev = (QVirtioMMIODevice *)d; + return (uint8_t)readl(dev->addr + QVIRTIO_MMIO_DEVICE_STATUS); +} + +static void qvirtio_mmio_set_status(QVirtioDevice *d, uint8_t status) +{ + QVirtioMMIODevice *dev = (QVirtioMMIODevice *)d; + writel(dev->addr + QVIRTIO_MMIO_DEVICE_STATUS, (uint32_t)status); +} + +static bool qvirtio_mmio_get_queue_isr_status(QVirtioDevice *d, QVirtQueue *vq) +{ + QVirtioMMIODevice *dev = (QVirtioMMIODevice *)d; + uint32_t isr; + + isr = readl(dev->addr + QVIRTIO_MMIO_INTERRUPT_STATUS) & 1; + if (isr != 0) { + writel(dev->addr + QVIRTIO_MMIO_INTERRUPT_ACK, 1); + return true; + } + + return false; +} + +static bool qvirtio_mmio_get_config_isr_status(QVirtioDevice *d) +{ + QVirtioMMIODevice *dev = (QVirtioMMIODevice *)d; + uint32_t isr; + + isr = readl(dev->addr + QVIRTIO_MMIO_INTERRUPT_STATUS) & 2; + if (isr != 0) { + writel(dev->addr + QVIRTIO_MMIO_INTERRUPT_ACK, 2); + return true; + } + + return false; +} + +static void qvirtio_mmio_queue_select(QVirtioDevice *d, uint16_t index) +{ + QVirtioMMIODevice *dev = (QVirtioMMIODevice *)d; + writel(dev->addr + QVIRTIO_MMIO_QUEUE_SEL, (uint32_t)index); + + g_assert_cmphex(readl(dev->addr + QVIRTIO_MMIO_QUEUE_PFN), ==, 0); +} + +static uint16_t qvirtio_mmio_get_queue_size(QVirtioDevice *d) +{ + QVirtioMMIODevice *dev = (QVirtioMMIODevice *)d; + return (uint16_t)readl(dev->addr + QVIRTIO_MMIO_QUEUE_NUM_MAX); +} + +static void qvirtio_mmio_set_queue_address(QVirtioDevice *d, uint32_t pfn) +{ + QVirtioMMIODevice *dev = (QVirtioMMIODevice *)d; + writel(dev->addr + QVIRTIO_MMIO_QUEUE_PFN, pfn); +} + +static QVirtQueue *qvirtio_mmio_virtqueue_setup(QVirtioDevice *d, + QGuestAllocator *alloc, uint16_t index) +{ + QVirtioMMIODevice *dev = (QVirtioMMIODevice *)d; + QVirtQueue *vq; + uint64_t addr; + + vq = g_malloc0(sizeof(*vq)); + qvirtio_mmio_queue_select(d, index); + writel(dev->addr + QVIRTIO_MMIO_QUEUE_ALIGN, dev->page_size); + + vq->index = index; + vq->size = qvirtio_mmio_get_queue_size(d); + vq->free_head = 0; + vq->num_free = vq->size; + vq->align = dev->page_size; + vq->indirect = (dev->features & QVIRTIO_F_RING_INDIRECT_DESC) != 0; + vq->event = (dev->features & QVIRTIO_F_RING_EVENT_IDX) != 0; + + writel(dev->addr + QVIRTIO_MMIO_QUEUE_NUM, vq->size); + + /* Check different than 0 */ + g_assert_cmpint(vq->size, !=, 0); + + /* Check power of 2 */ + g_assert_cmpint(vq->size & (vq->size - 1), ==, 0); + + addr = guest_alloc(alloc, qvring_size(vq->size, dev->page_size)); + qvring_init(alloc, vq, addr); + qvirtio_mmio_set_queue_address(d, vq->desc / dev->page_size); + + return vq; +} + +static void qvirtio_mmio_virtqueue_kick(QVirtioDevice *d, QVirtQueue *vq) +{ + QVirtioMMIODevice *dev = (QVirtioMMIODevice *)d; + writel(dev->addr + QVIRTIO_MMIO_QUEUE_NOTIFY, vq->index); +} + +const QVirtioBus qvirtio_mmio = { + .config_readb = qvirtio_mmio_config_readb, + .config_readw = qvirtio_mmio_config_readw, + .config_readl = qvirtio_mmio_config_readl, + .config_readq = qvirtio_mmio_config_readq, + .get_features = qvirtio_mmio_get_features, + .set_features = qvirtio_mmio_set_features, + .get_guest_features = qvirtio_mmio_get_guest_features, + .get_status = qvirtio_mmio_get_status, + .set_status = qvirtio_mmio_set_status, + .get_queue_isr_status = qvirtio_mmio_get_queue_isr_status, + .get_config_isr_status = qvirtio_mmio_get_config_isr_status, + .queue_select = qvirtio_mmio_queue_select, + .get_queue_size = qvirtio_mmio_get_queue_size, + .set_queue_address = qvirtio_mmio_set_queue_address, + .virtqueue_setup = qvirtio_mmio_virtqueue_setup, + .virtqueue_kick = qvirtio_mmio_virtqueue_kick, +}; + +QVirtioMMIODevice *qvirtio_mmio_init_device(uint64_t addr, uint32_t page_size) +{ + QVirtioMMIODevice *dev; + uint32_t magic; + dev = g_malloc0(sizeof(*dev)); + + magic = readl(addr + QVIRTIO_MMIO_MAGIC_VALUE); + g_assert(magic == ('v' | 'i' << 8 | 'r' << 16 | 't' << 24)); + + dev->addr = addr; + dev->page_size = page_size; + dev->vdev.device_type = readl(addr + QVIRTIO_MMIO_DEVICE_ID); + + writel(addr + QVIRTIO_MMIO_GUEST_PAGE_SIZE, page_size); + + return dev; +} diff --git a/tests/libqos/virtio-mmio.h b/tests/libqos/virtio-mmio.h new file mode 100644 index 000000000000..e3e52b9ce199 --- /dev/null +++ b/tests/libqos/virtio-mmio.h @@ -0,0 +1,46 @@ +/* + * libqos virtio MMIO definitions + * + * Copyright (c) 2014 Marc Marí + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef LIBQOS_VIRTIO_MMIO_H +#define LIBQOS_VIRTIO_MMIO_H + +#include "libqos/virtio.h" + +#define QVIRTIO_MMIO_MAGIC_VALUE 0x000 +#define QVIRTIO_MMIO_VERSION 0x004 +#define QVIRTIO_MMIO_DEVICE_ID 0x008 +#define QVIRTIO_MMIO_VENDOR_ID 0x00C +#define QVIRTIO_MMIO_HOST_FEATURES 0x010 +#define QVIRTIO_MMIO_HOST_FEATURES_SEL 0x014 +#define QVIRTIO_MMIO_GUEST_FEATURES 0x020 +#define QVIRTIO_MMIO_GUEST_FEATURES_SEL 0x024 +#define QVIRTIO_MMIO_GUEST_PAGE_SIZE 0x028 +#define QVIRTIO_MMIO_QUEUE_SEL 0x030 +#define QVIRTIO_MMIO_QUEUE_NUM_MAX 0x034 +#define QVIRTIO_MMIO_QUEUE_NUM 0x038 +#define QVIRTIO_MMIO_QUEUE_ALIGN 0x03C +#define QVIRTIO_MMIO_QUEUE_PFN 0x040 +#define QVIRTIO_MMIO_QUEUE_NOTIFY 0x050 +#define QVIRTIO_MMIO_INTERRUPT_STATUS 0x060 +#define QVIRTIO_MMIO_INTERRUPT_ACK 0x064 +#define QVIRTIO_MMIO_DEVICE_STATUS 0x070 +#define QVIRTIO_MMIO_DEVICE_SPECIFIC 0x100 + +typedef struct QVirtioMMIODevice { + QVirtioDevice vdev; + uint64_t addr; + uint32_t page_size; + uint32_t features; /* As it cannot be read later, save it */ +} QVirtioMMIODevice; + +extern const QVirtioBus qvirtio_mmio; + +QVirtioMMIODevice *qvirtio_mmio_init_device(uint64_t addr, uint32_t page_size); + +#endif diff --git a/tests/virtio-blk-test.c b/tests/virtio-blk-test.c index e9030325b4e2..3b943fc39617 100644 --- a/tests/virtio-blk-test.c +++ b/tests/virtio-blk-test.c @@ -16,9 +16,11 @@ #include "libqtest.h" #include "libqos/virtio.h" #include "libqos/virtio-pci.h" +#include "libqos/virtio-mmio.h" #include "libqos/pci-pc.h" #include "libqos/malloc.h" #include "libqos/malloc-pc.h" +#include "libqos/malloc-generic.h" #include "qemu/bswap.h" #define QVIRTIO_BLK_F_BARRIER 0x00000001 @@ -42,10 +44,14 @@ #define TEST_IMAGE_SIZE (64 * 1024 * 1024) #define QVIRTIO_BLK_TIMEOUT_US (30 * 1000 * 1000) +#define PCI_SLOT_HP 0x06 #define PCI_SLOT 0x04 #define PCI_FN 0x00 -#define PCI_SLOT_HP 0x06 +#define MMIO_PAGE_SIZE 4096 +#define MMIO_DEV_BASE_ADDR 0x0A003E00 +#define MMIO_RAM_ADDR 0x40000000 +#define MMIO_RAM_SIZE 0x20000000 typedef struct QVirtioBlkReq { uint32_t type; @@ -90,6 +96,23 @@ static QPCIBus *pci_test_start(void) return qpci_init_pc(); } +static void arm_test_start(void) +{ + char *cmdline; + char *tmp_path; + + tmp_path = drive_create(); + + cmdline = g_strdup_printf("-machine virt " + "-drive if=none,id=drive0,file=%s,format=raw " + "-device virtio-blk-device,drive=drive0", + tmp_path); + qtest_start(cmdline); + unlink(tmp_path); + g_free(tmp_path); + g_free(cmdline); +} + static void test_end(void) { qtest_end(); @@ -695,18 +718,64 @@ static void pci_hotplug(void) test_end(); } +static void mmio_basic(void) +{ + QVirtioMMIODevice *dev; + QVirtQueue *vq; + QGuestAllocator *alloc; + int n_size = TEST_IMAGE_SIZE / 2; + uint64_t capacity; + + arm_test_start(); + + dev = qvirtio_mmio_init_device(MMIO_DEV_BASE_ADDR, MMIO_PAGE_SIZE); + g_assert(dev != NULL); + g_assert_cmphex(dev->vdev.device_type, ==, QVIRTIO_BLK_DEVICE_ID); + + qvirtio_reset(&qvirtio_mmio, &dev->vdev); + qvirtio_set_acknowledge(&qvirtio_mmio, &dev->vdev); + qvirtio_set_driver(&qvirtio_mmio, &dev->vdev); + + alloc = generic_alloc_init(MMIO_RAM_ADDR, MMIO_RAM_SIZE, MMIO_PAGE_SIZE); + vq = qvirtqueue_setup(&qvirtio_mmio, &dev->vdev, alloc, 0); + + test_basic(&qvirtio_mmio, &dev->vdev, alloc, vq, + QVIRTIO_MMIO_DEVICE_SPECIFIC); + + qmp("{ 'execute': 'block_resize', 'arguments': { 'device': 'drive0', " + " 'size': %d } }", n_size); + + qvirtio_wait_queue_isr(&qvirtio_mmio, &dev->vdev, vq, + QVIRTIO_BLK_TIMEOUT_US); + + capacity = qvirtio_config_readq(&qvirtio_mmio, &dev->vdev, + QVIRTIO_MMIO_DEVICE_SPECIFIC); + g_assert_cmpint(capacity, ==, n_size / 512); + + /* End test */ + guest_free(alloc, vq->desc); + generic_alloc_uninit(alloc); + g_free(dev); + test_end(); +} + int main(int argc, char **argv) { int ret; + const char *arch = qtest_get_arch(); g_test_init(&argc, &argv, NULL); - g_test_add_func("/virtio/blk/pci/basic", pci_basic); - g_test_add_func("/virtio/blk/pci/indirect", pci_indirect); - g_test_add_func("/virtio/blk/pci/config", pci_config); - g_test_add_func("/virtio/blk/pci/msix", pci_msix); - g_test_add_func("/virtio/blk/pci/idx", pci_idx); - g_test_add_func("/virtio/blk/pci/hotplug", pci_hotplug); + if (strcmp(arch, "i386") == 0 || strcmp(arch, "x86_64") == 0) { + qtest_add_func("/virtio/blk/pci/basic", pci_basic); + qtest_add_func("/virtio/blk/pci/indirect", pci_indirect); + qtest_add_func("/virtio/blk/pci/config", pci_config); + qtest_add_func("/virtio/blk/pci/msix", pci_msix); + qtest_add_func("/virtio/blk/pci/idx", pci_idx); + qtest_add_func("/virtio/blk/pci/hotplug", pci_hotplug); + } else if (strcmp(arch, "arm") == 0) { + qtest_add_func("/virtio/blk/mmio/basic", mmio_basic); + } ret = g_test_run(); From 346a53df38e29021e4a9e8c2a759b05744902857 Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Tue, 10 Feb 2015 15:28:43 -0500 Subject: [PATCH 15/73] qcow2: Add two new fields to BDRVQcowState Add two new fields regarding refcount information (the bit width of every entry and the maximum refcount value) to the BDRVQcowState. Signed-off-by: Max Reitz Reviewed-by: Eric Blake Signed-off-by: Kevin Wolf --- block/qcow2-refcount.c | 4 ++-- block/qcow2.c | 3 +++ block/qcow2.h | 2 ++ 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c index 9b80ca79ea68..e124a5418e98 100644 --- a/block/qcow2-refcount.c +++ b/block/qcow2-refcount.c @@ -584,7 +584,7 @@ static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs, refcount = be16_to_cpu(refcount_block[block_index]); refcount += addend; - if (refcount < 0 || refcount > 0xffff) { + if (refcount < 0 || refcount > s->refcount_max) { ret = -EINVAL; goto fail; } @@ -775,7 +775,7 @@ int64_t qcow2_alloc_bytes(BlockDriverState *bs, int size) return refcount; } - if (refcount == 0xffff) { + if (refcount == s->refcount_max) { offset = 0; } } diff --git a/block/qcow2.c b/block/qcow2.c index 50e0a947dfa6..e04ba6d56e01 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -684,6 +684,9 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags, goto fail; } s->refcount_order = header.refcount_order; + s->refcount_bits = 1 << s->refcount_order; + s->refcount_max = UINT64_C(1) << (s->refcount_bits - 1); + s->refcount_max += s->refcount_max - 1; if (header.crypt_method > QCOW_CRYPT_AES) { error_setg(errp, "Unsupported encryption method: %" PRIu32, diff --git a/block/qcow2.h b/block/qcow2.h index 0fee29bb5e7f..55138c9d9203 100644 --- a/block/qcow2.h +++ b/block/qcow2.h @@ -258,6 +258,8 @@ typedef struct BDRVQcowState { int qcow_version; bool use_lazy_refcounts; int refcount_order; + int refcount_bits; + uint64_t refcount_max; bool discard_passthrough[QCOW2_DISCARD_MAX]; From 0709c5a1530b046183b6e96d9631affcff76c1fc Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Tue, 10 Feb 2015 15:28:44 -0500 Subject: [PATCH 16/73] qcow2: Add refcount_bits to format-specific info Add the bit width of every refcount entry to the format-specific information. In contrast to lazy_refcounts and the corrupt flag, this should be always emitted, even for compat=0.10 although it does not support any refcount width other than 16 bits. This is because if a boolean is optional, one normally assumes it to be false when omitted; but if an integer is not specified, it is rather difficult to guess its value. This new field breaks some test outputs, fix them. Signed-off-by: Max Reitz Reviewed-by: Eric Blake Reviewed-by: Stefan Hajnoczi Signed-off-by: Kevin Wolf --- block/qcow2.c | 4 +++- qapi/block-core.json | 5 ++++- tests/qemu-iotests/060.out | 1 + tests/qemu-iotests/065 | 23 +++++++++++++++-------- tests/qemu-iotests/067.out | 5 +++++ tests/qemu-iotests/082.out | 7 +++++++ tests/qemu-iotests/089.out | 2 ++ 7 files changed, 37 insertions(+), 10 deletions(-) diff --git a/block/qcow2.c b/block/qcow2.c index e04ba6d56e01..5c92803fff29 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -2482,7 +2482,8 @@ static ImageInfoSpecific *qcow2_get_specific_info(BlockDriverState *bs) }; if (s->qcow_version == 2) { *spec_info->qcow2 = (ImageInfoSpecificQCow2){ - .compat = g_strdup("0.10"), + .compat = g_strdup("0.10"), + .refcount_bits = s->refcount_bits, }; } else if (s->qcow_version == 3) { *spec_info->qcow2 = (ImageInfoSpecificQCow2){ @@ -2493,6 +2494,7 @@ static ImageInfoSpecific *qcow2_get_specific_info(BlockDriverState *bs) .corrupt = s->incompatible_features & QCOW2_INCOMPAT_CORRUPT, .has_corrupt = true, + .refcount_bits = s->refcount_bits, }; } diff --git a/qapi/block-core.json b/qapi/block-core.json index a3fdaf02ba13..90586a594994 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -41,13 +41,16 @@ # @corrupt: #optional true if the image has been marked corrupt; only valid for # compat >= 1.1 (since 2.2) # +# @refcount-bits: width of a refcount entry in bits (since 2.3) +# # Since: 1.7 ## { 'type': 'ImageInfoSpecificQCow2', 'data': { 'compat': 'str', '*lazy-refcounts': 'bool', - '*corrupt': 'bool' + '*corrupt': 'bool', + 'refcount-bits': 'int' } } ## diff --git a/tests/qemu-iotests/060.out b/tests/qemu-iotests/060.out index dc9f6b7570b8..751118951fd2 100644 --- a/tests/qemu-iotests/060.out +++ b/tests/qemu-iotests/060.out @@ -18,6 +18,7 @@ cluster_size: 65536 Format specific information: compat: 1.1 lazy refcounts: false + refcount bits: 16 corrupt: true qemu-io: can't open device TEST_DIR/t.IMGFMT: IMGFMT: Image is corrupt; cannot be opened read/write read 512/512 bytes at offset 0 diff --git a/tests/qemu-iotests/065 b/tests/qemu-iotests/065 index 8d3a9c9afdb8..72aa9707c7d6 100755 --- a/tests/qemu-iotests/065 +++ b/tests/qemu-iotests/065 @@ -88,34 +88,41 @@ class TestQMP(TestImageInfoSpecific): class TestQCow2(TestQemuImgInfo): '''Testing a qcow2 version 2 image''' img_options = 'compat=0.10' - json_compare = { 'compat': '0.10' } - human_compare = [ 'compat: 0.10' ] + json_compare = { 'compat': '0.10', 'refcount-bits': 16 } + human_compare = [ 'compat: 0.10', 'refcount bits: 16' ] class TestQCow3NotLazy(TestQemuImgInfo): '''Testing a qcow2 version 3 image with lazy refcounts disabled''' img_options = 'compat=1.1,lazy_refcounts=off' - json_compare = { 'compat': '1.1', 'lazy-refcounts': False, 'corrupt': False } - human_compare = [ 'compat: 1.1', 'lazy refcounts: false', 'corrupt: false' ] + json_compare = { 'compat': '1.1', 'lazy-refcounts': False, + 'refcount-bits': 16, 'corrupt': False } + human_compare = [ 'compat: 1.1', 'lazy refcounts: false', + 'refcount bits: 16', 'corrupt: false' ] class TestQCow3Lazy(TestQemuImgInfo): '''Testing a qcow2 version 3 image with lazy refcounts enabled''' img_options = 'compat=1.1,lazy_refcounts=on' - json_compare = { 'compat': '1.1', 'lazy-refcounts': True, 'corrupt': False } - human_compare = [ 'compat: 1.1', 'lazy refcounts: true', 'corrupt: false' ] + json_compare = { 'compat': '1.1', 'lazy-refcounts': True, + 'refcount-bits': 16, 'corrupt': False } + human_compare = [ 'compat: 1.1', 'lazy refcounts: true', + 'refcount bits: 16', 'corrupt: false' ] class TestQCow3NotLazyQMP(TestQMP): '''Testing a qcow2 version 3 image with lazy refcounts disabled, opening with lazy refcounts enabled''' img_options = 'compat=1.1,lazy_refcounts=off' qemu_options = 'lazy-refcounts=on' - compare = { 'compat': '1.1', 'lazy-refcounts': False, 'corrupt': False } + compare = { 'compat': '1.1', 'lazy-refcounts': False, + 'refcount-bits': 16, 'corrupt': False } + class TestQCow3LazyQMP(TestQMP): '''Testing a qcow2 version 3 image with lazy refcounts enabled, opening with lazy refcounts disabled''' img_options = 'compat=1.1,lazy_refcounts=on' qemu_options = 'lazy-refcounts=off' - compare = { 'compat': '1.1', 'lazy-refcounts': True, 'corrupt': False } + compare = { 'compat': '1.1', 'lazy-refcounts': True, + 'refcount-bits': 16, 'corrupt': False } TestImageInfoSpecific = None TestQemuImgInfo = None diff --git a/tests/qemu-iotests/067.out b/tests/qemu-iotests/067.out index 00b3eaefc781..6ff41bc7a1fa 100644 --- a/tests/qemu-iotests/067.out +++ b/tests/qemu-iotests/067.out @@ -32,6 +32,7 @@ Testing: -drive file=TEST_DIR/t.qcow2,format=qcow2,if=none,id=disk -device virti "data": { "compat": "1.1", "lazy-refcounts": false, + "refcount-bits": 16, "corrupt": false } }, @@ -208,6 +209,7 @@ Testing: -drive file=TEST_DIR/t.qcow2,format=qcow2,if=none,id=disk "data": { "compat": "1.1", "lazy-refcounts": false, + "refcount-bits": 16, "corrupt": false } }, @@ -414,6 +416,7 @@ Testing: "data": { "compat": "1.1", "lazy-refcounts": false, + "refcount-bits": 16, "corrupt": false } }, @@ -599,6 +602,7 @@ Testing: "data": { "compat": "1.1", "lazy-refcounts": false, + "refcount-bits": 16, "corrupt": false } }, @@ -710,6 +714,7 @@ Testing: "data": { "compat": "1.1", "lazy-refcounts": false, + "refcount-bits": 16, "corrupt": false } }, diff --git a/tests/qemu-iotests/082.out b/tests/qemu-iotests/082.out index d0234e613718..38729dc2190c 100644 --- a/tests/qemu-iotests/082.out +++ b/tests/qemu-iotests/082.out @@ -18,6 +18,7 @@ cluster_size: 4096 Format specific information: compat: 1.1 lazy refcounts: true + refcount bits: 16 corrupt: false Testing: create -f qcow2 -o cluster_size=4k -o lazy_refcounts=on -o cluster_size=8k TEST_DIR/t.qcow2 128M @@ -29,6 +30,7 @@ cluster_size: 8192 Format specific information: compat: 1.1 lazy refcounts: true + refcount bits: 16 corrupt: false Testing: create -f qcow2 -o cluster_size=4k,cluster_size=8k TEST_DIR/t.qcow2 128M @@ -190,6 +192,7 @@ cluster_size: 4096 Format specific information: compat: 1.1 lazy refcounts: true + refcount bits: 16 corrupt: false Testing: convert -O qcow2 -o cluster_size=4k -o lazy_refcounts=on -o cluster_size=8k TEST_DIR/t.qcow2 TEST_DIR/t.qcow2.base @@ -200,6 +203,7 @@ cluster_size: 8192 Format specific information: compat: 1.1 lazy refcounts: true + refcount bits: 16 corrupt: false Testing: convert -O qcow2 -o cluster_size=4k,cluster_size=8k TEST_DIR/t.qcow2 TEST_DIR/t.qcow2.base @@ -346,6 +350,7 @@ cluster_size: 65536 Format specific information: compat: 1.1 lazy refcounts: true + refcount bits: 16 corrupt: false Testing: amend -f qcow2 -o size=130M -o lazy_refcounts=off TEST_DIR/t.qcow2 @@ -356,6 +361,7 @@ cluster_size: 65536 Format specific information: compat: 1.1 lazy refcounts: false + refcount bits: 16 corrupt: false Testing: amend -f qcow2 -o size=8M -o lazy_refcounts=on -o size=132M TEST_DIR/t.qcow2 @@ -366,6 +372,7 @@ cluster_size: 65536 Format specific information: compat: 1.1 lazy refcounts: true + refcount bits: 16 corrupt: false Testing: amend -f qcow2 -o size=4M,size=148M TEST_DIR/t.qcow2 diff --git a/tests/qemu-iotests/089.out b/tests/qemu-iotests/089.out index a6d37118c7c3..5b541a340c3c 100644 --- a/tests/qemu-iotests/089.out +++ b/tests/qemu-iotests/089.out @@ -43,6 +43,7 @@ vm state offset: 512 MiB Format specific information: compat: 1.1 lazy refcounts: false + refcount bits: 16 corrupt: false format name: IMGFMT cluster size: 64 KiB @@ -50,5 +51,6 @@ vm state offset: 512 MiB Format specific information: compat: 1.1 lazy refcounts: false + refcount bits: 16 corrupt: false *** done From c6e9d8ae6629aaf3c6d483032d219b988d78583f Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Tue, 10 Feb 2015 15:28:45 -0500 Subject: [PATCH 17/73] qcow2: Do not return new value after refcount update qcow2_update_cluster_refcount() does not have any quick access to the new refcount value, it has to call qcow2_get_refcount(). Some callers do not need that new value at all, others call qcow2_get_refcount() themselves anyway (albeit in a different code path, which can however be easily changed), therefore there is no advantage in making qcow2_update_cluster_refcount() return the new value. Drop it. Signed-off-by: Max Reitz Reviewed-by: Eric Blake Reviewed-by: Stefan Hajnoczi Signed-off-by: Kevin Wolf --- block/qcow2-refcount.c | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c index e124a5418e98..1a85bcd49c71 100644 --- a/block/qcow2-refcount.c +++ b/block/qcow2-refcount.c @@ -631,8 +631,7 @@ static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs, /* * Increases or decreases the refcount of a given cluster. * - * If the return value is non-negative, it is the new refcount of the cluster. - * If it is negative, it is -errno and indicates an error. + * On success 0 is returned; on failure -errno is returned. */ int qcow2_update_cluster_refcount(BlockDriverState *bs, int64_t cluster_index, @@ -648,7 +647,7 @@ int qcow2_update_cluster_refcount(BlockDriverState *bs, return ret; } - return qcow2_get_refcount(bs, cluster_index); + return 0; } @@ -976,13 +975,15 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs, break; } if (addend != 0) { - refcount = qcow2_update_cluster_refcount(bs, + ret = qcow2_update_cluster_refcount(bs, cluster_index, addend, QCOW2_DISCARD_SNAPSHOT); - } else { - refcount = qcow2_get_refcount(bs, cluster_index); + if (ret < 0) { + goto fail; + } } + refcount = qcow2_get_refcount(bs, cluster_index); if (refcount < 0) { ret = refcount; goto fail; @@ -1017,11 +1018,15 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs, if (addend != 0) { - refcount = qcow2_update_cluster_refcount(bs, l2_offset >> - s->cluster_bits, addend, QCOW2_DISCARD_SNAPSHOT); - } else { - refcount = qcow2_get_refcount(bs, l2_offset >> s->cluster_bits); + ret = qcow2_update_cluster_refcount(bs, l2_offset >> + s->cluster_bits, + addend, + QCOW2_DISCARD_SNAPSHOT); + if (ret < 0) { + goto fail; + } } + refcount = qcow2_get_refcount(bs, l2_offset >> s->cluster_bits); if (refcount < 0) { ret = refcount; goto fail; From 7324c10f96c821b00d691e2c8ced67d8536bf1d6 Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Tue, 10 Feb 2015 15:28:46 -0500 Subject: [PATCH 18/73] qcow2: Only return status from qcow2_get_refcount Refcounts can theoretically be of type uint64_t; in order to be able to represent the full range, qcow2_get_refcount() cannot use a single variable to represent both all refcount values and also keep some values reserved for errors. One solution would be to add an Error pointer parameter to qcow2_get_refcount(); however, no caller could (currently) pass that error message, so it would have to be emitted immediately and be passed to the next caller by returning -EIO or something similar. Therefore, an Error parameter does not offer any advantages here. The solution applied by this patch is simpler to use. Because no caller would be able to pass the error message, they would have to print it and free it, whereas with this patch the caller only needs to pass the returned integer (which is often a no-op from the code perspective, because that integer will be stored in a variable "ret" which will be returned by the fail path of many callers). Signed-off-by: Max Reitz Reviewed-by: Eric Blake Signed-off-by: Kevin Wolf --- block/qcow2-cluster.c | 8 ++-- block/qcow2-refcount.c | 86 +++++++++++++++++++++++------------------- block/qcow2.h | 3 +- 3 files changed, 53 insertions(+), 44 deletions(-) diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c index 183177d51804..ee50500a076d 100644 --- a/block/qcow2-cluster.c +++ b/block/qcow2-cluster.c @@ -1640,7 +1640,7 @@ static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table, for (i = 0; i < l1_size; i++) { uint64_t l2_offset = l1_table[i] & L1E_OFFSET_MASK; bool l2_dirty = false; - int l2_refcount; + uint16_t l2_refcount; if (!l2_offset) { /* unallocated */ @@ -1672,9 +1672,9 @@ static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table, goto fail; } - l2_refcount = qcow2_get_refcount(bs, l2_offset >> s->cluster_bits); - if (l2_refcount < 0) { - ret = l2_refcount; + ret = qcow2_get_refcount(bs, l2_offset >> s->cluster_bits, + &l2_refcount); + if (ret < 0) { goto fail; } diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c index 1a85bcd49c71..d8534903c33b 100644 --- a/block/qcow2-refcount.c +++ b/block/qcow2-refcount.c @@ -87,26 +87,29 @@ static int load_refcount_block(BlockDriverState *bs, } /* - * Returns the refcount of the cluster given by its index. Any non-negative - * return value is the refcount of the cluster, negative values are -errno - * and indicate an error. + * Retrieves the refcount of the cluster given by its index and stores it in + * *refcount. Returns 0 on success and -errno on failure. */ -int qcow2_get_refcount(BlockDriverState *bs, int64_t cluster_index) +int qcow2_get_refcount(BlockDriverState *bs, int64_t cluster_index, + uint16_t *refcount) { BDRVQcowState *s = bs->opaque; uint64_t refcount_table_index, block_index; int64_t refcount_block_offset; int ret; uint16_t *refcount_block; - uint16_t refcount; refcount_table_index = cluster_index >> s->refcount_block_bits; - if (refcount_table_index >= s->refcount_table_size) + if (refcount_table_index >= s->refcount_table_size) { + *refcount = 0; return 0; + } refcount_block_offset = s->refcount_table[refcount_table_index] & REFT_OFFSET_MASK; - if (!refcount_block_offset) + if (!refcount_block_offset) { + *refcount = 0; return 0; + } if (offset_into_cluster(s, refcount_block_offset)) { qcow2_signal_corruption(bs, true, -1, -1, "Refblock offset %#" PRIx64 @@ -122,7 +125,7 @@ int qcow2_get_refcount(BlockDriverState *bs, int64_t cluster_index) } block_index = cluster_index & (s->refcount_block_size - 1); - refcount = be16_to_cpu(refcount_block[block_index]); + *refcount = be16_to_cpu(refcount_block[block_index]); ret = qcow2_cache_put(bs, s->refcount_block_cache, (void**) &refcount_block); @@ -130,7 +133,7 @@ int qcow2_get_refcount(BlockDriverState *bs, int64_t cluster_index) return ret; } - return refcount; + return 0; } /* @@ -662,16 +665,17 @@ static int64_t alloc_clusters_noref(BlockDriverState *bs, uint64_t size) { BDRVQcowState *s = bs->opaque; uint64_t i, nb_clusters; - int refcount; + uint16_t refcount; + int ret; nb_clusters = size_to_clusters(s, size); retry: for(i = 0; i < nb_clusters; i++) { uint64_t next_cluster_index = s->free_cluster_index++; - refcount = qcow2_get_refcount(bs, next_cluster_index); + ret = qcow2_get_refcount(bs, next_cluster_index, &refcount); - if (refcount < 0) { - return refcount; + if (ret < 0) { + return ret; } else if (refcount != 0) { goto retry; } @@ -721,7 +725,8 @@ int qcow2_alloc_clusters_at(BlockDriverState *bs, uint64_t offset, BDRVQcowState *s = bs->opaque; uint64_t cluster_index; uint64_t i; - int refcount, ret; + uint16_t refcount; + int ret; assert(nb_clusters >= 0); if (nb_clusters == 0) { @@ -732,10 +737,9 @@ int qcow2_alloc_clusters_at(BlockDriverState *bs, uint64_t offset, /* Check how many clusters there are free */ cluster_index = offset >> s->cluster_bits; for(i = 0; i < nb_clusters; i++) { - refcount = qcow2_get_refcount(bs, cluster_index++); - - if (refcount < 0) { - return refcount; + ret = qcow2_get_refcount(bs, cluster_index++, &refcount); + if (ret < 0) { + return ret; } else if (refcount != 0) { break; } @@ -769,9 +773,10 @@ int64_t qcow2_alloc_bytes(BlockDriverState *bs, int size) offset = s->free_byte_offset; if (offset) { - int refcount = qcow2_get_refcount(bs, offset >> s->cluster_bits); - if (refcount < 0) { - return refcount; + uint16_t refcount; + ret = qcow2_get_refcount(bs, offset >> s->cluster_bits, &refcount); + if (ret < 0) { + return ret; } if (refcount == s->refcount_max) { @@ -878,7 +883,8 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs, uint64_t *l1_table, *l2_table, l2_offset, offset, l1_size2; bool l1_allocated = false; int64_t old_offset, old_l2_offset; - int i, j, l1_modified = 0, nb_csectors, refcount; + int i, j, l1_modified = 0, nb_csectors; + uint16_t refcount; int ret; l2_table = NULL; @@ -983,9 +989,8 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs, } } - refcount = qcow2_get_refcount(bs, cluster_index); - if (refcount < 0) { - ret = refcount; + ret = qcow2_get_refcount(bs, cluster_index, &refcount); + if (ret < 0) { goto fail; } break; @@ -1026,9 +1031,9 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs, goto fail; } } - refcount = qcow2_get_refcount(bs, l2_offset >> s->cluster_bits); - if (refcount < 0) { - ret = refcount; + ret = qcow2_get_refcount(bs, l2_offset >> s->cluster_bits, + &refcount); + if (ret < 0) { goto fail; } else if (refcount == 1) { l2_offset |= QCOW_OFLAG_COPIED; @@ -1346,7 +1351,7 @@ static int check_oflag_copied(BlockDriverState *bs, BdrvCheckResult *res, BDRVQcowState *s = bs->opaque; uint64_t *l2_table = qemu_blockalign(bs, s->cluster_size); int ret; - int refcount; + uint16_t refcount; int i, j; for (i = 0; i < s->l1_size; i++) { @@ -1358,8 +1363,9 @@ static int check_oflag_copied(BlockDriverState *bs, BdrvCheckResult *res, continue; } - refcount = qcow2_get_refcount(bs, l2_offset >> s->cluster_bits); - if (refcount < 0) { + ret = qcow2_get_refcount(bs, l2_offset >> s->cluster_bits, + &refcount); + if (ret < 0) { /* don't print message nor increment check_errors */ continue; } @@ -1400,9 +1406,10 @@ static int check_oflag_copied(BlockDriverState *bs, BdrvCheckResult *res, if ((cluster_type == QCOW2_CLUSTER_NORMAL) || ((cluster_type == QCOW2_CLUSTER_ZERO) && (data_offset != 0))) { - refcount = qcow2_get_refcount(bs, - data_offset >> s->cluster_bits); - if (refcount < 0) { + ret = qcow2_get_refcount(bs, + data_offset >> s->cluster_bits, + &refcount); + if (ret < 0) { /* don't print message nor increment check_errors */ continue; } @@ -1634,13 +1641,14 @@ static void compare_refcounts(BlockDriverState *bs, BdrvCheckResult *res, { BDRVQcowState *s = bs->opaque; int64_t i; - int refcount1, refcount2, ret; + uint16_t refcount1, refcount2; + int ret; for (i = 0, *highest_cluster = 0; i < nb_clusters; i++) { - refcount1 = qcow2_get_refcount(bs, i); - if (refcount1 < 0) { + ret = qcow2_get_refcount(bs, i, &refcount1); + if (ret < 0) { fprintf(stderr, "Can't get refcount for cluster %" PRId64 ": %s\n", - i, strerror(-refcount1)); + i, strerror(-ret)); res->check_errors++; continue; } @@ -1670,7 +1678,7 @@ static void compare_refcounts(BlockDriverState *bs, BdrvCheckResult *res, if (num_fixed) { ret = update_refcount(bs, i << s->cluster_bits, 1, - refcount2 - refcount1, + (int)refcount2 - (int)refcount1, QCOW2_DISCARD_ALWAYS); if (ret >= 0) { (*num_fixed)++; diff --git a/block/qcow2.h b/block/qcow2.h index 55138c9d9203..a33431f05a91 100644 --- a/block/qcow2.h +++ b/block/qcow2.h @@ -478,7 +478,8 @@ void qcow2_signal_corruption(BlockDriverState *bs, bool fatal, int64_t offset, int qcow2_refcount_init(BlockDriverState *bs); void qcow2_refcount_close(BlockDriverState *bs); -int qcow2_get_refcount(BlockDriverState *bs, int64_t cluster_index); +int qcow2_get_refcount(BlockDriverState *bs, int64_t cluster_index, + uint16_t *refcount); int qcow2_update_cluster_refcount(BlockDriverState *bs, int64_t cluster_index, int addend, enum qcow2_discard_type type); From 2aabe7c7a16cee6b1b54592fa05b5f9c23c89bc0 Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Tue, 10 Feb 2015 15:28:47 -0500 Subject: [PATCH 19/73] qcow2: Use unsigned addend for update_refcount() update_refcount() and qcow2_update_cluster_refcount() currently take a signed addend. At least one caller passes a value directly derived from an absolute refcount that should be reached ("l2_refcount - 1" in expand_zero_clusters_in_l1()). Therefore, the addend should be unsigned as well; this will be especially important for 64 bit refcounts. Because update_refcount() then no longer knows whether the refcount should be increased or decreased, it now requires an additional flag which specified exactly that. The same applies to qcow2_update_cluster_refcount(). Signed-off-by: Max Reitz Reviewed-by: Eric Blake Signed-off-by: Kevin Wolf --- block/qcow2-cluster.c | 3 +- block/qcow2-refcount.c | 65 ++++++++++++++++++++++++++++-------------- block/qcow2.h | 8 +++++- 3 files changed, 52 insertions(+), 24 deletions(-) diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c index ee50500a076d..405329adb560 100644 --- a/block/qcow2-cluster.c +++ b/block/qcow2-cluster.c @@ -1707,7 +1707,8 @@ static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table, /* For shared L2 tables, set the refcount accordingly (it is * already 1 and needs to be l2_refcount) */ ret = qcow2_update_cluster_refcount(bs, - offset >> s->cluster_bits, l2_refcount - 1, + offset >> s->cluster_bits, + refcount_diff(1, l2_refcount), false, QCOW2_DISCARD_OTHER); if (ret < 0) { qcow2_free_clusters(bs, offset, s->cluster_size, diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c index d8534903c33b..354a355acfbf 100644 --- a/block/qcow2-refcount.c +++ b/block/qcow2-refcount.c @@ -29,8 +29,8 @@ static int64_t alloc_clusters_noref(BlockDriverState *bs, uint64_t size); static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs, - int64_t offset, int64_t length, - int addend, enum qcow2_discard_type type); + int64_t offset, int64_t length, uint16_t addend, + bool decrease, enum qcow2_discard_type type); /*********************************************************/ @@ -263,7 +263,7 @@ static int alloc_refcount_block(BlockDriverState *bs, } else { /* Described somewhere else. This can recurse at most twice before we * arrive at a block that describes itself. */ - ret = update_refcount(bs, new_block, s->cluster_size, 1, + ret = update_refcount(bs, new_block, s->cluster_size, 1, false, QCOW2_DISCARD_NEVER); if (ret < 0) { goto fail_block; @@ -530,8 +530,14 @@ static void update_refcount_discard(BlockDriverState *bs, } /* XXX: cache several refcount block clusters ? */ +/* @addend is the absolute value of the addend; if @decrease is set, @addend + * will be subtracted from the current refcount, otherwise it will be added */ static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs, - int64_t offset, int64_t length, int addend, enum qcow2_discard_type type) + int64_t offset, + int64_t length, + uint16_t addend, + bool decrease, + enum qcow2_discard_type type) { BDRVQcowState *s = bs->opaque; int64_t start, last, cluster_offset; @@ -540,8 +546,9 @@ static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs, int ret; #ifdef DEBUG_ALLOC2 - fprintf(stderr, "update_refcount: offset=%" PRId64 " size=%" PRId64 " addend=%d\n", - offset, length, addend); + fprintf(stderr, "update_refcount: offset=%" PRId64 " size=%" PRId64 + " addend=%s%" PRIu16 "\n", offset, length, decrease ? "-" : "", + addend); #endif if (length < 0) { return -EINVAL; @@ -549,7 +556,7 @@ static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs, return 0; } - if (addend < 0) { + if (decrease) { qcow2_cache_set_dependency(bs, s->refcount_block_cache, s->l2_table_cache); } @@ -559,7 +566,8 @@ static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs, for(cluster_offset = start; cluster_offset <= last; cluster_offset += s->cluster_size) { - int block_index, refcount; + int block_index; + uint16_t refcount; int64_t cluster_index = cluster_offset >> s->cluster_bits; int64_t table_index = cluster_index >> s->refcount_block_bits; @@ -586,11 +594,18 @@ static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs, block_index = cluster_index & (s->refcount_block_size - 1); refcount = be16_to_cpu(refcount_block[block_index]); - refcount += addend; - if (refcount < 0 || refcount > s->refcount_max) { + if (decrease ? ((uint16_t)(refcount - addend) > refcount) + : ((uint16_t)(refcount + addend) < refcount || + (uint16_t)(refcount + addend) > s->refcount_max)) + { ret = -EINVAL; goto fail; } + if (decrease) { + refcount -= addend; + } else { + refcount += addend; + } if (refcount == 0 && cluster_index < s->free_cluster_index) { s->free_cluster_index = cluster_index; } @@ -623,8 +638,8 @@ static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs, */ if (ret < 0) { int dummy; - dummy = update_refcount(bs, offset, cluster_offset - offset, -addend, - QCOW2_DISCARD_NEVER); + dummy = update_refcount(bs, offset, cluster_offset - offset, addend, + !decrease, QCOW2_DISCARD_NEVER); (void)dummy; } @@ -634,18 +649,21 @@ static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs, /* * Increases or decreases the refcount of a given cluster. * + * @addend is the absolute value of the addend; if @decrease is set, @addend + * will be subtracted from the current refcount, otherwise it will be added. + * * On success 0 is returned; on failure -errno is returned. */ int qcow2_update_cluster_refcount(BlockDriverState *bs, int64_t cluster_index, - int addend, + uint16_t addend, bool decrease, enum qcow2_discard_type type) { BDRVQcowState *s = bs->opaque; int ret; ret = update_refcount(bs, cluster_index << s->cluster_bits, 1, addend, - type); + decrease, type); if (ret < 0) { return ret; } @@ -709,7 +727,7 @@ int64_t qcow2_alloc_clusters(BlockDriverState *bs, uint64_t size) return offset; } - ret = update_refcount(bs, offset, size, 1, QCOW2_DISCARD_NEVER); + ret = update_refcount(bs, offset, size, 1, false, QCOW2_DISCARD_NEVER); } while (ret == -EAGAIN); if (ret < 0) { @@ -746,7 +764,7 @@ int qcow2_alloc_clusters_at(BlockDriverState *bs, uint64_t offset, } /* And then allocate them */ - ret = update_refcount(bs, offset, i << s->cluster_bits, 1, + ret = update_refcount(bs, offset, i << s->cluster_bits, 1, false, QCOW2_DISCARD_NEVER); } while (ret == -EAGAIN); @@ -797,7 +815,7 @@ int64_t qcow2_alloc_bytes(BlockDriverState *bs, int size) } assert(offset); - ret = update_refcount(bs, offset, size, 1, QCOW2_DISCARD_NEVER); + ret = update_refcount(bs, offset, size, 1, false, QCOW2_DISCARD_NEVER); if (ret < 0) { return ret; } @@ -821,7 +839,7 @@ void qcow2_free_clusters(BlockDriverState *bs, int ret; BLKDBG_EVENT(bs->file, BLKDBG_CLUSTER_FREE); - ret = update_refcount(bs, offset, size, -1, type); + ret = update_refcount(bs, offset, size, 1, true, type); if (ret < 0) { fprintf(stderr, "qcow2_free_clusters failed: %s\n", strerror(-ret)); /* TODO Remember the clusters to free them later and avoid leaking */ @@ -887,6 +905,8 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs, uint16_t refcount; int ret; + assert(addend >= -1 && addend <= 1); + l2_table = NULL; l1_table = NULL; l1_size2 = l1_size * sizeof(uint64_t); @@ -951,7 +971,7 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs, if (addend != 0) { ret = update_refcount(bs, (offset & s->cluster_offset_mask) & ~511, - nb_csectors * 512, addend, + nb_csectors * 512, abs(addend), addend < 0, QCOW2_DISCARD_SNAPSHOT); if (ret < 0) { goto fail; @@ -982,7 +1002,7 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs, } if (addend != 0) { ret = qcow2_update_cluster_refcount(bs, - cluster_index, addend, + cluster_index, abs(addend), addend < 0, QCOW2_DISCARD_SNAPSHOT); if (ret < 0) { goto fail; @@ -1025,7 +1045,7 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs, if (addend != 0) { ret = qcow2_update_cluster_refcount(bs, l2_offset >> s->cluster_bits, - addend, + abs(addend), addend < 0, QCOW2_DISCARD_SNAPSHOT); if (ret < 0) { goto fail; @@ -1678,7 +1698,8 @@ static void compare_refcounts(BlockDriverState *bs, BdrvCheckResult *res, if (num_fixed) { ret = update_refcount(bs, i << s->cluster_bits, 1, - (int)refcount2 - (int)refcount1, + refcount_diff(refcount1, refcount2), + refcount1 > refcount2, QCOW2_DISCARD_ALWAYS); if (ret >= 0) { (*num_fixed)++; diff --git a/block/qcow2.h b/block/qcow2.h index a33431f05a91..40910d817db4 100644 --- a/block/qcow2.h +++ b/block/qcow2.h @@ -459,6 +459,11 @@ static inline uint64_t l2meta_cow_end(QCowL2Meta *m) + (m->cow_end.nb_sectors << BDRV_SECTOR_BITS); } +static inline uint16_t refcount_diff(uint16_t r1, uint16_t r2) +{ + return r1 > r2 ? r1 - r2 : r2 - r1; +} + // FIXME Need qcow2_ prefix to global functions /* qcow2.c functions */ @@ -482,7 +487,8 @@ int qcow2_get_refcount(BlockDriverState *bs, int64_t cluster_index, uint16_t *refcount); int qcow2_update_cluster_refcount(BlockDriverState *bs, int64_t cluster_index, - int addend, enum qcow2_discard_type type); + uint16_t addend, bool decrease, + enum qcow2_discard_type type); int64_t qcow2_alloc_clusters(BlockDriverState *bs, uint64_t size); int qcow2_alloc_clusters_at(BlockDriverState *bs, uint64_t offset, From 0e06528e980b8ac7695a219f8405d3cdc52a1381 Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Tue, 10 Feb 2015 15:28:48 -0500 Subject: [PATCH 20/73] qcow2: Use 64 bits for refcount values Refcounts may have a width of up to 64 bits, so qemu should use the same width to represent refcount values internally. Signed-off-by: Max Reitz Reviewed-by: Eric Blake Signed-off-by: Kevin Wolf --- block/qcow2-cluster.c | 2 +- block/qcow2-refcount.c | 40 +++++++++++++++++++--------------------- block/qcow2.h | 6 +++--- 3 files changed, 23 insertions(+), 25 deletions(-) diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c index 405329adb560..ed2b44d2911e 100644 --- a/block/qcow2-cluster.c +++ b/block/qcow2-cluster.c @@ -1640,7 +1640,7 @@ static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table, for (i = 0; i < l1_size; i++) { uint64_t l2_offset = l1_table[i] & L1E_OFFSET_MASK; bool l2_dirty = false; - uint16_t l2_refcount; + uint64_t l2_refcount; if (!l2_offset) { /* unallocated */ diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c index 354a355acfbf..e86a1d6ccf3e 100644 --- a/block/qcow2-refcount.c +++ b/block/qcow2-refcount.c @@ -29,7 +29,7 @@ static int64_t alloc_clusters_noref(BlockDriverState *bs, uint64_t size); static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs, - int64_t offset, int64_t length, uint16_t addend, + int64_t offset, int64_t length, uint64_t addend, bool decrease, enum qcow2_discard_type type); @@ -91,7 +91,7 @@ static int load_refcount_block(BlockDriverState *bs, * *refcount. Returns 0 on success and -errno on failure. */ int qcow2_get_refcount(BlockDriverState *bs, int64_t cluster_index, - uint16_t *refcount) + uint64_t *refcount) { BDRVQcowState *s = bs->opaque; uint64_t refcount_table_index, block_index; @@ -535,7 +535,7 @@ static void update_refcount_discard(BlockDriverState *bs, static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs, int64_t offset, int64_t length, - uint16_t addend, + uint64_t addend, bool decrease, enum qcow2_discard_type type) { @@ -547,7 +547,7 @@ static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs, #ifdef DEBUG_ALLOC2 fprintf(stderr, "update_refcount: offset=%" PRId64 " size=%" PRId64 - " addend=%s%" PRIu16 "\n", offset, length, decrease ? "-" : "", + " addend=%s%" PRIu64 "\n", offset, length, decrease ? "-" : "", addend); #endif if (length < 0) { @@ -567,7 +567,7 @@ static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs, cluster_offset += s->cluster_size) { int block_index; - uint16_t refcount; + uint64_t refcount; int64_t cluster_index = cluster_offset >> s->cluster_bits; int64_t table_index = cluster_index >> s->refcount_block_bits; @@ -594,9 +594,9 @@ static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs, block_index = cluster_index & (s->refcount_block_size - 1); refcount = be16_to_cpu(refcount_block[block_index]); - if (decrease ? ((uint16_t)(refcount - addend) > refcount) - : ((uint16_t)(refcount + addend) < refcount || - (uint16_t)(refcount + addend) > s->refcount_max)) + if (decrease ? (refcount - addend > refcount) + : (refcount + addend < refcount || + refcount + addend > s->refcount_max)) { ret = -EINVAL; goto fail; @@ -656,7 +656,7 @@ static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs, */ int qcow2_update_cluster_refcount(BlockDriverState *bs, int64_t cluster_index, - uint16_t addend, bool decrease, + uint64_t addend, bool decrease, enum qcow2_discard_type type) { BDRVQcowState *s = bs->opaque; @@ -682,8 +682,7 @@ int qcow2_update_cluster_refcount(BlockDriverState *bs, static int64_t alloc_clusters_noref(BlockDriverState *bs, uint64_t size) { BDRVQcowState *s = bs->opaque; - uint64_t i, nb_clusters; - uint16_t refcount; + uint64_t i, nb_clusters, refcount; int ret; nb_clusters = size_to_clusters(s, size); @@ -741,9 +740,8 @@ int qcow2_alloc_clusters_at(BlockDriverState *bs, uint64_t offset, int nb_clusters) { BDRVQcowState *s = bs->opaque; - uint64_t cluster_index; + uint64_t cluster_index, refcount; uint64_t i; - uint16_t refcount; int ret; assert(nb_clusters >= 0); @@ -791,7 +789,7 @@ int64_t qcow2_alloc_bytes(BlockDriverState *bs, int size) offset = s->free_byte_offset; if (offset) { - uint16_t refcount; + uint64_t refcount; ret = qcow2_get_refcount(bs, offset >> s->cluster_bits, &refcount); if (ret < 0) { return ret; @@ -898,11 +896,10 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs, int64_t l1_table_offset, int l1_size, int addend) { BDRVQcowState *s = bs->opaque; - uint64_t *l1_table, *l2_table, l2_offset, offset, l1_size2; + uint64_t *l1_table, *l2_table, l2_offset, offset, l1_size2, refcount; bool l1_allocated = false; int64_t old_offset, old_l2_offset; int i, j, l1_modified = 0, nb_csectors; - uint16_t refcount; int ret; assert(addend >= -1 && addend <= 1); @@ -1371,7 +1368,7 @@ static int check_oflag_copied(BlockDriverState *bs, BdrvCheckResult *res, BDRVQcowState *s = bs->opaque; uint64_t *l2_table = qemu_blockalign(bs, s->cluster_size); int ret; - uint16_t refcount; + uint64_t refcount; int i, j; for (i = 0; i < s->l1_size; i++) { @@ -1391,7 +1388,7 @@ static int check_oflag_copied(BlockDriverState *bs, BdrvCheckResult *res, } if ((refcount == 1) != ((l1_entry & QCOW_OFLAG_COPIED) != 0)) { fprintf(stderr, "%s OFLAG_COPIED L2 cluster: l1_index=%d " - "l1_entry=%" PRIx64 " refcount=%d\n", + "l1_entry=%" PRIx64 " refcount=%" PRIu64 "\n", fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR", i, l1_entry, refcount); @@ -1435,7 +1432,7 @@ static int check_oflag_copied(BlockDriverState *bs, BdrvCheckResult *res, } if ((refcount == 1) != ((l2_entry & QCOW_OFLAG_COPIED) != 0)) { fprintf(stderr, "%s OFLAG_COPIED data cluster: " - "l2_entry=%" PRIx64 " refcount=%d\n", + "l2_entry=%" PRIx64 " refcount=%" PRIu64 "\n", fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR", l2_entry, refcount); @@ -1661,7 +1658,7 @@ static void compare_refcounts(BlockDriverState *bs, BdrvCheckResult *res, { BDRVQcowState *s = bs->opaque; int64_t i; - uint16_t refcount1, refcount2; + uint64_t refcount1, refcount2; int ret; for (i = 0, *highest_cluster = 0; i < nb_clusters; i++) { @@ -1690,7 +1687,8 @@ static void compare_refcounts(BlockDriverState *bs, BdrvCheckResult *res, num_fixed = &res->corruptions_fixed; } - fprintf(stderr, "%s cluster %" PRId64 " refcount=%d reference=%d\n", + fprintf(stderr, "%s cluster %" PRId64 " refcount=%" PRIu64 + " reference=%" PRIu64 "\n", num_fixed != NULL ? "Repairing" : refcount1 < refcount2 ? "ERROR" : "Leaked", diff --git a/block/qcow2.h b/block/qcow2.h index 40910d817db4..a9108f55cad0 100644 --- a/block/qcow2.h +++ b/block/qcow2.h @@ -459,7 +459,7 @@ static inline uint64_t l2meta_cow_end(QCowL2Meta *m) + (m->cow_end.nb_sectors << BDRV_SECTOR_BITS); } -static inline uint16_t refcount_diff(uint16_t r1, uint16_t r2) +static inline uint64_t refcount_diff(uint64_t r1, uint64_t r2) { return r1 > r2 ? r1 - r2 : r2 - r1; } @@ -484,10 +484,10 @@ int qcow2_refcount_init(BlockDriverState *bs); void qcow2_refcount_close(BlockDriverState *bs); int qcow2_get_refcount(BlockDriverState *bs, int64_t cluster_index, - uint16_t *refcount); + uint64_t *refcount); int qcow2_update_cluster_refcount(BlockDriverState *bs, int64_t cluster_index, - uint16_t addend, bool decrease, + uint64_t addend, bool decrease, enum qcow2_discard_type type); int64_t qcow2_alloc_clusters(BlockDriverState *bs, uint64_t size); From 5fee192efdfe2161d392491e6f68bffb406b18e1 Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Tue, 10 Feb 2015 15:28:49 -0500 Subject: [PATCH 21/73] qcow2: Helper for refcount array reallocation Add a helper function for reallocating a refcount array, independent of the refcount order. The newly allocated space is zeroed and the function handles failed reallocations gracefully. The helper function will always align the buffer size to a cluster boundary; if storing the refcounts in such an array in big endian byte order, this makes it possible to write parts of the array directly as refcount blocks into the image file. Signed-off-by: Max Reitz Reviewed-by: Eric Blake Signed-off-by: Kevin Wolf --- block/qcow2-refcount.c | 130 +++++++++++++++++++++++++---------------- 1 file changed, 81 insertions(+), 49 deletions(-) diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c index e86a1d6ccf3e..497364f673d3 100644 --- a/block/qcow2-refcount.c +++ b/block/qcow2-refcount.c @@ -1095,6 +1095,63 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs, /* refcount checking functions */ +static size_t refcount_array_byte_size(BDRVQcowState *s, uint64_t entries) +{ + /* This assertion holds because there is no way we can address more than + * 2^(64 - 9) clusters at once (with cluster size 512 = 2^9, and because + * offsets have to be representable in bytes); due to every cluster + * corresponding to one refcount entry, we are well below that limit */ + assert(entries < (UINT64_C(1) << (64 - 9))); + + /* Thanks to the assertion this will not overflow, because + * s->refcount_order < 7. + * (note: x << s->refcount_order == x * s->refcount_bits) */ + return DIV_ROUND_UP(entries << s->refcount_order, 8); +} + +/** + * Reallocates *array so that it can hold new_size entries. *size must contain + * the current number of entries in *array. If the reallocation fails, *array + * and *size will not be modified and -errno will be returned. If the + * reallocation is successful, *array will be set to the new buffer, *size + * will be set to new_size and 0 will be returned. The size of the reallocated + * refcount array buffer will be aligned to a cluster boundary, and the newly + * allocated area will be zeroed. + */ +static int realloc_refcount_array(BDRVQcowState *s, uint16_t **array, + int64_t *size, int64_t new_size) +{ + size_t old_byte_size, new_byte_size; + uint16_t *new_ptr; + + /* Round to clusters so the array can be directly written to disk */ + old_byte_size = size_to_clusters(s, refcount_array_byte_size(s, *size)) + * s->cluster_size; + new_byte_size = size_to_clusters(s, refcount_array_byte_size(s, new_size)) + * s->cluster_size; + + if (new_byte_size == old_byte_size) { + *size = new_size; + return 0; + } + + assert(new_byte_size > 0); + + new_ptr = g_try_realloc(*array, new_byte_size); + if (!new_ptr) { + return -ENOMEM; + } + + if (new_byte_size > old_byte_size) { + memset((void *)((uintptr_t)new_ptr + old_byte_size), 0, + new_byte_size - old_byte_size); + } + + *array = new_ptr; + *size = new_size; + + return 0; +} /* * Increases the refcount for a range of clusters in a given refcount table. @@ -1111,6 +1168,7 @@ static int inc_refcounts(BlockDriverState *bs, { BDRVQcowState *s = bs->opaque; uint64_t start, last, cluster_offset, k; + int ret; if (size <= 0) { return 0; @@ -1122,23 +1180,12 @@ static int inc_refcounts(BlockDriverState *bs, cluster_offset += s->cluster_size) { k = cluster_offset >> s->cluster_bits; if (k >= *refcount_table_size) { - int64_t old_refcount_table_size = *refcount_table_size; - uint16_t *new_refcount_table; - - *refcount_table_size = k + 1; - new_refcount_table = g_try_realloc(*refcount_table, - *refcount_table_size * - sizeof(**refcount_table)); - if (!new_refcount_table) { - *refcount_table_size = old_refcount_table_size; + ret = realloc_refcount_array(s, refcount_table, + refcount_table_size, k + 1); + if (ret < 0) { res->check_errors++; - return -ENOMEM; + return ret; } - *refcount_table = new_refcount_table; - - memset(*refcount_table + old_refcount_table_size, 0, - (*refcount_table_size - old_refcount_table_size) * - sizeof(**refcount_table)); } if (++(*refcount_table)[k] == 0) { @@ -1507,8 +1554,7 @@ static int check_refblocks(BlockDriverState *bs, BdrvCheckResult *res, fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR", i); if (fix & BDRV_FIX_ERRORS) { - int64_t old_nb_clusters = *nb_clusters; - uint16_t *new_refcount_table; + int64_t new_nb_clusters; if (offset > INT64_MAX - s->cluster_size) { ret = -EINVAL; @@ -1525,22 +1571,15 @@ static int check_refblocks(BlockDriverState *bs, BdrvCheckResult *res, goto resize_fail; } - *nb_clusters = size_to_clusters(s, size); - assert(*nb_clusters >= old_nb_clusters); + new_nb_clusters = size_to_clusters(s, size); + assert(new_nb_clusters >= *nb_clusters); - new_refcount_table = g_try_realloc(*refcount_table, - *nb_clusters * - sizeof(**refcount_table)); - if (!new_refcount_table) { - *nb_clusters = old_nb_clusters; + ret = realloc_refcount_array(s, refcount_table, + nb_clusters, new_nb_clusters); + if (ret < 0) { res->check_errors++; - return -ENOMEM; + return ret; } - *refcount_table = new_refcount_table; - - memset(*refcount_table + old_nb_clusters, 0, - (*nb_clusters - old_nb_clusters) * - sizeof(**refcount_table)); if (cluster >= *nb_clusters) { ret = -EINVAL; @@ -1600,10 +1639,12 @@ static int calculate_refcounts(BlockDriverState *bs, BdrvCheckResult *res, int ret; if (!*refcount_table) { - *refcount_table = g_try_new0(uint16_t, *nb_clusters); - if (*nb_clusters && *refcount_table == NULL) { + int64_t old_size = 0; + ret = realloc_refcount_array(s, refcount_table, + &old_size, *nb_clusters); + if (ret < 0) { res->check_errors++; - return -ENOMEM; + return ret; } } @@ -1737,6 +1778,7 @@ static int64_t alloc_clusters_imrt(BlockDriverState *bs, int64_t cluster = *first_free_cluster, i; bool first_gap = true; int contiguous_free_clusters; + int ret; /* Starting at *first_free_cluster, find a range of at least cluster_count * continuously free clusters */ @@ -1766,28 +1808,18 @@ static int64_t alloc_clusters_imrt(BlockDriverState *bs, /* If no such range could be found, grow the in-memory refcount table * accordingly to append free clusters at the end of the image */ if (contiguous_free_clusters < cluster_count) { - int64_t old_imrt_nb_clusters = *imrt_nb_clusters; - uint16_t *new_refcount_table; - /* contiguous_free_clusters clusters are already empty at the image end; * we need cluster_count clusters; therefore, we have to allocate * cluster_count - contiguous_free_clusters new clusters at the end of * the image (which is the current value of cluster; note that cluster * may exceed old_imrt_nb_clusters if *first_free_cluster pointed beyond * the image end) */ - *imrt_nb_clusters = cluster + cluster_count - contiguous_free_clusters; - new_refcount_table = g_try_realloc(*refcount_table, - *imrt_nb_clusters * - sizeof(**refcount_table)); - if (!new_refcount_table) { - *imrt_nb_clusters = old_imrt_nb_clusters; - return -ENOMEM; - } - *refcount_table = new_refcount_table; - - memset(*refcount_table + old_imrt_nb_clusters, 0, - (*imrt_nb_clusters - old_imrt_nb_clusters) * - sizeof(**refcount_table)); + ret = realloc_refcount_array(s, refcount_table, imrt_nb_clusters, + cluster + cluster_count + - contiguous_free_clusters); + if (ret < 0) { + return ret; + } } /* Go back to the first free cluster */ From 7453c96b78c2b09aa72924f933bb9616e5474194 Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Tue, 10 Feb 2015 15:28:50 -0500 Subject: [PATCH 22/73] qcow2: Helper function for refcount modification Since refcounts do not always have to be a uint16_t, all refcount blocks and arrays in memory should not have a specific type (thus they become pointers to void) and for accessing them, two helper functions are used (a getter and a setter). Those functions are called indirectly through function pointers in the BDRVQcowState so they may later be exchanged for different refcount orders. With the check and repair functions using this function, the refcount array they are creating will be in big endian byte order; additionally, using realloc_refcount_array() makes the size of this refcount array always cluster-aligned. Both combined allow rebuild_refcount_structure() to drop the bounce buffer which was used to convert parts of the refcount array to big endian byte order and store them on disk. Instead, those parts can now be written directly. [ kwolf: Fixed a build failure on 32 bit and another with old glib ] Signed-off-by: Max Reitz Reviewed-by: Eric Blake Signed-off-by: Kevin Wolf --- block/qcow2-refcount.c | 126 ++++++++++++++++++++++++----------------- block/qcow2.h | 8 +++ 2 files changed, 81 insertions(+), 53 deletions(-) diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c index 497364f673d3..c0c1313ddb1a 100644 --- a/block/qcow2-refcount.c +++ b/block/qcow2-refcount.c @@ -32,6 +32,11 @@ static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs, int64_t offset, int64_t length, uint64_t addend, bool decrease, enum qcow2_discard_type type); +static uint64_t get_refcount_ro4(const void *refcount_array, uint64_t index); + +static void set_refcount_ro4(void *refcount_array, uint64_t index, + uint64_t value); + /*********************************************************/ /* refcount handling */ @@ -42,6 +47,9 @@ int qcow2_refcount_init(BlockDriverState *bs) unsigned int refcount_table_size2, i; int ret; + s->get_refcount = &get_refcount_ro4; + s->set_refcount = &set_refcount_ro4; + assert(s->refcount_table_size <= INT_MAX / sizeof(uint64_t)); refcount_table_size2 = s->refcount_table_size * sizeof(uint64_t); s->refcount_table = g_try_malloc(refcount_table_size2); @@ -72,6 +80,19 @@ void qcow2_refcount_close(BlockDriverState *bs) } +static uint64_t get_refcount_ro4(const void *refcount_array, uint64_t index) +{ + return be16_to_cpu(((const uint16_t *)refcount_array)[index]); +} + +static void set_refcount_ro4(void *refcount_array, uint64_t index, + uint64_t value) +{ + assert(!(value >> 16)); + ((uint16_t *)refcount_array)[index] = cpu_to_be16(value); +} + + static int load_refcount_block(BlockDriverState *bs, int64_t refcount_block_offset, void **refcount_block) @@ -97,7 +118,7 @@ int qcow2_get_refcount(BlockDriverState *bs, int64_t cluster_index, uint64_t refcount_table_index, block_index; int64_t refcount_block_offset; int ret; - uint16_t *refcount_block; + void *refcount_block; refcount_table_index = cluster_index >> s->refcount_block_bits; if (refcount_table_index >= s->refcount_table_size) { @@ -119,16 +140,15 @@ int qcow2_get_refcount(BlockDriverState *bs, int64_t cluster_index, } ret = qcow2_cache_get(bs, s->refcount_block_cache, refcount_block_offset, - (void**) &refcount_block); + &refcount_block); if (ret < 0) { return ret; } block_index = cluster_index & (s->refcount_block_size - 1); - *refcount = be16_to_cpu(refcount_block[block_index]); + *refcount = s->get_refcount(refcount_block, block_index); - ret = qcow2_cache_put(bs, s->refcount_block_cache, - (void**) &refcount_block); + ret = qcow2_cache_put(bs, s->refcount_block_cache, &refcount_block); if (ret < 0) { return ret; } @@ -172,7 +192,7 @@ static int in_same_refcount_block(BDRVQcowState *s, uint64_t offset_a, * Returns 0 on success or -errno in error case */ static int alloc_refcount_block(BlockDriverState *bs, - int64_t cluster_index, uint16_t **refcount_block) + int64_t cluster_index, void **refcount_block) { BDRVQcowState *s = bs->opaque; unsigned int refcount_table_index; @@ -199,7 +219,7 @@ static int alloc_refcount_block(BlockDriverState *bs, } return load_refcount_block(bs, refcount_block_offset, - (void**) refcount_block); + refcount_block); } } @@ -249,7 +269,7 @@ static int alloc_refcount_block(BlockDriverState *bs, if (in_same_refcount_block(s, new_block, cluster_index << s->cluster_bits)) { /* Zero the new refcount block before updating it */ ret = qcow2_cache_get_empty(bs, s->refcount_block_cache, new_block, - (void**) refcount_block); + refcount_block); if (ret < 0) { goto fail_block; } @@ -259,7 +279,7 @@ static int alloc_refcount_block(BlockDriverState *bs, /* The block describes itself, need to update the cache */ int block_index = (new_block >> s->cluster_bits) & (s->refcount_block_size - 1); - (*refcount_block)[block_index] = cpu_to_be16(1); + s->set_refcount(*refcount_block, block_index, 1); } else { /* Described somewhere else. This can recurse at most twice before we * arrive at a block that describes itself. */ @@ -277,7 +297,7 @@ static int alloc_refcount_block(BlockDriverState *bs, /* Initialize the new refcount block only after updating its refcount, * update_refcount uses the refcount cache itself */ ret = qcow2_cache_get_empty(bs, s->refcount_block_cache, new_block, - (void**) refcount_block); + refcount_block); if (ret < 0) { goto fail_block; } @@ -311,7 +331,7 @@ static int alloc_refcount_block(BlockDriverState *bs, return -EAGAIN; } - ret = qcow2_cache_put(bs, s->refcount_block_cache, (void**) refcount_block); + ret = qcow2_cache_put(bs, s->refcount_block_cache, refcount_block); if (ret < 0) { goto fail_block; } @@ -365,7 +385,7 @@ static int alloc_refcount_block(BlockDriverState *bs, s->cluster_size; uint64_t table_offset = meta_offset + blocks_clusters * s->cluster_size; uint64_t *new_table = g_try_new0(uint64_t, table_size); - uint16_t *new_blocks = g_try_malloc0(blocks_clusters * s->cluster_size); + void *new_blocks = g_try_malloc0(blocks_clusters * s->cluster_size); assert(table_size > 0 && blocks_clusters > 0); if (new_table == NULL || new_blocks == NULL) { @@ -387,7 +407,7 @@ static int alloc_refcount_block(BlockDriverState *bs, uint64_t table_clusters = size_to_clusters(s, table_size * sizeof(uint64_t)); int block = 0; for (i = 0; i < table_clusters + blocks_clusters; i++) { - new_blocks[block++] = cpu_to_be16(1); + s->set_refcount(new_blocks, block++, 1); } /* Write refcount blocks to disk */ @@ -440,7 +460,7 @@ static int alloc_refcount_block(BlockDriverState *bs, qcow2_free_clusters(bs, old_table_offset, old_table_size * sizeof(uint64_t), QCOW2_DISCARD_OTHER); - ret = load_refcount_block(bs, new_block, (void**) refcount_block); + ret = load_refcount_block(bs, new_block, refcount_block); if (ret < 0) { return ret; } @@ -455,7 +475,7 @@ static int alloc_refcount_block(BlockDriverState *bs, g_free(new_table); fail_block: if (*refcount_block != NULL) { - qcow2_cache_put(bs, s->refcount_block_cache, (void**) refcount_block); + qcow2_cache_put(bs, s->refcount_block_cache, refcount_block); } return ret; } @@ -541,7 +561,7 @@ static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs, { BDRVQcowState *s = bs->opaque; int64_t start, last, cluster_offset; - uint16_t *refcount_block = NULL; + void *refcount_block = NULL; int64_t old_table_index = -1; int ret; @@ -575,7 +595,7 @@ static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs, if (table_index != old_table_index) { if (refcount_block) { ret = qcow2_cache_put(bs, s->refcount_block_cache, - (void**) &refcount_block); + &refcount_block); if (ret < 0) { goto fail; } @@ -593,7 +613,7 @@ static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs, /* we can update the count and save it */ block_index = cluster_index & (s->refcount_block_size - 1); - refcount = be16_to_cpu(refcount_block[block_index]); + refcount = s->get_refcount(refcount_block, block_index); if (decrease ? (refcount - addend > refcount) : (refcount + addend < refcount || refcount + addend > s->refcount_max)) @@ -609,7 +629,7 @@ static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs, if (refcount == 0 && cluster_index < s->free_cluster_index) { s->free_cluster_index = cluster_index; } - refcount_block[block_index] = cpu_to_be16(refcount); + s->set_refcount(refcount_block, block_index, refcount); if (refcount == 0 && s->discard_passthrough[type]) { update_refcount_discard(bs, cluster_offset, s->cluster_size); @@ -625,8 +645,7 @@ static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs, /* Write last changed block to disk */ if (refcount_block) { int wret; - wret = qcow2_cache_put(bs, s->refcount_block_cache, - (void**) &refcount_block); + wret = qcow2_cache_put(bs, s->refcount_block_cache, &refcount_block); if (wret < 0) { return ret < 0 ? ret : wret; } @@ -1118,11 +1137,11 @@ static size_t refcount_array_byte_size(BDRVQcowState *s, uint64_t entries) * refcount array buffer will be aligned to a cluster boundary, and the newly * allocated area will be zeroed. */ -static int realloc_refcount_array(BDRVQcowState *s, uint16_t **array, +static int realloc_refcount_array(BDRVQcowState *s, void **array, int64_t *size, int64_t new_size) { size_t old_byte_size, new_byte_size; - uint16_t *new_ptr; + void *new_ptr; /* Round to clusters so the array can be directly written to disk */ old_byte_size = size_to_clusters(s, refcount_array_byte_size(s, *size)) @@ -1162,12 +1181,12 @@ static int realloc_refcount_array(BDRVQcowState *s, uint16_t **array, */ static int inc_refcounts(BlockDriverState *bs, BdrvCheckResult *res, - uint16_t **refcount_table, + void **refcount_table, int64_t *refcount_table_size, int64_t offset, int64_t size) { BDRVQcowState *s = bs->opaque; - uint64_t start, last, cluster_offset, k; + uint64_t start, last, cluster_offset, k, refcount; int ret; if (size <= 0) { @@ -1188,11 +1207,14 @@ static int inc_refcounts(BlockDriverState *bs, } } - if (++(*refcount_table)[k] == 0) { + refcount = s->get_refcount(*refcount_table, k); + if (refcount == s->refcount_max) { fprintf(stderr, "ERROR: overflow cluster offset=0x%" PRIx64 "\n", cluster_offset); res->corruptions++; + continue; } + s->set_refcount(*refcount_table, k, refcount + 1); } return 0; @@ -1212,8 +1234,9 @@ enum { * error occurred. */ static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res, - uint16_t **refcount_table, int64_t *refcount_table_size, int64_t l2_offset, - int flags) + void **refcount_table, + int64_t *refcount_table_size, int64_t l2_offset, + int flags) { BDRVQcowState *s = bs->opaque; uint64_t *l2_table, l2_entry; @@ -1330,7 +1353,7 @@ static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res, */ static int check_refcounts_l1(BlockDriverState *bs, BdrvCheckResult *res, - uint16_t **refcount_table, + void **refcount_table, int64_t *refcount_table_size, int64_t l1_table_offset, int l1_size, int flags) @@ -1529,7 +1552,7 @@ static int check_oflag_copied(BlockDriverState *bs, BdrvCheckResult *res, */ static int check_refblocks(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix, bool *rebuild, - uint16_t **refcount_table, int64_t *nb_clusters) + void **refcount_table, int64_t *nb_clusters) { BDRVQcowState *s = bs->opaque; int64_t i, size; @@ -1614,9 +1637,10 @@ static int check_refblocks(BlockDriverState *bs, BdrvCheckResult *res, if (ret < 0) { return ret; } - if ((*refcount_table)[cluster] != 1) { + if (s->get_refcount(*refcount_table, cluster) != 1) { fprintf(stderr, "ERROR refcount block %" PRId64 - " refcount=%d\n", i, (*refcount_table)[cluster]); + " refcount=%" PRIu64 "\n", i, + s->get_refcount(*refcount_table, cluster)); res->corruptions++; *rebuild = true; } @@ -1631,7 +1655,7 @@ static int check_refblocks(BlockDriverState *bs, BdrvCheckResult *res, */ static int calculate_refcounts(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix, bool *rebuild, - uint16_t **refcount_table, int64_t *nb_clusters) + void **refcount_table, int64_t *nb_clusters) { BDRVQcowState *s = bs->opaque; int64_t i; @@ -1695,7 +1719,7 @@ static int calculate_refcounts(BlockDriverState *bs, BdrvCheckResult *res, static void compare_refcounts(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix, bool *rebuild, int64_t *highest_cluster, - uint16_t *refcount_table, int64_t nb_clusters) + void *refcount_table, int64_t nb_clusters) { BDRVQcowState *s = bs->opaque; int64_t i; @@ -1711,7 +1735,7 @@ static void compare_refcounts(BlockDriverState *bs, BdrvCheckResult *res, continue; } - refcount2 = refcount_table[i]; + refcount2 = s->get_refcount(refcount_table, i); if (refcount1 > 0 || refcount2 > 0) { *highest_cluster = i; @@ -1770,7 +1794,7 @@ static void compare_refcounts(BlockDriverState *bs, BdrvCheckResult *res, */ static int64_t alloc_clusters_imrt(BlockDriverState *bs, int cluster_count, - uint16_t **refcount_table, + void **refcount_table, int64_t *imrt_nb_clusters, int64_t *first_free_cluster) { @@ -1787,7 +1811,7 @@ static int64_t alloc_clusters_imrt(BlockDriverState *bs, contiguous_free_clusters < cluster_count; cluster++) { - if (!(*refcount_table)[cluster]) { + if (!s->get_refcount(*refcount_table, cluster)) { contiguous_free_clusters++; if (first_gap) { /* If this is the first free cluster found, update @@ -1825,7 +1849,7 @@ static int64_t alloc_clusters_imrt(BlockDriverState *bs, /* Go back to the first free cluster */ cluster -= contiguous_free_clusters; for (i = 0; i < cluster_count; i++) { - (*refcount_table)[cluster + i] = 1; + s->set_refcount(*refcount_table, cluster + i, 1); } return cluster << s->cluster_bits; @@ -1841,7 +1865,7 @@ static int64_t alloc_clusters_imrt(BlockDriverState *bs, */ static int rebuild_refcount_structure(BlockDriverState *bs, BdrvCheckResult *res, - uint16_t **refcount_table, + void **refcount_table, int64_t *nb_clusters) { BDRVQcowState *s = bs->opaque; @@ -1849,8 +1873,8 @@ static int rebuild_refcount_structure(BlockDriverState *bs, int64_t refblock_offset, refblock_start, refblock_index; uint32_t reftable_size = 0; uint64_t *on_disk_reftable = NULL; - uint16_t *on_disk_refblock; - int i, ret = 0; + void *on_disk_refblock; + int ret = 0; struct { uint64_t reftable_offset; uint32_t reftable_clusters; @@ -1860,7 +1884,7 @@ static int rebuild_refcount_structure(BlockDriverState *bs, write_refblocks: for (; cluster < *nb_clusters; cluster++) { - if (!(*refcount_table)[cluster]) { + if (!s->get_refcount(*refcount_table, cluster)) { continue; } @@ -1933,17 +1957,13 @@ static int rebuild_refcount_structure(BlockDriverState *bs, goto fail; } - on_disk_refblock = qemu_blockalign0(bs->file, s->cluster_size); - for (i = 0; i < s->refcount_block_size && - refblock_start + i < *nb_clusters; i++) - { - on_disk_refblock[i] = - cpu_to_be16((*refcount_table)[refblock_start + i]); - } + /* The size of *refcount_table is always cluster-aligned, therefore the + * write operation will not overflow */ + on_disk_refblock = (void *)((char *) *refcount_table + + refblock_index * s->cluster_size); ret = bdrv_write(bs->file, refblock_offset / BDRV_SECTOR_SIZE, - (void *)on_disk_refblock, s->cluster_sectors); - qemu_vfree(on_disk_refblock); + on_disk_refblock, s->cluster_sectors); if (ret < 0) { fprintf(stderr, "ERROR writing refblock: %s\n", strerror(-ret)); goto fail; @@ -2038,7 +2058,7 @@ int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res, BDRVQcowState *s = bs->opaque; BdrvCheckResult pre_compare_res; int64_t size, highest_cluster, nb_clusters; - uint16_t *refcount_table = NULL; + void *refcount_table = NULL; bool rebuild = false; int ret; @@ -2087,7 +2107,7 @@ int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res, /* Because the old reftable has been exchanged for a new one the * references have to be recalculated */ rebuild = false; - memset(refcount_table, 0, nb_clusters * sizeof(uint16_t)); + memset(refcount_table, 0, refcount_array_byte_size(s, nb_clusters)); ret = calculate_refcounts(bs, res, 0, &rebuild, &refcount_table, &nb_clusters); if (ret < 0) { diff --git a/block/qcow2.h b/block/qcow2.h index a9108f55cad0..aa6d367818eb 100644 --- a/block/qcow2.h +++ b/block/qcow2.h @@ -213,6 +213,11 @@ typedef struct Qcow2DiscardRegion { QTAILQ_ENTRY(Qcow2DiscardRegion) next; } Qcow2DiscardRegion; +typedef uint64_t Qcow2GetRefcountFunc(const void *refcount_array, + uint64_t index); +typedef void Qcow2SetRefcountFunc(void *refcount_array, + uint64_t index, uint64_t value); + typedef struct BDRVQcowState { int cluster_bits; int cluster_size; @@ -261,6 +266,9 @@ typedef struct BDRVQcowState { int refcount_bits; uint64_t refcount_max; + Qcow2GetRefcountFunc *get_refcount; + Qcow2SetRefcountFunc *set_refcount; + bool discard_passthrough[QCOW2_DISCARD_MAX]; int overlap_check; /* bitmask of Qcow2MetadataOverlap values */ From 59c0cb7830be68080c6a0d1449253b954350e57d Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Tue, 10 Feb 2015 15:28:51 -0500 Subject: [PATCH 23/73] qcow2: More helpers for refcount modification Add helper functions for getting and setting refcounts in a refcount array for any possible refcount order, and choose the correct one during refcount initialization. Signed-off-by: Max Reitz Reviewed-by: Eric Blake Reviewed-by: Stefan Hajnoczi Signed-off-by: Kevin Wolf --- block/qcow2-refcount.c | 121 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 119 insertions(+), 2 deletions(-) diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c index c0c1313ddb1a..dc8d186a825f 100644 --- a/block/qcow2-refcount.c +++ b/block/qcow2-refcount.c @@ -32,10 +32,49 @@ static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs, int64_t offset, int64_t length, uint64_t addend, bool decrease, enum qcow2_discard_type type); +static uint64_t get_refcount_ro0(const void *refcount_array, uint64_t index); +static uint64_t get_refcount_ro1(const void *refcount_array, uint64_t index); +static uint64_t get_refcount_ro2(const void *refcount_array, uint64_t index); +static uint64_t get_refcount_ro3(const void *refcount_array, uint64_t index); static uint64_t get_refcount_ro4(const void *refcount_array, uint64_t index); +static uint64_t get_refcount_ro5(const void *refcount_array, uint64_t index); +static uint64_t get_refcount_ro6(const void *refcount_array, uint64_t index); +static void set_refcount_ro0(void *refcount_array, uint64_t index, + uint64_t value); +static void set_refcount_ro1(void *refcount_array, uint64_t index, + uint64_t value); +static void set_refcount_ro2(void *refcount_array, uint64_t index, + uint64_t value); +static void set_refcount_ro3(void *refcount_array, uint64_t index, + uint64_t value); static void set_refcount_ro4(void *refcount_array, uint64_t index, uint64_t value); +static void set_refcount_ro5(void *refcount_array, uint64_t index, + uint64_t value); +static void set_refcount_ro6(void *refcount_array, uint64_t index, + uint64_t value); + + +static Qcow2GetRefcountFunc *const get_refcount_funcs[] = { + &get_refcount_ro0, + &get_refcount_ro1, + &get_refcount_ro2, + &get_refcount_ro3, + &get_refcount_ro4, + &get_refcount_ro5, + &get_refcount_ro6 +}; + +static Qcow2SetRefcountFunc *const set_refcount_funcs[] = { + &set_refcount_ro0, + &set_refcount_ro1, + &set_refcount_ro2, + &set_refcount_ro3, + &set_refcount_ro4, + &set_refcount_ro5, + &set_refcount_ro6 +}; /*********************************************************/ @@ -47,8 +86,10 @@ int qcow2_refcount_init(BlockDriverState *bs) unsigned int refcount_table_size2, i; int ret; - s->get_refcount = &get_refcount_ro4; - s->set_refcount = &set_refcount_ro4; + assert(s->refcount_order >= 0 && s->refcount_order <= 6); + + s->get_refcount = get_refcount_funcs[s->refcount_order]; + s->set_refcount = set_refcount_funcs[s->refcount_order]; assert(s->refcount_table_size <= INT_MAX / sizeof(uint64_t)); refcount_table_size2 = s->refcount_table_size * sizeof(uint64_t); @@ -80,6 +121,59 @@ void qcow2_refcount_close(BlockDriverState *bs) } +static uint64_t get_refcount_ro0(const void *refcount_array, uint64_t index) +{ + return (((const uint8_t *)refcount_array)[index / 8] >> (index % 8)) & 0x1; +} + +static void set_refcount_ro0(void *refcount_array, uint64_t index, + uint64_t value) +{ + assert(!(value >> 1)); + ((uint8_t *)refcount_array)[index / 8] &= ~(0x1 << (index % 8)); + ((uint8_t *)refcount_array)[index / 8] |= value << (index % 8); +} + +static uint64_t get_refcount_ro1(const void *refcount_array, uint64_t index) +{ + return (((const uint8_t *)refcount_array)[index / 4] >> (2 * (index % 4))) + & 0x3; +} + +static void set_refcount_ro1(void *refcount_array, uint64_t index, + uint64_t value) +{ + assert(!(value >> 2)); + ((uint8_t *)refcount_array)[index / 4] &= ~(0x3 << (2 * (index % 4))); + ((uint8_t *)refcount_array)[index / 4] |= value << (2 * (index % 4)); +} + +static uint64_t get_refcount_ro2(const void *refcount_array, uint64_t index) +{ + return (((const uint8_t *)refcount_array)[index / 2] >> (4 * (index % 2))) + & 0xf; +} + +static void set_refcount_ro2(void *refcount_array, uint64_t index, + uint64_t value) +{ + assert(!(value >> 4)); + ((uint8_t *)refcount_array)[index / 2] &= ~(0xf << (4 * (index % 2))); + ((uint8_t *)refcount_array)[index / 2] |= value << (4 * (index % 2)); +} + +static uint64_t get_refcount_ro3(const void *refcount_array, uint64_t index) +{ + return ((const uint8_t *)refcount_array)[index]; +} + +static void set_refcount_ro3(void *refcount_array, uint64_t index, + uint64_t value) +{ + assert(!(value >> 8)); + ((uint8_t *)refcount_array)[index] = value; +} + static uint64_t get_refcount_ro4(const void *refcount_array, uint64_t index) { return be16_to_cpu(((const uint16_t *)refcount_array)[index]); @@ -92,6 +186,29 @@ static void set_refcount_ro4(void *refcount_array, uint64_t index, ((uint16_t *)refcount_array)[index] = cpu_to_be16(value); } +static uint64_t get_refcount_ro5(const void *refcount_array, uint64_t index) +{ + return be32_to_cpu(((const uint32_t *)refcount_array)[index]); +} + +static void set_refcount_ro5(void *refcount_array, uint64_t index, + uint64_t value) +{ + assert(!(value >> 32)); + ((uint32_t *)refcount_array)[index] = cpu_to_be32(value); +} + +static uint64_t get_refcount_ro6(const void *refcount_array, uint64_t index) +{ + return be64_to_cpu(((const uint64_t *)refcount_array)[index]); +} + +static void set_refcount_ro6(void *refcount_array, uint64_t index, + uint64_t value) +{ + ((uint64_t *)refcount_array)[index] = cpu_to_be64(value); +} + static int load_refcount_block(BlockDriverState *bs, int64_t refcount_block_offset, From b72faf9f78377e38342f78ffacdfa0726f5e7511 Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Tue, 10 Feb 2015 15:28:52 -0500 Subject: [PATCH 24/73] qcow2: Open images with refcount order != 4 No longer refuse to open images with a different refcount entry width than 16 bits; only reject images with a refcount width larger than 64 bits (which is prohibited by the specification). Signed-off-by: Max Reitz Reviewed-by: Eric Blake Reviewed-by: Stefan Hajnoczi Signed-off-by: Kevin Wolf --- block/qcow2.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/block/qcow2.c b/block/qcow2.c index 5c92803fff29..52731ab67bb9 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -677,10 +677,10 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags, } /* Check support for various header values */ - if (header.refcount_order != 4) { - report_unsupported(bs, errp, "%d bit reference counts", - 1 << header.refcount_order); - ret = -ENOTSUP; + if (header.refcount_order > 6) { + error_setg(errp, "Reference count entry width too large; may not " + "exceed 64 bits"); + ret = -EINVAL; goto fail; } s->refcount_order = header.refcount_order; From bd4b167f84840865b788e9f236585f77a77b10fc Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Wed, 18 Feb 2015 17:40:46 -0500 Subject: [PATCH 25/73] qcow2: refcount_order parameter for qcow2_create2 Add a refcount_order parameter to qcow2_create2(), use that value for the image header and for calculating the size required for preallocation. For now, always pass 4. This addition requires changes to the calculation of the file size for the "full" and "falloc" preallocation modes. That in turn is a nice opportunity to add a comment about that calculation not necessarily being exact (and that being intentional). Signed-off-by: Max Reitz Reviewed-by: Eric Blake Signed-off-by: Kevin Wolf --- block/qcow2.c | 47 ++++++++++++++++++++++++++++++++++++----------- 1 file changed, 36 insertions(+), 11 deletions(-) diff --git a/block/qcow2.c b/block/qcow2.c index 52731ab67bb9..7a35f972351c 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -1783,7 +1783,7 @@ static int preallocate(BlockDriverState *bs) static int qcow2_create2(const char *filename, int64_t total_size, const char *backing_file, const char *backing_format, int flags, size_t cluster_size, PreallocMode prealloc, - QemuOpts *opts, int version, + QemuOpts *opts, int version, int refcount_order, Error **errp) { /* Calculate cluster_bits */ @@ -1816,9 +1816,21 @@ static int qcow2_create2(const char *filename, int64_t total_size, int ret; if (prealloc == PREALLOC_MODE_FULL || prealloc == PREALLOC_MODE_FALLOC) { + /* Note: The following calculation does not need to be exact; if it is a + * bit off, either some bytes will be "leaked" (which is fine) or we + * will need to increase the file size by some bytes (which is fine, + * too, as long as the bulk is allocated here). Therefore, using + * floating point arithmetic is fine. */ int64_t meta_size = 0; uint64_t nreftablee, nrefblocke, nl1e, nl2e; int64_t aligned_total_size = align_offset(total_size, cluster_size); + int refblock_bits, refblock_size; + /* refcount entry size in bytes */ + double rces = (1 << refcount_order) / 8.; + + /* see qcow2_open() */ + refblock_bits = cluster_bits - (refcount_order - 3); + refblock_size = 1 << refblock_bits; /* header: 1 cluster */ meta_size += cluster_size; @@ -1843,20 +1855,20 @@ static int qcow2_create2(const char *filename, int64_t total_size, * c = cluster size * y1 = number of refcount blocks entries * y2 = meta size including everything + * rces = refcount entry size in bytes * then, * y1 = (y2 + a)/c - * y2 = y1 * sizeof(u16) + y1 * sizeof(u16) * sizeof(u64) / c + m + * y2 = y1 * rces + y1 * rces * sizeof(u64) / c + m * we can get y1: - * y1 = (a + m) / (c - sizeof(u16) - sizeof(u16) * sizeof(u64) / c) + * y1 = (a + m) / (c - rces - rces * sizeof(u64) / c) */ - nrefblocke = (aligned_total_size + meta_size + cluster_size) / - (cluster_size - sizeof(uint16_t) - - 1.0 * sizeof(uint16_t) * sizeof(uint64_t) / cluster_size); - nrefblocke = align_offset(nrefblocke, cluster_size / sizeof(uint16_t)); - meta_size += nrefblocke * sizeof(uint16_t); + nrefblocke = (aligned_total_size + meta_size + cluster_size) + / (cluster_size - rces - rces * sizeof(uint64_t) + / cluster_size); + meta_size += DIV_ROUND_UP(nrefblocke, refblock_size) * cluster_size; /* total size of refcount tables */ - nreftablee = nrefblocke * sizeof(uint16_t) / cluster_size; + nreftablee = nrefblocke / refblock_size; nreftablee = align_offset(nreftablee, cluster_size / sizeof(uint64_t)); meta_size += nreftablee * sizeof(uint64_t); @@ -1892,7 +1904,7 @@ static int qcow2_create2(const char *filename, int64_t total_size, .l1_size = cpu_to_be32(0), .refcount_table_offset = cpu_to_be64(cluster_size), .refcount_table_clusters = cpu_to_be32(1), - .refcount_order = cpu_to_be32(4), + .refcount_order = cpu_to_be32(refcount_order), .header_length = cpu_to_be32(sizeof(*header)), }; @@ -2011,6 +2023,8 @@ static int qcow2_create(const char *filename, QemuOpts *opts, Error **errp) size_t cluster_size = DEFAULT_CLUSTER_SIZE; PreallocMode prealloc; int version = 3; + uint64_t refcount_bits = 16; + int refcount_order; Error *local_err = NULL; int ret; @@ -2065,8 +2079,19 @@ static int qcow2_create(const char *filename, QemuOpts *opts, Error **errp) goto finish; } + if (version < 3 && refcount_bits != 16) { + error_setg(errp, "Different refcount widths than 16 bits require " + "compatibility level 1.1 or above (use compat=1.1 or " + "greater)"); + ret = -EINVAL; + goto finish; + } + + refcount_order = ffs(refcount_bits) - 1; + ret = qcow2_create2(filename, size, backing_file, backing_fmt, flags, - cluster_size, prealloc, opts, version, &local_err); + cluster_size, prealloc, opts, version, refcount_order, + &local_err); if (local_err) { error_propagate(errp, local_err); } From 8a17b83cc3d4aa61ed24e2682aaf8ada7130f03d Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Wed, 18 Feb 2015 17:40:47 -0500 Subject: [PATCH 26/73] qcow2: Use symbolic macros in qcow2_amend_options qcow2_amend_options() should not compare options against some inline strings but rather use the symbolic macros available for each of the creation options. Signed-off-by: Max Reitz Reviewed-by: Eric Blake Reviewed-by: Stefan Hajnoczi Signed-off-by: Kevin Wolf --- block/qcow2.c | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/block/qcow2.c b/block/qcow2.c index 7a35f972351c..64bb7b8a1fae 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -2672,8 +2672,8 @@ static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts, continue; } - if (!strcmp(desc->name, "compat")) { - compat = qemu_opt_get(opts, "compat"); + if (!strcmp(desc->name, BLOCK_OPT_COMPAT_LEVEL)) { + compat = qemu_opt_get(opts, BLOCK_OPT_COMPAT_LEVEL); if (!compat) { /* preserve default */ } else if (!strcmp(compat, "0.10")) { @@ -2684,32 +2684,33 @@ static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts, fprintf(stderr, "Unknown compatibility level %s.\n", compat); return -EINVAL; } - } else if (!strcmp(desc->name, "preallocation")) { + } else if (!strcmp(desc->name, BLOCK_OPT_PREALLOC)) { fprintf(stderr, "Cannot change preallocation mode.\n"); return -ENOTSUP; - } else if (!strcmp(desc->name, "size")) { - new_size = qemu_opt_get_size(opts, "size", 0); - } else if (!strcmp(desc->name, "backing_file")) { - backing_file = qemu_opt_get(opts, "backing_file"); - } else if (!strcmp(desc->name, "backing_fmt")) { - backing_format = qemu_opt_get(opts, "backing_fmt"); - } else if (!strcmp(desc->name, "encryption")) { - encrypt = qemu_opt_get_bool(opts, "encryption", s->crypt_method); + } else if (!strcmp(desc->name, BLOCK_OPT_SIZE)) { + new_size = qemu_opt_get_size(opts, BLOCK_OPT_SIZE, 0); + } else if (!strcmp(desc->name, BLOCK_OPT_BACKING_FILE)) { + backing_file = qemu_opt_get(opts, BLOCK_OPT_BACKING_FILE); + } else if (!strcmp(desc->name, BLOCK_OPT_BACKING_FMT)) { + backing_format = qemu_opt_get(opts, BLOCK_OPT_BACKING_FMT); + } else if (!strcmp(desc->name, BLOCK_OPT_ENCRYPT)) { + encrypt = qemu_opt_get_bool(opts, BLOCK_OPT_ENCRYPT, + s->crypt_method); if (encrypt != !!s->crypt_method) { fprintf(stderr, "Changing the encryption flag is not " "supported.\n"); return -ENOTSUP; } - } else if (!strcmp(desc->name, "cluster_size")) { - cluster_size = qemu_opt_get_size(opts, "cluster_size", + } else if (!strcmp(desc->name, BLOCK_OPT_CLUSTER_SIZE)) { + cluster_size = qemu_opt_get_size(opts, BLOCK_OPT_CLUSTER_SIZE, cluster_size); if (cluster_size != s->cluster_size) { fprintf(stderr, "Changing the cluster size is not " "supported.\n"); return -ENOTSUP; } - } else if (!strcmp(desc->name, "lazy_refcounts")) { - lazy_refcounts = qemu_opt_get_bool(opts, "lazy_refcounts", + } else if (!strcmp(desc->name, BLOCK_OPT_LAZY_REFCOUNTS)) { + lazy_refcounts = qemu_opt_get_bool(opts, BLOCK_OPT_LAZY_REFCOUNTS, lazy_refcounts); } else { /* if this assertion fails, this probably means a new option was From 5262caa75468adce44b57ed9c076b2f22bd9315f Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Wed, 18 Feb 2015 17:40:48 -0500 Subject: [PATCH 27/73] iotests: Prepare for refcount_bits option Some tests do not work well with certain refcount widths (i.e. you cannot create internal snapshots with refcount_bits=1), so make those widths unsupported. Furthermore, add another filter to _filter_img_create in common.filter which filters out the refcount_bits value. This is necessary for test 079, which does actually work with any refcount width, but invoking qemu-img directly leads to the refcount_bits value being visible in the output; use _make_test_img instead which will filter it out. Signed-off-by: Max Reitz Reviewed-by: Eric Blake Reviewed-by: Stefan Hajnoczi Signed-off-by: Kevin Wolf --- tests/qemu-iotests/007 | 3 +++ tests/qemu-iotests/015 | 2 ++ tests/qemu-iotests/026 | 7 ++++++ tests/qemu-iotests/029 | 2 ++ tests/qemu-iotests/051 | 3 +++ tests/qemu-iotests/058 | 2 ++ tests/qemu-iotests/067 | 2 ++ tests/qemu-iotests/079 | 10 ++------- tests/qemu-iotests/079.out | 38 +++++++++----------------------- tests/qemu-iotests/080 | 2 ++ tests/qemu-iotests/089 | 2 ++ tests/qemu-iotests/108 | 2 ++ tests/qemu-iotests/common.filter | 3 ++- 13 files changed, 41 insertions(+), 37 deletions(-) diff --git a/tests/qemu-iotests/007 b/tests/qemu-iotests/007 index fe1a743806e7..7b5aff59b282 100755 --- a/tests/qemu-iotests/007 +++ b/tests/qemu-iotests/007 @@ -43,6 +43,9 @@ trap "_cleanup; exit \$status" 0 1 2 3 15 _supported_fmt qcow2 _supported_proto generic _supported_os Linux +# refcount_bits must be at least 4 so we can create ten internal snapshots +# (1 bit supports none, 2 bits support two, 4 bits support 14) +_unsupported_imgopts 'refcount_bits=\(1\|2\)[^0-9]' echo echo "creating image" diff --git a/tests/qemu-iotests/015 b/tests/qemu-iotests/015 index 099d75723ca0..6f2609524367 100755 --- a/tests/qemu-iotests/015 +++ b/tests/qemu-iotests/015 @@ -43,6 +43,8 @@ trap "_cleanup; exit \$status" 0 1 2 3 15 _supported_fmt qcow2 _supported_proto generic _supported_os Linux +# Internal snapshots are (currently) impossible with refcount_bits=1 +_unsupported_imgopts 'refcount_bits=1[^0-9]' echo echo "creating image" diff --git a/tests/qemu-iotests/026 b/tests/qemu-iotests/026 index df2884ba5148..0fc3244c7f2c 100755 --- a/tests/qemu-iotests/026 +++ b/tests/qemu-iotests/026 @@ -46,6 +46,13 @@ _supported_proto file _supported_os Linux _default_cache_mode "writethrough" _supported_cache_modes "writethrough" "none" +# The refcount table tests expect a certain minimum width for refcount entries +# (so that the refcount table actually needs to grow); that minimum is 16 bits, +# being the default refcount entry width. +# 32 and 64 bits do not work either, however, due to different leaked cluster +# count on error. +# Thus, the only remaining option is refcount_bits=16. +_unsupported_imgopts 'refcount_bits=\([^1]\|.\([^6]\|$\)\)' echo "Errors while writing 128 kB" echo diff --git a/tests/qemu-iotests/029 b/tests/qemu-iotests/029 index fa46ace67b4a..b9cd826c7ee5 100755 --- a/tests/qemu-iotests/029 +++ b/tests/qemu-iotests/029 @@ -44,6 +44,8 @@ trap "_cleanup; exit \$status" 0 1 2 3 15 _supported_fmt qcow2 _supported_proto generic _supported_os Linux +# Internal snapshots are (currently) impossible with refcount_bits=1 +_unsupported_imgopts 'refcount_bits=1[^0-9]' offset_size=24 offset_l1_size=36 diff --git a/tests/qemu-iotests/051 b/tests/qemu-iotests/051 index 27138a229964..2274d187a0ee 100755 --- a/tests/qemu-iotests/051 +++ b/tests/qemu-iotests/051 @@ -41,6 +41,9 @@ trap "_cleanup; exit \$status" 0 1 2 3 15 _supported_fmt qcow2 _supported_proto file _supported_os Linux +# A compat=0.10 image is created in this test which does not support anything +# other than refcount_bits=16 +_unsupported_imgopts 'refcount_bits=\([^1]\|.\([^6]\|$\)\)' function do_run_qemu() { diff --git a/tests/qemu-iotests/058 b/tests/qemu-iotests/058 index a60b34b46cac..f2bdd0bffcd1 100755 --- a/tests/qemu-iotests/058 +++ b/tests/qemu-iotests/058 @@ -89,6 +89,8 @@ _supported_fmt qcow2 _supported_proto file _supported_os Linux _require_command QEMU_NBD +# Internal snapshots are (currently) impossible with refcount_bits=1 +_unsupported_imgopts 'refcount_bits=1[^0-9]' # Use -f raw instead of -f $IMGFMT for the NBD connection QEMU_IO_NBD="$QEMU_IO -f raw --cache=$CACHEMODE" diff --git a/tests/qemu-iotests/067 b/tests/qemu-iotests/067 index 0508c696a895..83eefa394eb4 100755 --- a/tests/qemu-iotests/067 +++ b/tests/qemu-iotests/067 @@ -35,6 +35,8 @@ status=1 # failure is the default! _supported_fmt qcow2 _supported_proto file _supported_os Linux +# Because anything other than 16 would change the output of query-block +_unsupported_imgopts 'refcount_bits=\([^1]\|.\([^6]\|$\)\)' function do_run_qemu() { diff --git a/tests/qemu-iotests/079 b/tests/qemu-iotests/079 index 6613cfb18426..ade6efa0d1ae 100755 --- a/tests/qemu-iotests/079 +++ b/tests/qemu-iotests/079 @@ -42,19 +42,13 @@ _supported_fmt qcow2 _supported_proto file nfs _supported_os Linux -function test_qemu_img() -{ - echo qemu-img "$@" | _filter_testdir - $QEMU_IMG "$@" 2>&1 | _filter_testdir - echo -} - echo "=== Check option preallocation and cluster_size ===" echo cluster_sizes="16384 32768 65536 131072 262144 524288 1048576 2097152 4194304" for s in $cluster_sizes; do - test_qemu_img create -f $IMGFMT -o preallocation=metadata,cluster_size=$s "$TEST_IMG" 4G + IMGOPTS=$(_optstr_add "$IMGOPTS" "preallocation=metadata,cluster_size=$s") \ + _make_test_img 4G done # success, all done diff --git a/tests/qemu-iotests/079.out b/tests/qemu-iotests/079.out index ef4b8c911788..6dc5d5763528 100644 --- a/tests/qemu-iotests/079.out +++ b/tests/qemu-iotests/079.out @@ -1,32 +1,14 @@ QA output created by 079 === Check option preallocation and cluster_size === -qemu-img create -f qcow2 -o preallocation=metadata,cluster_size=16384 TEST_DIR/t.qcow2 4G -Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=4294967296 encryption=off cluster_size=16384 preallocation='metadata' lazy_refcounts=off - -qemu-img create -f qcow2 -o preallocation=metadata,cluster_size=32768 TEST_DIR/t.qcow2 4G -Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=4294967296 encryption=off cluster_size=32768 preallocation='metadata' lazy_refcounts=off - -qemu-img create -f qcow2 -o preallocation=metadata,cluster_size=65536 TEST_DIR/t.qcow2 4G -Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=4294967296 encryption=off cluster_size=65536 preallocation='metadata' lazy_refcounts=off - -qemu-img create -f qcow2 -o preallocation=metadata,cluster_size=131072 TEST_DIR/t.qcow2 4G -Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=4294967296 encryption=off cluster_size=131072 preallocation='metadata' lazy_refcounts=off - -qemu-img create -f qcow2 -o preallocation=metadata,cluster_size=262144 TEST_DIR/t.qcow2 4G -Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=4294967296 encryption=off cluster_size=262144 preallocation='metadata' lazy_refcounts=off - -qemu-img create -f qcow2 -o preallocation=metadata,cluster_size=524288 TEST_DIR/t.qcow2 4G -Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=4294967296 encryption=off cluster_size=524288 preallocation='metadata' lazy_refcounts=off - -qemu-img create -f qcow2 -o preallocation=metadata,cluster_size=1048576 TEST_DIR/t.qcow2 4G -Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=4294967296 encryption=off cluster_size=1048576 preallocation='metadata' lazy_refcounts=off - -qemu-img create -f qcow2 -o preallocation=metadata,cluster_size=2097152 TEST_DIR/t.qcow2 4G -Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=4294967296 encryption=off cluster_size=2097152 preallocation='metadata' lazy_refcounts=off - -qemu-img create -f qcow2 -o preallocation=metadata,cluster_size=4194304 TEST_DIR/t.qcow2 4G -qemu-img: TEST_DIR/t.qcow2: Cluster size must be a power of two between 512 and 2048k -Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=4294967296 encryption=off cluster_size=4194304 preallocation='metadata' lazy_refcounts=off - +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=4294967296 preallocation='metadata' +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=4294967296 preallocation='metadata' +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=4294967296 preallocation='metadata' +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=4294967296 preallocation='metadata' +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=4294967296 preallocation='metadata' +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=4294967296 preallocation='metadata' +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=4294967296 preallocation='metadata' +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=4294967296 preallocation='metadata' +qemu-img: TEST_DIR/t.IMGFMT: Cluster size must be a power of two between 512 and 2048k +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=4294967296 preallocation='metadata' *** done diff --git a/tests/qemu-iotests/080 b/tests/qemu-iotests/080 index 73795f198f20..a2c58aebdbc7 100755 --- a/tests/qemu-iotests/080 +++ b/tests/qemu-iotests/080 @@ -42,6 +42,8 @@ trap "_cleanup; exit \$status" 0 1 2 3 15 _supported_fmt qcow2 _supported_proto file _supported_os Linux +# Internal snapshots are (currently) impossible with refcount_bits=1 +_unsupported_imgopts 'refcount_bits=1[^0-9]' header_size=104 diff --git a/tests/qemu-iotests/089 b/tests/qemu-iotests/089 index 6f74cecde2da..3e0038dde15e 100755 --- a/tests/qemu-iotests/089 +++ b/tests/qemu-iotests/089 @@ -41,6 +41,8 @@ trap "_cleanup; exit \$status" 0 1 2 3 15 _supported_fmt qcow2 _supported_proto file _supported_os Linux +# Because anything other than 16 would change the output of qemu_io -c info +_unsupported_imgopts 'refcount_bits=\([^1]\|.\([^6]\|$\)\)' # Using an image filename containing quotation marks will render the JSON data # below invalid. In that case, we have little choice but simply not to run this diff --git a/tests/qemu-iotests/108 b/tests/qemu-iotests/108 index 12fc92a63317..ce447498e9e0 100755 --- a/tests/qemu-iotests/108 +++ b/tests/qemu-iotests/108 @@ -43,6 +43,8 @@ trap "_cleanup; exit \$status" 0 1 2 3 15 _supported_fmt qcow2 _supported_proto file _supported_os Linux +# This test directly modifies a refblock so it relies on refcount_bits being 16 +_unsupported_imgopts 'refcount_bits=\([^1]\|.\([^6]\|$\)\)' echo echo '=== Repairing an image without any refcount table ===' diff --git a/tests/qemu-iotests/common.filter b/tests/qemu-iotests/common.filter index 06e1bb045f53..012a8122d19e 100644 --- a/tests/qemu-iotests/common.filter +++ b/tests/qemu-iotests/common.filter @@ -192,7 +192,8 @@ _filter_img_create() -e "s# block_size=[0-9]\\+##g" \ -e "s# block_state_zero=\\(on\\|off\\)##g" \ -e "s# log_size=[0-9]\\+##g" \ - -e "s/archipelago:a/TEST_DIR\//g" + -e "s/archipelago:a/TEST_DIR\//g" \ + -e "s# refcount_bits=[0-9]\\+##g" } _filter_img_info() From 06d05fa738915ab82577289a0b81b3e600c4e749 Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Wed, 18 Feb 2015 17:40:49 -0500 Subject: [PATCH 28/73] qcow2: Allow creation with refcount order != 4 Add a creation option to qcow2 for setting the refcount order of images to be created, and respect that option's value. This breaks some test outputs, fix them. Signed-off-by: Max Reitz Reviewed-by: Eric Blake Signed-off-by: Kevin Wolf --- block/qcow2.c | 18 +++++++ include/block/block_int.h | 1 + tests/qemu-iotests/049.out | 108 ++++++++++++++++++------------------- tests/qemu-iotests/082.out | 41 +++++++++++--- tests/qemu-iotests/085.out | 38 ++++++------- 5 files changed, 126 insertions(+), 80 deletions(-) diff --git a/block/qcow2.c b/block/qcow2.c index 64bb7b8a1fae..8bfb094e53ac 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -2079,6 +2079,15 @@ static int qcow2_create(const char *filename, QemuOpts *opts, Error **errp) goto finish; } + refcount_bits = qemu_opt_get_number_del(opts, BLOCK_OPT_REFCOUNT_BITS, + refcount_bits); + if (refcount_bits > 64 || !is_power_of_2(refcount_bits)) { + error_setg(errp, "Refcount width must be a power of two and may not " + "exceed 64 bits"); + ret = -EINVAL; + goto finish; + } + if (version < 3 && refcount_bits != 16) { error_setg(errp, "Different refcount widths than 16 bits require " "compatibility level 1.1 or above (use compat=1.1 or " @@ -2712,6 +2721,9 @@ static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts, } else if (!strcmp(desc->name, BLOCK_OPT_LAZY_REFCOUNTS)) { lazy_refcounts = qemu_opt_get_bool(opts, BLOCK_OPT_LAZY_REFCOUNTS, lazy_refcounts); + } else if (!strcmp(desc->name, BLOCK_OPT_REFCOUNT_BITS)) { + error_report("Cannot change refcount entry width"); + return -ENOTSUP; } else { /* if this assertion fails, this probably means a new option was * added without having it covered here */ @@ -2881,6 +2893,12 @@ static QemuOptsList qcow2_create_opts = { .help = "Postpone refcount updates", .def_value_str = "off" }, + { + .name = BLOCK_OPT_REFCOUNT_BITS, + .type = QEMU_OPT_NUMBER, + .help = "Width of a reference count entry in bits", + .def_value_str = "16" + }, { /* end of list */ } } }; diff --git a/include/block/block_int.h b/include/block/block_int.h index 9f7b5bd18917..b88633d0c038 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -57,6 +57,7 @@ #define BLOCK_OPT_REDUNDANCY "redundancy" #define BLOCK_OPT_NOCOW "nocow" #define BLOCK_OPT_OBJECT_SIZE "object_size" +#define BLOCK_OPT_REFCOUNT_BITS "refcount_bits" #define BLOCK_PROBE_BUF_SIZE 512 diff --git a/tests/qemu-iotests/049.out b/tests/qemu-iotests/049.out index b6311b01b9e7..765afdd2a6bf 100644 --- a/tests/qemu-iotests/049.out +++ b/tests/qemu-iotests/049.out @@ -4,90 +4,90 @@ QA output created by 049 == 1. Traditional size parameter == qemu-img create -f qcow2 TEST_DIR/t.qcow2 1024 -Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=1024 encryption=off cluster_size=65536 lazy_refcounts=off +Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=1024 encryption=off cluster_size=65536 lazy_refcounts=off refcount_bits=16 qemu-img create -f qcow2 TEST_DIR/t.qcow2 1024b -Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=1024 encryption=off cluster_size=65536 lazy_refcounts=off +Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=1024 encryption=off cluster_size=65536 lazy_refcounts=off refcount_bits=16 qemu-img create -f qcow2 TEST_DIR/t.qcow2 1k -Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=1024 encryption=off cluster_size=65536 lazy_refcounts=off +Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=1024 encryption=off cluster_size=65536 lazy_refcounts=off refcount_bits=16 qemu-img create -f qcow2 TEST_DIR/t.qcow2 1K -Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=1024 encryption=off cluster_size=65536 lazy_refcounts=off +Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=1024 encryption=off cluster_size=65536 lazy_refcounts=off refcount_bits=16 qemu-img create -f qcow2 TEST_DIR/t.qcow2 1M -Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=1048576 encryption=off cluster_size=65536 lazy_refcounts=off +Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=1048576 encryption=off cluster_size=65536 lazy_refcounts=off refcount_bits=16 qemu-img create -f qcow2 TEST_DIR/t.qcow2 1G -Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=1073741824 encryption=off cluster_size=65536 lazy_refcounts=off +Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=1073741824 encryption=off cluster_size=65536 lazy_refcounts=off refcount_bits=16 qemu-img create -f qcow2 TEST_DIR/t.qcow2 1T -Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=1099511627776 encryption=off cluster_size=65536 lazy_refcounts=off +Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=1099511627776 encryption=off cluster_size=65536 lazy_refcounts=off refcount_bits=16 qemu-img create -f qcow2 TEST_DIR/t.qcow2 1024.0 -Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=1024 encryption=off cluster_size=65536 lazy_refcounts=off +Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=1024 encryption=off cluster_size=65536 lazy_refcounts=off refcount_bits=16 qemu-img create -f qcow2 TEST_DIR/t.qcow2 1024.0b -Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=1024 encryption=off cluster_size=65536 lazy_refcounts=off +Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=1024 encryption=off cluster_size=65536 lazy_refcounts=off refcount_bits=16 qemu-img create -f qcow2 TEST_DIR/t.qcow2 1.5k -Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=1536 encryption=off cluster_size=65536 lazy_refcounts=off +Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=1536 encryption=off cluster_size=65536 lazy_refcounts=off refcount_bits=16 qemu-img create -f qcow2 TEST_DIR/t.qcow2 1.5K -Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=1536 encryption=off cluster_size=65536 lazy_refcounts=off +Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=1536 encryption=off cluster_size=65536 lazy_refcounts=off refcount_bits=16 qemu-img create -f qcow2 TEST_DIR/t.qcow2 1.5M -Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=1572864 encryption=off cluster_size=65536 lazy_refcounts=off +Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=1572864 encryption=off cluster_size=65536 lazy_refcounts=off refcount_bits=16 qemu-img create -f qcow2 TEST_DIR/t.qcow2 1.5G -Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=1610612736 encryption=off cluster_size=65536 lazy_refcounts=off +Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=1610612736 encryption=off cluster_size=65536 lazy_refcounts=off refcount_bits=16 qemu-img create -f qcow2 TEST_DIR/t.qcow2 1.5T -Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=1649267441664 encryption=off cluster_size=65536 lazy_refcounts=off +Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=1649267441664 encryption=off cluster_size=65536 lazy_refcounts=off refcount_bits=16 == 2. Specifying size via -o == qemu-img create -f qcow2 -o size=1024 TEST_DIR/t.qcow2 -Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=1024 encryption=off cluster_size=65536 lazy_refcounts=off +Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=1024 encryption=off cluster_size=65536 lazy_refcounts=off refcount_bits=16 qemu-img create -f qcow2 -o size=1024b TEST_DIR/t.qcow2 -Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=1024 encryption=off cluster_size=65536 lazy_refcounts=off +Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=1024 encryption=off cluster_size=65536 lazy_refcounts=off refcount_bits=16 qemu-img create -f qcow2 -o size=1k TEST_DIR/t.qcow2 -Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=1024 encryption=off cluster_size=65536 lazy_refcounts=off +Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=1024 encryption=off cluster_size=65536 lazy_refcounts=off refcount_bits=16 qemu-img create -f qcow2 -o size=1K TEST_DIR/t.qcow2 -Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=1024 encryption=off cluster_size=65536 lazy_refcounts=off +Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=1024 encryption=off cluster_size=65536 lazy_refcounts=off refcount_bits=16 qemu-img create -f qcow2 -o size=1M TEST_DIR/t.qcow2 -Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=1048576 encryption=off cluster_size=65536 lazy_refcounts=off +Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=1048576 encryption=off cluster_size=65536 lazy_refcounts=off refcount_bits=16 qemu-img create -f qcow2 -o size=1G TEST_DIR/t.qcow2 -Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=1073741824 encryption=off cluster_size=65536 lazy_refcounts=off +Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=1073741824 encryption=off cluster_size=65536 lazy_refcounts=off refcount_bits=16 qemu-img create -f qcow2 -o size=1T TEST_DIR/t.qcow2 -Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=1099511627776 encryption=off cluster_size=65536 lazy_refcounts=off +Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=1099511627776 encryption=off cluster_size=65536 lazy_refcounts=off refcount_bits=16 qemu-img create -f qcow2 -o size=1024.0 TEST_DIR/t.qcow2 -Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=1024 encryption=off cluster_size=65536 lazy_refcounts=off +Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=1024 encryption=off cluster_size=65536 lazy_refcounts=off refcount_bits=16 qemu-img create -f qcow2 -o size=1024.0b TEST_DIR/t.qcow2 -Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=1024 encryption=off cluster_size=65536 lazy_refcounts=off +Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=1024 encryption=off cluster_size=65536 lazy_refcounts=off refcount_bits=16 qemu-img create -f qcow2 -o size=1.5k TEST_DIR/t.qcow2 -Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=1536 encryption=off cluster_size=65536 lazy_refcounts=off +Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=1536 encryption=off cluster_size=65536 lazy_refcounts=off refcount_bits=16 qemu-img create -f qcow2 -o size=1.5K TEST_DIR/t.qcow2 -Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=1536 encryption=off cluster_size=65536 lazy_refcounts=off +Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=1536 encryption=off cluster_size=65536 lazy_refcounts=off refcount_bits=16 qemu-img create -f qcow2 -o size=1.5M TEST_DIR/t.qcow2 -Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=1572864 encryption=off cluster_size=65536 lazy_refcounts=off +Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=1572864 encryption=off cluster_size=65536 lazy_refcounts=off refcount_bits=16 qemu-img create -f qcow2 -o size=1.5G TEST_DIR/t.qcow2 -Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=1610612736 encryption=off cluster_size=65536 lazy_refcounts=off +Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=1610612736 encryption=off cluster_size=65536 lazy_refcounts=off refcount_bits=16 qemu-img create -f qcow2 -o size=1.5T TEST_DIR/t.qcow2 -Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=1649267441664 encryption=off cluster_size=65536 lazy_refcounts=off +Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=1649267441664 encryption=off cluster_size=65536 lazy_refcounts=off refcount_bits=16 == 3. Invalid sizes == @@ -97,7 +97,7 @@ qemu-img: Image size must be less than 8 EiB! qemu-img create -f qcow2 -o size=-1024 TEST_DIR/t.qcow2 qemu-img: qcow2 doesn't support shrinking images yet qemu-img: TEST_DIR/t.qcow2: Could not resize image: Operation not supported -Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=-1024 encryption=off cluster_size=65536 lazy_refcounts=off +Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=-1024 encryption=off cluster_size=65536 lazy_refcounts=off refcount_bits=16 qemu-img create -f qcow2 TEST_DIR/t.qcow2 -- -1k qemu-img: Image size must be less than 8 EiB! @@ -105,14 +105,14 @@ qemu-img: Image size must be less than 8 EiB! qemu-img create -f qcow2 -o size=-1k TEST_DIR/t.qcow2 qemu-img: qcow2 doesn't support shrinking images yet qemu-img: TEST_DIR/t.qcow2: Could not resize image: Operation not supported -Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=-1024 encryption=off cluster_size=65536 lazy_refcounts=off +Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=-1024 encryption=off cluster_size=65536 lazy_refcounts=off refcount_bits=16 qemu-img create -f qcow2 TEST_DIR/t.qcow2 -- 1kilobyte qemu-img: Invalid image size specified! You may use k, M, G, T, P or E suffixes for qemu-img: kilobytes, megabytes, gigabytes, terabytes, petabytes and exabytes. qemu-img create -f qcow2 -o size=1kilobyte TEST_DIR/t.qcow2 -Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=1024 encryption=off cluster_size=65536 lazy_refcounts=off +Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=1024 encryption=off cluster_size=65536 lazy_refcounts=off refcount_bits=16 qemu-img create -f qcow2 TEST_DIR/t.qcow2 -- foobar qemu-img: Invalid image size specified! You may use k, M, G, T, P or E suffixes for @@ -125,84 +125,84 @@ qemu-img: TEST_DIR/t.qcow2: Invalid options for file format 'qcow2' == Check correct interpretation of suffixes for cluster size == qemu-img create -f qcow2 -o cluster_size=1024 TEST_DIR/t.qcow2 64M -Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=67108864 encryption=off cluster_size=1024 lazy_refcounts=off +Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=67108864 encryption=off cluster_size=1024 lazy_refcounts=off refcount_bits=16 qemu-img create -f qcow2 -o cluster_size=1024b TEST_DIR/t.qcow2 64M -Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=67108864 encryption=off cluster_size=1024 lazy_refcounts=off +Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=67108864 encryption=off cluster_size=1024 lazy_refcounts=off refcount_bits=16 qemu-img create -f qcow2 -o cluster_size=1k TEST_DIR/t.qcow2 64M -Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=67108864 encryption=off cluster_size=1024 lazy_refcounts=off +Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=67108864 encryption=off cluster_size=1024 lazy_refcounts=off refcount_bits=16 qemu-img create -f qcow2 -o cluster_size=1K TEST_DIR/t.qcow2 64M -Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=67108864 encryption=off cluster_size=1024 lazy_refcounts=off +Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=67108864 encryption=off cluster_size=1024 lazy_refcounts=off refcount_bits=16 qemu-img create -f qcow2 -o cluster_size=1M TEST_DIR/t.qcow2 64M -Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=67108864 encryption=off cluster_size=1048576 lazy_refcounts=off +Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=67108864 encryption=off cluster_size=1048576 lazy_refcounts=off refcount_bits=16 qemu-img create -f qcow2 -o cluster_size=1024.0 TEST_DIR/t.qcow2 64M -Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=67108864 encryption=off cluster_size=1024 lazy_refcounts=off +Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=67108864 encryption=off cluster_size=1024 lazy_refcounts=off refcount_bits=16 qemu-img create -f qcow2 -o cluster_size=1024.0b TEST_DIR/t.qcow2 64M -Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=67108864 encryption=off cluster_size=1024 lazy_refcounts=off +Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=67108864 encryption=off cluster_size=1024 lazy_refcounts=off refcount_bits=16 qemu-img create -f qcow2 -o cluster_size=0.5k TEST_DIR/t.qcow2 64M -Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=67108864 encryption=off cluster_size=512 lazy_refcounts=off +Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=67108864 encryption=off cluster_size=512 lazy_refcounts=off refcount_bits=16 qemu-img create -f qcow2 -o cluster_size=0.5K TEST_DIR/t.qcow2 64M -Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=67108864 encryption=off cluster_size=512 lazy_refcounts=off +Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=67108864 encryption=off cluster_size=512 lazy_refcounts=off refcount_bits=16 qemu-img create -f qcow2 -o cluster_size=0.5M TEST_DIR/t.qcow2 64M -Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=67108864 encryption=off cluster_size=524288 lazy_refcounts=off +Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=67108864 encryption=off cluster_size=524288 lazy_refcounts=off refcount_bits=16 == Check compat level option == qemu-img create -f qcow2 -o compat=0.10 TEST_DIR/t.qcow2 64M -Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=67108864 compat='0.10' encryption=off cluster_size=65536 lazy_refcounts=off +Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=67108864 compat='0.10' encryption=off cluster_size=65536 lazy_refcounts=off refcount_bits=16 qemu-img create -f qcow2 -o compat=1.1 TEST_DIR/t.qcow2 64M -Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=67108864 compat='1.1' encryption=off cluster_size=65536 lazy_refcounts=off +Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=67108864 compat='1.1' encryption=off cluster_size=65536 lazy_refcounts=off refcount_bits=16 qemu-img create -f qcow2 -o compat=0.42 TEST_DIR/t.qcow2 64M qemu-img: TEST_DIR/t.qcow2: Invalid compatibility level: '0.42' -Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=67108864 compat='0.42' encryption=off cluster_size=65536 lazy_refcounts=off +Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=67108864 compat='0.42' encryption=off cluster_size=65536 lazy_refcounts=off refcount_bits=16 qemu-img create -f qcow2 -o compat=foobar TEST_DIR/t.qcow2 64M qemu-img: TEST_DIR/t.qcow2: Invalid compatibility level: 'foobar' -Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=67108864 compat='foobar' encryption=off cluster_size=65536 lazy_refcounts=off +Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=67108864 compat='foobar' encryption=off cluster_size=65536 lazy_refcounts=off refcount_bits=16 == Check preallocation option == qemu-img create -f qcow2 -o preallocation=off TEST_DIR/t.qcow2 64M -Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=67108864 encryption=off cluster_size=65536 preallocation='off' lazy_refcounts=off +Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=67108864 encryption=off cluster_size=65536 preallocation='off' lazy_refcounts=off refcount_bits=16 qemu-img create -f qcow2 -o preallocation=metadata TEST_DIR/t.qcow2 64M -Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=67108864 encryption=off cluster_size=65536 preallocation='metadata' lazy_refcounts=off +Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=67108864 encryption=off cluster_size=65536 preallocation='metadata' lazy_refcounts=off refcount_bits=16 qemu-img create -f qcow2 -o preallocation=1234 TEST_DIR/t.qcow2 64M qemu-img: TEST_DIR/t.qcow2: invalid parameter value: 1234 -Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=67108864 encryption=off cluster_size=65536 preallocation='1234' lazy_refcounts=off +Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=67108864 encryption=off cluster_size=65536 preallocation='1234' lazy_refcounts=off refcount_bits=16 == Check encryption option == qemu-img create -f qcow2 -o encryption=off TEST_DIR/t.qcow2 64M -Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=67108864 encryption=off cluster_size=65536 lazy_refcounts=off +Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=67108864 encryption=off cluster_size=65536 lazy_refcounts=off refcount_bits=16 qemu-img create -f qcow2 -o encryption=on TEST_DIR/t.qcow2 64M -Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=67108864 encryption=on cluster_size=65536 lazy_refcounts=off +Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=67108864 encryption=on cluster_size=65536 lazy_refcounts=off refcount_bits=16 == Check lazy_refcounts option (only with v3) == qemu-img create -f qcow2 -o compat=1.1,lazy_refcounts=off TEST_DIR/t.qcow2 64M -Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=67108864 compat='1.1' encryption=off cluster_size=65536 lazy_refcounts=off +Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=67108864 compat='1.1' encryption=off cluster_size=65536 lazy_refcounts=off refcount_bits=16 qemu-img create -f qcow2 -o compat=1.1,lazy_refcounts=on TEST_DIR/t.qcow2 64M -Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=67108864 compat='1.1' encryption=off cluster_size=65536 lazy_refcounts=on +Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=67108864 compat='1.1' encryption=off cluster_size=65536 lazy_refcounts=on refcount_bits=16 qemu-img create -f qcow2 -o compat=0.10,lazy_refcounts=off TEST_DIR/t.qcow2 64M -Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=67108864 compat='0.10' encryption=off cluster_size=65536 lazy_refcounts=off +Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=67108864 compat='0.10' encryption=off cluster_size=65536 lazy_refcounts=off refcount_bits=16 qemu-img create -f qcow2 -o compat=0.10,lazy_refcounts=on TEST_DIR/t.qcow2 64M qemu-img: TEST_DIR/t.qcow2: Lazy refcounts only supported with compatibility level 1.1 and above (use compat=1.1 or greater) -Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=67108864 compat='0.10' encryption=off cluster_size=65536 lazy_refcounts=on +Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=67108864 compat='0.10' encryption=off cluster_size=65536 lazy_refcounts=on refcount_bits=16 *** done diff --git a/tests/qemu-iotests/082.out b/tests/qemu-iotests/082.out index 38729dc2190c..3a749b8251c4 100644 --- a/tests/qemu-iotests/082.out +++ b/tests/qemu-iotests/082.out @@ -3,14 +3,14 @@ QA output created by 082 === create: Options specified more than once === Testing: create -f foo -f qcow2 TEST_DIR/t.qcow2 128M -Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=134217728 encryption=off cluster_size=65536 lazy_refcounts=off +Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=134217728 encryption=off cluster_size=65536 lazy_refcounts=off refcount_bits=16 image: TEST_DIR/t.IMGFMT file format: IMGFMT virtual size: 128M (134217728 bytes) cluster_size: 65536 Testing: create -f qcow2 -o cluster_size=4k -o lazy_refcounts=on TEST_DIR/t.qcow2 128M -Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=134217728 encryption=off cluster_size=4096 lazy_refcounts=on +Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=134217728 encryption=off cluster_size=4096 lazy_refcounts=on refcount_bits=16 image: TEST_DIR/t.IMGFMT file format: IMGFMT virtual size: 128M (134217728 bytes) @@ -22,7 +22,7 @@ Format specific information: corrupt: false Testing: create -f qcow2 -o cluster_size=4k -o lazy_refcounts=on -o cluster_size=8k TEST_DIR/t.qcow2 128M -Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=134217728 encryption=off cluster_size=8192 lazy_refcounts=on +Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=134217728 encryption=off cluster_size=8192 lazy_refcounts=on refcount_bits=16 image: TEST_DIR/t.IMGFMT file format: IMGFMT virtual size: 128M (134217728 bytes) @@ -34,7 +34,7 @@ Format specific information: corrupt: false Testing: create -f qcow2 -o cluster_size=4k,cluster_size=8k TEST_DIR/t.qcow2 128M -Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=134217728 encryption=off cluster_size=8192 lazy_refcounts=off +Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=134217728 encryption=off cluster_size=8192 lazy_refcounts=off refcount_bits=16 image: TEST_DIR/t.IMGFMT file format: IMGFMT virtual size: 128M (134217728 bytes) @@ -52,6 +52,7 @@ encryption Encrypt the image cluster_size qcow2 cluster size preallocation Preallocation mode (allowed values: off, metadata, falloc, full) lazy_refcounts Postpone refcount updates +refcount_bits Width of a reference count entry in bits nocow Turn off copy-on-write (valid only on btrfs) Testing: create -f qcow2 -o ? TEST_DIR/t.qcow2 128M @@ -64,6 +65,7 @@ encryption Encrypt the image cluster_size qcow2 cluster size preallocation Preallocation mode (allowed values: off, metadata, falloc, full) lazy_refcounts Postpone refcount updates +refcount_bits Width of a reference count entry in bits nocow Turn off copy-on-write (valid only on btrfs) Testing: create -f qcow2 -o cluster_size=4k,help TEST_DIR/t.qcow2 128M @@ -76,6 +78,7 @@ encryption Encrypt the image cluster_size qcow2 cluster size preallocation Preallocation mode (allowed values: off, metadata, falloc, full) lazy_refcounts Postpone refcount updates +refcount_bits Width of a reference count entry in bits nocow Turn off copy-on-write (valid only on btrfs) Testing: create -f qcow2 -o cluster_size=4k,? TEST_DIR/t.qcow2 128M @@ -88,6 +91,7 @@ encryption Encrypt the image cluster_size qcow2 cluster size preallocation Preallocation mode (allowed values: off, metadata, falloc, full) lazy_refcounts Postpone refcount updates +refcount_bits Width of a reference count entry in bits nocow Turn off copy-on-write (valid only on btrfs) Testing: create -f qcow2 -o help,cluster_size=4k TEST_DIR/t.qcow2 128M @@ -100,6 +104,7 @@ encryption Encrypt the image cluster_size qcow2 cluster size preallocation Preallocation mode (allowed values: off, metadata, falloc, full) lazy_refcounts Postpone refcount updates +refcount_bits Width of a reference count entry in bits nocow Turn off copy-on-write (valid only on btrfs) Testing: create -f qcow2 -o ?,cluster_size=4k TEST_DIR/t.qcow2 128M @@ -112,6 +117,7 @@ encryption Encrypt the image cluster_size qcow2 cluster size preallocation Preallocation mode (allowed values: off, metadata, falloc, full) lazy_refcounts Postpone refcount updates +refcount_bits Width of a reference count entry in bits nocow Turn off copy-on-write (valid only on btrfs) Testing: create -f qcow2 -o cluster_size=4k -o help TEST_DIR/t.qcow2 128M @@ -124,6 +130,7 @@ encryption Encrypt the image cluster_size qcow2 cluster size preallocation Preallocation mode (allowed values: off, metadata, falloc, full) lazy_refcounts Postpone refcount updates +refcount_bits Width of a reference count entry in bits nocow Turn off copy-on-write (valid only on btrfs) Testing: create -f qcow2 -o cluster_size=4k -o ? TEST_DIR/t.qcow2 128M @@ -136,13 +143,14 @@ encryption Encrypt the image cluster_size qcow2 cluster size preallocation Preallocation mode (allowed values: off, metadata, falloc, full) lazy_refcounts Postpone refcount updates +refcount_bits Width of a reference count entry in bits nocow Turn off copy-on-write (valid only on btrfs) Testing: create -f qcow2 -o backing_file=TEST_DIR/t.qcow2,,help TEST_DIR/t.qcow2 128M -Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=134217728 backing_file='TEST_DIR/t.qcow2,help' encryption=off cluster_size=65536 lazy_refcounts=off +Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=134217728 backing_file='TEST_DIR/t.qcow2,help' encryption=off cluster_size=65536 lazy_refcounts=off refcount_bits=16 Testing: create -f qcow2 -o backing_file=TEST_DIR/t.qcow2,,? TEST_DIR/t.qcow2 128M -Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=134217728 backing_file='TEST_DIR/t.qcow2,?' encryption=off cluster_size=65536 lazy_refcounts=off +Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=134217728 backing_file='TEST_DIR/t.qcow2,?' encryption=off cluster_size=65536 lazy_refcounts=off refcount_bits=16 Testing: create -f qcow2 -o backing_file=TEST_DIR/t.qcow2, -o help TEST_DIR/t.qcow2 128M qemu-img: Invalid option list: backing_file=TEST_DIR/t.qcow2, @@ -163,6 +171,7 @@ encryption Encrypt the image cluster_size qcow2 cluster size preallocation Preallocation mode (allowed values: off, metadata, falloc, full) lazy_refcounts Postpone refcount updates +refcount_bits Width of a reference count entry in bits Testing: create -o help Supported options: @@ -171,7 +180,7 @@ size Virtual disk size === convert: Options specified more than once === Testing: create -f qcow2 TEST_DIR/t.qcow2 128M -Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=134217728 encryption=off cluster_size=65536 lazy_refcounts=off +Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=134217728 encryption=off cluster_size=65536 lazy_refcounts=off refcount_bits=16 Testing: convert -f foo -f qcow2 TEST_DIR/t.qcow2 TEST_DIR/t.qcow2.base image: TEST_DIR/t.IMGFMT.base @@ -224,6 +233,7 @@ encryption Encrypt the image cluster_size qcow2 cluster size preallocation Preallocation mode (allowed values: off, metadata, falloc, full) lazy_refcounts Postpone refcount updates +refcount_bits Width of a reference count entry in bits nocow Turn off copy-on-write (valid only on btrfs) Testing: convert -O qcow2 -o ? TEST_DIR/t.qcow2 TEST_DIR/t.qcow2.base @@ -236,6 +246,7 @@ encryption Encrypt the image cluster_size qcow2 cluster size preallocation Preallocation mode (allowed values: off, metadata, falloc, full) lazy_refcounts Postpone refcount updates +refcount_bits Width of a reference count entry in bits nocow Turn off copy-on-write (valid only on btrfs) Testing: convert -O qcow2 -o cluster_size=4k,help TEST_DIR/t.qcow2 TEST_DIR/t.qcow2.base @@ -248,6 +259,7 @@ encryption Encrypt the image cluster_size qcow2 cluster size preallocation Preallocation mode (allowed values: off, metadata, falloc, full) lazy_refcounts Postpone refcount updates +refcount_bits Width of a reference count entry in bits nocow Turn off copy-on-write (valid only on btrfs) Testing: convert -O qcow2 -o cluster_size=4k,? TEST_DIR/t.qcow2 TEST_DIR/t.qcow2.base @@ -260,6 +272,7 @@ encryption Encrypt the image cluster_size qcow2 cluster size preallocation Preallocation mode (allowed values: off, metadata, falloc, full) lazy_refcounts Postpone refcount updates +refcount_bits Width of a reference count entry in bits nocow Turn off copy-on-write (valid only on btrfs) Testing: convert -O qcow2 -o help,cluster_size=4k TEST_DIR/t.qcow2 TEST_DIR/t.qcow2.base @@ -272,6 +285,7 @@ encryption Encrypt the image cluster_size qcow2 cluster size preallocation Preallocation mode (allowed values: off, metadata, falloc, full) lazy_refcounts Postpone refcount updates +refcount_bits Width of a reference count entry in bits nocow Turn off copy-on-write (valid only on btrfs) Testing: convert -O qcow2 -o ?,cluster_size=4k TEST_DIR/t.qcow2 TEST_DIR/t.qcow2.base @@ -284,6 +298,7 @@ encryption Encrypt the image cluster_size qcow2 cluster size preallocation Preallocation mode (allowed values: off, metadata, falloc, full) lazy_refcounts Postpone refcount updates +refcount_bits Width of a reference count entry in bits nocow Turn off copy-on-write (valid only on btrfs) Testing: convert -O qcow2 -o cluster_size=4k -o help TEST_DIR/t.qcow2 TEST_DIR/t.qcow2.base @@ -296,6 +311,7 @@ encryption Encrypt the image cluster_size qcow2 cluster size preallocation Preallocation mode (allowed values: off, metadata, falloc, full) lazy_refcounts Postpone refcount updates +refcount_bits Width of a reference count entry in bits nocow Turn off copy-on-write (valid only on btrfs) Testing: convert -O qcow2 -o cluster_size=4k -o ? TEST_DIR/t.qcow2 TEST_DIR/t.qcow2.base @@ -308,6 +324,7 @@ encryption Encrypt the image cluster_size qcow2 cluster size preallocation Preallocation mode (allowed values: off, metadata, falloc, full) lazy_refcounts Postpone refcount updates +refcount_bits Width of a reference count entry in bits nocow Turn off copy-on-write (valid only on btrfs) Testing: convert -O qcow2 -o backing_file=TEST_DIR/t.qcow2,,help TEST_DIR/t.qcow2 TEST_DIR/t.qcow2.base @@ -335,6 +352,7 @@ encryption Encrypt the image cluster_size qcow2 cluster size preallocation Preallocation mode (allowed values: off, metadata, falloc, full) lazy_refcounts Postpone refcount updates +refcount_bits Width of a reference count entry in bits Testing: convert -o help Supported options: @@ -393,6 +411,7 @@ encryption Encrypt the image cluster_size qcow2 cluster size preallocation Preallocation mode (allowed values: off, metadata, falloc, full) lazy_refcounts Postpone refcount updates +refcount_bits Width of a reference count entry in bits nocow Turn off copy-on-write (valid only on btrfs) Testing: amend -f qcow2 -o ? TEST_DIR/t.qcow2 @@ -405,6 +424,7 @@ encryption Encrypt the image cluster_size qcow2 cluster size preallocation Preallocation mode (allowed values: off, metadata, falloc, full) lazy_refcounts Postpone refcount updates +refcount_bits Width of a reference count entry in bits nocow Turn off copy-on-write (valid only on btrfs) Testing: amend -f qcow2 -o cluster_size=4k,help TEST_DIR/t.qcow2 @@ -417,6 +437,7 @@ encryption Encrypt the image cluster_size qcow2 cluster size preallocation Preallocation mode (allowed values: off, metadata, falloc, full) lazy_refcounts Postpone refcount updates +refcount_bits Width of a reference count entry in bits nocow Turn off copy-on-write (valid only on btrfs) Testing: amend -f qcow2 -o cluster_size=4k,? TEST_DIR/t.qcow2 @@ -429,6 +450,7 @@ encryption Encrypt the image cluster_size qcow2 cluster size preallocation Preallocation mode (allowed values: off, metadata, falloc, full) lazy_refcounts Postpone refcount updates +refcount_bits Width of a reference count entry in bits nocow Turn off copy-on-write (valid only on btrfs) Testing: amend -f qcow2 -o help,cluster_size=4k TEST_DIR/t.qcow2 @@ -441,6 +463,7 @@ encryption Encrypt the image cluster_size qcow2 cluster size preallocation Preallocation mode (allowed values: off, metadata, falloc, full) lazy_refcounts Postpone refcount updates +refcount_bits Width of a reference count entry in bits nocow Turn off copy-on-write (valid only on btrfs) Testing: amend -f qcow2 -o ?,cluster_size=4k TEST_DIR/t.qcow2 @@ -453,6 +476,7 @@ encryption Encrypt the image cluster_size qcow2 cluster size preallocation Preallocation mode (allowed values: off, metadata, falloc, full) lazy_refcounts Postpone refcount updates +refcount_bits Width of a reference count entry in bits nocow Turn off copy-on-write (valid only on btrfs) Testing: amend -f qcow2 -o cluster_size=4k -o help TEST_DIR/t.qcow2 @@ -465,6 +489,7 @@ encryption Encrypt the image cluster_size qcow2 cluster size preallocation Preallocation mode (allowed values: off, metadata, falloc, full) lazy_refcounts Postpone refcount updates +refcount_bits Width of a reference count entry in bits nocow Turn off copy-on-write (valid only on btrfs) Testing: amend -f qcow2 -o cluster_size=4k -o ? TEST_DIR/t.qcow2 @@ -477,6 +502,7 @@ encryption Encrypt the image cluster_size qcow2 cluster size preallocation Preallocation mode (allowed values: off, metadata, falloc, full) lazy_refcounts Postpone refcount updates +refcount_bits Width of a reference count entry in bits nocow Turn off copy-on-write (valid only on btrfs) Testing: amend -f qcow2 -o backing_file=TEST_DIR/t.qcow2,,help TEST_DIR/t.qcow2 @@ -506,6 +532,7 @@ encryption Encrypt the image cluster_size qcow2 cluster size preallocation Preallocation mode (allowed values: off, metadata, falloc, full) lazy_refcounts Postpone refcount updates +refcount_bits Width of a reference count entry in bits Testing: convert -o help Supported options: diff --git a/tests/qemu-iotests/085.out b/tests/qemu-iotests/085.out index 0f2b17f99e34..5eb8b947ca0f 100644 --- a/tests/qemu-iotests/085.out +++ b/tests/qemu-iotests/085.out @@ -11,7 +11,7 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 === Create a single snapshot on virtio0 === -Formatting 'TEST_DIR/1-snapshot-v0.qcow2', fmt=qcow2 size=134217728 backing_file='TEST_DIR/t.qcow2.orig' backing_fmt='qcow2' encryption=off cluster_size=65536 lazy_refcounts=off +Formatting 'TEST_DIR/1-snapshot-v0.qcow2', fmt=qcow2 size=134217728 backing_file='TEST_DIR/t.qcow2.orig' backing_fmt='qcow2' encryption=off cluster_size=65536 lazy_refcounts=off refcount_bits=16 {"return": {}} === Invalid command - missing device and nodename === @@ -25,31 +25,31 @@ Formatting 'TEST_DIR/1-snapshot-v0.qcow2', fmt=qcow2 size=134217728 backing_file === Create several transactional group snapshots === -Formatting 'TEST_DIR/2-snapshot-v0.qcow2', fmt=qcow2 size=134217728 backing_file='TEST_DIR/1-snapshot-v0.qcow2' backing_fmt='qcow2' encryption=off cluster_size=65536 lazy_refcounts=off -Formatting 'TEST_DIR/2-snapshot-v1.qcow2', fmt=qcow2 size=134217728 backing_file='TEST_DIR/t.qcow2' backing_fmt='qcow2' encryption=off cluster_size=65536 lazy_refcounts=off +Formatting 'TEST_DIR/2-snapshot-v0.qcow2', fmt=qcow2 size=134217728 backing_file='TEST_DIR/1-snapshot-v0.qcow2' backing_fmt='qcow2' encryption=off cluster_size=65536 lazy_refcounts=off refcount_bits=16 +Formatting 'TEST_DIR/2-snapshot-v1.qcow2', fmt=qcow2 size=134217728 backing_file='TEST_DIR/t.qcow2' backing_fmt='qcow2' encryption=off cluster_size=65536 lazy_refcounts=off refcount_bits=16 {"return": {}} -Formatting 'TEST_DIR/3-snapshot-v0.qcow2', fmt=qcow2 size=134217728 backing_file='TEST_DIR/2-snapshot-v0.qcow2' backing_fmt='qcow2' encryption=off cluster_size=65536 lazy_refcounts=off -Formatting 'TEST_DIR/3-snapshot-v1.qcow2', fmt=qcow2 size=134217728 backing_file='TEST_DIR/2-snapshot-v1.qcow2' backing_fmt='qcow2' encryption=off cluster_size=65536 lazy_refcounts=off +Formatting 'TEST_DIR/3-snapshot-v0.qcow2', fmt=qcow2 size=134217728 backing_file='TEST_DIR/2-snapshot-v0.qcow2' backing_fmt='qcow2' encryption=off cluster_size=65536 lazy_refcounts=off refcount_bits=16 +Formatting 'TEST_DIR/3-snapshot-v1.qcow2', fmt=qcow2 size=134217728 backing_file='TEST_DIR/2-snapshot-v1.qcow2' backing_fmt='qcow2' encryption=off cluster_size=65536 lazy_refcounts=off refcount_bits=16 {"return": {}} -Formatting 'TEST_DIR/4-snapshot-v0.qcow2', fmt=qcow2 size=134217728 backing_file='TEST_DIR/3-snapshot-v0.qcow2' backing_fmt='qcow2' encryption=off cluster_size=65536 lazy_refcounts=off -Formatting 'TEST_DIR/4-snapshot-v1.qcow2', fmt=qcow2 size=134217728 backing_file='TEST_DIR/3-snapshot-v1.qcow2' backing_fmt='qcow2' encryption=off cluster_size=65536 lazy_refcounts=off +Formatting 'TEST_DIR/4-snapshot-v0.qcow2', fmt=qcow2 size=134217728 backing_file='TEST_DIR/3-snapshot-v0.qcow2' backing_fmt='qcow2' encryption=off cluster_size=65536 lazy_refcounts=off refcount_bits=16 +Formatting 'TEST_DIR/4-snapshot-v1.qcow2', fmt=qcow2 size=134217728 backing_file='TEST_DIR/3-snapshot-v1.qcow2' backing_fmt='qcow2' encryption=off cluster_size=65536 lazy_refcounts=off refcount_bits=16 {"return": {}} -Formatting 'TEST_DIR/5-snapshot-v0.qcow2', fmt=qcow2 size=134217728 backing_file='TEST_DIR/4-snapshot-v0.qcow2' backing_fmt='qcow2' encryption=off cluster_size=65536 lazy_refcounts=off -Formatting 'TEST_DIR/5-snapshot-v1.qcow2', fmt=qcow2 size=134217728 backing_file='TEST_DIR/4-snapshot-v1.qcow2' backing_fmt='qcow2' encryption=off cluster_size=65536 lazy_refcounts=off +Formatting 'TEST_DIR/5-snapshot-v0.qcow2', fmt=qcow2 size=134217728 backing_file='TEST_DIR/4-snapshot-v0.qcow2' backing_fmt='qcow2' encryption=off cluster_size=65536 lazy_refcounts=off refcount_bits=16 +Formatting 'TEST_DIR/5-snapshot-v1.qcow2', fmt=qcow2 size=134217728 backing_file='TEST_DIR/4-snapshot-v1.qcow2' backing_fmt='qcow2' encryption=off cluster_size=65536 lazy_refcounts=off refcount_bits=16 {"return": {}} -Formatting 'TEST_DIR/6-snapshot-v0.qcow2', fmt=qcow2 size=134217728 backing_file='TEST_DIR/5-snapshot-v0.qcow2' backing_fmt='qcow2' encryption=off cluster_size=65536 lazy_refcounts=off -Formatting 'TEST_DIR/6-snapshot-v1.qcow2', fmt=qcow2 size=134217728 backing_file='TEST_DIR/5-snapshot-v1.qcow2' backing_fmt='qcow2' encryption=off cluster_size=65536 lazy_refcounts=off +Formatting 'TEST_DIR/6-snapshot-v0.qcow2', fmt=qcow2 size=134217728 backing_file='TEST_DIR/5-snapshot-v0.qcow2' backing_fmt='qcow2' encryption=off cluster_size=65536 lazy_refcounts=off refcount_bits=16 +Formatting 'TEST_DIR/6-snapshot-v1.qcow2', fmt=qcow2 size=134217728 backing_file='TEST_DIR/5-snapshot-v1.qcow2' backing_fmt='qcow2' encryption=off cluster_size=65536 lazy_refcounts=off refcount_bits=16 {"return": {}} -Formatting 'TEST_DIR/7-snapshot-v0.qcow2', fmt=qcow2 size=134217728 backing_file='TEST_DIR/6-snapshot-v0.qcow2' backing_fmt='qcow2' encryption=off cluster_size=65536 lazy_refcounts=off -Formatting 'TEST_DIR/7-snapshot-v1.qcow2', fmt=qcow2 size=134217728 backing_file='TEST_DIR/6-snapshot-v1.qcow2' backing_fmt='qcow2' encryption=off cluster_size=65536 lazy_refcounts=off +Formatting 'TEST_DIR/7-snapshot-v0.qcow2', fmt=qcow2 size=134217728 backing_file='TEST_DIR/6-snapshot-v0.qcow2' backing_fmt='qcow2' encryption=off cluster_size=65536 lazy_refcounts=off refcount_bits=16 +Formatting 'TEST_DIR/7-snapshot-v1.qcow2', fmt=qcow2 size=134217728 backing_file='TEST_DIR/6-snapshot-v1.qcow2' backing_fmt='qcow2' encryption=off cluster_size=65536 lazy_refcounts=off refcount_bits=16 {"return": {}} -Formatting 'TEST_DIR/8-snapshot-v0.qcow2', fmt=qcow2 size=134217728 backing_file='TEST_DIR/7-snapshot-v0.qcow2' backing_fmt='qcow2' encryption=off cluster_size=65536 lazy_refcounts=off -Formatting 'TEST_DIR/8-snapshot-v1.qcow2', fmt=qcow2 size=134217728 backing_file='TEST_DIR/7-snapshot-v1.qcow2' backing_fmt='qcow2' encryption=off cluster_size=65536 lazy_refcounts=off +Formatting 'TEST_DIR/8-snapshot-v0.qcow2', fmt=qcow2 size=134217728 backing_file='TEST_DIR/7-snapshot-v0.qcow2' backing_fmt='qcow2' encryption=off cluster_size=65536 lazy_refcounts=off refcount_bits=16 +Formatting 'TEST_DIR/8-snapshot-v1.qcow2', fmt=qcow2 size=134217728 backing_file='TEST_DIR/7-snapshot-v1.qcow2' backing_fmt='qcow2' encryption=off cluster_size=65536 lazy_refcounts=off refcount_bits=16 {"return": {}} -Formatting 'TEST_DIR/9-snapshot-v0.qcow2', fmt=qcow2 size=134217728 backing_file='TEST_DIR/8-snapshot-v0.qcow2' backing_fmt='qcow2' encryption=off cluster_size=65536 lazy_refcounts=off -Formatting 'TEST_DIR/9-snapshot-v1.qcow2', fmt=qcow2 size=134217728 backing_file='TEST_DIR/8-snapshot-v1.qcow2' backing_fmt='qcow2' encryption=off cluster_size=65536 lazy_refcounts=off +Formatting 'TEST_DIR/9-snapshot-v0.qcow2', fmt=qcow2 size=134217728 backing_file='TEST_DIR/8-snapshot-v0.qcow2' backing_fmt='qcow2' encryption=off cluster_size=65536 lazy_refcounts=off refcount_bits=16 +Formatting 'TEST_DIR/9-snapshot-v1.qcow2', fmt=qcow2 size=134217728 backing_file='TEST_DIR/8-snapshot-v1.qcow2' backing_fmt='qcow2' encryption=off cluster_size=65536 lazy_refcounts=off refcount_bits=16 {"return": {}} -Formatting 'TEST_DIR/10-snapshot-v0.qcow2', fmt=qcow2 size=134217728 backing_file='TEST_DIR/9-snapshot-v0.qcow2' backing_fmt='qcow2' encryption=off cluster_size=65536 lazy_refcounts=off -Formatting 'TEST_DIR/10-snapshot-v1.qcow2', fmt=qcow2 size=134217728 backing_file='TEST_DIR/9-snapshot-v1.qcow2' backing_fmt='qcow2' encryption=off cluster_size=65536 lazy_refcounts=off +Formatting 'TEST_DIR/10-snapshot-v0.qcow2', fmt=qcow2 size=134217728 backing_file='TEST_DIR/9-snapshot-v0.qcow2' backing_fmt='qcow2' encryption=off cluster_size=65536 lazy_refcounts=off refcount_bits=16 +Formatting 'TEST_DIR/10-snapshot-v1.qcow2', fmt=qcow2 size=134217728 backing_file='TEST_DIR/9-snapshot-v1.qcow2' backing_fmt='qcow2' encryption=off cluster_size=65536 lazy_refcounts=off refcount_bits=16 {"return": {}} *** done From d2eed8c6d9601cddc9dff2940d87bc022e9bd03c Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Wed, 18 Feb 2015 17:40:50 -0500 Subject: [PATCH 29/73] iotests: Add test for different refcount widths Add a test for errors specific to certain widths (i.e. snapshots with refcount_bits=1). Signed-off-by: Max Reitz Reviewed-by: Eric Blake Signed-off-by: Kevin Wolf --- tests/qemu-iotests/112 | 187 +++++++++++++++++++++++++++++++++++++ tests/qemu-iotests/112.out | 84 +++++++++++++++++ tests/qemu-iotests/group | 1 + 3 files changed, 272 insertions(+) create mode 100755 tests/qemu-iotests/112 create mode 100644 tests/qemu-iotests/112.out diff --git a/tests/qemu-iotests/112 b/tests/qemu-iotests/112 new file mode 100755 index 000000000000..3f054a3fcd09 --- /dev/null +++ b/tests/qemu-iotests/112 @@ -0,0 +1,187 @@ +#!/bin/bash +# +# Test cases for different refcount_bits values +# +# Copyright (C) 2015 Red Hat, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# + +# creator +owner=mreitz@redhat.com + +seq="$(basename $0)" +echo "QA output created by $seq" + +here="$PWD" +tmp=/tmp/$$ +status=1 # failure is the default! + +_cleanup() +{ + _cleanup_test_img +} +trap "_cleanup; exit \$status" 0 1 2 3 15 + +# get standard environment, filters and checks +. ./common.rc +. ./common.filter + +# This tests qcow2-specific low-level functionality +_supported_fmt qcow2 +_supported_proto file +_supported_os Linux +# This test will set refcount_bits on its own which would conflict with the +# manual setting; compat will be overridden as well +_unsupported_imgopts refcount_bits 'compat=0.10' + +function print_refcount_bits() +{ + $QEMU_IMG info "$TEST_IMG" | sed -n '/refcount bits:/ s/^ *//p' +} + +echo +echo '=== refcount_bits limits ===' +echo + +# Must be positive (non-zero) +IMGOPTS="$IMGOPTS,refcount_bits=0" _make_test_img 64M +# Must be positive (non-negative) +IMGOPTS="$IMGOPTS,refcount_bits=-1" _make_test_img 64M +# May not exceed 64 +IMGOPTS="$IMGOPTS,refcount_bits=128" _make_test_img 64M +# Must be a power of two +IMGOPTS="$IMGOPTS,refcount_bits=42" _make_test_img 64M + +# 1 is the minimum +IMGOPTS="$IMGOPTS,refcount_bits=1" _make_test_img 64M +print_refcount_bits + +# 64 is the maximum +IMGOPTS="$IMGOPTS,refcount_bits=64" _make_test_img 64M +print_refcount_bits + +# 16 is the default +_make_test_img 64M +print_refcount_bits + +echo +echo '=== refcount_bits and compat=0.10 ===' +echo + +# Should work +IMGOPTS="$IMGOPTS,compat=0.10,refcount_bits=16" _make_test_img 64M +print_refcount_bits + +# Should not work +IMGOPTS="$IMGOPTS,compat=0.10,refcount_bits=1" _make_test_img 64M +IMGOPTS="$IMGOPTS,compat=0.10,refcount_bits=64" _make_test_img 64M + + +echo +echo '=== Snapshot limit on refcount_bits=1 ===' +echo + +IMGOPTS="$IMGOPTS,refcount_bits=1" _make_test_img 64M +print_refcount_bits + +$QEMU_IO -c 'write 0 512' "$TEST_IMG" | _filter_qemu_io + +# Should fail for now; in the future, this might be supported by automatically +# copying all clusters with overflowing refcount +$QEMU_IMG snapshot -c foo "$TEST_IMG" + +# The new L1 table could/should be leaked +_check_test_img + +echo +echo '=== Snapshot limit on refcount_bits=2 ===' +echo + +IMGOPTS="$IMGOPTS,refcount_bits=2" _make_test_img 64M +print_refcount_bits + +$QEMU_IO -c 'write 0 512' "$TEST_IMG" | _filter_qemu_io + +# Should succeed +$QEMU_IMG snapshot -c foo "$TEST_IMG" +$QEMU_IMG snapshot -c bar "$TEST_IMG" +# Should fail (4th reference) +$QEMU_IMG snapshot -c baz "$TEST_IMG" + +# The new L1 table could/should be leaked +_check_test_img + +echo +echo '=== Compressed clusters with refcount_bits=1 ===' +echo + +IMGOPTS="$IMGOPTS,refcount_bits=1" _make_test_img 64M +print_refcount_bits + +# Both should fit into a single host cluster; instead of failing to increase the +# refcount of that cluster, qemu should just allocate a new cluster and make +# this operation succeed +$QEMU_IO -c 'write -P 0 -c 0 64k' \ + -c 'write -P 1 -c 64k 64k' \ + "$TEST_IMG" | _filter_qemu_io + +_check_test_img + +echo +echo '=== MSb set in 64 bit refcount ===' +echo + +IMGOPTS="$IMGOPTS,refcount_bits=64" _make_test_img 64M +print_refcount_bits + +$QEMU_IO -c 'write 0 512' "$TEST_IMG" | _filter_qemu_io + +# Set the MSb in the refblock entry of the data cluster +poke_file "$TEST_IMG" $((0x20028)) "\x80\x00\x00\x00\x00\x00\x00\x00" + +# Clear OFLAG_COPIED in the L2 entry of the data cluster +poke_file "$TEST_IMG" $((0x40000)) "\x00\x00\x00\x00\x00\x05\x00\x00" + +# Try to write to that cluster (should work, even though the MSb is set) +$QEMU_IO -c 'write 0 512' "$TEST_IMG" | _filter_qemu_io + +echo +echo '=== Snapshot on maximum 64 bit refcount value ===' +echo + +IMGOPTS="$IMGOPTS,refcount_bits=64" _make_test_img 64M +print_refcount_bits + +$QEMU_IO -c 'write 0 512' "$TEST_IMG" | _filter_qemu_io + +# Set the refblock entry to the maximum value possible +poke_file "$TEST_IMG" $((0x20028)) "\xff\xff\xff\xff\xff\xff\xff\xff" + +# Clear OFLAG_COPIED in the L2 entry of the data cluster +poke_file "$TEST_IMG" $((0x40000)) "\x00\x00\x00\x00\x00\x05\x00\x00" + +# Try a snapshot (should correctly identify the overflow; may work in the future +# by falling back to COW) +$QEMU_IMG snapshot -c foo "$TEST_IMG" + +# The new L1 table could/should be leaked; and obviously the data cluster is +# leaked (refcount=UINT64_MAX reference=1) +_check_test_img + + +# success, all done +echo '*** done' +rm -f $seq.full +status=0 diff --git a/tests/qemu-iotests/112.out b/tests/qemu-iotests/112.out new file mode 100644 index 000000000000..9a98633f6a1a --- /dev/null +++ b/tests/qemu-iotests/112.out @@ -0,0 +1,84 @@ +QA output created by 112 + +=== refcount_bits limits === + +qemu-img: TEST_DIR/t.IMGFMT: Refcount width must be a power of two and may not exceed 64 bits +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 +qemu-img: TEST_DIR/t.IMGFMT: Refcount width must be a power of two and may not exceed 64 bits +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 refcount_bits=-1 +qemu-img: TEST_DIR/t.IMGFMT: Refcount width must be a power of two and may not exceed 64 bits +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 +qemu-img: TEST_DIR/t.IMGFMT: Refcount width must be a power of two and may not exceed 64 bits +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 +refcount bits: 1 +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 +refcount bits: 64 +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 +refcount bits: 16 + +=== refcount_bits and compat=0.10 === + +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 +refcount bits: 16 +qemu-img: TEST_DIR/t.IMGFMT: Different refcount widths than 16 bits require compatibility level 1.1 or above (use compat=1.1 or greater) +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 +qemu-img: TEST_DIR/t.IMGFMT: Different refcount widths than 16 bits require compatibility level 1.1 or above (use compat=1.1 or greater) +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 + +=== Snapshot limit on refcount_bits=1 === + +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 +refcount bits: 1 +wrote 512/512 bytes at offset 0 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-img: Could not create snapshot 'foo': -22 (Invalid argument) +Leaked cluster 6 refcount=1 reference=0 + +1 leaked clusters were found on the image. +This means waste of disk space, but no harm to data. + +=== Snapshot limit on refcount_bits=2 === + +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 +refcount bits: 2 +wrote 512/512 bytes at offset 0 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-img: Could not create snapshot 'baz': -22 (Invalid argument) +Leaked cluster 7 refcount=1 reference=0 + +1 leaked clusters were found on the image. +This means waste of disk space, but no harm to data. + +=== Compressed clusters with refcount_bits=1 === + +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 +refcount bits: 1 +wrote 65536/65536 bytes at offset 0 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 65536/65536 bytes at offset 65536 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +No errors were found on the image. + +=== MSb set in 64 bit refcount === + +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 +refcount bits: 64 +wrote 512/512 bytes at offset 0 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 512/512 bytes at offset 0 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + +=== Snapshot on maximum 64 bit refcount value === + +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 +refcount bits: 64 +wrote 512/512 bytes at offset 0 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-img: Could not create snapshot 'foo': -22 (Invalid argument) +Leaked cluster 5 refcount=18446744073709551615 reference=1 +Leaked cluster 6 refcount=1 reference=0 + +2 leaked clusters were found on the image. +This means waste of disk space, but no harm to data. +*** done diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group index 0d3b95c25899..93a02502df6d 100644 --- a/tests/qemu-iotests/group +++ b/tests/qemu-iotests/group @@ -116,6 +116,7 @@ 109 rw auto 110 rw auto backing quick 111 rw auto quick +112 rw auto 113 rw auto quick 114 rw auto quick 116 rw auto quick From a069e2f1372a0a823ab506fc019852a2a652aa54 Mon Sep 17 00:00:00 2001 From: John Snow Date: Fri, 6 Feb 2015 16:26:17 -0500 Subject: [PATCH 30/73] blkdebug: fix "once" rule Background: The blkdebug scripts are currently engineered so that when a debug event occurs, a prefilter browses a master list of parsed rules for a certain event and adds them to an "active list" of rules to be used for the forthcoming action, provided the events and state numbers match. Then, once the request is received, the last active rule is used to inject an error if certain parameters match. This active list is cleared every time the prefilter injects a new rule for the first time during a debug event. The "once" rule currently causes the error injection, if it is triggered, to only clear the active list. This is insufficient for preventing future injections of the same rule. Remedy: This patch /deletes/ the rule from the list that the prefilter browses, so it is gone for good. In V2, we remove only the rule of interest from the active list instead of allowing the "once" rule to clear the entire list of active rules. Impact: This affects iotests 026. Several ENOSPC tests that used "once" can be seen to have output that shows multiple failure messages. After this patch, the error messages tend to be smaller and less severe, but the injection can still be seen to be working. I have patched the expected output to expect the smaller error messages. Signed-off-by: John Snow Message-id: 1423257977-25630-1-git-send-email-jsnow@redhat.com Reviewed-by: Max Reitz Signed-off-by: Max Reitz Signed-off-by: Stefan Hajnoczi Signed-off-by: Kevin Wolf --- block/blkdebug.c | 6 ++++-- tests/qemu-iotests/026.out | 24 ++++-------------------- 2 files changed, 8 insertions(+), 22 deletions(-) diff --git a/block/blkdebug.c b/block/blkdebug.c index 9ce35cd0356e..63611e0a33e9 100644 --- a/block/blkdebug.c +++ b/block/blkdebug.c @@ -472,12 +472,14 @@ static BlockAIOCB *inject_error(BlockDriverState *bs, int error = rule->options.inject.error; struct BlkdebugAIOCB *acb; QEMUBH *bh; + bool immediately = rule->options.inject.immediately; if (rule->options.inject.once) { - QSIMPLEQ_INIT(&s->active_rules); + QSIMPLEQ_REMOVE(&s->active_rules, rule, BlkdebugRule, active_next); + remove_rule(rule); } - if (rule->options.inject.immediately) { + if (immediately) { return NULL; } diff --git a/tests/qemu-iotests/026.out b/tests/qemu-iotests/026.out index 524f7ee21cd1..5e964fb5a51e 100644 --- a/tests/qemu-iotests/026.out +++ b/tests/qemu-iotests/026.out @@ -140,19 +140,13 @@ No errors were found on the image. Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 Event: l2_update; errno: 5; imm: off; once: on; write -Failed to flush the L2 table cache: Input/output error write failed: Input/output error - -127 leaked clusters were found on the image. -This means waste of disk space, but no harm to data. +No errors were found on the image. Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 Event: l2_update; errno: 5; imm: off; once: on; write -b -Failed to flush the L2 table cache: Input/output error write failed: Input/output error - -127 leaked clusters were found on the image. -This means waste of disk space, but no harm to data. +No errors were found on the image. Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 Event: l2_update; errno: 5; imm: off; once: off; write @@ -174,19 +168,13 @@ This means waste of disk space, but no harm to data. Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 Event: l2_update; errno: 28; imm: off; once: on; write -Failed to flush the L2 table cache: No space left on device write failed: No space left on device - -127 leaked clusters were found on the image. -This means waste of disk space, but no harm to data. +No errors were found on the image. Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 Event: l2_update; errno: 28; imm: off; once: on; write -b -Failed to flush the L2 table cache: No space left on device write failed: No space left on device - -127 leaked clusters were found on the image. -This means waste of disk space, but no harm to data. +No errors were found on the image. Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 Event: l2_update; errno: 28; imm: off; once: off; write @@ -356,13 +344,11 @@ No errors were found on the image. Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 Event: refblock_update_part; errno: 5; imm: off; once: on; write -Failed to flush the refcount block cache: Input/output error write failed: Input/output error No errors were found on the image. Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 Event: refblock_update_part; errno: 5; imm: off; once: on; write -b -Failed to flush the refcount block cache: Input/output error write failed: Input/output error No errors were found on the image. Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 @@ -382,13 +368,11 @@ No errors were found on the image. Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 Event: refblock_update_part; errno: 28; imm: off; once: on; write -Failed to flush the refcount block cache: No space left on device write failed: No space left on device No errors were found on the image. Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 Event: refblock_update_part; errno: 28; imm: off; once: on; write -b -Failed to flush the refcount block cache: No space left on device write failed: No space left on device No errors were found on the image. Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 From 892b7de832681cdfff22a6e96cb0280b59684024 Mon Sep 17 00:00:00 2001 From: Ekaterina Tumanova Date: Mon, 16 Feb 2015 12:47:54 +0100 Subject: [PATCH 31/73] block: add bdrv functions for geometry and blocksize Add driver functions for geometry and blocksize detection Signed-off-by: Ekaterina Tumanova Reviewed-by: Thomas Huth Reviewed-by: Markus Armbruster Reviewed-by: Stefan Hajnoczi Message-id: 1424087278-49393-2-git-send-email-tumanova@linux.vnet.ibm.com Signed-off-by: Stefan Hajnoczi Signed-off-by: Kevin Wolf --- block.c | 34 ++++++++++++++++++++++++++++++++++ include/block/block.h | 13 +++++++++++++ include/block/block_int.h | 15 +++++++++++++++ 3 files changed, 62 insertions(+) diff --git a/block.c b/block.c index 28ea19aafba7..191a847940d7 100644 --- a/block.c +++ b/block.c @@ -568,6 +568,40 @@ void bdrv_refresh_limits(BlockDriverState *bs, Error **errp) } } +/** + * Try to get @bs's logical and physical block size. + * On success, store them in @bsz struct and return 0. + * On failure return -errno. + * @bs must not be empty. + */ +int bdrv_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz) +{ + BlockDriver *drv = bs->drv; + + if (drv && drv->bdrv_probe_blocksizes) { + return drv->bdrv_probe_blocksizes(bs, bsz); + } + + return -ENOTSUP; +} + +/** + * Try to get @bs's geometry (cyls, heads, sectors). + * On success, store them in @geo struct and return 0. + * On failure return -errno. + * @bs must not be empty. + */ +int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry *geo) +{ + BlockDriver *drv = bs->drv; + + if (drv && drv->bdrv_probe_geometry) { + return drv->bdrv_probe_geometry(bs, geo); + } + + return -ENOTSUP; +} + /* * Create a uniquely-named empty temporary file. * Return 0 upon success, otherwise a negative errno value. diff --git a/include/block/block.h b/include/block/block.h index 649c269ecdc8..473c746342f5 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -60,6 +60,17 @@ typedef enum { BDRV_REQ_MAY_UNMAP = 0x4, } BdrvRequestFlags; +typedef struct BlockSizes { + uint32_t phys; + uint32_t log; +} BlockSizes; + +typedef struct HDGeometry { + uint32_t heads; + uint32_t sectors; + uint32_t cylinders; +} HDGeometry; + #define BDRV_O_RDWR 0x0002 #define BDRV_O_SNAPSHOT 0x0008 /* open the file read only and save writes in a snapshot */ #define BDRV_O_TEMPORARY 0x0010 /* delete the file after use */ @@ -550,6 +561,8 @@ AioContext *bdrv_get_aio_context(BlockDriverState *bs); * This function must be called with iothread lock held. */ void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context); +int bdrv_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz); +int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry *geo); void bdrv_io_plug(BlockDriverState *bs); void bdrv_io_unplug(BlockDriverState *bs); diff --git a/include/block/block_int.h b/include/block/block_int.h index b88633d0c038..dccb092df79d 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -275,6 +275,21 @@ struct BlockDriver { void (*bdrv_io_unplug)(BlockDriverState *bs); void (*bdrv_flush_io_queue)(BlockDriverState *bs); + /** + * Try to get @bs's logical and physical block size. + * On success, store them in @bsz and return zero. + * On failure, return negative errno. + */ + int (*bdrv_probe_blocksizes)(BlockDriverState *bs, BlockSizes *bsz); + /** + * Try to get @bs's geometry (cyls, heads, sectors) + * On success, store them in @geo and return 0. + * On failure return -errno. + * Only drivers that want to override guest geometry implement this + * callback; see hd_geometry_guess(). + */ + int (*bdrv_probe_geometry)(BlockDriverState *bs, HDGeometry *geo); + QLIST_ENTRY(BlockDriver) list; }; From 8a4ed0d1b16d8932362ffecf8f6f79e6340a585f Mon Sep 17 00:00:00 2001 From: Ekaterina Tumanova Date: Mon, 16 Feb 2015 12:47:55 +0100 Subject: [PATCH 32/73] raw-posix: Factor block size detection out of raw_probe_alignment() Put it in new probe_logical_blocksize(). Signed-off-by: Ekaterina Tumanova Reviewed-by: Markus Armbruster Reviewed-by: Stefan Hajnoczi Message-id: 1424087278-49393-3-git-send-email-tumanova@linux.vnet.ibm.com Signed-off-by: Stefan Hajnoczi Signed-off-by: Kevin Wolf --- block/raw-posix.c | 51 ++++++++++++++++++++++++++++++++--------------- 1 file changed, 35 insertions(+), 16 deletions(-) diff --git a/block/raw-posix.c b/block/raw-posix.c index c0b46cac9e87..34d403d3917c 100644 --- a/block/raw-posix.c +++ b/block/raw-posix.c @@ -218,39 +218,58 @@ static int raw_normalize_devicepath(const char **filename) } #endif -static void raw_probe_alignment(BlockDriverState *bs, int fd, Error **errp) +/* + * Get logical block size via ioctl. On success store it in @sector_size_p. + */ +static int probe_logical_blocksize(int fd, unsigned int *sector_size_p) { - BDRVRawState *s = bs->opaque; - char *buf; unsigned int sector_size; + bool success = false; - /* For /dev/sg devices the alignment is not really used. - With buffered I/O, we don't have any restrictions. */ - if (bs->sg || !s->needs_alignment) { - bs->request_alignment = 1; - s->buf_align = 1; - return; - } + errno = ENOTSUP; /* Try a few ioctls to get the right size */ - bs->request_alignment = 0; - s->buf_align = 0; - #ifdef BLKSSZGET if (ioctl(fd, BLKSSZGET, §or_size) >= 0) { - bs->request_alignment = sector_size; + *sector_size_p = sector_size; + success = true; } #endif #ifdef DKIOCGETBLOCKSIZE if (ioctl(fd, DKIOCGETBLOCKSIZE, §or_size) >= 0) { - bs->request_alignment = sector_size; + *sector_size_p = sector_size; + success = true; } #endif #ifdef DIOCGSECTORSIZE if (ioctl(fd, DIOCGSECTORSIZE, §or_size) >= 0) { - bs->request_alignment = sector_size; + *sector_size_p = sector_size; + success = true; } #endif + + return success ? 0 : -errno; +} + +static void raw_probe_alignment(BlockDriverState *bs, int fd, Error **errp) +{ + BDRVRawState *s = bs->opaque; + char *buf; + + /* For /dev/sg devices the alignment is not really used. + With buffered I/O, we don't have any restrictions. */ + if (bs->sg || !s->needs_alignment) { + bs->request_alignment = 1; + s->buf_align = 1; + return; + } + + bs->request_alignment = 0; + s->buf_align = 0; + /* Let's try to use the logical blocksize for the alignment. */ + if (probe_logical_blocksize(fd, &bs->request_alignment) < 0) { + bs->request_alignment = 0; + } #ifdef CONFIG_XFS if (s->is_xfs) { struct dioattr da; From 1a9335e4a94d0f0c635a5284aa3ba5dc50d24f70 Mon Sep 17 00:00:00 2001 From: Ekaterina Tumanova Date: Mon, 16 Feb 2015 12:47:56 +0100 Subject: [PATCH 33/73] block: Add driver methods to probe blocksizes and geometry Introduce driver methods of defining disk blocksizes (physical and logical) and hard drive geometry. Methods are only implemented for "host_device". For "raw" devices driver calls child's method. For now geometry detection will only work for DASD devices. To check that a local check_for_dasd function was introduced. It calls BIODASDINFO2 ioctl and returns its rc. Blocksizes detection function will probe sizes for DASD devices. Signed-off-by: Ekaterina Tumanova Reviewed-by: Markus Armbruster Reviewed-by: Stefan Hajnoczi Message-id: 1424087278-49393-4-git-send-email-tumanova@linux.vnet.ibm.com Signed-off-by: Stefan Hajnoczi Signed-off-by: Kevin Wolf --- block/raw-posix.c | 103 ++++++++++++++++++++++++++++++++++++++++++++++ block/raw_bsd.c | 12 ++++++ 2 files changed, 115 insertions(+) diff --git a/block/raw-posix.c b/block/raw-posix.c index 34d403d3917c..3263d2b5a14e 100644 --- a/block/raw-posix.c +++ b/block/raw-posix.c @@ -56,6 +56,10 @@ #include #include #include +#include +#ifdef __s390__ +#include +#endif #ifndef FS_NOCOW_FL #define FS_NOCOW_FL 0x00800000 /* Do not cow file */ #endif @@ -251,6 +255,23 @@ static int probe_logical_blocksize(int fd, unsigned int *sector_size_p) return success ? 0 : -errno; } +/** + * Get physical block size of @fd. + * On success, store it in @blk_size and return 0. + * On failure, return -errno. + */ +static int probe_physical_blocksize(int fd, unsigned int *blk_size) +{ +#ifdef BLKPBSZGET + if (ioctl(fd, BLKPBSZGET, blk_size) < 0) { + return -errno; + } + return 0; +#else + return -ENOTSUP; +#endif +} + static void raw_probe_alignment(BlockDriverState *bs, int fd, Error **errp) { BDRVRawState *s = bs->opaque; @@ -674,6 +695,86 @@ static void raw_refresh_limits(BlockDriverState *bs, Error **errp) bs->bl.opt_mem_alignment = s->buf_align; } +static int check_for_dasd(int fd) +{ +#ifdef BIODASDINFO2 + struct dasd_information2_t info = {0}; + + return ioctl(fd, BIODASDINFO2, &info); +#else + return -1; +#endif +} + +/** + * Try to get @bs's logical and physical block size. + * On success, store them in @bsz and return zero. + * On failure, return negative errno. + */ +static int hdev_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz) +{ + BDRVRawState *s = bs->opaque; + int ret; + + /* If DASD, get blocksizes */ + if (check_for_dasd(s->fd) < 0) { + return -ENOTSUP; + } + ret = probe_logical_blocksize(s->fd, &bsz->log); + if (ret < 0) { + return ret; + } + return probe_physical_blocksize(s->fd, &bsz->phys); +} + +/** + * Try to get @bs's geometry: cyls, heads, sectors. + * On success, store them in @geo and return 0. + * On failure return -errno. + * (Allows block driver to assign default geometry values that guest sees) + */ +#ifdef __linux__ +static int hdev_probe_geometry(BlockDriverState *bs, HDGeometry *geo) +{ + BDRVRawState *s = bs->opaque; + struct hd_geometry ioctl_geo = {0}; + uint32_t blksize; + + /* If DASD, get its geometry */ + if (check_for_dasd(s->fd) < 0) { + return -ENOTSUP; + } + if (ioctl(s->fd, HDIO_GETGEO, &ioctl_geo) < 0) { + return -errno; + } + /* HDIO_GETGEO may return success even though geo contains zeros + (e.g. certain multipath setups) */ + if (!ioctl_geo.heads || !ioctl_geo.sectors || !ioctl_geo.cylinders) { + return -ENOTSUP; + } + /* Do not return a geometry for partition */ + if (ioctl_geo.start != 0) { + return -ENOTSUP; + } + geo->heads = ioctl_geo.heads; + geo->sectors = ioctl_geo.sectors; + if (!probe_physical_blocksize(s->fd, &blksize)) { + /* overwrite cyls: HDIO_GETGEO result is incorrect for big drives */ + geo->cylinders = bdrv_nb_sectors(bs) / (blksize / BDRV_SECTOR_SIZE) + / (geo->heads * geo->sectors); + return 0; + } + geo->cylinders = ioctl_geo.cylinders; + + return 0; +} +#else /* __linux__ */ +static int hdev_probe_geometry(BlockDriverState *bs, HDGeometry *geo) +{ + return -ENOTSUP; +} +#endif + static ssize_t handle_aiocb_ioctl(RawPosixAIOData *aiocb) { int ret; @@ -2215,6 +2316,8 @@ static BlockDriver bdrv_host_device = { .bdrv_get_info = raw_get_info, .bdrv_get_allocated_file_size = raw_get_allocated_file_size, + .bdrv_probe_blocksizes = hdev_probe_blocksizes, + .bdrv_probe_geometry = hdev_probe_geometry, .bdrv_detach_aio_context = raw_detach_aio_context, .bdrv_attach_aio_context = raw_attach_aio_context, diff --git a/block/raw_bsd.c b/block/raw_bsd.c index 05b02c76d49a..e3d2d046813f 100644 --- a/block/raw_bsd.c +++ b/block/raw_bsd.c @@ -235,6 +235,16 @@ static int raw_probe(const uint8_t *buf, int buf_size, const char *filename) return 1; } +static int raw_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz) +{ + return bdrv_probe_blocksizes(bs->file, bsz); +} + +static int raw_probe_geometry(BlockDriverState *bs, HDGeometry *geo) +{ + return bdrv_probe_geometry(bs->file, geo); +} + BlockDriver bdrv_raw = { .format_name = "raw", .bdrv_probe = &raw_probe, @@ -252,6 +262,8 @@ BlockDriver bdrv_raw = { .has_variable_length = true, .bdrv_get_info = &raw_get_info, .bdrv_refresh_limits = &raw_refresh_limits, + .bdrv_probe_blocksizes = &raw_probe_blocksizes, + .bdrv_probe_geometry = &raw_probe_geometry, .bdrv_is_inserted = &raw_is_inserted, .bdrv_media_changed = &raw_media_changed, .bdrv_eject = &raw_eject, From f0272c4db2a914453a6254f80bfe3b708a0b2426 Mon Sep 17 00:00:00 2001 From: Ekaterina Tumanova Date: Mon, 16 Feb 2015 12:47:57 +0100 Subject: [PATCH 34/73] block-backend: Add wrappers for blocksizes and geometry probing Signed-off-by: Ekaterina Tumanova Reviewed-by: Markus Armbruster Reviewed-by: Stefan Hajnoczi Message-id: 1424087278-49393-5-git-send-email-tumanova@linux.vnet.ibm.com Signed-off-by: Stefan Hajnoczi Signed-off-by: Kevin Wolf --- block/block-backend.c | 10 ++++++++++ include/sysemu/block-backend.h | 2 ++ 2 files changed, 12 insertions(+) diff --git a/block/block-backend.c b/block/block-backend.c index bfb041823ee3..48b6e4c05c1c 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -892,3 +892,13 @@ int blk_load_vmstate(BlockBackend *blk, uint8_t *buf, int64_t pos, int size) { return bdrv_load_vmstate(blk->bs, buf, pos, size); } + +int blk_probe_blocksizes(BlockBackend *blk, BlockSizes *bsz) +{ + return bdrv_probe_blocksizes(blk->bs, bsz); +} + +int blk_probe_geometry(BlockBackend *blk, HDGeometry *geo) +{ + return bdrv_probe_geometry(blk->bs, geo); +} diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h index 3ff9aeeb09b5..77e9b9c370ec 100644 --- a/include/sysemu/block-backend.h +++ b/include/sysemu/block-backend.h @@ -164,5 +164,7 @@ int blk_discard(BlockBackend *blk, int64_t sector_num, int nb_sectors); int blk_save_vmstate(BlockBackend *blk, const uint8_t *buf, int64_t pos, int size); int blk_load_vmstate(BlockBackend *blk, uint8_t *buf, int64_t pos, int size); +int blk_probe_blocksizes(BlockBackend *blk, BlockSizes *bsz); +int blk_probe_geometry(BlockBackend *blk, HDGeometry *geo); #endif From 0eb28a42284ec32e6f283985d2d638474a05eba4 Mon Sep 17 00:00:00 2001 From: Ekaterina Tumanova Date: Mon, 16 Feb 2015 12:47:58 +0100 Subject: [PATCH 35/73] BlockConf: Call backend functions to detect geometry and blocksizes geometry: hd_geometry_guess function autodetects the drive geometry. This patch adds a block backend call, that probes the backing device geometry. If the inner driver method is implemented and succeeds (currently only for DASDs), the blkconf_geometry will pass-through the backing device geometry. Otherwise will fallback to old logic. blocksize: This patch initializes blocksize properties to 0. In order to set the property a blkconf_blocksizes was introduced. If user didn't set physical or logical blocksize, it will retrieve its value from a driver (only succeeds for DASD), otherwise it will set default 512 value. The blkconf_blocksizes call was added to all users of BlkConf. Signed-off-by: Ekaterina Tumanova Reviewed-by: Markus Armbruster Reviewed-by: Stefan Hajnoczi Message-id: 1424087278-49393-6-git-send-email-tumanova@linux.vnet.ibm.com Signed-off-by: Stefan Hajnoczi Signed-off-by: Kevin Wolf --- hw/block/block.c | 24 ++++++++++++++++++++++++ hw/block/hd-geometry.c | 10 +++++++++- hw/block/nvme.c | 1 + hw/block/virtio-blk.c | 1 + hw/core/qdev-properties.c | 3 ++- hw/ide/qdev.c | 1 + hw/scsi/scsi-disk.c | 2 ++ hw/usb/dev-storage.c | 1 + include/hw/block/block.h | 5 +++-- include/hw/qdev-properties.h | 4 ++-- 10 files changed, 46 insertions(+), 6 deletions(-) diff --git a/hw/block/block.c b/hw/block/block.c index a625773d442c..f7243e5b948f 100644 --- a/hw/block/block.c +++ b/hw/block/block.c @@ -25,6 +25,30 @@ void blkconf_serial(BlockConf *conf, char **serial) } } +void blkconf_blocksizes(BlockConf *conf) +{ + BlockBackend *blk = conf->blk; + BlockSizes blocksizes; + int backend_ret; + + backend_ret = blk_probe_blocksizes(blk, &blocksizes); + /* fill in detected values if they are not defined via qemu command line */ + if (!conf->physical_block_size) { + if (!backend_ret) { + conf->physical_block_size = blocksizes.phys; + } else { + conf->physical_block_size = BDRV_SECTOR_SIZE; + } + } + if (!conf->logical_block_size) { + if (!backend_ret) { + conf->logical_block_size = blocksizes.log; + } else { + conf->logical_block_size = BDRV_SECTOR_SIZE; + } + } +} + void blkconf_geometry(BlockConf *conf, int *ptrans, unsigned cyls_max, unsigned heads_max, unsigned secs_max, Error **errp) diff --git a/hw/block/hd-geometry.c b/hw/block/hd-geometry.c index 6fcf74df44fc..b187878facfc 100644 --- a/hw/block/hd-geometry.c +++ b/hw/block/hd-geometry.c @@ -121,8 +121,16 @@ void hd_geometry_guess(BlockBackend *blk, int *ptrans) { int cylinders, heads, secs, translation; + HDGeometry geo; - if (guess_disk_lchs(blk, &cylinders, &heads, &secs) < 0) { + /* Try to probe the backing device geometry, otherwise fallback + to the old logic. (as of 12/2014 probing only succeeds on DASDs) */ + if (blk_probe_geometry(blk, &geo) == 0) { + *pcyls = geo.cylinders; + *psecs = geo.sectors; + *pheads = geo.heads; + translation = BIOS_ATA_TRANSLATION_NONE; + } else if (guess_disk_lchs(blk, &cylinders, &heads, &secs) < 0) { /* no LCHS guess: use a standard physical disk geometry */ guess_chs_for_size(blk, pcyls, pheads, psecs); translation = hd_bios_chs_auto_trans(*pcyls, *pheads, *psecs); diff --git a/hw/block/nvme.c b/hw/block/nvme.c index ce079aefdd61..0f3dfb90ea65 100644 --- a/hw/block/nvme.c +++ b/hw/block/nvme.c @@ -765,6 +765,7 @@ static int nvme_init(PCIDevice *pci_dev) if (!n->serial) { return -1; } + blkconf_blocksizes(&n->conf); pci_conf = pci_dev->config; pci_conf[PCI_INTERRUPT_PIN] = 1; diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c index 3ad5fe44e2ad..2acfef87eba2 100644 --- a/hw/block/virtio-blk.c +++ b/hw/block/virtio-blk.c @@ -893,6 +893,7 @@ static void virtio_blk_device_realize(DeviceState *dev, Error **errp) error_propagate(errp, err); return; } + blkconf_blocksizes(&conf->conf); virtio_init(vdev, "virtio-blk", VIRTIO_ID_BLOCK, sizeof(struct virtio_blk_config)); diff --git a/hw/core/qdev-properties.c b/hw/core/qdev-properties.c index 2e47f70eb435..ba8170914256 100644 --- a/hw/core/qdev-properties.c +++ b/hw/core/qdev-properties.c @@ -580,7 +580,8 @@ static void set_blocksize(Object *obj, Visitor *v, void *opaque, error_propagate(errp, local_err); return; } - if (value < min || value > max) { + /* value of 0 means "unset" */ + if (value && (value < min || value > max)) { error_set(errp, QERR_PROPERTY_VALUE_OUT_OF_RANGE, dev->id?:"", name, (int64_t)value, min, max); return; diff --git a/hw/ide/qdev.c b/hw/ide/qdev.c index b4103fa009bc..788b36133c04 100644 --- a/hw/ide/qdev.c +++ b/hw/ide/qdev.c @@ -163,6 +163,7 @@ static int ide_dev_initfn(IDEDevice *dev, IDEDriveKind kind) return -1; } + blkconf_blocksizes(&dev->conf); if (dev->conf.logical_block_size != 512) { error_report("logical_block_size must be 512 for IDE"); return -1; diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c index f65618d80245..2921728be45a 100644 --- a/hw/scsi/scsi-disk.c +++ b/hw/scsi/scsi-disk.c @@ -2251,6 +2251,7 @@ static void scsi_realize(SCSIDevice *dev, Error **errp) } blkconf_serial(&s->qdev.conf, &s->serial); + blkconf_blocksizes(&s->qdev.conf); if (dev->type == TYPE_DISK) { blkconf_geometry(&dev->conf, NULL, 65535, 255, 255, &err); if (err) { @@ -2290,6 +2291,7 @@ static void scsi_realize(SCSIDevice *dev, Error **errp) static void scsi_hd_realize(SCSIDevice *dev, Error **errp) { SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, dev); + blkconf_blocksizes(&s->qdev.conf); s->qdev.blocksize = s->qdev.conf.logical_block_size; s->qdev.type = TYPE_DISK; if (!s->product) { diff --git a/hw/usb/dev-storage.c b/hw/usb/dev-storage.c index 65d9aa6147c4..8edc314fab5d 100644 --- a/hw/usb/dev-storage.c +++ b/hw/usb/dev-storage.c @@ -611,6 +611,7 @@ static void usb_msd_realize_storage(USBDevice *dev, Error **errp) } blkconf_serial(&s->conf, &dev->serial); + blkconf_blocksizes(&s->conf); /* * Hack alert: this pretends to be a block device, but it's really diff --git a/include/hw/block/block.h b/include/hw/block/block.h index 0d0ce9a46763..8d7c4b413f26 100644 --- a/include/hw/block/block.h +++ b/include/hw/block/block.h @@ -44,9 +44,9 @@ static inline unsigned int get_physical_block_exp(BlockConf *conf) #define DEFINE_BLOCK_PROPERTIES(_state, _conf) \ DEFINE_PROP_DRIVE("drive", _state, _conf.blk), \ DEFINE_PROP_BLOCKSIZE("logical_block_size", _state, \ - _conf.logical_block_size, 512), \ + _conf.logical_block_size), \ DEFINE_PROP_BLOCKSIZE("physical_block_size", _state, \ - _conf.physical_block_size, 512), \ + _conf.physical_block_size), \ DEFINE_PROP_UINT16("min_io_size", _state, _conf.min_io_size, 0), \ DEFINE_PROP_UINT32("opt_io_size", _state, _conf.opt_io_size, 0), \ DEFINE_PROP_UINT32("discard_granularity", _state, \ @@ -63,6 +63,7 @@ void blkconf_serial(BlockConf *conf, char **serial); void blkconf_geometry(BlockConf *conf, int *trans, unsigned cyls_max, unsigned heads_max, unsigned secs_max, Error **errp); +void blkconf_blocksizes(BlockConf *conf); /* Hard disk geometry */ diff --git a/include/hw/qdev-properties.h b/include/hw/qdev-properties.h index 070006c71561..2fdcbce0329d 100644 --- a/include/hw/qdev-properties.h +++ b/include/hw/qdev-properties.h @@ -149,8 +149,8 @@ extern PropertyInfo qdev_prop_arraylen; LostTickPolicy) #define DEFINE_PROP_BIOS_CHS_TRANS(_n, _s, _f, _d) \ DEFINE_PROP_DEFAULT(_n, _s, _f, _d, qdev_prop_bios_chs_trans, int) -#define DEFINE_PROP_BLOCKSIZE(_n, _s, _f, _d) \ - DEFINE_PROP_DEFAULT(_n, _s, _f, _d, qdev_prop_blocksize, uint16_t) +#define DEFINE_PROP_BLOCKSIZE(_n, _s, _f) \ + DEFINE_PROP_DEFAULT(_n, _s, _f, 0, qdev_prop_blocksize, uint16_t) #define DEFINE_PROP_PCI_HOST_DEVADDR(_n, _s, _f) \ DEFINE_PROP(_n, _s, _f, qdev_prop_pci_host_devaddr, PCIHostDeviceAddress) From 06b95b1ee75706592929e7e91cf18373fd0e6697 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 23 Feb 2015 11:17:50 -0500 Subject: [PATCH 36/73] ide: start extracting ide_restart_dma out of bmdma_restart_dma This patch begins refactoring the restart dma functions out of bmdma to be shared with AHCI and other future IDE HBA implementations. Signed-off-by: Paolo Bonzini Signed-off-by: John Snow Message-id: 1424708286-16483-2-git-send-email-jsnow@redhat.com Signed-off-by: Stefan Hajnoczi Signed-off-by: Kevin Wolf --- hw/ide/pci.c | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/hw/ide/pci.c b/hw/ide/pci.c index e3f2054a9089..da3e392d0a23 100644 --- a/hw/ide/pci.c +++ b/hw/ide/pci.c @@ -184,18 +184,24 @@ static void bmdma_set_inactive(IDEDMA *dma, bool more) } } -static void bmdma_restart_dma(BMDMAState *bm, enum ide_dma_cmd dma_cmd) +static void bmdma_restart_dma(BMDMAState *bm) { IDEState *s = bmdma_active_if(bm); ide_set_sector(s, bm->sector_num); + s->nsector = bm->nsector; + bm->cur_addr = bm->addr; +} + +static void ide_restart_dma(IDEState *s, enum ide_dma_cmd dma_cmd) +{ + BMDMAState *bm = DO_UPCAST(BMDMAState, dma, s->bus->dma); + + bmdma_restart_dma(bm); s->io_buffer_index = 0; s->io_buffer_size = 0; - s->nsector = bm->nsector; s->dma_cmd = dma_cmd; - bm->cur_addr = bm->addr; - bm->dma_cb = ide_dma_cb; - bmdma_start_dma(&bm->dma, s, bm->dma_cb); + bmdma_start_dma(&bm->dma, s, ide_dma_cb); } /* TODO This should be common IDE code */ @@ -203,6 +209,7 @@ static void bmdma_restart_bh(void *opaque) { BMDMAState *bm = opaque; IDEBus *bus = bm->bus; + IDEState *s; bool is_read; int error_status; @@ -213,6 +220,7 @@ static void bmdma_restart_bh(void *opaque) return; } + s = bmdma_active_if(bm); is_read = (bus->error_status & IDE_RETRY_READ) != 0; /* The error status must be cleared before resubmitting the request: The @@ -223,18 +231,18 @@ static void bmdma_restart_bh(void *opaque) if (error_status & IDE_RETRY_DMA) { if (error_status & IDE_RETRY_TRIM) { - bmdma_restart_dma(bm, IDE_DMA_TRIM); + ide_restart_dma(s, IDE_DMA_TRIM); } else { - bmdma_restart_dma(bm, is_read ? IDE_DMA_READ : IDE_DMA_WRITE); + ide_restart_dma(s, is_read ? IDE_DMA_READ : IDE_DMA_WRITE); } } else if (error_status & IDE_RETRY_PIO) { if (is_read) { - ide_sector_read(bmdma_active_if(bm)); + ide_sector_read(s); } else { - ide_sector_write(bmdma_active_if(bm)); + ide_sector_write(s); } } else if (error_status & IDE_RETRY_FLUSH) { - ide_flush_cache(bmdma_active_if(bm)); + ide_flush_cache(s); } else { IDEState *s = bmdma_active_if(bm); From bd8892c4e6d681517c687ec5ac86eed46b3bb23b Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 23 Feb 2015 11:17:51 -0500 Subject: [PATCH 37/73] ide: prepare to move restart to common code This patch adds the restart_dma callback and adjusts the ide_restart_dma function to utilize this callback to call the BMDMA-specific restart code instead of statically executing BMDMA-specific code. Signed-off-by: Paolo Bonzini Signed-off-by: John Snow Message-id: 1424708286-16483-3-git-send-email-jsnow@redhat.com Signed-off-by: Stefan Hajnoczi Signed-off-by: Kevin Wolf --- hw/ide/internal.h | 1 + hw/ide/pci.c | 12 +++++++----- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/hw/ide/internal.h b/hw/ide/internal.h index ee9a57f03965..2566503e5cb2 100644 --- a/hw/ide/internal.h +++ b/hw/ide/internal.h @@ -437,6 +437,7 @@ struct IDEDMAOps { DMAu32Func *commit_buf; DMAIntFunc *rw_buf; DMAIntFunc *set_unit; + DMAVoidFunc *restart_dma; DMAStopFunc *set_inactive; DMAVoidFunc *cmd_done; DMARestartFunc *restart_cb; diff --git a/hw/ide/pci.c b/hw/ide/pci.c index da3e392d0a23..34fc4fbef90c 100644 --- a/hw/ide/pci.c +++ b/hw/ide/pci.c @@ -184,8 +184,9 @@ static void bmdma_set_inactive(IDEDMA *dma, bool more) } } -static void bmdma_restart_dma(BMDMAState *bm) +static void bmdma_restart_dma(IDEDMA *dma) { + BMDMAState *bm = DO_UPCAST(BMDMAState, dma, dma); IDEState *s = bmdma_active_if(bm); ide_set_sector(s, bm->sector_num); @@ -195,13 +196,13 @@ static void bmdma_restart_dma(BMDMAState *bm) static void ide_restart_dma(IDEState *s, enum ide_dma_cmd dma_cmd) { - BMDMAState *bm = DO_UPCAST(BMDMAState, dma, s->bus->dma); - - bmdma_restart_dma(bm); + if (s->bus->dma->ops->restart_dma) { + s->bus->dma->ops->restart_dma(s->bus->dma); + } s->io_buffer_index = 0; s->io_buffer_size = 0; s->dma_cmd = dma_cmd; - bmdma_start_dma(&bm->dma, s, ide_dma_cb); + ide_start_dma(s, ide_dma_cb); } /* TODO This should be common IDE code */ @@ -532,6 +533,7 @@ static const struct IDEDMAOps bmdma_ops = { .prepare_buf = bmdma_prepare_buf, .rw_buf = bmdma_rw_buf, .set_unit = bmdma_set_unit, + .restart_dma = bmdma_restart_dma, .set_inactive = bmdma_set_inactive, .restart_cb = bmdma_restart_cb, .reset = bmdma_reset, From f878c916077d510be3276dad7c36f606cdcea80a Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 23 Feb 2015 11:17:52 -0500 Subject: [PATCH 38/73] ide: introduce ide_register_restart_cb A helper is added that registers the IDEDMAOp .restart_cb() via qemu_add_vm_change_state_handler instead of requiring each HBA to register the callback themselves. Signed-off-by: Paolo Bonzini Signed-off-by: John Snow Message-id: 1424708286-16483-4-git-send-email-jsnow@redhat.com Signed-off-by: Stefan Hajnoczi Signed-off-by: Kevin Wolf --- hw/ide/cmd646.c | 3 +-- hw/ide/core.c | 5 +++++ hw/ide/internal.h | 1 + hw/ide/piix.c | 3 +-- hw/ide/via.c | 3 +-- 5 files changed, 9 insertions(+), 6 deletions(-) diff --git a/hw/ide/cmd646.c b/hw/ide/cmd646.c index c8b032215510..0374653471a1 100644 --- a/hw/ide/cmd646.c +++ b/hw/ide/cmd646.c @@ -368,8 +368,7 @@ static int pci_cmd646_ide_initfn(PCIDevice *dev) bmdma_init(&d->bus[i], &d->bmdma[i], d); d->bmdma[i].bus = &d->bus[i]; - qemu_add_vm_change_state_handler(d->bus[i].dma->ops->restart_cb, - &d->bmdma[i].dma); + ide_register_restart_cb(&d->bus[i]); } vmstate_register(DEVICE(dev), 0, &vmstate_ide_pci, d); diff --git a/hw/ide/core.c b/hw/ide/core.c index ac3f015a8dae..5a42771420e3 100644 --- a/hw/ide/core.c +++ b/hw/ide/core.c @@ -2330,6 +2330,11 @@ static const IDEDMAOps ide_dma_nop_ops = { .restart_cb = ide_nop_restart, }; +void ide_register_restart_cb(IDEBus *bus) +{ + qemu_add_vm_change_state_handler(bus->dma->ops->restart_cb, bus->dma); +} + static IDEDMA ide_dma_nop = { .ops = &ide_dma_nop_ops, .aiocb = NULL, diff --git a/hw/ide/internal.h b/hw/ide/internal.h index 2566503e5cb2..57ef5ca4a43e 100644 --- a/hw/ide/internal.h +++ b/hw/ide/internal.h @@ -551,6 +551,7 @@ int ide_init_drive(IDEState *s, BlockBackend *blk, IDEDriveKind kind, int chs_trans); void ide_init2(IDEBus *bus, qemu_irq irq); void ide_init_ioport(IDEBus *bus, ISADevice *isa, int iobase, int iobase2); +void ide_register_restart_cb(IDEBus *bus); void ide_exec_cmd(IDEBus *bus, uint32_t val); void ide_dma_cb(void *opaque, int ret); diff --git a/hw/ide/piix.c b/hw/ide/piix.c index b0172fbf5991..247dd4f8b0c3 100644 --- a/hw/ide/piix.c +++ b/hw/ide/piix.c @@ -143,8 +143,7 @@ static void pci_piix_init_ports(PCIIDEState *d) { bmdma_init(&d->bus[i], &d->bmdma[i], d); d->bmdma[i].bus = &d->bus[i]; - qemu_add_vm_change_state_handler(d->bus[i].dma->ops->restart_cb, - &d->bmdma[i].dma); + ide_register_restart_cb(&d->bus[i]); } } diff --git a/hw/ide/via.c b/hw/ide/via.c index 4d8089de7509..fb3d8a086167 100644 --- a/hw/ide/via.c +++ b/hw/ide/via.c @@ -166,8 +166,7 @@ static void vt82c686b_init_ports(PCIIDEState *d) { bmdma_init(&d->bus[i], &d->bmdma[i], d); d->bmdma[i].bus = &d->bus[i]; - qemu_add_vm_change_state_handler(d->bus[i].dma->ops->restart_cb, - &d->bmdma[i].dma); + ide_register_restart_cb(&d->bus[i]); } } From 2745df813d82724f2633a9b2ba7f7c5717d7509b Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 23 Feb 2015 11:17:53 -0500 Subject: [PATCH 39/73] ide: do not use BMDMA in restart callback Whenever an error stops the VM, ide_handle_rw_error does "s->bus->dma->unit = s->unit". So we can just use idebus_active_if. Signed-off-by: Paolo Bonzini Signed-off-by: John Snow Message-id: 1424708286-16483-5-git-send-email-jsnow@redhat.com Signed-off-by: Stefan Hajnoczi Signed-off-by: Kevin Wolf --- hw/ide/pci.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/hw/ide/pci.c b/hw/ide/pci.c index 34fc4fbef90c..62c88d7f6f00 100644 --- a/hw/ide/pci.c +++ b/hw/ide/pci.c @@ -217,17 +217,17 @@ static void bmdma_restart_bh(void *opaque) qemu_bh_delete(bm->bh); bm->bh = NULL; - if (bm->unit == (uint8_t) -1) { + error_status = bus->error_status; + if (bus->error_status == 0) { return; } - s = bmdma_active_if(bm); + s = idebus_active_if(bus); is_read = (bus->error_status & IDE_RETRY_READ) != 0; /* The error status must be cleared before resubmitting the request: The * request may fail again, and this case can only be distinguished if the * called function can set a new error status. */ - error_status = bus->error_status; bus->error_status = 0; if (error_status & IDE_RETRY_DMA) { From d34fceda8feb6c36722f41f3e1db40cf2af82ea8 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 23 Feb 2015 11:17:54 -0500 Subject: [PATCH 40/73] ide: pass IDEBus to the restart_cb Pass the containing IDEBus to the restart_cb instead of the more specific BMDMAState child. Signed-off-by: Paolo Bonzini Signed-off-by: John Snow Message-id: 1424708286-16483-6-git-send-email-jsnow@redhat.com Signed-off-by: Stefan Hajnoczi Signed-off-by: Kevin Wolf --- hw/ide/core.c | 2 +- hw/ide/pci.c | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/hw/ide/core.c b/hw/ide/core.c index 5a42771420e3..9d510b170911 100644 --- a/hw/ide/core.c +++ b/hw/ide/core.c @@ -2332,7 +2332,7 @@ static const IDEDMAOps ide_dma_nop_ops = { void ide_register_restart_cb(IDEBus *bus) { - qemu_add_vm_change_state_handler(bus->dma->ops->restart_cb, bus->dma); + qemu_add_vm_change_state_handler(bus->dma->ops->restart_cb, bus); } static IDEDMA ide_dma_nop = { diff --git a/hw/ide/pci.c b/hw/ide/pci.c index 62c88d7f6f00..45254d466cf8 100644 --- a/hw/ide/pci.c +++ b/hw/ide/pci.c @@ -208,8 +208,8 @@ static void ide_restart_dma(IDEState *s, enum ide_dma_cmd dma_cmd) /* TODO This should be common IDE code */ static void bmdma_restart_bh(void *opaque) { - BMDMAState *bm = opaque; - IDEBus *bus = bm->bus; + IDEBus *bus = opaque; + BMDMAState *bm = DO_UPCAST(BMDMAState, dma, bus->dma); IDEState *s; bool is_read; int error_status; @@ -260,14 +260,14 @@ static void bmdma_restart_bh(void *opaque) static void bmdma_restart_cb(void *opaque, int running, RunState state) { - IDEDMA *dma = opaque; - BMDMAState *bm = DO_UPCAST(BMDMAState, dma, dma); + IDEBus *bus = opaque; + BMDMAState *bm = DO_UPCAST(BMDMAState, dma, bus->dma); if (!running) return; if (!bm->bh) { - bm->bh = qemu_bh_new(bmdma_restart_bh, &bm->dma); + bm->bh = qemu_bh_new(bmdma_restart_bh, bus); qemu_bh_schedule(bm->bh); } } From 9898586d8929a7f1293fc64164b67a79289b074f Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 23 Feb 2015 11:17:55 -0500 Subject: [PATCH 41/73] ide: move restart callback to common code With BMDMA specific excised from the restart functions, create a HBA-agnostic restart callback to be shared between the different HBAs. Change the callback registered with the vmstate_change handler to always point to ide_restart_cb instead of relying on the IDEDMAOps.restart_cb() member. Signed-off-by: Paolo Bonzini Signed-off-by: John Snow Message-id: 1424708286-16483-7-git-send-email-jsnow@redhat.com Signed-off-by: Stefan Hajnoczi Signed-off-by: Kevin Wolf --- hw/ide/core.c | 80 ++++++++++++++++++++++++++++++++++++++++++++++- hw/ide/internal.h | 2 ++ hw/ide/pci.c | 79 ---------------------------------------------- hw/ide/pci.h | 1 - 4 files changed, 81 insertions(+), 81 deletions(-) diff --git a/hw/ide/core.c b/hw/ide/core.c index 9d510b170911..a4250f605fa1 100644 --- a/hw/ide/core.c +++ b/hw/ide/core.c @@ -2314,6 +2314,10 @@ static int ide_nop_int(IDEDMA *dma, int x) return 0; } +static void ide_nop(IDEDMA *dma) +{ +} + static int32_t ide_nop_int32(IDEDMA *dma, int x) { return 0; @@ -2325,14 +2329,88 @@ static void ide_nop_restart(void *opaque, int x, RunState y) static const IDEDMAOps ide_dma_nop_ops = { .prepare_buf = ide_nop_int32, + .restart_dma = ide_nop, .rw_buf = ide_nop_int, .set_unit = ide_nop_int, .restart_cb = ide_nop_restart, }; +static void ide_restart_dma(IDEState *s, enum ide_dma_cmd dma_cmd) +{ + s->bus->dma->ops->restart_dma(s->bus->dma); + s->io_buffer_index = 0; + s->io_buffer_size = 0; + s->dma_cmd = dma_cmd; + ide_start_dma(s, ide_dma_cb); +} + +static void ide_restart_bh(void *opaque) +{ + IDEBus *bus = opaque; + IDEState *s; + bool is_read; + int error_status; + + qemu_bh_delete(bus->bh); + bus->bh = NULL; + + error_status = bus->error_status; + if (bus->error_status == 0) { + return; + } + + s = idebus_active_if(bus); + is_read = (bus->error_status & IDE_RETRY_READ) != 0; + + /* The error status must be cleared before resubmitting the request: The + * request may fail again, and this case can only be distinguished if the + * called function can set a new error status. */ + bus->error_status = 0; + + if (error_status & IDE_RETRY_DMA) { + if (error_status & IDE_RETRY_TRIM) { + ide_restart_dma(s, IDE_DMA_TRIM); + } else { + ide_restart_dma(s, is_read ? IDE_DMA_READ : IDE_DMA_WRITE); + } + } else if (error_status & IDE_RETRY_PIO) { + if (is_read) { + ide_sector_read(s); + } else { + ide_sector_write(s); + } + } else if (error_status & IDE_RETRY_FLUSH) { + ide_flush_cache(s); + } else { + /* + * We've not got any bits to tell us about ATAPI - but + * we do have the end_transfer_func that tells us what + * we're trying to do. + */ + if (s->end_transfer_func == ide_atapi_cmd) { + ide_atapi_dma_restart(s); + } + } +} + +static void ide_restart_cb(void *opaque, int running, RunState state) +{ + IDEBus *bus = opaque; + + if (!running) + return; + + if (!bus->bh) { + bus->bh = qemu_bh_new(ide_restart_bh, bus); + qemu_bh_schedule(bus->bh); + } +} + void ide_register_restart_cb(IDEBus *bus) { - qemu_add_vm_change_state_handler(bus->dma->ops->restart_cb, bus); + if (bus->dma->ops->restart_dma) { + qemu_add_vm_change_state_handler(ide_restart_cb, bus); + } } static IDEDMA ide_dma_nop = { diff --git a/hw/ide/internal.h b/hw/ide/internal.h index 57ef5ca4a43e..9a11c3c6f0d5 100644 --- a/hw/ide/internal.h +++ b/hw/ide/internal.h @@ -456,6 +456,8 @@ struct IDEBus { IDEDevice *master; IDEDevice *slave; IDEState ifs[2]; + QEMUBH *bh; + int bus_id; int max_units; IDEDMA *dma; diff --git a/hw/ide/pci.c b/hw/ide/pci.c index 45254d466cf8..cd1bbb076c73 100644 --- a/hw/ide/pci.c +++ b/hw/ide/pci.c @@ -194,84 +194,6 @@ static void bmdma_restart_dma(IDEDMA *dma) bm->cur_addr = bm->addr; } -static void ide_restart_dma(IDEState *s, enum ide_dma_cmd dma_cmd) -{ - if (s->bus->dma->ops->restart_dma) { - s->bus->dma->ops->restart_dma(s->bus->dma); - } - s->io_buffer_index = 0; - s->io_buffer_size = 0; - s->dma_cmd = dma_cmd; - ide_start_dma(s, ide_dma_cb); -} - -/* TODO This should be common IDE code */ -static void bmdma_restart_bh(void *opaque) -{ - IDEBus *bus = opaque; - BMDMAState *bm = DO_UPCAST(BMDMAState, dma, bus->dma); - IDEState *s; - bool is_read; - int error_status; - - qemu_bh_delete(bm->bh); - bm->bh = NULL; - - error_status = bus->error_status; - if (bus->error_status == 0) { - return; - } - - s = idebus_active_if(bus); - is_read = (bus->error_status & IDE_RETRY_READ) != 0; - - /* The error status must be cleared before resubmitting the request: The - * request may fail again, and this case can only be distinguished if the - * called function can set a new error status. */ - bus->error_status = 0; - - if (error_status & IDE_RETRY_DMA) { - if (error_status & IDE_RETRY_TRIM) { - ide_restart_dma(s, IDE_DMA_TRIM); - } else { - ide_restart_dma(s, is_read ? IDE_DMA_READ : IDE_DMA_WRITE); - } - } else if (error_status & IDE_RETRY_PIO) { - if (is_read) { - ide_sector_read(s); - } else { - ide_sector_write(s); - } - } else if (error_status & IDE_RETRY_FLUSH) { - ide_flush_cache(s); - } else { - IDEState *s = bmdma_active_if(bm); - - /* - * We've not got any bits to tell us about ATAPI - but - * we do have the end_transfer_func that tells us what - * we're trying to do. - */ - if (s->end_transfer_func == ide_atapi_cmd) { - ide_atapi_dma_restart(s); - } - } -} - -static void bmdma_restart_cb(void *opaque, int running, RunState state) -{ - IDEBus *bus = opaque; - BMDMAState *bm = DO_UPCAST(BMDMAState, dma, bus->dma); - - if (!running) - return; - - if (!bm->bh) { - bm->bh = qemu_bh_new(bmdma_restart_bh, bus); - qemu_bh_schedule(bm->bh); - } -} - static void bmdma_cancel(BMDMAState *bm) { if (bm->status & BM_STATUS_DMAING) { @@ -535,7 +457,6 @@ static const struct IDEDMAOps bmdma_ops = { .set_unit = bmdma_set_unit, .restart_dma = bmdma_restart_dma, .set_inactive = bmdma_set_inactive, - .restart_cb = bmdma_restart_cb, .reset = bmdma_reset, }; diff --git a/hw/ide/pci.h b/hw/ide/pci.h index 2e9314ad87f0..ea4e05160fc8 100644 --- a/hw/ide/pci.h +++ b/hw/ide/pci.h @@ -28,7 +28,6 @@ typedef struct BMDMAState { uint32_t nsector; MemoryRegion addr_ioport; MemoryRegion extra_io; - QEMUBH *bh; qemu_irq irq; /* Bit 0-2 and 7: BM status register From fe09c7c9f0e8ed8793e986cf616c8de0a9518fd7 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 23 Feb 2015 11:17:56 -0500 Subject: [PATCH 42/73] ide: remove restart_cb callback With restarts now handled by ide_restart_cb and the IDEDMAOps.restart_dma() member, remove the old restart_cb callback. Signed-off-by: Paolo Bonzini Signed-off-by: John Snow Message-id: 1424708286-16483-8-git-send-email-jsnow@redhat.com Signed-off-by: Stefan Hajnoczi Signed-off-by: Kevin Wolf --- hw/ide/ahci.c | 5 ----- hw/ide/core.c | 5 ----- hw/ide/internal.h | 1 - hw/ide/macio.c | 5 ----- 4 files changed, 16 deletions(-) diff --git a/hw/ide/ahci.c b/hw/ide/ahci.c index 5651372be38f..3892025f5c9a 100644 --- a/hw/ide/ahci.c +++ b/hw/ide/ahci.c @@ -1252,10 +1252,6 @@ static void ahci_irq_set(void *opaque, int n, int level) { } -static void ahci_dma_restart_cb(void *opaque, int running, RunState state) -{ -} - static const IDEDMAOps ahci_dma_ops = { .start_dma = ahci_start_dma, .start_transfer = ahci_start_transfer, @@ -1264,7 +1260,6 @@ static const IDEDMAOps ahci_dma_ops = { .rw_buf = ahci_dma_rw_buf, .set_unit = ahci_dma_set_unit, .cmd_done = ahci_cmd_done, - .restart_cb = ahci_dma_restart_cb, }; void ahci_init(AHCIState *s, DeviceState *qdev, AddressSpace *as, int ports) diff --git a/hw/ide/core.c b/hw/ide/core.c index a4250f605fa1..0a3d244fe0df 100644 --- a/hw/ide/core.c +++ b/hw/ide/core.c @@ -2323,16 +2323,11 @@ static int32_t ide_nop_int32(IDEDMA *dma, int x) return 0; } -static void ide_nop_restart(void *opaque, int x, RunState y) -{ -} - static const IDEDMAOps ide_dma_nop_ops = { .prepare_buf = ide_nop_int32, .restart_dma = ide_nop, .rw_buf = ide_nop_int, .set_unit = ide_nop_int, - .restart_cb = ide_nop_restart, }; static void ide_restart_dma(IDEState *s, enum ide_dma_cmd dma_cmd) diff --git a/hw/ide/internal.h b/hw/ide/internal.h index 9a11c3c6f0d5..44cce5b1ba0a 100644 --- a/hw/ide/internal.h +++ b/hw/ide/internal.h @@ -440,7 +440,6 @@ struct IDEDMAOps { DMAVoidFunc *restart_dma; DMAStopFunc *set_inactive; DMAVoidFunc *cmd_done; - DMARestartFunc *restart_cb; DMAVoidFunc *reset; }; diff --git a/hw/ide/macio.c b/hw/ide/macio.c index f6074f202456..e7a5c997108a 100644 --- a/hw/ide/macio.c +++ b/hw/ide/macio.c @@ -558,10 +558,6 @@ static int32_t ide_nop_int32(IDEDMA *dma, int x) return 0; } -static void ide_nop_restart(void *opaque, int x, RunState y) -{ -} - static void ide_dbdma_start(IDEDMA *dma, IDEState *s, BlockCompletionFunc *cb) { @@ -577,7 +573,6 @@ static const IDEDMAOps dbdma_ops = { .prepare_buf = ide_nop_int32, .rw_buf = ide_nop_int, .set_unit = ide_nop_int, - .restart_cb = ide_nop_restart, }; static void macio_ide_realizefn(DeviceState *dev, Error **errp) From a96cb236290ef8e54b061b30c41835e435905d7a Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 23 Feb 2015 11:17:57 -0500 Subject: [PATCH 43/73] ide: replace set_unit callback with more IDEBus state Start moving the initial state of the current request to IDEBus, so that AHCI can use it. The set_unit callback is not used anymore once this is done. Signed-off-by: Paolo Bonzini Signed-off-by: John Snow Message-id: 1424708286-16483-9-git-send-email-jsnow@redhat.com Signed-off-by: Stefan Hajnoczi Signed-off-by: Kevin Wolf --- hw/ide/ahci.c | 7 ------- hw/ide/core.c | 6 ++++-- hw/ide/internal.h | 2 +- hw/ide/macio.c | 1 - hw/ide/pci.c | 19 ++++++------------- hw/ide/pci.h | 6 ++++-- 6 files changed, 15 insertions(+), 26 deletions(-) diff --git a/hw/ide/ahci.c b/hw/ide/ahci.c index 3892025f5c9a..bc6d5cea5d0b 100644 --- a/hw/ide/ahci.c +++ b/hw/ide/ahci.c @@ -1226,12 +1226,6 @@ static int ahci_dma_rw_buf(IDEDMA *dma, int is_write) return 1; } -static int ahci_dma_set_unit(IDEDMA *dma, int unit) -{ - /* only a single unit per link */ - return 0; -} - static void ahci_cmd_done(IDEDMA *dma) { AHCIDevice *ad = DO_UPCAST(AHCIDevice, dma, dma); @@ -1258,7 +1252,6 @@ static const IDEDMAOps ahci_dma_ops = { .prepare_buf = ahci_dma_prepare_buf, .commit_buf = ahci_commit_buf, .rw_buf = ahci_dma_rw_buf, - .set_unit = ahci_dma_set_unit, .cmd_done = ahci_cmd_done, }; diff --git a/hw/ide/core.c b/hw/ide/core.c index 0a3d244fe0df..7607d03170e3 100644 --- a/hw/ide/core.c +++ b/hw/ide/core.c @@ -646,6 +646,7 @@ static void dma_buf_commit(IDEState *s, uint32_t tx_bytes) void ide_set_inactive(IDEState *s, bool more) { s->bus->dma->aiocb = NULL; + s->bus->retry_unit = -1; if (s->bus->dma->ops->set_inactive) { s->bus->dma->ops->set_inactive(s->bus->dma, more); } @@ -666,7 +667,7 @@ static int ide_handle_rw_error(IDEState *s, int error, int op) BlockErrorAction action = blk_get_error_action(s->blk, is_read, error); if (action == BLOCK_ERROR_ACTION_STOP) { - s->bus->dma->ops->set_unit(s->bus->dma, s->unit); + assert(s->bus->retry_unit == s->unit); s->bus->error_status = op; } else if (action == BLOCK_ERROR_ACTION_REPORT) { if (op & IDE_RETRY_DMA) { @@ -799,6 +800,7 @@ static void ide_sector_start_dma(IDEState *s, enum ide_dma_cmd dma_cmd) void ide_start_dma(IDEState *s, BlockCompletionFunc *cb) { + s->bus->retry_unit = s->unit; if (s->bus->dma->ops->start_dma) { s->bus->dma->ops->start_dma(s->bus->dma, s, cb); } @@ -2327,11 +2329,11 @@ static const IDEDMAOps ide_dma_nop_ops = { .prepare_buf = ide_nop_int32, .restart_dma = ide_nop, .rw_buf = ide_nop_int, - .set_unit = ide_nop_int, }; static void ide_restart_dma(IDEState *s, enum ide_dma_cmd dma_cmd) { + s->unit = s->bus->retry_unit; s->bus->dma->ops->restart_dma(s->bus->dma); s->io_buffer_index = 0; s->io_buffer_size = 0; diff --git a/hw/ide/internal.h b/hw/ide/internal.h index 44cce5b1ba0a..9630f6125b00 100644 --- a/hw/ide/internal.h +++ b/hw/ide/internal.h @@ -436,7 +436,6 @@ struct IDEDMAOps { DMAInt32Func *prepare_buf; DMAu32Func *commit_buf; DMAIntFunc *rw_buf; - DMAIntFunc *set_unit; DMAVoidFunc *restart_dma; DMAStopFunc *set_inactive; DMAVoidFunc *cmd_done; @@ -465,6 +464,7 @@ struct IDEBus { qemu_irq irq; int error_status; + uint8_t retry_unit; }; #define TYPE_IDE_DEVICE "ide-device" diff --git a/hw/ide/macio.c b/hw/ide/macio.c index e7a5c997108a..a009674f48aa 100644 --- a/hw/ide/macio.c +++ b/hw/ide/macio.c @@ -572,7 +572,6 @@ static const IDEDMAOps dbdma_ops = { .start_dma = ide_dbdma_start, .prepare_buf = ide_nop_int32, .rw_buf = ide_nop_int, - .set_unit = ide_nop_int, }; static void macio_ide_realizefn(DeviceState *dev, Error **errp) diff --git a/hw/ide/pci.c b/hw/ide/pci.c index cd1bbb076c73..2b0e886b66a4 100644 --- a/hw/ide/pci.c +++ b/hw/ide/pci.c @@ -42,7 +42,6 @@ static void bmdma_start_dma(IDEDMA *dma, IDEState *s, { BMDMAState *bm = DO_UPCAST(BMDMAState, dma, dma); - bm->unit = s->unit; bm->dma_cb = dma_cb; bm->cur_prd_last = 0; bm->cur_prd_addr = 0; @@ -163,20 +162,11 @@ static int bmdma_rw_buf(IDEDMA *dma, int is_write) return 1; } -static int bmdma_set_unit(IDEDMA *dma, int unit) -{ - BMDMAState *bm = DO_UPCAST(BMDMAState, dma, dma); - bm->unit = unit; - - return 0; -} - static void bmdma_set_inactive(IDEDMA *dma, bool more) { BMDMAState *bm = DO_UPCAST(BMDMAState, dma, dma); bm->dma_cb = NULL; - bm->unit = -1; if (more) { bm->status |= BM_STATUS_DMAING; } else { @@ -335,6 +325,7 @@ static void ide_bmdma_pre_save(void *opaque) BMDMAState *bm = opaque; uint8_t abused_bits = BM_MIGRATION_COMPAT_STATUS_BITS; + bm->migration_retry_unit = bm->bus->retry_unit; bm->migration_compat_status = (bm->status & ~abused_bits) | (bm->bus->error_status & abused_bits); } @@ -351,6 +342,9 @@ static int ide_bmdma_post_load(void *opaque, int version_id) bm->status = bm->migration_compat_status & ~abused_bits; bm->bus->error_status |= bm->migration_compat_status & abused_bits; } + if (bm->bus->error_status) { + bm->bus->retry_unit = bm->migration_retry_unit; + } return 0; } @@ -389,7 +383,7 @@ static const VMStateDescription vmstate_bmdma = { VMSTATE_UINT32(addr, BMDMAState), VMSTATE_INT64(sector_num, BMDMAState), VMSTATE_UINT32(nsector, BMDMAState), - VMSTATE_UINT8(unit, BMDMAState), + VMSTATE_UINT8(migration_retry_unit, BMDMAState), VMSTATE_END_OF_LIST() }, .subsections = (VMStateSubsection []) { @@ -413,7 +407,7 @@ static int ide_pci_post_load(void *opaque, int version_id) for(i = 0; i < 2; i++) { /* current versions always store 0/1, but older version stored bigger values. We only need last bit */ - d->bmdma[i].unit &= 1; + d->bmdma[i].migration_retry_unit &= 1; ide_bmdma_post_load(&d->bmdma[i], -1); } @@ -454,7 +448,6 @@ static const struct IDEDMAOps bmdma_ops = { .start_dma = bmdma_start_dma, .prepare_buf = bmdma_prepare_buf, .rw_buf = bmdma_rw_buf, - .set_unit = bmdma_set_unit, .restart_dma = bmdma_restart_dma, .set_inactive = bmdma_set_inactive, .reset = bmdma_reset, diff --git a/hw/ide/pci.h b/hw/ide/pci.h index ea4e05160fc8..222a16316c91 100644 --- a/hw/ide/pci.h +++ b/hw/ide/pci.h @@ -33,6 +33,8 @@ typedef struct BMDMAState { /* Bit 0-2 and 7: BM status register * Bit 3-6: bus->error_status */ uint8_t migration_compat_status; + uint8_t migration_retry_unit; + struct PCIIDEState *pci_dev; } BMDMAState; @@ -61,8 +63,8 @@ typedef struct PCIIDEState { static inline IDEState *bmdma_active_if(BMDMAState *bmdma) { - assert(bmdma->unit != (uint8_t)-1); - return bmdma->bus->ifs + bmdma->unit; + assert(bmdma->bus->retry_unit != (uint8_t)-1); + return bmdma->bus->ifs + bmdma->bus->retry_unit; } From dc5d0af49a90571813b9cbcea90c0bbbea0d4a22 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 23 Feb 2015 11:17:58 -0500 Subject: [PATCH 44/73] ide: place initial state of the current request to IDEBus This moves more common restarting logic to the core IDE code. Signed-off-by: Paolo Bonzini Signed-off-by: John Snow Message-id: 1424708286-16483-10-git-send-email-jsnow@redhat.com Signed-off-by: Stefan Hajnoczi Signed-off-by: Kevin Wolf --- hw/ide/core.c | 6 ++++++ hw/ide/internal.h | 2 ++ hw/ide/pci.c | 15 ++++++--------- hw/ide/pci.h | 5 ++--- 4 files changed, 16 insertions(+), 12 deletions(-) diff --git a/hw/ide/core.c b/hw/ide/core.c index 7607d03170e3..71ec1e76dcf3 100644 --- a/hw/ide/core.c +++ b/hw/ide/core.c @@ -647,6 +647,8 @@ void ide_set_inactive(IDEState *s, bool more) { s->bus->dma->aiocb = NULL; s->bus->retry_unit = -1; + s->bus->retry_sector_num = 0; + s->bus->retry_nsector = 0; if (s->bus->dma->ops->set_inactive) { s->bus->dma->ops->set_inactive(s->bus->dma, more); } @@ -801,6 +803,8 @@ static void ide_sector_start_dma(IDEState *s, enum ide_dma_cmd dma_cmd) void ide_start_dma(IDEState *s, BlockCompletionFunc *cb) { s->bus->retry_unit = s->unit; + s->bus->retry_sector_num = ide_get_sector(s); + s->bus->retry_nsector = s->nsector; if (s->bus->dma->ops->start_dma) { s->bus->dma->ops->start_dma(s->bus->dma, s, cb); } @@ -2334,6 +2338,8 @@ static const IDEDMAOps ide_dma_nop_ops = { static void ide_restart_dma(IDEState *s, enum ide_dma_cmd dma_cmd) { s->unit = s->bus->retry_unit; + ide_set_sector(s, s->bus->retry_sector_num); + s->nsector = s->bus->retry_nsector; s->bus->dma->ops->restart_dma(s->bus->dma); s->io_buffer_index = 0; s->io_buffer_size = 0; diff --git a/hw/ide/internal.h b/hw/ide/internal.h index 9630f6125b00..02206a9f58f0 100644 --- a/hw/ide/internal.h +++ b/hw/ide/internal.h @@ -465,6 +465,8 @@ struct IDEBus { int error_status; uint8_t retry_unit; + int64_t retry_sector_num; + uint32_t retry_nsector; }; #define TYPE_IDE_DEVICE "ide-device" diff --git a/hw/ide/pci.c b/hw/ide/pci.c index 2b0e886b66a4..fab2abc01abb 100644 --- a/hw/ide/pci.c +++ b/hw/ide/pci.c @@ -46,8 +46,6 @@ static void bmdma_start_dma(IDEDMA *dma, IDEState *s, bm->cur_prd_last = 0; bm->cur_prd_addr = 0; bm->cur_prd_len = 0; - bm->sector_num = ide_get_sector(s); - bm->nsector = s->nsector; if (bm->status & BM_STATUS_DMAING) { bm->dma_cb(bmdma_active_if(bm), 0); @@ -177,10 +175,7 @@ static void bmdma_set_inactive(IDEDMA *dma, bool more) static void bmdma_restart_dma(IDEDMA *dma) { BMDMAState *bm = DO_UPCAST(BMDMAState, dma, dma); - IDEState *s = bmdma_active_if(bm); - ide_set_sector(s, bm->sector_num); - s->nsector = bm->nsector; bm->cur_addr = bm->addr; } @@ -207,8 +202,6 @@ static void bmdma_reset(IDEDMA *dma) bm->cur_prd_last = 0; bm->cur_prd_addr = 0; bm->cur_prd_len = 0; - bm->sector_num = 0; - bm->nsector = 0; } static void bmdma_irq(void *opaque, int n, int level) @@ -326,6 +319,8 @@ static void ide_bmdma_pre_save(void *opaque) uint8_t abused_bits = BM_MIGRATION_COMPAT_STATUS_BITS; bm->migration_retry_unit = bm->bus->retry_unit; + bm->migration_retry_sector_num = bm->bus->retry_sector_num; + bm->migration_retry_nsector = bm->bus->retry_nsector; bm->migration_compat_status = (bm->status & ~abused_bits) | (bm->bus->error_status & abused_bits); } @@ -343,6 +338,8 @@ static int ide_bmdma_post_load(void *opaque, int version_id) bm->bus->error_status |= bm->migration_compat_status & abused_bits; } if (bm->bus->error_status) { + bm->bus->retry_sector_num = bm->migration_retry_sector_num; + bm->bus->retry_nsector = bm->migration_retry_nsector; bm->bus->retry_unit = bm->migration_retry_unit; } @@ -381,8 +378,8 @@ static const VMStateDescription vmstate_bmdma = { VMSTATE_UINT8(cmd, BMDMAState), VMSTATE_UINT8(migration_compat_status, BMDMAState), VMSTATE_UINT32(addr, BMDMAState), - VMSTATE_INT64(sector_num, BMDMAState), - VMSTATE_UINT32(nsector, BMDMAState), + VMSTATE_INT64(migration_retry_sector_num, BMDMAState), + VMSTATE_UINT32(migration_retry_nsector, BMDMAState), VMSTATE_UINT8(migration_retry_unit, BMDMAState), VMSTATE_END_OF_LIST() }, diff --git a/hw/ide/pci.h b/hw/ide/pci.h index 222a16316c91..0f2d4b91a770 100644 --- a/hw/ide/pci.h +++ b/hw/ide/pci.h @@ -22,10 +22,7 @@ typedef struct BMDMAState { uint32_t cur_prd_last; uint32_t cur_prd_addr; uint32_t cur_prd_len; - uint8_t unit; BlockCompletionFunc *dma_cb; - int64_t sector_num; - uint32_t nsector; MemoryRegion addr_ioport; MemoryRegion extra_io; qemu_irq irq; @@ -34,6 +31,8 @@ typedef struct BMDMAState { * Bit 3-6: bus->error_status */ uint8_t migration_compat_status; uint8_t migration_retry_unit; + int64_t migration_retry_sector_num; + uint32_t migration_retry_nsector; struct PCIIDEState *pci_dev; } BMDMAState; From d12b9ff2a4e21587e5b180008c88a0e6e1ec8ac5 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 23 Feb 2015 11:17:59 -0500 Subject: [PATCH 45/73] ide: migrate initial request state via IDEBus This only breaks backwards migration compatibility if the bus is in an error state. It is in principle possible to avoid this by making two subsections (one for version 1, and one for version 2, but with the same name) with different "_needed" callbacks. The v1 callback would return true if error_status != 0 and the bus is PATA; the v2 callback would return true if error_status != 0 and the bus is AHCI. Forward migration keeps working. Signed-off-by: Paolo Bonzini Signed-off-by: John Snow Message-id: 1424708286-16483-11-git-send-email-jsnow@redhat.com Signed-off-by: Stefan Hajnoczi Signed-off-by: Kevin Wolf --- hw/ide/core.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/hw/ide/core.c b/hw/ide/core.c index 71ec1e76dcf3..b62a94aac28e 100644 --- a/hw/ide/core.c +++ b/hw/ide/core.c @@ -2643,10 +2643,13 @@ const VMStateDescription vmstate_ide_drive = { static const VMStateDescription vmstate_ide_error_status = { .name ="ide_bus/error", - .version_id = 1, + .version_id = 2, .minimum_version_id = 1, .fields = (VMStateField[]) { VMSTATE_INT32(error_status, IDEBus), + VMSTATE_INT64_V(retry_sector_num, IDEBus, 2), + VMSTATE_UINT32_V(retry_nsector, IDEBus, 2), + VMSTATE_UINT8_V(retry_unit, IDEBus, 2), VMSTATE_END_OF_LIST() } }; From c71c06d437d4cccf753cb41731e5a71f54317a01 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 23 Feb 2015 11:18:00 -0500 Subject: [PATCH 46/73] ide: commonize io_buffer_index initialization Resetting the io_buffer_index to 0 is commonized, with the exception of the case within ide_atapi_cmd_reply, where we need to reset this index to 0 prior to the ide_atapi_cmd_reply_end call. Note that not all calls to ide_atapi_cmd_reply_end expect the index to be 0, so setting it there is not appropriate. Signed-off-by: Paolo Bonzini Signed-off-by: John Snow Message-id: 1424708286-16483-12-git-send-email-jsnow@redhat.com Signed-off-by: Stefan Hajnoczi Signed-off-by: Kevin Wolf --- hw/ide/atapi.c | 3 +-- hw/ide/core.c | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/hw/ide/atapi.c b/hw/ide/atapi.c index 1bf8b345280d..950e311d3139 100644 --- a/hw/ide/atapi.c +++ b/hw/ide/atapi.c @@ -252,7 +252,6 @@ static void ide_atapi_cmd_reply(IDEState *s, int size, int max_size) s->packet_transfer_size = size; s->io_buffer_size = size; /* dma: send the reply data as one chunk */ s->elementary_transfer_size = 0; - s->io_buffer_index = 0; if (s->atapi_dma) { block_acct_start(blk_get_stats(s->blk), &s->acct, size, @@ -261,6 +260,7 @@ static void ide_atapi_cmd_reply(IDEState *s, int size, int max_size) ide_start_dma(s, ide_atapi_cmd_read_dma_cb); } else { s->status = READY_STAT | SEEK_STAT; + s->io_buffer_index = 0; ide_atapi_cmd_reply_end(s); } } @@ -368,7 +368,6 @@ static void ide_atapi_cmd_read_dma(IDEState *s, int lba, int nb_sectors, { s->lba = lba; s->packet_transfer_size = nb_sectors * sector_size; - s->io_buffer_index = 0; s->io_buffer_size = 0; s->cd_sector_size = sector_size; diff --git a/hw/ide/core.c b/hw/ide/core.c index b62a94aac28e..ff28db0853fb 100644 --- a/hw/ide/core.c +++ b/hw/ide/core.c @@ -780,7 +780,6 @@ void ide_dma_cb(void *opaque, int ret) static void ide_sector_start_dma(IDEState *s, enum ide_dma_cmd dma_cmd) { s->status = READY_STAT | SEEK_STAT | DRQ_STAT | BUSY_STAT; - s->io_buffer_index = 0; s->io_buffer_size = 0; s->dma_cmd = dma_cmd; @@ -802,6 +801,7 @@ static void ide_sector_start_dma(IDEState *s, enum ide_dma_cmd dma_cmd) void ide_start_dma(IDEState *s, BlockCompletionFunc *cb) { + s->io_buffer_index = 0; s->bus->retry_unit = s->unit; s->bus->retry_sector_num = ide_get_sector(s); s->bus->retry_nsector = s->nsector; @@ -2341,7 +2341,6 @@ static void ide_restart_dma(IDEState *s, enum ide_dma_cmd dma_cmd) ide_set_sector(s, s->bus->retry_sector_num); s->nsector = s->bus->retry_nsector; s->bus->dma->ops->restart_dma(s->bus->dma); - s->io_buffer_index = 0; s->io_buffer_size = 0; s->dma_cmd = dma_cmd; ide_start_dma(s, ide_dma_cb); From 4e2b8b4a4e8fcc598f7b9df15aa8949a1ab096e8 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 23 Feb 2015 11:18:01 -0500 Subject: [PATCH 47/73] ide: make more functions static Signed-off-by: Paolo Bonzini Signed-off-by: John Snow Message-id: 1424708286-16483-13-git-send-email-jsnow@redhat.com Signed-off-by: Stefan Hajnoczi Signed-off-by: Kevin Wolf --- hw/ide/core.c | 12 ++++++++---- hw/ide/internal.h | 4 ---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/hw/ide/core.c b/hw/ide/core.c index ff28db0853fb..ef52f3516f0f 100644 --- a/hw/ide/core.c +++ b/hw/ide/core.c @@ -561,6 +561,8 @@ static bool ide_sect_range_ok(IDEState *s, return true; } +static void ide_sector_read(IDEState *s); + static void ide_sector_read_cb(void *opaque, int ret) { IDEState *s = opaque; @@ -595,7 +597,7 @@ static void ide_sector_read_cb(void *opaque, int ret) s->io_buffer_offset += 512 * n; } -void ide_sector_read(IDEState *s) +static void ide_sector_read(IDEState *s) { int64_t sector_num; int n; @@ -682,7 +684,7 @@ static int ide_handle_rw_error(IDEState *s, int error, int op) return action != BLOCK_ERROR_ACTION_IGNORE; } -void ide_dma_cb(void *opaque, int ret) +static void ide_dma_cb(void *opaque, int ret) { IDEState *s = opaque; int n; @@ -810,6 +812,8 @@ void ide_start_dma(IDEState *s, BlockCompletionFunc *cb) } } +static void ide_sector_write(IDEState *s); + static void ide_sector_write_timer_cb(void *opaque) { IDEState *s = opaque; @@ -869,7 +873,7 @@ static void ide_sector_write_cb(void *opaque, int ret) } } -void ide_sector_write(IDEState *s) +static void ide_sector_write(IDEState *s) { int64_t sector_num; int n; @@ -923,7 +927,7 @@ static void ide_flush_cb(void *opaque, int ret) ide_set_irq(s->bus); } -void ide_flush_cache(IDEState *s) +static void ide_flush_cache(IDEState *s) { if (s->blk == NULL) { ide_flush_cb(s, 0); diff --git a/hw/ide/internal.h b/hw/ide/internal.h index 02206a9f58f0..0d5f881cea2e 100644 --- a/hw/ide/internal.h +++ b/hw/ide/internal.h @@ -557,10 +557,6 @@ void ide_init_ioport(IDEBus *bus, ISADevice *isa, int iobase, int iobase2); void ide_register_restart_cb(IDEBus *bus); void ide_exec_cmd(IDEBus *bus, uint32_t val); -void ide_dma_cb(void *opaque, int ret); -void ide_sector_write(IDEState *s); -void ide_sector_read(IDEState *s); -void ide_flush_cache(IDEState *s); void ide_transfer_start(IDEState *s, uint8_t *buf, int size, EndTransferFunc *end_transfer_func); From d32c76b3add2cb5216ae6931ef06632b802a6f68 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 23 Feb 2015 11:18:02 -0500 Subject: [PATCH 48/73] ide: support PIO restart for the ISA controller Signed-off-by: Paolo Bonzini Signed-off-by: John Snow Message-id: 1424708286-16483-14-git-send-email-jsnow@redhat.com Signed-off-by: Stefan Hajnoczi Signed-off-by: Kevin Wolf --- hw/ide/isa.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/hw/ide/isa.c b/hw/ide/isa.c index c0c4e1b0981a..9f80503faae8 100644 --- a/hw/ide/isa.c +++ b/hw/ide/isa.c @@ -74,7 +74,8 @@ static void isa_ide_realizefn(DeviceState *dev, Error **errp) isa_init_irq(isadev, &s->irq, s->isairq); ide_init2(&s->bus, s->irq); vmstate_register(dev, 0, &vmstate_ide_isa, s); -}; + ide_register_restart_cb(&s->bus); +} ISADevice *isa_ide_init(ISABus *bus, int iobase, int iobase2, int isairq, DriveInfo *hd0, DriveInfo *hd1) From bd6649101d044df446c66949cb6c1462c8989726 Mon Sep 17 00:00:00 2001 From: John Snow Date: Mon, 23 Feb 2015 11:18:03 -0500 Subject: [PATCH 49/73] ahci: Migrate IDEStatus Amazingly, we weren't doing this before. Make sure we migrate the IDEState structure that belongs to the AHCIDevice.IDEBus structure during migrations. No version numbering changes because AHCI is not officially migratable (and we can all see with good reason why) so we do not impact any official builds by altering the stream and leaving it at version 1. This fixes the rerror=stop/werror=stop test case where we wish to migrate a halted job. Previously, the error code would not migrate, so even if the job completed successfully, AHCI would report an error because it would still have the placeholder error code from initialization time. Reviewed-by: Paolo Bonzini Signed-off-by: John Snow Message-id: 1424708286-16483-15-git-send-email-jsnow@redhat.com Signed-off-by: Stefan Hajnoczi Signed-off-by: Kevin Wolf --- hw/ide/ahci.c | 1 + hw/ide/internal.h | 3 +++ 2 files changed, 4 insertions(+) diff --git a/hw/ide/ahci.c b/hw/ide/ahci.c index bc6d5cea5d0b..3f4fc77ab6be 100644 --- a/hw/ide/ahci.c +++ b/hw/ide/ahci.c @@ -1321,6 +1321,7 @@ static const VMStateDescription vmstate_ahci_device = { .version_id = 1, .fields = (VMStateField[]) { VMSTATE_IDE_BUS(port, AHCIDevice), + VMSTATE_IDE_DRIVE(port.ifs[0], AHCIDevice), VMSTATE_UINT32(port_state, AHCIDevice), VMSTATE_UINT32(finished, AHCIDevice), VMSTATE_UINT32(port_regs.lst_addr, AHCIDevice), diff --git a/hw/ide/internal.h b/hw/ide/internal.h index 0d5f881cea2e..965cc55cb81a 100644 --- a/hw/ide/internal.h +++ b/hw/ide/internal.h @@ -526,6 +526,9 @@ extern const VMStateDescription vmstate_ide_drive; #define VMSTATE_IDE_DRIVES(_field, _state) \ VMSTATE_STRUCT_ARRAY(_field, _state, 2, 3, vmstate_ide_drive, IDEState) +#define VMSTATE_IDE_DRIVE(_field, _state) \ + VMSTATE_STRUCT(_field, _state, 1, vmstate_ide_drive, IDEState) + void ide_bus_reset(IDEBus *bus); int64_t ide_get_sector(IDEState *s); void ide_set_sector(IDEState *s, int64_t sector_num); From e8ef87433ee005d304ba19c56488474432eb0c53 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 23 Feb 2015 11:18:04 -0500 Subject: [PATCH 50/73] ahci: add support for restarting non-queued commands This is easy, since start_dma already restarts processing from the beginning of the PRDT. Migration is also easy to cover; the comment about busy_slot is wrong, busy_slot will only be set if there is an error. In this case we have nothing to do really. The core IDE code will restart the operation and command list processing will proceed after the erroring command has been completed. Signed-off-by: Paolo Bonzini Signed-off-by: John Snow Message-id: 1424708286-16483-16-git-send-email-jsnow@redhat.com Signed-off-by: Stefan Hajnoczi Signed-off-by: Kevin Wolf --- hw/ide/ahci.c | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/hw/ide/ahci.c b/hw/ide/ahci.c index 3f4fc77ab6be..56a4867cd09c 100644 --- a/hw/ide/ahci.c +++ b/hw/ide/ahci.c @@ -1160,6 +1160,11 @@ static void ahci_start_dma(IDEDMA *dma, IDEState *s, dma_cb(s, 0); } +static void ahci_restart_dma(IDEDMA *dma) +{ + /* Nothing to do, ahci_start_dma already resets s->io_buffer_offset. */ +} + /** * Called in DMA R/W chains to read the PRDT, utilizing ahci_populate_sglist. * Not currently invoked by PIO R/W chains, @@ -1248,6 +1253,7 @@ static void ahci_irq_set(void *opaque, int n, int level) static const IDEDMAOps ahci_dma_ops = { .start_dma = ahci_start_dma, + .restart_dma = ahci_restart_dma, .start_transfer = ahci_start_transfer, .prepare_buf = ahci_dma_prepare_buf, .commit_buf = ahci_commit_buf, @@ -1282,6 +1288,7 @@ void ahci_init(AHCIState *s, DeviceState *qdev, AddressSpace *as, int ports) ad->port_no = i; ad->port.dma = &ad->dma; ad->port.dma->ops = &ahci_dma_ops; + ide_register_restart_cb(&ad->port); } } @@ -1360,16 +1367,16 @@ static int ahci_state_post_load(void *opaque, int version_id) map_page(s->as, &ad->res_fis, ((uint64_t)pr->fis_addr_hi << 32) | pr->fis_addr, 256); /* - * All pending i/o should be flushed out on a migrate. However, - * we might not have cleared the busy_slot since this is done - * in a bh. Also, issue i/o against any slots that are pending. + * If an error is present, ad->busy_slot will be valid and not -1. + * In this case, an operation is waiting to resume and will re-check + * for additional AHCI commands to execute upon completion. + * + * In the case where no error was present, busy_slot will be -1, + * and we should check to see if there are additional commands waiting. */ - if ((ad->busy_slot != -1) && - !(ad->port.ifs[0].status & (BUSY_STAT|DRQ_STAT))) { - pr->cmd_issue &= ~(1 << ad->busy_slot); - ad->busy_slot = -1; + if (ad->busy_slot == -1) { + check_cmd(s, i); } - check_cmd(s, i); } return 0; From c27c73aaf924655b2cc2e74384a639805acededb Mon Sep 17 00:00:00 2001 From: John Snow Date: Mon, 23 Feb 2015 11:18:05 -0500 Subject: [PATCH 51/73] ahci: Recompute cur_cmd on migrate post load When the AHCI HBA device is migrated, all of the information that led to the request being created is stored in the AHCIDevice structures, except for pointers into guest data where return information needs to be stored. The "cur_cmd" field is usually responsible for this. To rebuild the cur_cmd pointer post-migration, we can utilize the busy_slot index to figure out where the command header we are still processing is. This allows a machine in a halted state from rerror=stop or werror=stop to be migrated and resume operations without issue. Signed-off-by: John Snow Message-id: 1424708286-16483-17-git-send-email-jsnow@redhat.com Signed-off-by: Stefan Hajnoczi Signed-off-by: Kevin Wolf --- hw/ide/ahci.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/hw/ide/ahci.c b/hw/ide/ahci.c index 56a4867cd09c..e1ae36f7cd51 100644 --- a/hw/ide/ahci.c +++ b/hw/ide/ahci.c @@ -1376,6 +1376,13 @@ static int ahci_state_post_load(void *opaque, int version_id) */ if (ad->busy_slot == -1) { check_cmd(s, i); + } else { + /* We are in the middle of a command, and may need to access + * the command header in guest memory again. */ + if (ad->busy_slot < 0 || ad->busy_slot >= AHCI_MAX_CMDS) { + return -1; + } + ad->cur_cmd = &((AHCICmdHdr *)ad->lst)[ad->busy_slot]; } } From baca2b9e3a94be1690fc4a842a97b64a4c8f892c Mon Sep 17 00:00:00 2001 From: John Snow Date: Mon, 23 Feb 2015 11:18:06 -0500 Subject: [PATCH 52/73] qtest/ide: Test flush / retry for ISA and PCI This patch adds tests for werror and rerror functionality for the PCI and ISA ide buses. Tests for the AHCI device are to be included at a later date after requisite patches have been merged upstream to support needed functionality by the tests. Signed-off-by: Paolo Bonzini Signed-off-by: John Snow Reviewed-by: Paolo Bonzini Message-id: 1424708286-16483-18-git-send-email-jsnow@redhat.com Signed-off-by: Stefan Hajnoczi Signed-off-by: Kevin Wolf --- tests/ide-test.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/tests/ide-test.c b/tests/ide-test.c index 29f4039bd5ca..b28a3023c2e6 100644 --- a/tests/ide-test.c +++ b/tests/ide-test.c @@ -118,7 +118,6 @@ static void ide_test_start(const char *cmdline_fmt, ...) va_end(ap); qtest_start(cmdline); - qtest_irq_intercept_in(global_qtest, "ioapic"); guest_malloc = pc_alloc_init(); g_free(cmdline); @@ -388,6 +387,7 @@ static void test_bmdma_setup(void) "-drive file=%s,if=ide,serial=%s,cache=writeback,format=raw " "-global ide-hd.ver=%s", tmp_path, "testdisk", "version"); + qtest_irq_intercept_in(global_qtest, "ioapic"); } static void test_bmdma_teardown(void) @@ -516,7 +516,7 @@ static void prepare_blkdebug_script(const char *debug_fn, const char *event) g_assert(ret == 0); } -static void test_retry_flush(void) +static void test_retry_flush(const char *machine) { uint8_t data; const char *s; @@ -580,6 +580,16 @@ static void test_flush_nodev(void) ide_test_quit(); } +static void test_pci_retry_flush(const char *machine) +{ + test_retry_flush("pc"); +} + +static void test_isa_retry_flush(const char *machine) +{ + test_retry_flush("isapc"); +} + int main(int argc, char **argv) { const char *arch = qtest_get_arch(); @@ -617,9 +627,9 @@ int main(int argc, char **argv) qtest_add_func("/ide/bmdma/teardown", test_bmdma_teardown); qtest_add_func("/ide/flush", test_flush); - qtest_add_func("/ide/flush_nodev", test_flush_nodev); - - qtest_add_func("/ide/retry/flush", test_retry_flush); + qtest_add_func("/ide/flush/nodev", test_flush_nodev); + qtest_add_func("/ide/flush/retry_pci", test_pci_retry_flush); + qtest_add_func("/ide/flush/retry_isa", test_isa_retry_flush); ret = g_test_run(); From 4a42f6d408079830099dbffc1b0cb2869db69215 Mon Sep 17 00:00:00 2001 From: John Snow Date: Wed, 25 Feb 2015 18:06:35 -0500 Subject: [PATCH 53/73] libqos/ahci: Zero-fill AHCI headers Even though it's just the reserved space, make sure they're zeroes. Signed-off-by: John Snow Message-id: 1424905602-24715-2-git-send-email-jsnow@redhat.com Signed-off-by: Stefan Hajnoczi Signed-off-by: Kevin Wolf --- tests/libqos/ahci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/libqos/ahci.c b/tests/libqos/ahci.c index a6105c750fe7..9dc505ce0a64 100644 --- a/tests/libqos/ahci.c +++ b/tests/libqos/ahci.c @@ -487,7 +487,7 @@ void ahci_get_command_header(AHCIQState *ahci, uint8_t port, void ahci_set_command_header(AHCIQState *ahci, uint8_t port, uint8_t slot, AHCICommandHeader *cmd) { - AHCICommandHeader tmp; + AHCICommandHeader tmp = { .flags = 0 }; uint64_t ba = ahci->port[port].clb; ba += slot * sizeof(AHCICommandHeader); From d63b4017c2dff9cf0f15d06d5f604a97a30ef4c5 Mon Sep 17 00:00:00 2001 From: John Snow Date: Wed, 25 Feb 2015 18:06:36 -0500 Subject: [PATCH 54/73] qtest/ahci: Add a macro bootup routine Add a routine that can be used to engage the AHCI device at a not-granular level so that bringing up the functionality of the HBA is easy in future tests that are not concerned with testing the bring-up process. Signed-off-by: John Snow Message-id: 1424905602-24715-3-git-send-email-jsnow@redhat.com Signed-off-by: Stefan Hajnoczi Signed-off-by: Kevin Wolf --- tests/ahci-test.c | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/tests/ahci-test.c b/tests/ahci-test.c index 53fd068c8aad..9fe9fb5f44e6 100644 --- a/tests/ahci-test.c +++ b/tests/ahci-test.c @@ -107,6 +107,21 @@ static void ahci_shutdown(AHCIQState *ahci) qtest_shutdown(qs); } +/** + * Boot and fully enable the HBA device. + * @see ahci_boot, ahci_pci_enable and ahci_hba_enable. + */ +static AHCIQState *ahci_boot_and_enable(void) +{ + AHCIQState *ahci; + ahci = ahci_boot(); + + ahci_pci_enable(ahci); + ahci_hba_enable(ahci); + + return ahci; +} + /*** Specification Adherence Tests ***/ /** @@ -831,9 +846,7 @@ static void test_identify(void) { AHCIQState *ahci; - ahci = ahci_boot(); - ahci_pci_enable(ahci); - ahci_hba_enable(ahci); + ahci = ahci_boot_and_enable(); ahci_test_identify(ahci); ahci_shutdown(ahci); } @@ -845,9 +858,7 @@ static void test_dma_rw_simple(void) { AHCIQState *ahci; - ahci = ahci_boot(); - ahci_pci_enable(ahci); - ahci_hba_enable(ahci); + ahci = ahci_boot_and_enable(); ahci_test_dma_rw_simple(ahci); ahci_shutdown(ahci); } From f9f963e0fbba98d7ece75287c40d0c1b249dc9b4 Mon Sep 17 00:00:00 2001 From: John Snow Date: Wed, 25 Feb 2015 18:06:37 -0500 Subject: [PATCH 55/73] libqos/ahci: add ahci command helpers ahci_command_set_flags: Set additional flags in the command header. ahci_command_clr_flags: Clear flags from the command header. ahci_command_set_offset: Change the IO sector from 0. ahci_command_adjust: Adjust many values simultaneously. To be used to adjust the command header if the default values/guesses were incorrect or undesirable. Signed-off-by: John Snow Message-id: 1424905602-24715-4-git-send-email-jsnow@redhat.com Signed-off-by: Stefan Hajnoczi [ kwolf: Fixed conflicting prototype for ahci_command_adjust() ] Signed-off-by: Kevin Wolf --- tests/libqos/ahci.c | 42 ++++++++++++++++++++++++++++++++++++++++++ tests/libqos/ahci.h | 5 +++++ 2 files changed, 47 insertions(+) diff --git a/tests/libqos/ahci.c b/tests/libqos/ahci.c index 9dc505ce0a64..b0f39a5e328b 100644 --- a/tests/libqos/ahci.c +++ b/tests/libqos/ahci.c @@ -713,6 +713,40 @@ void ahci_command_free(AHCICommand *cmd) g_free(cmd); } +void ahci_command_set_flags(AHCICommand *cmd, uint16_t cmdh_flags) +{ + cmd->header.flags |= cmdh_flags; +} + +void ahci_command_clr_flags(AHCICommand *cmd, uint16_t cmdh_flags) +{ + cmd->header.flags &= ~cmdh_flags; +} + +void ahci_command_set_offset(AHCICommand *cmd, uint64_t lba_sect) +{ + RegH2DFIS *fis = &(cmd->fis); + if (cmd->props->lba28) { + g_assert_cmphex(lba_sect, <=, 0xFFFFFFF); + } else if (cmd->props->lba48) { + g_assert_cmphex(lba_sect, <=, 0xFFFFFFFFFFFF); + } else { + /* Can't set offset if we don't know the format. */ + g_assert_not_reached(); + } + + /* LBA28 uses the low nibble of the device/control register for LBA24:27 */ + fis->lba_lo[0] = (lba_sect & 0xFF); + fis->lba_lo[1] = (lba_sect >> 8) & 0xFF; + fis->lba_lo[2] = (lba_sect >> 16) & 0xFF; + if (cmd->props->lba28) { + fis->device = (fis->device & 0xF0) || (lba_sect >> 24) & 0x0F; + } + fis->lba_hi[0] = (lba_sect >> 24) & 0xFF; + fis->lba_hi[1] = (lba_sect >> 32) & 0xFF; + fis->lba_hi[2] = (lba_sect >> 40) & 0xFF; +} + void ahci_command_set_buffer(AHCICommand *cmd, uint64_t buffer) { cmd->buffer = buffer; @@ -740,6 +774,14 @@ void ahci_command_set_prd_size(AHCICommand *cmd, unsigned prd_size) ahci_command_set_sizes(cmd, cmd->xbytes, prd_size); } +void ahci_command_adjust(AHCICommand *cmd, uint64_t offset, uint64_t buffer, + uint64_t xbytes, unsigned prd_size) +{ + ahci_command_set_sizes(cmd, xbytes, prd_size); + ahci_command_set_buffer(cmd, buffer); + ahci_command_set_offset(cmd, offset); +} + void ahci_command_commit(AHCIQState *ahci, AHCICommand *cmd, uint8_t port) { uint16_t i, prdtl; diff --git a/tests/libqos/ahci.h b/tests/libqos/ahci.h index 39b99d365817..888545d5a2a2 100644 --- a/tests/libqos/ahci.h +++ b/tests/libqos/ahci.h @@ -537,11 +537,16 @@ void ahci_command_verify(AHCIQState *ahci, AHCICommand *cmd); void ahci_command_free(AHCICommand *cmd); /* Command adjustments */ +void ahci_command_set_flags(AHCICommand *cmd, uint16_t cmdh_flags); +void ahci_command_clr_flags(AHCICommand *cmd, uint16_t cmdh_flags); +void ahci_command_set_offset(AHCICommand *cmd, uint64_t lba_sect); void ahci_command_set_buffer(AHCICommand *cmd, uint64_t buffer); void ahci_command_set_size(AHCICommand *cmd, uint64_t xbytes); void ahci_command_set_prd_size(AHCICommand *cmd, unsigned prd_size); void ahci_command_set_sizes(AHCICommand *cmd, uint64_t xbytes, unsigned prd_size); +void ahci_command_adjust(AHCICommand *cmd, uint64_t lba_sect, uint64_t gbuffer, + uint64_t xbytes, unsigned prd_size); /* Command Misc */ uint8_t ahci_command_slot(AHCICommand *cmd); From d6c403edc7ce890be12915f12eb02062bf275ca8 Mon Sep 17 00:00:00 2001 From: John Snow Date: Wed, 25 Feb 2015 18:06:38 -0500 Subject: [PATCH 56/73] qtest/ahci: Add DMA test variants These test a few different pathways in the AHCI code. short: Test the minimum transfer size, exactly one sector. simple: Test a transfer using a single PRD, in this case, 4K. double: Test transferring 8K, which we will split up as two PRDs. long: Test transferring a lot of data using many PRDs, 256K. Signed-off-by: John Snow Message-id: 1424905602-24715-5-git-send-email-jsnow@redhat.com Signed-off-by: Stefan Hajnoczi Signed-off-by: Kevin Wolf --- tests/ahci-test.c | 34 ++++++++++++++++++++++++++++------ 1 file changed, 28 insertions(+), 6 deletions(-) diff --git a/tests/ahci-test.c b/tests/ahci-test.c index 9fe9fb5f44e6..9394d8587296 100644 --- a/tests/ahci-test.c +++ b/tests/ahci-test.c @@ -731,12 +731,11 @@ static void ahci_test_identify(AHCIQState *ahci) g_assert_cmphex(sect_size, ==, 0x200); } -static void ahci_test_dma_rw_simple(AHCIQState *ahci) +static void ahci_test_dma_rw_simple(AHCIQState *ahci, unsigned bufsize) { uint64_t ptr; uint8_t port; unsigned i; - const unsigned bufsize = 4096; unsigned char *tx = g_malloc(bufsize); unsigned char *rx = g_malloc0(bufsize); @@ -751,7 +750,7 @@ static void ahci_test_dma_rw_simple(AHCIQState *ahci) ptr = ahci_alloc(ahci, bufsize); g_assert(ptr); - /* Write some indicative pattern to our 4K buffer. */ + /* Write some indicative pattern to our buffer. */ for (i = 0; i < bufsize; i++) { tx[i] = (bufsize - i); } @@ -852,17 +851,37 @@ static void test_identify(void) } /** - * Perform a simple DMA R/W test, using a single PRD and non-NCQ commands. + * Perform a simple DMA R/W test using non-NCQ commands. */ -static void test_dma_rw_simple(void) +static void test_dma_rw_interface(unsigned bufsize) { AHCIQState *ahci; ahci = ahci_boot_and_enable(); - ahci_test_dma_rw_simple(ahci); + ahci_test_dma_rw_simple(ahci, bufsize); ahci_shutdown(ahci); } +static void test_dma_rw_simple(void) +{ + test_dma_rw_interface(4096); +} + +static void test_dma_rw_double(void) +{ + test_dma_rw_interface(8192); +} + +static void test_dma_rw_long(void) +{ + test_dma_rw_interface(4096 * 64); +} + +static void test_dma_rw_short(void) +{ + test_dma_rw_interface(512); +} + /******************************************************************************/ int main(int argc, char **argv) @@ -919,6 +938,9 @@ int main(int argc, char **argv) qtest_add_func("/ahci/hba_enable", test_hba_enable); qtest_add_func("/ahci/identify", test_identify); qtest_add_func("/ahci/dma/simple", test_dma_rw_simple); + qtest_add_func("/ahci/dma/double", test_dma_rw_double); + qtest_add_func("/ahci/dma/long", test_dma_rw_long); + qtest_add_func("/ahci/dma/short", test_dma_rw_short); ret = g_test_run(); From bda39dc241b1c216158aa990936d08f0a1ad26e3 Mon Sep 17 00:00:00 2001 From: John Snow Date: Wed, 25 Feb 2015 18:06:39 -0500 Subject: [PATCH 57/73] qtest/ahci: Add PIO and LBA48 tests In addition to DMA tests, test PIO and LBA48 command pathways in AHCI. To accomplish this, a primitive multiplexer for gtest is added. Though guests may prefer not to issue PIO commands directly except for single sector cases during early boot and shutdown, these pathways are still used for the transfer of ATAPI commands as well, and should be behaving well. Signed-off-by: John Snow Message-id: 1424905602-24715-6-git-send-email-jsnow@redhat.com Signed-off-by: Stefan Hajnoczi Signed-off-by: Kevin Wolf --- tests/ahci-test.c | 156 +++++++++++++++++++++++++++++++++++++++------- 1 file changed, 134 insertions(+), 22 deletions(-) diff --git a/tests/ahci-test.c b/tests/ahci-test.c index 9394d8587296..21f20f70029c 100644 --- a/tests/ahci-test.c +++ b/tests/ahci-test.c @@ -731,7 +731,8 @@ static void ahci_test_identify(AHCIQState *ahci) g_assert_cmphex(sect_size, ==, 0x200); } -static void ahci_test_dma_rw_simple(AHCIQState *ahci, unsigned bufsize) +static void ahci_test_io_rw_simple(AHCIQState *ahci, unsigned bufsize, + uint8_t read_cmd, uint8_t write_cmd) { uint64_t ptr; uint8_t port; @@ -757,9 +758,9 @@ static void ahci_test_dma_rw_simple(AHCIQState *ahci, unsigned bufsize) memwrite(ptr, tx, bufsize); /* Write this buffer to disk, then read it back to the DMA buffer. */ - ahci_guest_io(ahci, port, CMD_WRITE_DMA, ptr, bufsize); + ahci_guest_io(ahci, port, write_cmd, ptr, bufsize); qmemset(ptr, 0x00, bufsize); - ahci_guest_io(ahci, port, CMD_READ_DMA, ptr, bufsize); + ahci_guest_io(ahci, port, read_cmd, ptr, bufsize); /*** Read back the Data ***/ memread(ptr, rx, bufsize); @@ -850,36 +851,142 @@ static void test_identify(void) ahci_shutdown(ahci); } +/******************************************************************************/ +/* AHCI I/O Test Matrix Definitions */ + +enum BuffLen { + LEN_BEGIN = 0, + LEN_SIMPLE = LEN_BEGIN, + LEN_DOUBLE, + LEN_LONG, + LEN_SHORT, + NUM_LENGTHS +}; + +static const char *buff_len_str[NUM_LENGTHS] = { "simple", "double", + "long", "short" }; + +enum AddrMode { + ADDR_MODE_BEGIN = 0, + ADDR_MODE_LBA28 = ADDR_MODE_BEGIN, + ADDR_MODE_LBA48, + NUM_ADDR_MODES +}; + +static const char *addr_mode_str[NUM_ADDR_MODES] = { "lba28", "lba48" }; + +enum IOMode { + MODE_BEGIN = 0, + MODE_PIO = MODE_BEGIN, + MODE_DMA, + NUM_MODES +}; + +static const char *io_mode_str[NUM_MODES] = { "pio", "dma" }; + +enum IOOps { + IO_BEGIN = 0, + IO_READ = IO_BEGIN, + IO_WRITE, + NUM_IO_OPS +}; + +typedef struct AHCIIOTestOptions { + enum BuffLen length; + enum AddrMode address_type; + enum IOMode io_type; +} AHCIIOTestOptions; + /** - * Perform a simple DMA R/W test using non-NCQ commands. + * Table of possible I/O ATA commands given a set of enumerations. */ -static void test_dma_rw_interface(unsigned bufsize) +static const uint8_t io_cmds[NUM_MODES][NUM_ADDR_MODES][NUM_IO_OPS] = { + [MODE_PIO] = { + [ADDR_MODE_LBA28] = { + [IO_READ] = CMD_READ_PIO, + [IO_WRITE] = CMD_WRITE_PIO }, + [ADDR_MODE_LBA48] = { + [IO_READ] = CMD_READ_PIO_EXT, + [IO_WRITE] = CMD_WRITE_PIO_EXT } + }, + [MODE_DMA] = { + [ADDR_MODE_LBA28] = { + [IO_READ] = CMD_READ_DMA, + [IO_WRITE] = CMD_WRITE_DMA }, + [ADDR_MODE_LBA48] = { + [IO_READ] = CMD_READ_DMA_EXT, + [IO_WRITE] = CMD_WRITE_DMA_EXT } + } +}; + +/** + * Test a Read/Write pattern using various commands, addressing modes, + * transfer modes, and buffer sizes. + */ +static void test_io_rw_interface(enum AddrMode lba48, enum IOMode dma, + unsigned bufsize) { AHCIQState *ahci; ahci = ahci_boot_and_enable(); - ahci_test_dma_rw_simple(ahci, bufsize); + ahci_test_io_rw_simple(ahci, bufsize, + io_cmds[dma][lba48][IO_READ], + io_cmds[dma][lba48][IO_WRITE]); ahci_shutdown(ahci); } -static void test_dma_rw_simple(void) +/** + * Demultiplex the test data and invoke the actual test routine. + */ +static void test_io_interface(gconstpointer opaque) { - test_dma_rw_interface(4096); -} + AHCIIOTestOptions *opts = (AHCIIOTestOptions *)opaque; + unsigned bufsize; -static void test_dma_rw_double(void) -{ - test_dma_rw_interface(8192); -} + switch (opts->length) { + case LEN_SIMPLE: + bufsize = 4096; + break; + case LEN_DOUBLE: + bufsize = 8192; + break; + case LEN_LONG: + bufsize = 4096 * 64; + break; + case LEN_SHORT: + bufsize = 512; + break; + default: + g_assert_not_reached(); + } -static void test_dma_rw_long(void) -{ - test_dma_rw_interface(4096 * 64); + test_io_rw_interface(opts->address_type, opts->io_type, bufsize); + g_free(opts); + return; } -static void test_dma_rw_short(void) +static void create_ahci_io_test(enum IOMode type, enum AddrMode addr, + enum BuffLen len) { - test_dma_rw_interface(512); + static const char *arch; + char *name; + AHCIIOTestOptions *opts = g_malloc(sizeof(AHCIIOTestOptions)); + + opts->length = len; + opts->address_type = addr; + opts->io_type = type; + + if (!arch) { + arch = qtest_get_arch(); + } + + name = g_strdup_printf("/%s/ahci/io/%s/%s/%s", arch, + io_mode_str[type], + addr_mode_str[addr], + buff_len_str[len]); + + g_test_add_data_func(name, opts, test_io_interface); + g_free(name); } /******************************************************************************/ @@ -890,6 +997,7 @@ int main(int argc, char **argv) int fd; int ret; int c; + int i, j, k; static struct option long_options[] = { {"pedantic", no_argument, 0, 'p' }, @@ -937,10 +1045,14 @@ int main(int argc, char **argv) qtest_add_func("/ahci/hba_spec", test_hba_spec); qtest_add_func("/ahci/hba_enable", test_hba_enable); qtest_add_func("/ahci/identify", test_identify); - qtest_add_func("/ahci/dma/simple", test_dma_rw_simple); - qtest_add_func("/ahci/dma/double", test_dma_rw_double); - qtest_add_func("/ahci/dma/long", test_dma_rw_long); - qtest_add_func("/ahci/dma/short", test_dma_rw_short); + + for (i = MODE_BEGIN; i < NUM_MODES; i++) { + for (j = ADDR_MODE_BEGIN; j < NUM_ADDR_MODES; j++) { + for (k = LEN_BEGIN; k < NUM_LENGTHS; k++) { + create_ahci_io_test(i, j, k); + } + } + } ret = g_test_run(); From e0c59cc7608f84fcaddc827e05d38af8d10447a3 Mon Sep 17 00:00:00 2001 From: John Snow Date: Wed, 25 Feb 2015 18:06:40 -0500 Subject: [PATCH 58/73] qtest/ahci: add fragmented dma test Test what happens when we try to use extremely short PRDTs to accomplish a small data transfer. Signed-off-by: John Snow Message-id: 1424905602-24715-7-git-send-email-jsnow@redhat.com Signed-off-by: Stefan Hajnoczi Signed-off-by: Kevin Wolf --- tests/ahci-test.c | 59 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) diff --git a/tests/ahci-test.c b/tests/ahci-test.c index 21f20f70029c..cf0b98b962ed 100644 --- a/tests/ahci-test.c +++ b/tests/ahci-test.c @@ -851,6 +851,63 @@ static void test_identify(void) ahci_shutdown(ahci); } +/** + * Fragmented DMA test: Perform a standard 4K DMA read/write + * test, but make sure the physical regions are fragmented to + * be very small, each just 32 bytes, to see how AHCI performs + * with chunks defined to be much less than a sector. + */ +static void test_dma_fragmented(void) +{ + AHCIQState *ahci; + AHCICommand *cmd; + uint8_t px; + size_t bufsize = 4096; + unsigned char *tx = g_malloc(bufsize); + unsigned char *rx = g_malloc0(bufsize); + unsigned i; + uint64_t ptr; + + ahci = ahci_boot_and_enable(); + px = ahci_port_select(ahci); + ahci_port_clear(ahci, px); + + /* create pattern */ + for (i = 0; i < bufsize; i++) { + tx[i] = (bufsize - i); + } + + /* Create a DMA buffer in guest memory, and write our pattern to it. */ + ptr = guest_alloc(ahci->parent->alloc, bufsize); + g_assert(ptr); + memwrite(ptr, tx, bufsize); + + cmd = ahci_command_create(CMD_WRITE_DMA); + ahci_command_adjust(cmd, 0, ptr, bufsize, 32); + ahci_command_commit(ahci, cmd, px); + ahci_command_issue(ahci, cmd); + ahci_command_verify(ahci, cmd); + g_free(cmd); + + cmd = ahci_command_create(CMD_READ_DMA); + ahci_command_adjust(cmd, 0, ptr, bufsize, 32); + ahci_command_commit(ahci, cmd, px); + ahci_command_issue(ahci, cmd); + ahci_command_verify(ahci, cmd); + g_free(cmd); + + /* Read back the guest's receive buffer into local memory */ + memread(ptr, rx, bufsize); + guest_free(ahci->parent->alloc, ptr); + + g_assert_cmphex(memcmp(tx, rx, bufsize), ==, 0); + + ahci_shutdown(ahci); + + g_free(rx); + g_free(tx); +} + /******************************************************************************/ /* AHCI I/O Test Matrix Definitions */ @@ -1054,6 +1111,8 @@ int main(int argc, char **argv) } } + qtest_add_func("/ahci/io/dma/lba28/fragmented", test_dma_fragmented); + ret = g_test_run(); /* Cleanup */ From 833a7cc36e63653641558ba27148076f9a32062f Mon Sep 17 00:00:00 2001 From: Liu Yuan Date: Wed, 18 Feb 2015 11:57:55 +0800 Subject: [PATCH 59/73] sheepdog: fix confused return values These functions mix up -1 and -errno in return values and would might cause trouble error handling in the call chain. This patch let them return -errno and add some comments. Cc: qemu-devel@nongnu.org Cc: Markus Armbruster Cc: Kevin Wolf Cc: Stefan Hajnoczi Reported-by: Markus Armbruster Signed-off-by: Liu Yuan Message-id: 1424231875-7131-1-git-send-email-namei.unix@gmail.com Signed-off-by: Stefan Hajnoczi Signed-off-by: Kevin Wolf --- block/sheepdog.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/block/sheepdog.c b/block/sheepdog.c index a2679c280335..60a4853b640f 100644 --- a/block/sheepdog.c +++ b/block/sheepdog.c @@ -545,6 +545,7 @@ static SheepdogAIOCB *sd_aio_setup(BlockDriverState *bs, QEMUIOVector *qiov, return acb; } +/* Return -EIO in case of error, file descriptor on success */ static int connect_to_sdog(BDRVSheepdogState *s, Error **errp) { int fd; @@ -564,11 +565,14 @@ static int connect_to_sdog(BDRVSheepdogState *s, Error **errp) if (fd >= 0) { qemu_set_nonblock(fd); + } else { + fd = -EIO; } return fd; } +/* Return 0 on success and -errno in case of error */ static coroutine_fn int send_co_req(int sockfd, SheepdogReq *hdr, void *data, unsigned int *wlen) { @@ -577,11 +581,13 @@ static coroutine_fn int send_co_req(int sockfd, SheepdogReq *hdr, void *data, ret = qemu_co_send(sockfd, hdr, sizeof(*hdr)); if (ret != sizeof(*hdr)) { error_report("failed to send a req, %s", strerror(errno)); + ret = -socket_error(); return ret; } ret = qemu_co_send(sockfd, data, *wlen); if (ret != *wlen) { + ret = -socket_error(); error_report("failed to send a req, %s", strerror(errno)); } @@ -656,6 +662,11 @@ static coroutine_fn void do_co_req(void *opaque) srco->finished = true; } +/* + * Send the request to the sheep in a synchronous manner. + * + * Return 0 on success, -errno in case of error. + */ static int do_req(int sockfd, AioContext *aio_context, SheepdogReq *hdr, void *data, unsigned int *wlen, unsigned int *rlen) { From 1e34cf9681ec549e26f30daaabc1ce58d60446f7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc=20Mar=C3=AD?= Date: Tue, 24 Feb 2015 17:34:14 +0100 Subject: [PATCH 60/73] libqos: Solve bug in interrupt checking when using MSIX in virtio-pci.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The MSIX interrupt was always acked without checking its value, which caused a race condition. If the ISR was raised between the read and the acking, the ISR was never detected and it timed out. Signed-off-by: Marc Marí Reviewed-by: John Snow Tested-by: John Snow Message-id: 1424795655-16952-1-git-send-email-marc.mari.barcelo@gmail.com Signed-off-by: Stefan Hajnoczi Signed-off-by: Kevin Wolf --- tests/libqos/virtio-pci.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/tests/libqos/virtio-pci.c b/tests/libqos/virtio-pci.c index 046a3160df26..f9fb924b8ee4 100644 --- a/tests/libqos/virtio-pci.c +++ b/tests/libqos/virtio-pci.c @@ -142,8 +142,12 @@ static bool qvirtio_pci_get_queue_isr_status(QVirtioDevice *d, QVirtQueue *vq) return qpci_msix_pending(dev->pdev, vqpci->msix_entry); } else { data = readl(vqpci->msix_addr); - writel(vqpci->msix_addr, 0); - return data == vqpci->msix_data; + if (data == vqpci->msix_data) { + writel(vqpci->msix_addr, 0); + return true; + } else { + return false; + } } } else { return qpci_io_readb(dev->pdev, dev->addr + QVIRTIO_PCI_ISR_STATUS) & 1; @@ -162,8 +166,12 @@ static bool qvirtio_pci_get_config_isr_status(QVirtioDevice *d) return qpci_msix_pending(dev->pdev, dev->config_msix_entry); } else { data = readl(dev->config_msix_addr); - writel(dev->config_msix_addr, 0); - return data == dev->config_msix_data; + if (data == dev->config_msix_data) { + writel(dev->config_msix_addr, 0); + return true; + } else { + return false; + } } } else { return qpci_io_readb(dev->pdev, dev->addr + QVIRTIO_PCI_ISR_STATUS) & 2; From 9b7d2d8b076ff12f0d7b4246aec8f421b4150aae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc=20Mar=C3=AD?= Date: Tue, 24 Feb 2015 22:59:14 +0100 Subject: [PATCH 61/73] tests: Check QVIRTIO_F_ANY_LAYOUT flag in virtio-blk test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Check the QVIRTIO_F_ANY_LAYOUT flag before performing operations with 2 descriptor layout. This is to follow the specification strictly. This patch depends on: [PATCH v5 0/5] libqos: Virtio MMIO driver Signed-off-by: Marc Marí Message-id: 1424815154-27243-1-git-send-email-marc.mari.barcelo@gmail.com Signed-off-by: Stefan Hajnoczi Signed-off-by: Kevin Wolf --- tests/virtio-blk-test.c | 98 ++++++++++++++++++++++------------------- 1 file changed, 52 insertions(+), 46 deletions(-) diff --git a/tests/virtio-blk-test.c b/tests/virtio-blk-test.c index 3b943fc39617..4078321a20c4 100644 --- a/tests/virtio-blk-test.c +++ b/tests/virtio-blk-test.c @@ -191,7 +191,7 @@ static void test_basic(const QVirtioBus *bus, QVirtioDevice *dev, qvirtio_set_driver_ok(bus, dev); - /* Write and read with 2 descriptor layout */ + /* Write and read with 3 descriptor layout */ /* Write request */ req.type = QVIRTIO_BLK_T_OUT; req.ioprio = 1; @@ -203,8 +203,10 @@ static void test_basic(const QVirtioBus *bus, QVirtioDevice *dev, g_free(req.data); - free_head = qvirtqueue_add(vq, req_addr, 528, false, true); + free_head = qvirtqueue_add(vq, req_addr, 16, false, true); + qvirtqueue_add(vq, req_addr + 16, 512, false, true); qvirtqueue_add(vq, req_addr + 528, 1, true, false); + qvirtqueue_kick(bus, dev, vq, free_head); qvirtio_wait_queue_isr(bus, dev, vq, QVIRTIO_BLK_TIMEOUT_US); @@ -224,7 +226,8 @@ static void test_basic(const QVirtioBus *bus, QVirtioDevice *dev, g_free(req.data); free_head = qvirtqueue_add(vq, req_addr, 16, false, true); - qvirtqueue_add(vq, req_addr + 16, 513, true, false); + qvirtqueue_add(vq, req_addr + 16, 512, true, true); + qvirtqueue_add(vq, req_addr + 528, 1, true, false); qvirtqueue_kick(bus, dev, vq, free_head); @@ -239,56 +242,55 @@ static void test_basic(const QVirtioBus *bus, QVirtioDevice *dev, guest_free(alloc, req_addr); - /* Write and read with 3 descriptor layout */ - /* Write request */ - req.type = QVIRTIO_BLK_T_OUT; - req.ioprio = 1; - req.sector = 1; - req.data = g_malloc0(512); - strcpy(req.data, "TEST"); + if (features & QVIRTIO_F_ANY_LAYOUT) { + /* Write and read with 2 descriptor layout */ + /* Write request */ + req.type = QVIRTIO_BLK_T_OUT; + req.ioprio = 1; + req.sector = 1; + req.data = g_malloc0(512); + strcpy(req.data, "TEST"); - req_addr = virtio_blk_request(alloc, &req, 512); - - g_free(req.data); + req_addr = virtio_blk_request(alloc, &req, 512); - free_head = qvirtqueue_add(vq, req_addr, 16, false, true); - qvirtqueue_add(vq, req_addr + 16, 512, false, true); - qvirtqueue_add(vq, req_addr + 528, 1, true, false); + g_free(req.data); - qvirtqueue_kick(bus, dev, vq, free_head); + free_head = qvirtqueue_add(vq, req_addr, 528, false, true); + qvirtqueue_add(vq, req_addr + 528, 1, true, false); + qvirtqueue_kick(bus, dev, vq, free_head); - qvirtio_wait_queue_isr(bus, dev, vq, QVIRTIO_BLK_TIMEOUT_US); - status = readb(req_addr + 528); - g_assert_cmpint(status, ==, 0); + qvirtio_wait_queue_isr(bus, dev, vq, QVIRTIO_BLK_TIMEOUT_US); + status = readb(req_addr + 528); + g_assert_cmpint(status, ==, 0); - guest_free(alloc, req_addr); + guest_free(alloc, req_addr); - /* Read request */ - req.type = QVIRTIO_BLK_T_IN; - req.ioprio = 1; - req.sector = 1; - req.data = g_malloc0(512); + /* Read request */ + req.type = QVIRTIO_BLK_T_IN; + req.ioprio = 1; + req.sector = 1; + req.data = g_malloc0(512); - req_addr = virtio_blk_request(alloc, &req, 512); + req_addr = virtio_blk_request(alloc, &req, 512); - g_free(req.data); + g_free(req.data); - free_head = qvirtqueue_add(vq, req_addr, 16, false, true); - qvirtqueue_add(vq, req_addr + 16, 512, true, true); - qvirtqueue_add(vq, req_addr + 528, 1, true, false); + free_head = qvirtqueue_add(vq, req_addr, 16, false, true); + qvirtqueue_add(vq, req_addr + 16, 513, true, false); - qvirtqueue_kick(bus, dev, vq, free_head); + qvirtqueue_kick(bus, dev, vq, free_head); - qvirtio_wait_queue_isr(bus, dev, vq, QVIRTIO_BLK_TIMEOUT_US); - status = readb(req_addr + 528); - g_assert_cmpint(status, ==, 0); + qvirtio_wait_queue_isr(bus, dev, vq, QVIRTIO_BLK_TIMEOUT_US); + status = readb(req_addr + 528); + g_assert_cmpint(status, ==, 0); - data = g_malloc0(512); - memread(req_addr + 16, data, 512); - g_assert_cmpstr(data, ==, "TEST"); - g_free(data); + data = g_malloc0(512); + memread(req_addr + 16, data, 512); + g_assert_cmpstr(data, ==, "TEST"); + g_free(data); - guest_free(alloc, req_addr); + guest_free(alloc, req_addr); + } } static void pci_basic(void) @@ -521,7 +523,8 @@ static void pci_msix(void) g_free(req.data); - free_head = qvirtqueue_add(&vqpci->vq, req_addr, 528, false, true); + free_head = qvirtqueue_add(&vqpci->vq, req_addr, 16, false, true); + qvirtqueue_add(&vqpci->vq, req_addr + 16, 512, false, true); qvirtqueue_add(&vqpci->vq, req_addr + 528, 1, true, false); qvirtqueue_kick(&qvirtio_pci, &dev->vdev, &vqpci->vq, free_head); @@ -544,7 +547,8 @@ static void pci_msix(void) g_free(req.data); free_head = qvirtqueue_add(&vqpci->vq, req_addr, 16, false, true); - qvirtqueue_add(&vqpci->vq, req_addr + 16, 513, true, false); + qvirtqueue_add(&vqpci->vq, req_addr + 16, 512, true, true); + qvirtqueue_add(&vqpci->vq, req_addr + 528, 1, true, false); qvirtqueue_kick(&qvirtio_pci, &dev->vdev, &vqpci->vq, free_head); @@ -625,7 +629,8 @@ static void pci_idx(void) g_free(req.data); - free_head = qvirtqueue_add(&vqpci->vq, req_addr, 528, false, true); + free_head = qvirtqueue_add(&vqpci->vq, req_addr, 16, false, true); + qvirtqueue_add(&vqpci->vq, req_addr + 16, 512, false, true); qvirtqueue_add(&vqpci->vq, req_addr + 528, 1, true, false); qvirtqueue_kick(&qvirtio_pci, &dev->vdev, &vqpci->vq, free_head); @@ -645,7 +650,8 @@ static void pci_idx(void) /* Notify after processing the third request */ qvirtqueue_set_used_event(&vqpci->vq, 2); - free_head = qvirtqueue_add(&vqpci->vq, req_addr, 528, false, true); + free_head = qvirtqueue_add(&vqpci->vq, req_addr, 16, false, true); + qvirtqueue_add(&vqpci->vq, req_addr + 16, 512, false, true); qvirtqueue_add(&vqpci->vq, req_addr + 528, 1, true, false); qvirtqueue_kick(&qvirtio_pci, &dev->vdev, &vqpci->vq, free_head); @@ -668,11 +674,11 @@ static void pci_idx(void) g_free(req.data); free_head = qvirtqueue_add(&vqpci->vq, req_addr, 16, false, true); - qvirtqueue_add(&vqpci->vq, req_addr + 16, 513, true, false); + qvirtqueue_add(&vqpci->vq, req_addr + 16, 512, true, true); + qvirtqueue_add(&vqpci->vq, req_addr + 528, 1, true, false); qvirtqueue_kick(&qvirtio_pci, &dev->vdev, &vqpci->vq, free_head); - qvirtio_wait_queue_isr(&qvirtio_pci, &dev->vdev, &vqpci->vq, QVIRTIO_BLK_TIMEOUT_US); From 4c0542ea60d21b56a9157d3f6331d2df8990d3bb Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Mon, 16 Feb 2015 12:09:40 +0800 Subject: [PATCH 62/73] virtio-blk: Remove the stale FIXME comment By default, we have ioeventfd enabled, so the IO request processing is in IO thread; in the vcpu thread, guest mode is returned to as quickly as possible, and completion is delivered via irqfd. Therefore this comment from the initial implementation is barely relevant. Signed-off-by: Fam Zheng Reviewed-by: Stefan Hajnoczi Signed-off-by: Kevin Wolf --- hw/block/virtio-blk.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c index 2acfef87eba2..046829768853 100644 --- a/hw/block/virtio-blk.c +++ b/hw/block/virtio-blk.c @@ -597,12 +597,6 @@ static void virtio_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq) if (mrb.num_reqs) { virtio_blk_submit_multireq(s->blk, &mrb); } - - /* - * FIXME: Want to check for completions before returning to guest mode, - * so cached reads and writes are reported as quickly as possible. But - * that should be done in the generic block layer. - */ } static void virtio_blk_dma_restart_bh(void *opaque) From 2726958d5eaa3e8242a56014a33e2420280cdd57 Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Tue, 3 Mar 2015 09:38:32 -0500 Subject: [PATCH 63/73] iotests: Fix 051's reference output Commit c4bacafb717de55538311b80a185ce9166b8daa2 changed (improved) qdev_init_nofail()'s error reporting, which affects iotest 051. Fix the reference output. Signed-off-by: Max Reitz Reviewed-by: Markus Armbruster Signed-off-by: Kevin Wolf --- tests/qemu-iotests/051.out | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/tests/qemu-iotests/051.out b/tests/qemu-iotests/051.out index bf52bf02d429..88add8f05673 100644 --- a/tests/qemu-iotests/051.out +++ b/tests/qemu-iotests/051.out @@ -115,8 +115,7 @@ QEMU X.Y.Z monitor - type 'help' for more information Testing: -drive if=ide QEMU X.Y.Z monitor - type 'help' for more information (qemu) QEMU_PROG: Device needs media, but drive is empty -QEMU_PROG: Device initialization failed. -QEMU_PROG: Initialization of device ide-hd failed +QEMU_PROG: Initialization of device ide-hd failed: Device initialization failed. Testing: -drive if=virtio QEMU X.Y.Z monitor - type 'help' for more information @@ -127,8 +126,7 @@ QEMU_PROG: -drive if=virtio: Device 'virtio-blk-pci' could not be initialized Testing: -drive if=scsi QEMU X.Y.Z monitor - type 'help' for more information (qemu) QEMU_PROG: -drive if=scsi: Device needs media, but drive is empty -QEMU_PROG: Device initialization failed. -QEMU_PROG: Initialization of device lsi53c895a failed +QEMU_PROG: Initialization of device lsi53c895a failed: Device initialization failed. Testing: -drive if=none,id=disk -device ide-cd,drive=disk QEMU X.Y.Z monitor - type 'help' for more information @@ -178,8 +176,7 @@ QEMU X.Y.Z monitor - type 'help' for more information Testing: -drive file=TEST_DIR/t.qcow2,if=ide,readonly=on QEMU X.Y.Z monitor - type 'help' for more information (qemu) QEMU_PROG: Can't use a read-only drive -QEMU_PROG: Device initialization failed. -QEMU_PROG: Initialization of device ide-hd failed +QEMU_PROG: Initialization of device ide-hd failed: Device initialization failed. Testing: -drive file=TEST_DIR/t.qcow2,if=virtio,readonly=on QEMU X.Y.Z monitor - type 'help' for more information From 005628bc8a1693c3dcb00ff2dc3a67cf92c61a13 Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Tue, 3 Mar 2015 16:22:38 -0500 Subject: [PATCH 64/73] iotests: Remove 006 vpc does support images > 127 GB if done correctly. qemu does it correctly. Remove the test pretending otherwise. Signed-off-by: Max Reitz Reviewed-by: Eric Blake Signed-off-by: Kevin Wolf --- tests/qemu-iotests/006 | 54 -------------------------------------- tests/qemu-iotests/006.out | 6 ----- tests/qemu-iotests/group | 2 +- 3 files changed, 1 insertion(+), 61 deletions(-) delete mode 100755 tests/qemu-iotests/006 delete mode 100644 tests/qemu-iotests/006.out diff --git a/tests/qemu-iotests/006 b/tests/qemu-iotests/006 deleted file mode 100755 index 0c0cf5d32fd4..000000000000 --- a/tests/qemu-iotests/006 +++ /dev/null @@ -1,54 +0,0 @@ -#!/bin/bash -# -# Make sure qemu-img rejects > 127GB images for the vpc format as the format -# doesn't support this. -# -# Copyright (C) 2009 Red Hat, Inc. -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program. If not, see . -# - -# creator -owner=hch@lst.de - -seq=`basename $0` -echo "QA output created by $seq" - -here=`pwd` -tmp=/tmp/$$ -status=1 # failure is the default! - -_cleanup() -{ - _cleanup_test_img -} -trap "_cleanup; exit \$status" 0 1 2 3 15 - -# get standard environment, filters and checks -. ./common.rc -. ./common.filter - -_supported_fmt vpc -_supported_proto generic -_supported_os Linux - - -echo -echo "creating 128GB image" -_make_test_img 128G - -# success, all done -echo "*** done" -rm -f $seq.full -status=0 diff --git a/tests/qemu-iotests/006.out b/tests/qemu-iotests/006.out deleted file mode 100644 index d135841b707c..000000000000 --- a/tests/qemu-iotests/006.out +++ /dev/null @@ -1,6 +0,0 @@ -QA output created by 006 - -creating 128GB image -qemu-img: The image size is too large for file format 'vpc' -Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=137438953472 -*** done diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group index 93a02502df6d..87eec393a80e 100644 --- a/tests/qemu-iotests/group +++ b/tests/qemu-iotests/group @@ -12,7 +12,7 @@ 003 rw auto 004 rw auto quick 005 img auto quick -006 img auto +# 006 was removed, do not reuse 007 snapshot auto 008 rw auto quick 009 rw auto quick From aef58bdc1e7d8f37cdf838b49f160c520399c84f Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Tue, 3 Mar 2015 16:22:39 -0500 Subject: [PATCH 65/73] iotests: Drop vpc from 004's and 104's format list Both tests require the test image to have a specific size; this cannot be guaranteed by vpc (unless tuning the test specifically for that format). It is safe to exclude vpc from 004 because what is tested there is implemented in a generic part in the block layer and not format-specific. It is safe to exclude vpc from 104 because for vpc basically every image size is "unaligned", so if that would break at some point in time, we would quickly notice just by running the generic tests. Signed-off-by: Max Reitz Reviewed-by: Eric Blake Signed-off-by: Kevin Wolf --- tests/qemu-iotests/004 | 2 +- tests/qemu-iotests/104 | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/qemu-iotests/004 b/tests/qemu-iotests/004 index 651072ef896b..2ad77ed51410 100755 --- a/tests/qemu-iotests/004 +++ b/tests/qemu-iotests/004 @@ -38,7 +38,7 @@ trap "_cleanup; exit \$status" 0 1 2 3 15 . ./common.rc . ./common.filter -_supported_fmt generic +_supported_fmt raw qcow qcow2 qed vdi vmdk vhdx _supported_proto generic _supported_os Linux diff --git a/tests/qemu-iotests/104 b/tests/qemu-iotests/104 index f32752bb6f44..2e35ea80df91 100755 --- a/tests/qemu-iotests/104 +++ b/tests/qemu-iotests/104 @@ -34,7 +34,7 @@ trap "exit \$status" 0 1 2 3 15 . ./common.rc . ./common.filter -_supported_fmt generic +_supported_fmt raw qcow qcow2 qed vdi vmdk vhdx _supported_proto generic _supported_os Linux From f0ab6f109630940146cbaf47d0cd99993ddba824 Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Fri, 27 Feb 2015 14:54:39 -0500 Subject: [PATCH 66/73] block/vdi: Add locking for parallel requests When allocating a new cluster, the first write to it must be the one doing the allocation, because that one pads its write request to the cluster size; if another write to that cluster is executed before it, that write will be overwritten due to the padding. See https://bugs.launchpad.net/qemu/+bug/1422307 for what can go wrong without this patch. Cc: qemu-stable Signed-off-by: Max Reitz Reviewed-by: Stefan Hajnoczi Reviewed-by: Paolo Bonzini Signed-off-by: Kevin Wolf --- block/vdi.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/block/vdi.c b/block/vdi.c index 74030c6e30e6..53bd02fe227e 100644 --- a/block/vdi.c +++ b/block/vdi.c @@ -53,6 +53,7 @@ #include "block/block_int.h" #include "qemu/module.h" #include "migration/migration.h" +#include "block/coroutine.h" #if defined(CONFIG_UUID) #include @@ -196,6 +197,8 @@ typedef struct { /* VDI header (converted to host endianness). */ VdiHeader header; + CoMutex write_lock; + Error *migration_blocker; } BDRVVdiState; @@ -504,6 +507,8 @@ static int vdi_open(BlockDriverState *bs, QDict *options, int flags, "vdi", bdrv_get_device_name(bs), "live migration"); migrate_add_blocker(s->migration_blocker); + qemu_co_mutex_init(&s->write_lock); + return 0; fail_free_bmap: @@ -639,11 +644,31 @@ static int vdi_co_write(BlockDriverState *bs, buf, n_sectors * SECTOR_SIZE); memset(block + (sector_in_block + n_sectors) * SECTOR_SIZE, 0, (s->block_sectors - n_sectors - sector_in_block) * SECTOR_SIZE); + + /* Note that this coroutine does not yield anywhere from reading the + * bmap entry until here, so in regards to all the coroutines trying + * to write to this cluster, the one doing the allocation will + * always be the first to try to acquire the lock. + * Therefore, it is also the first that will actually be able to + * acquire the lock and thus the padded cluster is written before + * the other coroutines can write to the affected area. */ + qemu_co_mutex_lock(&s->write_lock); ret = bdrv_write(bs->file, offset, block, s->block_sectors); + qemu_co_mutex_unlock(&s->write_lock); } else { uint64_t offset = s->header.offset_data / SECTOR_SIZE + (uint64_t)bmap_entry * s->block_sectors + sector_in_block; + qemu_co_mutex_lock(&s->write_lock); + /* This lock is only used to make sure the following write operation + * is executed after the write issued by the coroutine allocating + * this cluster, therefore we do not need to keep it locked. + * As stated above, the allocating coroutine will always try to lock + * the mutex before all the other concurrent accesses to that + * cluster, therefore at this point we can be absolutely certain + * that that write operation has returned (there may be other writes + * in flight, but they do not concern this very operation). */ + qemu_co_mutex_unlock(&s->write_lock); ret = bdrv_write(bs->file, offset, buf, n_sectors); } From df1d4c341a735334de23513f17bf110c8c49b3e7 Mon Sep 17 00:00:00 2001 From: Ekaterina Tumanova Date: Wed, 4 Mar 2015 11:02:16 +0100 Subject: [PATCH 67/73] scsi-hd: fix property unset case Commit c53659f0 ("BlockConf: Call backend functions to detect geometry and blocksizes") causes a segmentation fault on the invalid configuration of a scsi device without a drive. Let's check for conf.blk before calling blkconf_blocksizes. The error will be handled later on in scsi_realize anyway. Reported-by: Max Reitz Reviewed-by: Stefan Hajnoczi Reviewed-by: Max Reitz Signed-off-by: Ekaterina Tumanova Signed-off-by: Kevin Wolf --- hw/scsi/scsi-disk.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c index 2921728be45a..54d71f4c036c 100644 --- a/hw/scsi/scsi-disk.c +++ b/hw/scsi/scsi-disk.c @@ -2291,7 +2291,12 @@ static void scsi_realize(SCSIDevice *dev, Error **errp) static void scsi_hd_realize(SCSIDevice *dev, Error **errp) { SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, dev); - blkconf_blocksizes(&s->qdev.conf); + /* can happen for devices without drive. The error message for missing + * backend will be issued in scsi_realize + */ + if (s->qdev.conf.blk) { + blkconf_blocksizes(&s->qdev.conf); + } s->qdev.blocksize = s->qdev.conf.logical_block_size; s->qdev.type = TYPE_DISK; if (!s->product) { From 354483e50726140d9c6cb5ff0e1698bd5f17a2d5 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Tue, 3 Mar 2015 15:59:26 +0100 Subject: [PATCH 68/73] Add testcase for scsi-hd devices without drive property Lets add a test for scsi devices without a drive. This was broken by a recent block patch, thus indicating that we need a testcase. Suggested-by: Kevin Wolf Signed-off-by: Christian Borntraeger Reviewed-by: Stefan Hajnoczi Reviewed-by: Max Reitz Signed-off-by: Kevin Wolf --- tests/qemu-iotests/051 | 6 ++++++ tests/qemu-iotests/051.out | 8 ++++++++ 2 files changed, 14 insertions(+) diff --git a/tests/qemu-iotests/051 b/tests/qemu-iotests/051 index 2274d187a0ee..0360f37e5a9e 100755 --- a/tests/qemu-iotests/051 +++ b/tests/qemu-iotests/051 @@ -98,6 +98,12 @@ run_qemu -drive file="$TEST_IMG",driver=foo run_qemu -drive file="$TEST_IMG",driver=raw,format=qcow2 run_qemu -drive file="$TEST_IMG",driver=qcow2,format=qcow2 +echo +echo === Device without drive === +echo + +run_qemu -device virtio-scsi-pci -device scsi-hd + echo echo === Overriding backing file === echo diff --git a/tests/qemu-iotests/051.out b/tests/qemu-iotests/051.out index 88add8f05673..09895e613621 100644 --- a/tests/qemu-iotests/051.out +++ b/tests/qemu-iotests/051.out @@ -47,6 +47,14 @@ Testing: -drive file=TEST_DIR/t.qcow2,driver=qcow2,format=qcow2 QEMU_PROG: -drive file=TEST_DIR/t.qcow2,driver=qcow2,format=qcow2: Cannot specify both 'driver' and 'format' +=== Device without drive === + +Testing: -device virtio-scsi-pci -device scsi-hd +QEMU X.Y.Z monitor - type 'help' for more information +(qemu) QEMU_PROG: -device scsi-hd: drive property not set +QEMU_PROG: -device scsi-hd: Device 'scsi-hd' could not be initialized + + === Overriding backing file === Testing: -drive file=TEST_DIR/t.qcow2,driver=qcow2,backing.file.filename=TEST_DIR/t.qcow2.orig -nodefaults From 27994d587940b0c72d5f1d69f6e1a62a02f26dc9 Mon Sep 17 00:00:00 2001 From: Markus Armbruster Date: Thu, 12 Feb 2015 14:49:50 +0100 Subject: [PATCH 69/73] sheepdog: Fix misleading error messages in sd_snapshot_create() If do_sd_create() fails, it first reports the error returned, then reports a another one with strerror(errno). errno is meaningless at that point. Report just one error combining the valid information from both messages. Reported-by: Eric Blake Signed-off-by: Markus Armbruster Reviewed-by: Liu Yuan Reviewed-by: Eric Blake Signed-off-by: Kevin Wolf --- block/sheepdog.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/block/sheepdog.c b/block/sheepdog.c index 60a4853b640f..c14172cfa6c4 100644 --- a/block/sheepdog.c +++ b/block/sheepdog.c @@ -2339,9 +2339,8 @@ static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info) ret = do_sd_create(s, &new_vid, 1, &local_err); if (ret < 0) { - error_report_err(local_err); - error_report("failed to create inode for snapshot. %s", - strerror(errno)); + error_report("failed to create inode for snapshot: %s", + error_get_pretty(local_err)); goto cleanup; } From c095348b914b740d8364a2ccf748e4284de3b68b Mon Sep 17 00:00:00 2001 From: John Snow Date: Wed, 4 Mar 2015 23:37:55 -0500 Subject: [PATCH 70/73] MAINTAINERS: Add jsnow as IDE maintainer It has been proposed that the block layer be split up into smaller, more manageable portions to help speed up the review and merging of block layer patches. As part of this process, I propose that I take over the IDE, ATA, ATAPI and FD devices. As we split out the block layer, we will begin using the qemu-block mailing list as a catchall for all of the block layer subcomponents. Please CC qemu-block@nongnu.org for all block layer patches, including any that touch the IDE/Floppy devices. Signed-off-by: John Snow Signed-off-by: Kevin Wolf --- MAINTAINERS | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 8c06739ff973..d7c55b926bf0 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -605,11 +605,25 @@ S: Maintained F: hw/misc/edu.c IDE -M: Kevin Wolf -M: Stefan Hajnoczi -S: Odd Fixes +M: John Snow +L: qemu-block@nongnu.org +S: Supported F: include/hw/ide.h F: hw/ide/ +F: hw/block/block.c +F: hw/block/cdrom.c +F: hw/block/hd-geometry.c +F: tests/ide-test.c +F: tests/ahci-test.c +T: git git://github.com/jnsnow/qemu.git ide + +Floppy +M: John Snow +L: qemu-block@nongnu.org +S: Supported +F: hw/block/fdc.c +F: include/hw/block/fdc.h +T: git git://github.com/jnsnow/qemu.git ide OMAP M: Peter Maydell From 22d182e82b4ba2fb78b2cc22bcec4e6a440b0ad6 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Thu, 5 Mar 2015 15:38:17 -0600 Subject: [PATCH 71/73] block/raw-posix: fix launching with failed disks Since commit c25f53b06eba1575d5d0e92a0132455c97825b83 ("raw: Probe required direct I/O alignment") QEMU has failed to launch if image files produce I/O errors. Previously, QEMU would launch successfully and the guest would see the errors when attempting I/O. This is a regression and may prevent multipath I/O inside the guest, where QEMU must launch and let the guest figure out by itself which disks are online. Tweak the alignment probing code in raw-posix.c to explicitly look for EINVAL on Linux instead of bailing. The kernel refuses misaligned requests with this error code and other error codes can be ignored. Signed-off-by: Stefan Hajnoczi Signed-off-by: Kevin Wolf --- block/raw-posix.c | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/block/raw-posix.c b/block/raw-posix.c index 3263d2b5a14e..f0b4488d1eb0 100644 --- a/block/raw-posix.c +++ b/block/raw-posix.c @@ -272,6 +272,31 @@ static int probe_physical_blocksize(int fd, unsigned int *blk_size) #endif } +/* Check if read is allowed with given memory buffer and length. + * + * This function is used to check O_DIRECT memory buffer and request alignment. + */ +static bool raw_is_io_aligned(int fd, void *buf, size_t len) +{ + ssize_t ret = pread(fd, buf, len, 0); + + if (ret >= 0) { + return true; + } + +#ifdef __linux__ + /* The Linux kernel returns EINVAL for misaligned O_DIRECT reads. Ignore + * other errors (e.g. real I/O error), which could happen on a failed + * drive, since we only care about probing alignment. + */ + if (errno != EINVAL) { + return true; + } +#endif + + return false; +} + static void raw_probe_alignment(BlockDriverState *bs, int fd, Error **errp) { BDRVRawState *s = bs->opaque; @@ -307,7 +332,7 @@ static void raw_probe_alignment(BlockDriverState *bs, int fd, Error **errp) size_t align; buf = qemu_memalign(MAX_BLOCKSIZE, 2 * MAX_BLOCKSIZE); for (align = 512; align <= MAX_BLOCKSIZE; align <<= 1) { - if (pread(fd, buf + align, MAX_BLOCKSIZE, 0) >= 0) { + if (raw_is_io_aligned(fd, buf + align, MAX_BLOCKSIZE)) { s->buf_align = align; break; } @@ -319,7 +344,7 @@ static void raw_probe_alignment(BlockDriverState *bs, int fd, Error **errp) size_t align; buf = qemu_memalign(s->buf_align, MAX_BLOCKSIZE); for (align = 512; align <= MAX_BLOCKSIZE; align <<= 1) { - if (pread(fd, buf, align, 0) >= 0) { + if (raw_is_io_aligned(fd, buf, align)) { bs->request_alignment = align; break; } From 1a6e597995c8c0125d1f305b583d6de083e44c73 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Thu, 5 Mar 2015 15:38:18 -0600 Subject: [PATCH 72/73] iotests: add O_DIRECT alignment probing test This test case checks that image files can be opened even if I/O produces EIO errors. QEMU should not refuse opening failed disks since the guest may be configured for multipath I/O where accessing failed disks is expected. Signed-off-by: Stefan Hajnoczi Signed-off-by: Kevin Wolf --- tests/qemu-iotests/128 | 82 ++++++++++++++++++++++++++++++++++++++ tests/qemu-iotests/128.out | 5 +++ tests/qemu-iotests/group | 1 + 3 files changed, 88 insertions(+) create mode 100755 tests/qemu-iotests/128 create mode 100644 tests/qemu-iotests/128.out diff --git a/tests/qemu-iotests/128 b/tests/qemu-iotests/128 new file mode 100755 index 000000000000..249a8655817f --- /dev/null +++ b/tests/qemu-iotests/128 @@ -0,0 +1,82 @@ +#!/bin/bash +# +# Test that opening O_DIRECT succeeds when image file I/O produces EIO +# +# Copyright (C) 2015 Red Hat, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# + +# creator +owner=stefanha@redhat.com + +seq=`basename $0` +echo "QA output created by $seq" + +here=`pwd` +tmp=/tmp/$$ +status=1 # failure is the default! + +devname="eiodev$$" + +_setup_eiodev() +{ + # This test should either be run as root or with passwordless sudo + for cmd in "" "sudo -n"; do + echo "0 $((1024 * 1024 * 1024 / 512)) error" | \ + $cmd dmsetup create "$devname" 2>/dev/null + if [ "$?" -eq 0 ]; then + return + fi + done + _notrun "root privileges required to run dmsetup" +} + +_cleanup_eiodev() +{ + for cmd in "" "sudo -n"; do + $cmd dmsetup remove "$devname" 2>/dev/null + if [ "$?" -eq 0 ]; then + return + fi + done +} + +_cleanup() +{ + _cleanup_eiodev +} +trap "_cleanup; exit \$status" 0 1 2 3 15 + +# get standard environment, filters and checks +. ./common.rc +. ./common.filter + +_supported_fmt raw +_supported_proto file +_supported_os Linux + +_setup_eiodev + +TEST_IMG="/dev/mapper/$devname" + +echo +echo "== reading from error device ==" +# Opening image should succeed but the read operation should fail +$QEMU_IO --format "$IMGFMT" --nocache -c "read 0 65536" "$TEST_IMG" | _filter_qemu_io + +# success, all done +echo "*** done" +rm -f $seq.full +status=0 diff --git a/tests/qemu-iotests/128.out b/tests/qemu-iotests/128.out new file mode 100644 index 000000000000..4e43f5faa041 --- /dev/null +++ b/tests/qemu-iotests/128.out @@ -0,0 +1,5 @@ +QA output created by 128 + +== reading from error device == +read failed: Input/output error +*** done diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group index 87eec393a80e..71f19d4ecea2 100644 --- a/tests/qemu-iotests/group +++ b/tests/qemu-iotests/group @@ -121,3 +121,4 @@ 114 rw auto quick 116 rw auto quick 123 rw auto quick +128 rw auto quick From 280458a34abcca2ba70843a089a35468c81e3740 Mon Sep 17 00:00:00 2001 From: Jeff Cody Date: Fri, 6 Mar 2015 12:16:06 -0500 Subject: [PATCH 73/73] MAINTAINERS: Add jcody as blockjobs, block devices maintainer The block layer maintainership is being split up into smaller, more manageable pieces. I propose that I take over / assist with the following areas: * blockjobs * archipelago * curl * gluster * nfs * rbd * sheepdog * ssh * vhdx As John Snow noted in a different patch: As we split out the block layer, we will begin using the qemu-block mailing list as a catchall for all of the block layer subcomponents. Please CC qemu-block@nongnu.org for all block layer patches, including any that touch the above listed areas. Signed-off-by: Jeff Cody Signed-off-by: Kevin Wolf --- MAINTAINERS | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index d7c55b926bf0..08443c20e485 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -784,6 +784,18 @@ F: tests/qemu-iotests/ T: git git://repo.or.cz/qemu/kevin.git block T: git git://github.com/stefanha/qemu.git block +Block Jobs +M: Jeff Cody +L: qemu-block@nongnu.org +S: Supported +F: blockjob.c +F: include/block/blockjob.h +F: block/backup.c +F: block/commit.c +F: block/stream.h +F: block/mirror.c +T: git git://github.com/codyprime/qemu-kvm-jtc.git block + Character Devices M: Anthony Liguori M: Paolo Bonzini @@ -1093,20 +1105,28 @@ F: block/vmdk.c RBD M: Josh Durgin +M: Jeff Cody +L: qemu-block@nongnu.org S: Supported F: block/rbd.c +T: git git://github.com/codyprime/qemu-kvm-jtc.git block Sheepdog M: Hitoshi Mitake M: Liu Yuan +M: Jeff Cody +L: qemu-block@nongnu.org L: sheepdog@lists.wpkg.org S: Supported F: block/sheepdog.c +T: git git://github.com/codyprime/qemu-kvm-jtc.git block VHDX M: Jeff Cody +L: qemu-block@nongnu.org S: Supported F: block/vhdx* +T: git git://github.com/codyprime/qemu-kvm-jtc.git block VDI M: Stefan Weil @@ -1121,19 +1141,42 @@ S: Supported F: block/iscsi.c NFS +M: Jeff Cody M: Peter Lieven +L: qemu-block@nongnu.org S: Maintained F: block/nfs.c +T: git git://github.com/codyprime/qemu-kvm-jtc.git block SSH M: Richard W.M. Jones +M: Jeff Cody +L: qemu-block@nongnu.org S: Supported F: block/ssh.c +T: git git://github.com/codyprime/qemu-kvm-jtc.git block ARCHIPELAGO M: Chrysostomos Nanakos +M: Jeff Cody +L: qemu-block@nongnu.org S: Maintained F: block/archipelago.c +T: git git://github.com/codyprime/qemu-kvm-jtc.git block + +CURL +M: Jeff Cody +L: qemu-block@nongnu.org +S: Supported +F: block/curl.c +T: git git://github.com/codyprime/qemu-kvm-jtc.git block + +GLUSTER +M: Jeff Cody +L: qemu-block@nongnu.org +S: Supported +F: block/gluster.c +T: git git://github.com/codyprime/qemu-kvm-jtc.git block Bootdevice M: Gonglei