From 947eb2030eef73b7a66167a6244b11eeb4f109ff Mon Sep 17 00:00:00 2001 From: Niels de Vos Date: Thu, 10 Mar 2016 19:38:00 +0100 Subject: [PATCH 01/10] block/gluster: add support for SEEK_DATA/SEEK_HOLE GlusterFS 3.8 contains support for SEEK_DATA and SEEK_HOLE. This makes it possible to detect sparse areas in files. Signed-off-by: Niels de Vos Reviewed-by: Jeff Cody --- block/gluster.c | 182 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 182 insertions(+) diff --git a/block/gluster.c b/block/gluster.c index d361d8e8479f..38fce9ee2cdd 100644 --- a/block/gluster.c +++ b/block/gluster.c @@ -24,6 +24,7 @@ typedef struct GlusterAIOCB { typedef struct BDRVGlusterState { struct glfs *glfs; struct glfs_fd *fd; + bool supports_seek_data; } BDRVGlusterState; typedef struct GlusterConf { @@ -287,6 +288,28 @@ static void qemu_gluster_parse_flags(int bdrv_flags, int *open_flags) } } +/* + * Do SEEK_DATA/HOLE to detect if it is functional. Older broken versions of + * gfapi incorrectly return the current offset when SEEK_DATA/HOLE is used. + * - Corrected versions return -1 and set errno to EINVAL. + * - Versions that support SEEK_DATA/HOLE correctly, will return -1 and set + * errno to ENXIO when SEEK_DATA is called with a position of EOF. + */ +static bool qemu_gluster_test_seek(struct glfs_fd *fd) +{ + off_t ret, eof; + + eof = glfs_lseek(fd, 0, SEEK_END); + if (eof < 0) { + /* this should never occur */ + return false; + } + + /* this should always fail with ENXIO if SEEK_DATA is supported */ + ret = glfs_lseek(fd, eof, SEEK_DATA); + return (ret < 0) && (errno == ENXIO); +} + static int qemu_gluster_open(BlockDriverState *bs, QDict *options, int bdrv_flags, Error **errp) { @@ -338,6 +361,8 @@ static int qemu_gluster_open(BlockDriverState *bs, QDict *options, ret = -errno; } + s->supports_seek_data = qemu_gluster_test_seek(s->fd); + out: qemu_opts_del(opts); qemu_gluster_gconf_free(gconf); @@ -727,6 +752,159 @@ static int qemu_gluster_has_zero_init(BlockDriverState *bs) return 0; } +/* + * Find allocation range in @bs around offset @start. + * May change underlying file descriptor's file offset. + * If @start is not in a hole, store @start in @data, and the + * beginning of the next hole in @hole, and return 0. + * If @start is in a non-trailing hole, store @start in @hole and the + * beginning of the next non-hole in @data, and return 0. + * If @start is in a trailing hole or beyond EOF, return -ENXIO. + * If we can't find out, return a negative errno other than -ENXIO. + * + * (Shamefully copied from raw-posix.c, only miniscule adaptions.) + */ +static int find_allocation(BlockDriverState *bs, off_t start, + off_t *data, off_t *hole) +{ + BDRVGlusterState *s = bs->opaque; + off_t offs; + + if (!s->supports_seek_data) { + return -ENOTSUP; + } + + /* + * SEEK_DATA cases: + * D1. offs == start: start is in data + * D2. offs > start: start is in a hole, next data at offs + * D3. offs < 0, errno = ENXIO: either start is in a trailing hole + * or start is beyond EOF + * If the latter happens, the file has been truncated behind + * our back since we opened it. All bets are off then. + * Treating like a trailing hole is simplest. + * D4. offs < 0, errno != ENXIO: we learned nothing + */ + offs = glfs_lseek(s->fd, start, SEEK_DATA); + if (offs < 0) { + return -errno; /* D3 or D4 */ + } + assert(offs >= start); + + if (offs > start) { + /* D2: in hole, next data at offs */ + *hole = start; + *data = offs; + return 0; + } + + /* D1: in data, end not yet known */ + + /* + * SEEK_HOLE cases: + * H1. offs == start: start is in a hole + * If this happens here, a hole has been dug behind our back + * since the previous lseek(). + * H2. offs > start: either start is in data, next hole at offs, + * or start is in trailing hole, EOF at offs + * Linux treats trailing holes like any other hole: offs == + * start. Solaris seeks to EOF instead: offs > start (blech). + * If that happens here, a hole has been dug behind our back + * since the previous lseek(). + * H3. offs < 0, errno = ENXIO: start is beyond EOF + * If this happens, the file has been truncated behind our + * back since we opened it. Treat it like a trailing hole. + * H4. offs < 0, errno != ENXIO: we learned nothing + * Pretend we know nothing at all, i.e. "forget" about D1. + */ + offs = glfs_lseek(s->fd, start, SEEK_HOLE); + if (offs < 0) { + return -errno; /* D1 and (H3 or H4) */ + } + assert(offs >= start); + + if (offs > start) { + /* + * D1 and H2: either in data, next hole at offs, or it was in + * data but is now in a trailing hole. In the latter case, + * all bets are off. Treating it as if it there was data all + * the way to EOF is safe, so simply do that. + */ + *data = start; + *hole = offs; + return 0; + } + + /* D1 and H1 */ + return -EBUSY; +} + +/* + * Returns the allocation status of the specified sectors. + * + * If 'sector_num' is beyond the end of the disk image the return value is 0 + * and 'pnum' is set to 0. + * + * 'pnum' is set to the number of sectors (including and immediately following + * the specified sector) that are known to be in the same + * allocated/unallocated state. + * + * 'nb_sectors' is the max value 'pnum' should be set to. If nb_sectors goes + * beyond the end of the disk image it will be clamped. + * + * (Based on raw_co_get_block_status() from raw-posix.c.) + */ +static int64_t coroutine_fn qemu_gluster_co_get_block_status( + BlockDriverState *bs, int64_t sector_num, int nb_sectors, int *pnum, + BlockDriverState **file) +{ + BDRVGlusterState *s = bs->opaque; + off_t start, data = 0, hole = 0; + int64_t total_size; + int ret = -EINVAL; + + if (!s->fd) { + return ret; + } + + start = sector_num * BDRV_SECTOR_SIZE; + total_size = bdrv_getlength(bs); + if (total_size < 0) { + return total_size; + } else if (start >= total_size) { + *pnum = 0; + return 0; + } else if (start + nb_sectors * BDRV_SECTOR_SIZE > total_size) { + nb_sectors = DIV_ROUND_UP(total_size - start, BDRV_SECTOR_SIZE); + } + + ret = find_allocation(bs, start, &data, &hole); + if (ret == -ENXIO) { + /* Trailing hole */ + *pnum = nb_sectors; + ret = BDRV_BLOCK_ZERO; + } else if (ret < 0) { + /* No info available, so pretend there are no holes */ + *pnum = nb_sectors; + ret = BDRV_BLOCK_DATA; + } else if (data == start) { + /* On a data extent, compute sectors to the end of the extent, + * possibly including a partial sector at EOF. */ + *pnum = MIN(nb_sectors, DIV_ROUND_UP(hole - start, BDRV_SECTOR_SIZE)); + ret = BDRV_BLOCK_DATA; + } else { + /* On a hole, compute sectors to the beginning of the next extent. */ + assert(hole == start); + *pnum = MIN(nb_sectors, (data - start) / BDRV_SECTOR_SIZE); + ret = BDRV_BLOCK_ZERO; + } + + *file = bs; + + return ret | BDRV_BLOCK_OFFSET_VALID | start; +} + + static QemuOptsList qemu_gluster_create_opts = { .name = "qemu-gluster-create-opts", .head = QTAILQ_HEAD_INITIALIZER(qemu_gluster_create_opts.head), @@ -769,6 +947,7 @@ static BlockDriver bdrv_gluster = { #ifdef CONFIG_GLUSTERFS_ZEROFILL .bdrv_co_pwrite_zeroes = qemu_gluster_co_pwrite_zeroes, #endif + .bdrv_co_get_block_status = qemu_gluster_co_get_block_status, .create_opts = &qemu_gluster_create_opts, }; @@ -796,6 +975,7 @@ static BlockDriver bdrv_gluster_tcp = { #ifdef CONFIG_GLUSTERFS_ZEROFILL .bdrv_co_pwrite_zeroes = qemu_gluster_co_pwrite_zeroes, #endif + .bdrv_co_get_block_status = qemu_gluster_co_get_block_status, .create_opts = &qemu_gluster_create_opts, }; @@ -823,6 +1003,7 @@ static BlockDriver bdrv_gluster_unix = { #ifdef CONFIG_GLUSTERFS_ZEROFILL .bdrv_co_pwrite_zeroes = qemu_gluster_co_pwrite_zeroes, #endif + .bdrv_co_get_block_status = qemu_gluster_co_get_block_status, .create_opts = &qemu_gluster_create_opts, }; @@ -850,6 +1031,7 @@ static BlockDriver bdrv_gluster_rdma = { #ifdef CONFIG_GLUSTERFS_ZEROFILL .bdrv_co_pwrite_zeroes = qemu_gluster_co_pwrite_zeroes, #endif + .bdrv_co_get_block_status = qemu_gluster_co_get_block_status, .create_opts = &qemu_gluster_create_opts, }; From 38f8d5e0251ae7d8257cf099cb3e5a375ef60378 Mon Sep 17 00:00:00 2001 From: Peter Lieven Date: Thu, 19 May 2016 14:48:02 +0200 Subject: [PATCH 02/10] block/nfs: refuse readahead if cache.direct is on if we open a NFS export with disabled cache we should refuse the readahead feature as it will cache data inside libnfs. If a export was opened with readahead enabled it should futher not be allowed to disable the cache while running. Cc: qemu-stable@nongnu.org Signed-off-by: Peter Lieven Reviewed-by: Jeff Cody Message-id: 1463662083-20814-2-git-send-email-pl@kamp.de Signed-off-by: Jeff Cody --- block/nfs.c | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/block/nfs.c b/block/nfs.c index 9f51cc3f10b9..60be45e8d94b 100644 --- a/block/nfs.c +++ b/block/nfs.c @@ -1,7 +1,7 @@ /* * QEMU Block driver for native access to files on NFS shares * - * Copyright (c) 2014 Peter Lieven + * Copyright (c) 2014-2016 Peter Lieven * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -47,6 +47,7 @@ typedef struct NFSClient { bool has_zero_init; AioContext *aio_context; blkcnt_t st_blocks; + bool cache_used; } NFSClient; typedef struct NFSRPC { @@ -278,7 +279,7 @@ static void nfs_file_close(BlockDriverState *bs) } static int64_t nfs_client_open(NFSClient *client, const char *filename, - int flags, Error **errp) + int flags, Error **errp, int open_flags) { int ret = -EINVAL, i; struct stat st; @@ -330,12 +331,18 @@ static int64_t nfs_client_open(NFSClient *client, const char *filename, nfs_set_tcp_syncnt(client->context, val); #ifdef LIBNFS_FEATURE_READAHEAD } else if (!strcmp(qp->p[i].name, "readahead")) { + if (open_flags & BDRV_O_NOCACHE) { + error_setg(errp, "Cannot enable NFS readahead " + "if cache.direct = on"); + goto fail; + } if (val > QEMU_NFS_MAX_READAHEAD_SIZE) { error_report("NFS Warning: Truncating NFS readahead" " size to %d", QEMU_NFS_MAX_READAHEAD_SIZE); val = QEMU_NFS_MAX_READAHEAD_SIZE; } nfs_set_readahead(client->context, val); + client->cache_used = true; #endif #ifdef LIBNFS_FEATURE_DEBUG } else if (!strcmp(qp->p[i].name, "debug")) { @@ -418,7 +425,7 @@ static int nfs_file_open(BlockDriverState *bs, QDict *options, int flags, } ret = nfs_client_open(client, qemu_opt_get(opts, "filename"), (flags & BDRV_O_RDWR) ? O_RDWR : O_RDONLY, - errp); + errp, bs->open_flags); if (ret < 0) { goto out; } @@ -454,7 +461,7 @@ static int nfs_file_create(const char *url, QemuOpts *opts, Error **errp) total_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0), BDRV_SECTOR_SIZE); - ret = nfs_client_open(client, url, O_CREAT, errp); + ret = nfs_client_open(client, url, O_CREAT, errp, 0); if (ret < 0) { goto out; } @@ -516,6 +523,11 @@ static int nfs_reopen_prepare(BDRVReopenState *state, return -EACCES; } + if ((state->flags & BDRV_O_NOCACHE) && client->cache_used) { + error_setg(errp, "Cannot disable cache if libnfs readahead is enabled"); + return -EINVAL; + } + /* Update cache for read-only reopens */ if (!(state->flags & BDRV_O_RDWR)) { ret = nfs_fstat(client->context, client->fh, &st); From d99b26c42a1375e41404b69c3214459e929af4e3 Mon Sep 17 00:00:00 2001 From: Peter Lieven Date: Thu, 19 May 2016 14:48:03 +0200 Subject: [PATCH 03/10] block/nfs: add support for libnfs pagecache upcoming libnfs will have support for a read cache that can significantly help to speed up requests since libnfs by design circumvents the kernel cache. Example: qemu -cdrom nfs://127.0.0.1/iso/my.iso?pagecache=1024 The pagecache parameters takes the maximum amount of pages to cache. A page in libnfs is always the NFS_BLKSIZE which is 4KB. Signed-off-by: Peter Lieven Reviewed-by: Jeff Cody Message-id: 1463662083-20814-3-git-send-email-pl@kamp.de Signed-off-by: Jeff Cody --- block/nfs.c | 37 ++++++++++++++++++++++++++++++++++++- 1 file changed, 36 insertions(+), 1 deletion(-) diff --git a/block/nfs.c b/block/nfs.c index 60be45e8d94b..15d6832c4ce4 100644 --- a/block/nfs.c +++ b/block/nfs.c @@ -38,6 +38,7 @@ #include #define QEMU_NFS_MAX_READAHEAD_SIZE 1048576 +#define QEMU_NFS_MAX_PAGECACHE_SIZE (8388608 / NFS_BLKSIZE) #define QEMU_NFS_MAX_DEBUG_LEVEL 2 typedef struct NFSClient { @@ -342,6 +343,26 @@ static int64_t nfs_client_open(NFSClient *client, const char *filename, val = QEMU_NFS_MAX_READAHEAD_SIZE; } nfs_set_readahead(client->context, val); +#ifdef LIBNFS_FEATURE_PAGECACHE + nfs_set_pagecache_ttl(client->context, 0); +#endif + client->cache_used = true; +#endif +#ifdef LIBNFS_FEATURE_PAGECACHE + nfs_set_pagecache_ttl(client->context, 0); + } else if (!strcmp(qp->p[i].name, "pagecache")) { + if (open_flags & BDRV_O_NOCACHE) { + error_setg(errp, "Cannot enable NFS pagecache " + "if cache.direct = on"); + goto fail; + } + if (val > QEMU_NFS_MAX_PAGECACHE_SIZE) { + error_report("NFS Warning: Truncating NFS pagecache" + " size to %d pages", QEMU_NFS_MAX_PAGECACHE_SIZE); + val = QEMU_NFS_MAX_PAGECACHE_SIZE; + } + nfs_set_pagecache(client->context, val); + nfs_set_pagecache_ttl(client->context, 0); client->cache_used = true; #endif #ifdef LIBNFS_FEATURE_DEBUG @@ -524,7 +545,8 @@ static int nfs_reopen_prepare(BDRVReopenState *state, } if ((state->flags & BDRV_O_NOCACHE) && client->cache_used) { - error_setg(errp, "Cannot disable cache if libnfs readahead is enabled"); + error_setg(errp, "Cannot disable cache if libnfs readahead or" + " pagecache is enabled"); return -EINVAL; } @@ -542,6 +564,15 @@ static int nfs_reopen_prepare(BDRVReopenState *state, return 0; } +#ifdef LIBNFS_FEATURE_PAGECACHE +static void nfs_invalidate_cache(BlockDriverState *bs, + Error **errp) +{ + NFSClient *client = bs->opaque; + nfs_pagecache_invalidate(client->context, client->fh); +} +#endif + static BlockDriver bdrv_nfs = { .format_name = "nfs", .protocol_name = "nfs", @@ -565,6 +596,10 @@ static BlockDriver bdrv_nfs = { .bdrv_detach_aio_context = nfs_detach_aio_context, .bdrv_attach_aio_context = nfs_attach_aio_context, + +#ifdef LIBNFS_FEATURE_PAGECACHE + .bdrv_invalidate_cache = nfs_invalidate_cache, +#endif }; static void nfs_block_init(void) From ff04198bf576beac5793c445c008ccc9d48f87d5 Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Tue, 21 Jun 2016 17:09:17 +0300 Subject: [PATCH 04/10] mirror: fix trace_mirror_yield_in_flight usage in mirror_iteration() trace_mirror_yield_in_flight accepts 2nd arguments in sectors while here we pass chunks instead. Signed-off-by: Denis V. Lunev Reviewed-by: Eric Blake Message-id: 1466518157-27140-1-git-send-email-den@openvz.org CC: Jeff Cody CC: Kevin Wolf CC: Max Reitz Signed-off-by: Jeff Cody --- block/mirror.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/mirror.c b/block/mirror.c index a04ed9c7a448..930ac96a8606 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -327,7 +327,7 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s) first_chunk = sector_num / sectors_per_chunk; while (test_bit(first_chunk, s->in_flight_bitmap)) { - trace_mirror_yield_in_flight(s, first_chunk, s->in_flight); + trace_mirror_yield_in_flight(s, sector_num, s->in_flight); mirror_wait_for_io(s); } From 7eac868a508cdbf4cccef5c2084941b63fa3aded Mon Sep 17 00:00:00 2001 From: Jeff Cody Date: Thu, 7 Apr 2016 17:24:19 -0400 Subject: [PATCH 05/10] block/gluster: add support for selecting debug logging level This adds commandline support for the logging level of the gluster protocol driver, output to stdout. The option is 'debug', e.g.: -drive filename=gluster://192.168.15.180/gv2/test.qcow2,debug=9 Debug levels are 0-9, with 9 being the most verbose, and 0 representing no debugging output. The default is the same as it was before, which is a level of 4. The current logging levels defined in the gluster source are: 0 - None 1 - Emergency 2 - Alert 3 - Critical 4 - Error 5 - Warning 6 - Notice 7 - Info 8 - Debug 9 - Trace (From: glusterfs/logging.h) Reviewed-by: Niels de Vos Signed-off-by: Jeff Cody --- block/gluster.c | 48 +++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 41 insertions(+), 7 deletions(-) diff --git a/block/gluster.c b/block/gluster.c index 38fce9ee2cdd..16f7778a5079 100644 --- a/block/gluster.c +++ b/block/gluster.c @@ -25,6 +25,7 @@ typedef struct BDRVGlusterState { struct glfs *glfs; struct glfs_fd *fd; bool supports_seek_data; + int debug_level; } BDRVGlusterState; typedef struct GlusterConf { @@ -33,6 +34,7 @@ typedef struct GlusterConf { char *volname; char *image; char *transport; + int debug_level; } GlusterConf; static void qemu_gluster_gconf_free(GlusterConf *gconf) @@ -195,11 +197,7 @@ static struct glfs *qemu_gluster_init(GlusterConf *gconf, const char *filename, goto out; } - /* - * TODO: Use GF_LOG_ERROR instead of hard code value of 4 here when - * GlusterFS makes GF_LOG_* macros available to libgfapi users. - */ - ret = glfs_set_logging(glfs, "-", 4); + ret = glfs_set_logging(glfs, "-", gconf->debug_level); if (ret < 0) { goto out; } @@ -257,16 +255,26 @@ static void gluster_finish_aiocb(struct glfs_fd *fd, ssize_t ret, void *arg) qemu_bh_schedule(acb->bh); } +#define GLUSTER_OPT_FILENAME "filename" +#define GLUSTER_OPT_DEBUG "debug" +#define GLUSTER_DEBUG_DEFAULT 4 +#define GLUSTER_DEBUG_MAX 9 + /* TODO Convert to fine grained options */ static QemuOptsList runtime_opts = { .name = "gluster", .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head), .desc = { { - .name = "filename", + .name = GLUSTER_OPT_FILENAME, .type = QEMU_OPT_STRING, .help = "URL to the gluster image", }, + { + .name = GLUSTER_OPT_DEBUG, + .type = QEMU_OPT_NUMBER, + .help = "Gluster log level, valid range is 0-9", + }, { /* end of list */ } }, }; @@ -329,8 +337,17 @@ static int qemu_gluster_open(BlockDriverState *bs, QDict *options, goto out; } - filename = qemu_opt_get(opts, "filename"); + filename = qemu_opt_get(opts, GLUSTER_OPT_FILENAME); + + s->debug_level = qemu_opt_get_number(opts, GLUSTER_OPT_DEBUG, + GLUSTER_DEBUG_DEFAULT); + if (s->debug_level < 0) { + s->debug_level = 0; + } else if (s->debug_level > GLUSTER_DEBUG_MAX) { + s->debug_level = GLUSTER_DEBUG_MAX; + } + gconf->debug_level = s->debug_level; s->glfs = qemu_gluster_init(gconf, filename, errp); if (!s->glfs) { ret = -errno; @@ -388,6 +405,7 @@ static int qemu_gluster_reopen_prepare(BDRVReopenState *state, BlockReopenQueue *queue, Error **errp) { int ret = 0; + BDRVGlusterState *s; BDRVGlusterReopenState *reop_s; GlusterConf *gconf = NULL; int open_flags = 0; @@ -395,6 +413,8 @@ static int qemu_gluster_reopen_prepare(BDRVReopenState *state, assert(state != NULL); assert(state->bs != NULL); + s = state->bs->opaque; + state->opaque = g_new0(BDRVGlusterReopenState, 1); reop_s = state->opaque; @@ -402,6 +422,7 @@ static int qemu_gluster_reopen_prepare(BDRVReopenState *state, gconf = g_new0(GlusterConf, 1); + gconf->debug_level = s->debug_level; reop_s->glfs = qemu_gluster_init(gconf, state->bs->filename, errp); if (reop_s->glfs == NULL) { ret = -errno; @@ -535,6 +556,14 @@ static int qemu_gluster_create(const char *filename, char *tmp = NULL; GlusterConf *gconf = g_new0(GlusterConf, 1); + gconf->debug_level = qemu_opt_get_number_del(opts, GLUSTER_OPT_DEBUG, + GLUSTER_DEBUG_DEFAULT); + if (gconf->debug_level < 0) { + gconf->debug_level = 0; + } else if (gconf->debug_level > GLUSTER_DEBUG_MAX) { + gconf->debug_level = GLUSTER_DEBUG_MAX; + } + glfs = qemu_gluster_init(gconf, filename, errp); if (!glfs) { ret = -errno; @@ -919,6 +948,11 @@ static QemuOptsList qemu_gluster_create_opts = { .type = QEMU_OPT_STRING, .help = "Preallocation mode (allowed values: off, full)" }, + { + .name = GLUSTER_OPT_DEBUG, + .type = QEMU_OPT_NUMBER, + .help = "Gluster log level, valid range is 0-9", + }, { /* end of list */ } } }; From 176129552f78bcb99022036d3293c6593c9716c3 Mon Sep 17 00:00:00 2001 From: John Snow Date: Wed, 22 Jun 2016 15:51:02 -0400 Subject: [PATCH 06/10] mirror: clarify mirror_do_read return code mirror_do_read intends to return the number of sectors processed after the starting sector, without regard to how many sectors were processed before the starting sector due to alignment. Clean up the comments and code to hopefully illustrate this more clearly. This also fixes an issue in initialization where if the mirror buffer size is initialized to smaller than the number of sectors being requested for transfer, we report back an incorrectly large number to the caller. Signed-off-by: John Snow Reviewed-by: Eric Blake Reviewed-by: Fam Zheng Message-id: 1466625064-11280-2-git-send-email-jsnow@redhat.com Signed-off-by: Jeff Cody --- block/mirror.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/block/mirror.c b/block/mirror.c index 930ac96a8606..42ebc3b9e9ee 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -218,7 +218,9 @@ static inline void mirror_wait_for_io(MirrorBlockJob *s) } /* Submit async read while handling COW. - * Returns: nb_sectors if no alignment is necessary, or + * Returns: The number of sectors copied after and including sector_num, + * excluding any sectors copied prior to sector_num due to alignment. + * This will be nb_sectors if no alignment is necessary, or * (new_end - sector_num) if tail is rounded up or down due to * alignment or buffer limit. */ @@ -227,7 +229,7 @@ static int mirror_do_read(MirrorBlockJob *s, int64_t sector_num, { BlockBackend *source = s->common.blk; int sectors_per_chunk, nb_chunks; - int ret = nb_sectors; + int ret; MirrorOp *op; sectors_per_chunk = s->granularity >> BDRV_SECTOR_BITS; @@ -235,6 +237,7 @@ static int mirror_do_read(MirrorBlockJob *s, int64_t sector_num, /* We can only handle as much as buf_size at a time. */ nb_sectors = MIN(s->buf_size >> BDRV_SECTOR_BITS, nb_sectors); assert(nb_sectors); + ret = nb_sectors; if (s->cow_bitmap) { ret += mirror_cow_align(s, §or_num, &nb_sectors); From e4808881cbbe8ee9b587a49dd5fabd88375affd6 Mon Sep 17 00:00:00 2001 From: John Snow Date: Wed, 22 Jun 2016 15:51:03 -0400 Subject: [PATCH 07/10] mirror: limit niov to IOV_MAX elements, again During the refactor of mirror_iteration in e5b43573, we regressed the fix introduced in cae98cb8. This patch re-adds IOV_MAX checking to cases where we aren't checking alignment (and size) already. Signed-off-by: John Snow Reviewed-by: Eric Blake Reviewed-by: Fam Zheng Message-id: 1466625064-11280-3-git-send-email-jsnow@redhat.com Signed-off-by: Jeff Cody --- block/mirror.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/block/mirror.c b/block/mirror.c index 42ebc3b9e9ee..5bac906d82fb 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -231,11 +231,14 @@ static int mirror_do_read(MirrorBlockJob *s, int64_t sector_num, int sectors_per_chunk, nb_chunks; int ret; MirrorOp *op; + int max_sectors; sectors_per_chunk = s->granularity >> BDRV_SECTOR_BITS; + max_sectors = sectors_per_chunk * s->max_iov; /* We can only handle as much as buf_size at a time. */ nb_sectors = MIN(s->buf_size >> BDRV_SECTOR_BITS, nb_sectors); + nb_sectors = MIN(max_sectors, nb_sectors); assert(nb_sectors); ret = nb_sectors; From ccee3d8f97781515ed40356f1e6a7a233dd466b8 Mon Sep 17 00:00:00 2001 From: John Snow Date: Wed, 22 Jun 2016 15:51:04 -0400 Subject: [PATCH 08/10] iotests: add small-granularity mirror test Signed-off-by: John Snow Reviewed-by: Eric Blake Reviewed-by: Fam Zheng Message-id: 1466625064-11280-4-git-send-email-jsnow@redhat.com Signed-off-by: Jeff Cody --- tests/qemu-iotests/041 | 30 ++++++++++++++++++++++++++++++ tests/qemu-iotests/041.out | 4 ++-- 2 files changed, 32 insertions(+), 2 deletions(-) diff --git a/tests/qemu-iotests/041 b/tests/qemu-iotests/041 index ed1d9d464c7e..cbf5e0ba5c60 100755 --- a/tests/qemu-iotests/041 +++ b/tests/qemu-iotests/041 @@ -727,6 +727,36 @@ class TestUnbackedSource(iotests.QMPTestCase): self.complete_and_wait() self.assert_no_active_block_jobs() +class TestGranularity(iotests.QMPTestCase): + image_len = 10 * 1024 * 1024 # MB + + def setUp(self): + qemu_img('create', '-f', iotests.imgfmt, test_img, + str(TestGranularity.image_len)) + qemu_io('-c', 'write 0 %d' % (self.image_len), + test_img) + self.vm = iotests.VM().add_drive(test_img) + self.vm.launch() + + def tearDown(self): + self.vm.shutdown() + self.assertTrue(iotests.compare_images(test_img, target_img), + 'target image does not match source after mirroring') + os.remove(test_img) + os.remove(target_img) + + def test_granularity(self): + self.assert_no_active_block_jobs() + result = self.vm.qmp('drive-mirror', device='drive0', + sync='full', target=target_img, + mode='absolute-paths', granularity=8192) + self.assert_qmp(result, 'return', {}) + event = self.vm.get_qmp_event(wait=60.0) + # Failures will manifest as COMPLETED/ERROR. + self.assert_qmp(event, 'event', 'BLOCK_JOB_READY') + self.complete_and_wait(drive='drive0', wait_ready=False) + self.assert_no_active_block_jobs() + class TestRepairQuorum(iotests.QMPTestCase): """ This class test quorum file repair using drive-mirror. It's mostly a fork of TestSingleDrive """ diff --git a/tests/qemu-iotests/041.out b/tests/qemu-iotests/041.out index b0cadc8245e5..b67d0504a6d9 100644 --- a/tests/qemu-iotests/041.out +++ b/tests/qemu-iotests/041.out @@ -1,5 +1,5 @@ -........................................................................... +............................................................................ ---------------------------------------------------------------------- -Ran 75 tests +Ran 76 tests OK From b48100cf07c94f66feef6886d3697eac8635bce4 Mon Sep 17 00:00:00 2001 From: Changlong Xie Date: Thu, 23 Jun 2016 16:57:20 +0800 Subject: [PATCH 09/10] blockjob: assert(cb) when create job Callback for block job should always exist Suggested-by: Paolo Bonzini Suggested-by: Kevin Wolf Signed-off-by: Changlong Xie Reviewed-by: Fam Zheng Reviewed-by: Stefan Hajnoczi Reviewed-by: Jeff Cody Message-id: 1466672241-22485-2-git-send-email-xiecl.fnst@cn.fujitsu.com Signed-off-by: Jeff Cody --- block/backup.c | 1 - blockjob.c | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/block/backup.c b/block/backup.c index 581269b29aef..f87f8d539b52 100644 --- a/block/backup.c +++ b/block/backup.c @@ -489,7 +489,6 @@ void backup_start(BlockDriverState *bs, BlockDriverState *target, assert(bs); assert(target); - assert(cb); if (bs == target) { error_setg(errp, "Source and target cannot be the same"); diff --git a/blockjob.c b/blockjob.c index 90c4e262b090..205da9df4e03 100644 --- a/blockjob.c +++ b/blockjob.c @@ -110,6 +110,7 @@ void *block_job_create(const BlockJobDriver *driver, BlockDriverState *bs, BlockBackend *blk; BlockJob *job; + assert(cb); if (bs->job) { error_setg(errp, QERR_DEVICE_IN_USE, bdrv_get_device_name(bs)); return NULL; From 15d6729850728ee49859711dd40b00d8d85d94ee Mon Sep 17 00:00:00 2001 From: Changlong Xie Date: Thu, 23 Jun 2016 16:57:21 +0800 Subject: [PATCH 10/10] mirror: fix misleading comments s/target bs/to_replace/, also we check to_replace bs is not blocked in qmp_drive_mirror() not here Signed-off-by: Changlong Xie Reviewed-by: Fam Zheng Reviewed-by: Stefan Hajnoczi Reviewed-by: Jeff Cody Message-id: 1466672241-22485-3-git-send-email-xiecl.fnst@cn.fujitsu.com Signed-off-by: Jeff Cody --- block/mirror.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/mirror.c b/block/mirror.c index 5bac906d82fb..8d96049555fc 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -775,7 +775,7 @@ static void mirror_complete(BlockJob *job, Error **errp) } } - /* check the target bs is not blocked and block all operations on it */ + /* block all operations on to_replace bs */ if (s->replaces) { AioContext *replace_aio_context;