From 947eb2030eef73b7a66167a6244b11eeb4f109ff Mon Sep 17 00:00:00 2001
From: Niels de Vos <ndevos@redhat.com>
Date: Thu, 10 Mar 2016 19:38:00 +0100
Subject: [PATCH 01/10] block/gluster: add support for SEEK_DATA/SEEK_HOLE

GlusterFS 3.8 contains support for SEEK_DATA and SEEK_HOLE. This makes
it possible to detect sparse areas in files.

Signed-off-by: Niels de Vos <ndevos@redhat.com>
Reviewed-by: Jeff Cody <jcody@redhat.com>
---
 block/gluster.c | 182 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 182 insertions(+)

diff --git a/block/gluster.c b/block/gluster.c
index d361d8e8479f..38fce9ee2cdd 100644
--- a/block/gluster.c
+++ b/block/gluster.c
@@ -24,6 +24,7 @@ typedef struct GlusterAIOCB {
 typedef struct BDRVGlusterState {
     struct glfs *glfs;
     struct glfs_fd *fd;
+    bool supports_seek_data;
 } BDRVGlusterState;
 
 typedef struct GlusterConf {
@@ -287,6 +288,28 @@ static void qemu_gluster_parse_flags(int bdrv_flags, int *open_flags)
     }
 }
 
+/*
+ * Do SEEK_DATA/HOLE to detect if it is functional. Older broken versions of
+ * gfapi incorrectly return the current offset when SEEK_DATA/HOLE is used.
+ * - Corrected versions return -1 and set errno to EINVAL.
+ * - Versions that support SEEK_DATA/HOLE correctly, will return -1 and set
+ *   errno to ENXIO when SEEK_DATA is called with a position of EOF.
+ */
+static bool qemu_gluster_test_seek(struct glfs_fd *fd)
+{
+    off_t ret, eof;
+
+    eof = glfs_lseek(fd, 0, SEEK_END);
+    if (eof < 0) {
+        /* this should never occur */
+        return false;
+    }
+
+    /* this should always fail with ENXIO if SEEK_DATA is supported */
+    ret = glfs_lseek(fd, eof, SEEK_DATA);
+    return (ret < 0) && (errno == ENXIO);
+}
+
 static int qemu_gluster_open(BlockDriverState *bs,  QDict *options,
                              int bdrv_flags, Error **errp)
 {
@@ -338,6 +361,8 @@ static int qemu_gluster_open(BlockDriverState *bs,  QDict *options,
         ret = -errno;
     }
 
+    s->supports_seek_data = qemu_gluster_test_seek(s->fd);
+
 out:
     qemu_opts_del(opts);
     qemu_gluster_gconf_free(gconf);
@@ -727,6 +752,159 @@ static int qemu_gluster_has_zero_init(BlockDriverState *bs)
     return 0;
 }
 
+/*
+ * Find allocation range in @bs around offset @start.
+ * May change underlying file descriptor's file offset.
+ * If @start is not in a hole, store @start in @data, and the
+ * beginning of the next hole in @hole, and return 0.
+ * If @start is in a non-trailing hole, store @start in @hole and the
+ * beginning of the next non-hole in @data, and return 0.
+ * If @start is in a trailing hole or beyond EOF, return -ENXIO.
+ * If we can't find out, return a negative errno other than -ENXIO.
+ *
+ * (Shamefully copied from raw-posix.c, only miniscule adaptions.)
+ */
+static int find_allocation(BlockDriverState *bs, off_t start,
+                           off_t *data, off_t *hole)
+{
+    BDRVGlusterState *s = bs->opaque;
+    off_t offs;
+
+    if (!s->supports_seek_data) {
+        return -ENOTSUP;
+    }
+
+    /*
+     * SEEK_DATA cases:
+     * D1. offs == start: start is in data
+     * D2. offs > start: start is in a hole, next data at offs
+     * D3. offs < 0, errno = ENXIO: either start is in a trailing hole
+     *                              or start is beyond EOF
+     *     If the latter happens, the file has been truncated behind
+     *     our back since we opened it.  All bets are off then.
+     *     Treating like a trailing hole is simplest.
+     * D4. offs < 0, errno != ENXIO: we learned nothing
+     */
+    offs = glfs_lseek(s->fd, start, SEEK_DATA);
+    if (offs < 0) {
+        return -errno;          /* D3 or D4 */
+    }
+    assert(offs >= start);
+
+    if (offs > start) {
+        /* D2: in hole, next data at offs */
+        *hole = start;
+        *data = offs;
+        return 0;
+    }
+
+    /* D1: in data, end not yet known */
+
+    /*
+     * SEEK_HOLE cases:
+     * H1. offs == start: start is in a hole
+     *     If this happens here, a hole has been dug behind our back
+     *     since the previous lseek().
+     * H2. offs > start: either start is in data, next hole at offs,
+     *                   or start is in trailing hole, EOF at offs
+     *     Linux treats trailing holes like any other hole: offs ==
+     *     start.  Solaris seeks to EOF instead: offs > start (blech).
+     *     If that happens here, a hole has been dug behind our back
+     *     since the previous lseek().
+     * H3. offs < 0, errno = ENXIO: start is beyond EOF
+     *     If this happens, the file has been truncated behind our
+     *     back since we opened it.  Treat it like a trailing hole.
+     * H4. offs < 0, errno != ENXIO: we learned nothing
+     *     Pretend we know nothing at all, i.e. "forget" about D1.
+     */
+    offs = glfs_lseek(s->fd, start, SEEK_HOLE);
+    if (offs < 0) {
+        return -errno;          /* D1 and (H3 or H4) */
+    }
+    assert(offs >= start);
+
+    if (offs > start) {
+        /*
+         * D1 and H2: either in data, next hole at offs, or it was in
+         * data but is now in a trailing hole.  In the latter case,
+         * all bets are off.  Treating it as if it there was data all
+         * the way to EOF is safe, so simply do that.
+         */
+        *data = start;
+        *hole = offs;
+        return 0;
+    }
+
+    /* D1 and H1 */
+    return -EBUSY;
+}
+
+/*
+ * Returns the allocation status of the specified sectors.
+ *
+ * If 'sector_num' is beyond the end of the disk image the return value is 0
+ * and 'pnum' is set to 0.
+ *
+ * 'pnum' is set to the number of sectors (including and immediately following
+ * the specified sector) that are known to be in the same
+ * allocated/unallocated state.
+ *
+ * 'nb_sectors' is the max value 'pnum' should be set to.  If nb_sectors goes
+ * beyond the end of the disk image it will be clamped.
+ *
+ * (Based on raw_co_get_block_status() from raw-posix.c.)
+ */
+static int64_t coroutine_fn qemu_gluster_co_get_block_status(
+        BlockDriverState *bs, int64_t sector_num, int nb_sectors, int *pnum,
+        BlockDriverState **file)
+{
+    BDRVGlusterState *s = bs->opaque;
+    off_t start, data = 0, hole = 0;
+    int64_t total_size;
+    int ret = -EINVAL;
+
+    if (!s->fd) {
+        return ret;
+    }
+
+    start = sector_num * BDRV_SECTOR_SIZE;
+    total_size = bdrv_getlength(bs);
+    if (total_size < 0) {
+        return total_size;
+    } else if (start >= total_size) {
+        *pnum = 0;
+        return 0;
+    } else if (start + nb_sectors * BDRV_SECTOR_SIZE > total_size) {
+        nb_sectors = DIV_ROUND_UP(total_size - start, BDRV_SECTOR_SIZE);
+    }
+
+    ret = find_allocation(bs, start, &data, &hole);
+    if (ret == -ENXIO) {
+        /* Trailing hole */
+        *pnum = nb_sectors;
+        ret = BDRV_BLOCK_ZERO;
+    } else if (ret < 0) {
+        /* No info available, so pretend there are no holes */
+        *pnum = nb_sectors;
+        ret = BDRV_BLOCK_DATA;
+    } else if (data == start) {
+        /* On a data extent, compute sectors to the end of the extent,
+         * possibly including a partial sector at EOF. */
+        *pnum = MIN(nb_sectors, DIV_ROUND_UP(hole - start, BDRV_SECTOR_SIZE));
+        ret = BDRV_BLOCK_DATA;
+    } else {
+        /* On a hole, compute sectors to the beginning of the next extent.  */
+        assert(hole == start);
+        *pnum = MIN(nb_sectors, (data - start) / BDRV_SECTOR_SIZE);
+        ret = BDRV_BLOCK_ZERO;
+    }
+
+    *file = bs;
+
+    return ret | BDRV_BLOCK_OFFSET_VALID | start;
+}
+
+
 static QemuOptsList qemu_gluster_create_opts = {
     .name = "qemu-gluster-create-opts",
     .head = QTAILQ_HEAD_INITIALIZER(qemu_gluster_create_opts.head),
@@ -769,6 +947,7 @@ static BlockDriver bdrv_gluster = {
 #ifdef CONFIG_GLUSTERFS_ZEROFILL
     .bdrv_co_pwrite_zeroes        = qemu_gluster_co_pwrite_zeroes,
 #endif
+    .bdrv_co_get_block_status     = qemu_gluster_co_get_block_status,
     .create_opts                  = &qemu_gluster_create_opts,
 };
 
@@ -796,6 +975,7 @@ static BlockDriver bdrv_gluster_tcp = {
 #ifdef CONFIG_GLUSTERFS_ZEROFILL
     .bdrv_co_pwrite_zeroes        = qemu_gluster_co_pwrite_zeroes,
 #endif
+    .bdrv_co_get_block_status     = qemu_gluster_co_get_block_status,
     .create_opts                  = &qemu_gluster_create_opts,
 };
 
@@ -823,6 +1003,7 @@ static BlockDriver bdrv_gluster_unix = {
 #ifdef CONFIG_GLUSTERFS_ZEROFILL
     .bdrv_co_pwrite_zeroes        = qemu_gluster_co_pwrite_zeroes,
 #endif
+    .bdrv_co_get_block_status     = qemu_gluster_co_get_block_status,
     .create_opts                  = &qemu_gluster_create_opts,
 };
 
@@ -850,6 +1031,7 @@ static BlockDriver bdrv_gluster_rdma = {
 #ifdef CONFIG_GLUSTERFS_ZEROFILL
     .bdrv_co_pwrite_zeroes        = qemu_gluster_co_pwrite_zeroes,
 #endif
+    .bdrv_co_get_block_status     = qemu_gluster_co_get_block_status,
     .create_opts                  = &qemu_gluster_create_opts,
 };
 

From 38f8d5e0251ae7d8257cf099cb3e5a375ef60378 Mon Sep 17 00:00:00 2001
From: Peter Lieven <pl@kamp.de>
Date: Thu, 19 May 2016 14:48:02 +0200
Subject: [PATCH 02/10] block/nfs: refuse readahead if cache.direct is on

if we open a NFS export with disabled cache we should refuse
the readahead feature as it will cache data inside libnfs.

If a export was opened with readahead enabled it should
futher not be allowed to disable the cache while running.

Cc: qemu-stable@nongnu.org
Signed-off-by: Peter Lieven <pl@kamp.de>
Reviewed-by: Jeff Cody <jcody@redhat.com>
Message-id: 1463662083-20814-2-git-send-email-pl@kamp.de
Signed-off-by: Jeff Cody <jcody@redhat.com>
---
 block/nfs.c | 20 ++++++++++++++++----
 1 file changed, 16 insertions(+), 4 deletions(-)

diff --git a/block/nfs.c b/block/nfs.c
index 9f51cc3f10b9..60be45e8d94b 100644
--- a/block/nfs.c
+++ b/block/nfs.c
@@ -1,7 +1,7 @@
 /*
  * QEMU Block driver for native access to files on NFS shares
  *
- * Copyright (c) 2014 Peter Lieven <pl@kamp.de>
+ * Copyright (c) 2014-2016 Peter Lieven <pl@kamp.de>
  *
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this software and associated documentation files (the "Software"), to deal
@@ -47,6 +47,7 @@ typedef struct NFSClient {
     bool has_zero_init;
     AioContext *aio_context;
     blkcnt_t st_blocks;
+    bool cache_used;
 } NFSClient;
 
 typedef struct NFSRPC {
@@ -278,7 +279,7 @@ static void nfs_file_close(BlockDriverState *bs)
 }
 
 static int64_t nfs_client_open(NFSClient *client, const char *filename,
-                               int flags, Error **errp)
+                               int flags, Error **errp, int open_flags)
 {
     int ret = -EINVAL, i;
     struct stat st;
@@ -330,12 +331,18 @@ static int64_t nfs_client_open(NFSClient *client, const char *filename,
             nfs_set_tcp_syncnt(client->context, val);
 #ifdef LIBNFS_FEATURE_READAHEAD
         } else if (!strcmp(qp->p[i].name, "readahead")) {
+            if (open_flags & BDRV_O_NOCACHE) {
+                error_setg(errp, "Cannot enable NFS readahead "
+                                 "if cache.direct = on");
+                goto fail;
+            }
             if (val > QEMU_NFS_MAX_READAHEAD_SIZE) {
                 error_report("NFS Warning: Truncating NFS readahead"
                              " size to %d", QEMU_NFS_MAX_READAHEAD_SIZE);
                 val = QEMU_NFS_MAX_READAHEAD_SIZE;
             }
             nfs_set_readahead(client->context, val);
+            client->cache_used = true;
 #endif
 #ifdef LIBNFS_FEATURE_DEBUG
         } else if (!strcmp(qp->p[i].name, "debug")) {
@@ -418,7 +425,7 @@ static int nfs_file_open(BlockDriverState *bs, QDict *options, int flags,
     }
     ret = nfs_client_open(client, qemu_opt_get(opts, "filename"),
                           (flags & BDRV_O_RDWR) ? O_RDWR : O_RDONLY,
-                          errp);
+                          errp, bs->open_flags);
     if (ret < 0) {
         goto out;
     }
@@ -454,7 +461,7 @@ static int nfs_file_create(const char *url, QemuOpts *opts, Error **errp)
     total_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
                           BDRV_SECTOR_SIZE);
 
-    ret = nfs_client_open(client, url, O_CREAT, errp);
+    ret = nfs_client_open(client, url, O_CREAT, errp, 0);
     if (ret < 0) {
         goto out;
     }
@@ -516,6 +523,11 @@ static int nfs_reopen_prepare(BDRVReopenState *state,
         return -EACCES;
     }
 
+    if ((state->flags & BDRV_O_NOCACHE) && client->cache_used) {
+        error_setg(errp, "Cannot disable cache if libnfs readahead is enabled");
+        return -EINVAL;
+    }
+
     /* Update cache for read-only reopens */
     if (!(state->flags & BDRV_O_RDWR)) {
         ret = nfs_fstat(client->context, client->fh, &st);

From d99b26c42a1375e41404b69c3214459e929af4e3 Mon Sep 17 00:00:00 2001
From: Peter Lieven <pl@kamp.de>
Date: Thu, 19 May 2016 14:48:03 +0200
Subject: [PATCH 03/10] block/nfs: add support for libnfs pagecache

upcoming libnfs will have support for a read cache that can
significantly help to speed up requests since libnfs by design
circumvents the kernel cache.

Example:
 qemu -cdrom nfs://127.0.0.1/iso/my.iso?pagecache=1024

The pagecache parameters takes the maximum amount of pages to
cache.  A page in libnfs is always the NFS_BLKSIZE which is
4KB.

Signed-off-by: Peter Lieven <pl@kamp.de>
Reviewed-by: Jeff Cody <jcody@redhat.com>
Message-id: 1463662083-20814-3-git-send-email-pl@kamp.de
Signed-off-by: Jeff Cody <jcody@redhat.com>
---
 block/nfs.c | 37 ++++++++++++++++++++++++++++++++++++-
 1 file changed, 36 insertions(+), 1 deletion(-)

diff --git a/block/nfs.c b/block/nfs.c
index 60be45e8d94b..15d6832c4ce4 100644
--- a/block/nfs.c
+++ b/block/nfs.c
@@ -38,6 +38,7 @@
 #include <nfsc/libnfs.h>
 
 #define QEMU_NFS_MAX_READAHEAD_SIZE 1048576
+#define QEMU_NFS_MAX_PAGECACHE_SIZE (8388608 / NFS_BLKSIZE)
 #define QEMU_NFS_MAX_DEBUG_LEVEL 2
 
 typedef struct NFSClient {
@@ -342,6 +343,26 @@ static int64_t nfs_client_open(NFSClient *client, const char *filename,
                 val = QEMU_NFS_MAX_READAHEAD_SIZE;
             }
             nfs_set_readahead(client->context, val);
+#ifdef LIBNFS_FEATURE_PAGECACHE
+            nfs_set_pagecache_ttl(client->context, 0);
+#endif
+            client->cache_used = true;
+#endif
+#ifdef LIBNFS_FEATURE_PAGECACHE
+            nfs_set_pagecache_ttl(client->context, 0);
+        } else if (!strcmp(qp->p[i].name, "pagecache")) {
+            if (open_flags & BDRV_O_NOCACHE) {
+                error_setg(errp, "Cannot enable NFS pagecache "
+                                 "if cache.direct = on");
+                goto fail;
+            }
+            if (val > QEMU_NFS_MAX_PAGECACHE_SIZE) {
+                error_report("NFS Warning: Truncating NFS pagecache"
+                             " size to %d pages", QEMU_NFS_MAX_PAGECACHE_SIZE);
+                val = QEMU_NFS_MAX_PAGECACHE_SIZE;
+            }
+            nfs_set_pagecache(client->context, val);
+            nfs_set_pagecache_ttl(client->context, 0);
             client->cache_used = true;
 #endif
 #ifdef LIBNFS_FEATURE_DEBUG
@@ -524,7 +545,8 @@ static int nfs_reopen_prepare(BDRVReopenState *state,
     }
 
     if ((state->flags & BDRV_O_NOCACHE) && client->cache_used) {
-        error_setg(errp, "Cannot disable cache if libnfs readahead is enabled");
+        error_setg(errp, "Cannot disable cache if libnfs readahead or"
+                         " pagecache is enabled");
         return -EINVAL;
     }
 
@@ -542,6 +564,15 @@ static int nfs_reopen_prepare(BDRVReopenState *state,
     return 0;
 }
 
+#ifdef LIBNFS_FEATURE_PAGECACHE
+static void nfs_invalidate_cache(BlockDriverState *bs,
+                                 Error **errp)
+{
+    NFSClient *client = bs->opaque;
+    nfs_pagecache_invalidate(client->context, client->fh);
+}
+#endif
+
 static BlockDriver bdrv_nfs = {
     .format_name                    = "nfs",
     .protocol_name                  = "nfs",
@@ -565,6 +596,10 @@ static BlockDriver bdrv_nfs = {
 
     .bdrv_detach_aio_context        = nfs_detach_aio_context,
     .bdrv_attach_aio_context        = nfs_attach_aio_context,
+
+#ifdef LIBNFS_FEATURE_PAGECACHE
+    .bdrv_invalidate_cache          = nfs_invalidate_cache,
+#endif
 };
 
 static void nfs_block_init(void)

From ff04198bf576beac5793c445c008ccc9d48f87d5 Mon Sep 17 00:00:00 2001
From: "Denis V. Lunev" <den@openvz.org>
Date: Tue, 21 Jun 2016 17:09:17 +0300
Subject: [PATCH 04/10] mirror: fix trace_mirror_yield_in_flight usage in
 mirror_iteration()

trace_mirror_yield_in_flight accepts 2nd arguments in sectors while here
we pass chunks instead.

Signed-off-by: Denis V. Lunev <den@openvz.org>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-id: 1466518157-27140-1-git-send-email-den@openvz.org
CC: Jeff Cody <jcody@redhat.com>
CC: Kevin Wolf <kwolf@redhat.com>
CC: Max Reitz <mreitz@redhat.com>
Signed-off-by: Jeff Cody <jcody@redhat.com>
---
 block/mirror.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/block/mirror.c b/block/mirror.c
index a04ed9c7a448..930ac96a8606 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -327,7 +327,7 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
 
     first_chunk = sector_num / sectors_per_chunk;
     while (test_bit(first_chunk, s->in_flight_bitmap)) {
-        trace_mirror_yield_in_flight(s, first_chunk, s->in_flight);
+        trace_mirror_yield_in_flight(s, sector_num, s->in_flight);
         mirror_wait_for_io(s);
     }
 

From 7eac868a508cdbf4cccef5c2084941b63fa3aded Mon Sep 17 00:00:00 2001
From: Jeff Cody <jcody@redhat.com>
Date: Thu, 7 Apr 2016 17:24:19 -0400
Subject: [PATCH 05/10] block/gluster: add support for selecting debug logging
 level

This adds commandline support for the logging level of the
gluster protocol driver, output to stdout.  The option is 'debug',
e.g.:

-drive filename=gluster://192.168.15.180/gv2/test.qcow2,debug=9

Debug levels are 0-9, with 9 being the most verbose, and 0 representing
no debugging output.  The default is the same as it was before, which
is a level of 4.  The current logging levels defined in the gluster
source are:

    0 - None
    1 - Emergency
    2 - Alert
    3 - Critical
    4 - Error
    5 - Warning
    6 - Notice
    7 - Info
    8 - Debug
    9 - Trace

(From: glusterfs/logging.h)

Reviewed-by: Niels de Vos <ndevos@redhat.com>
Signed-off-by: Jeff Cody <jcody@redhat.com>
---
 block/gluster.c | 48 +++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 41 insertions(+), 7 deletions(-)

diff --git a/block/gluster.c b/block/gluster.c
index 38fce9ee2cdd..16f7778a5079 100644
--- a/block/gluster.c
+++ b/block/gluster.c
@@ -25,6 +25,7 @@ typedef struct BDRVGlusterState {
     struct glfs *glfs;
     struct glfs_fd *fd;
     bool supports_seek_data;
+    int debug_level;
 } BDRVGlusterState;
 
 typedef struct GlusterConf {
@@ -33,6 +34,7 @@ typedef struct GlusterConf {
     char *volname;
     char *image;
     char *transport;
+    int debug_level;
 } GlusterConf;
 
 static void qemu_gluster_gconf_free(GlusterConf *gconf)
@@ -195,11 +197,7 @@ static struct glfs *qemu_gluster_init(GlusterConf *gconf, const char *filename,
         goto out;
     }
 
-    /*
-     * TODO: Use GF_LOG_ERROR instead of hard code value of 4 here when
-     * GlusterFS makes GF_LOG_* macros available to libgfapi users.
-     */
-    ret = glfs_set_logging(glfs, "-", 4);
+    ret = glfs_set_logging(glfs, "-", gconf->debug_level);
     if (ret < 0) {
         goto out;
     }
@@ -257,16 +255,26 @@ static void gluster_finish_aiocb(struct glfs_fd *fd, ssize_t ret, void *arg)
     qemu_bh_schedule(acb->bh);
 }
 
+#define GLUSTER_OPT_FILENAME "filename"
+#define GLUSTER_OPT_DEBUG "debug"
+#define GLUSTER_DEBUG_DEFAULT 4
+#define GLUSTER_DEBUG_MAX 9
+
 /* TODO Convert to fine grained options */
 static QemuOptsList runtime_opts = {
     .name = "gluster",
     .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
     .desc = {
         {
-            .name = "filename",
+            .name = GLUSTER_OPT_FILENAME,
             .type = QEMU_OPT_STRING,
             .help = "URL to the gluster image",
         },
+        {
+            .name = GLUSTER_OPT_DEBUG,
+            .type = QEMU_OPT_NUMBER,
+            .help = "Gluster log level, valid range is 0-9",
+        },
         { /* end of list */ }
     },
 };
@@ -329,8 +337,17 @@ static int qemu_gluster_open(BlockDriverState *bs,  QDict *options,
         goto out;
     }
 
-    filename = qemu_opt_get(opts, "filename");
+    filename = qemu_opt_get(opts, GLUSTER_OPT_FILENAME);
+
+    s->debug_level = qemu_opt_get_number(opts, GLUSTER_OPT_DEBUG,
+                                         GLUSTER_DEBUG_DEFAULT);
+    if (s->debug_level < 0) {
+        s->debug_level = 0;
+    } else if (s->debug_level > GLUSTER_DEBUG_MAX) {
+        s->debug_level = GLUSTER_DEBUG_MAX;
+    }
 
+    gconf->debug_level = s->debug_level;
     s->glfs = qemu_gluster_init(gconf, filename, errp);
     if (!s->glfs) {
         ret = -errno;
@@ -388,6 +405,7 @@ static int qemu_gluster_reopen_prepare(BDRVReopenState *state,
                                        BlockReopenQueue *queue, Error **errp)
 {
     int ret = 0;
+    BDRVGlusterState *s;
     BDRVGlusterReopenState *reop_s;
     GlusterConf *gconf = NULL;
     int open_flags = 0;
@@ -395,6 +413,8 @@ static int qemu_gluster_reopen_prepare(BDRVReopenState *state,
     assert(state != NULL);
     assert(state->bs != NULL);
 
+    s = state->bs->opaque;
+
     state->opaque = g_new0(BDRVGlusterReopenState, 1);
     reop_s = state->opaque;
 
@@ -402,6 +422,7 @@ static int qemu_gluster_reopen_prepare(BDRVReopenState *state,
 
     gconf = g_new0(GlusterConf, 1);
 
+    gconf->debug_level = s->debug_level;
     reop_s->glfs = qemu_gluster_init(gconf, state->bs->filename, errp);
     if (reop_s->glfs == NULL) {
         ret = -errno;
@@ -535,6 +556,14 @@ static int qemu_gluster_create(const char *filename,
     char *tmp = NULL;
     GlusterConf *gconf = g_new0(GlusterConf, 1);
 
+    gconf->debug_level = qemu_opt_get_number_del(opts, GLUSTER_OPT_DEBUG,
+                                                 GLUSTER_DEBUG_DEFAULT);
+    if (gconf->debug_level < 0) {
+        gconf->debug_level = 0;
+    } else if (gconf->debug_level > GLUSTER_DEBUG_MAX) {
+        gconf->debug_level = GLUSTER_DEBUG_MAX;
+    }
+
     glfs = qemu_gluster_init(gconf, filename, errp);
     if (!glfs) {
         ret = -errno;
@@ -919,6 +948,11 @@ static QemuOptsList qemu_gluster_create_opts = {
             .type = QEMU_OPT_STRING,
             .help = "Preallocation mode (allowed values: off, full)"
         },
+        {
+            .name = GLUSTER_OPT_DEBUG,
+            .type = QEMU_OPT_NUMBER,
+            .help = "Gluster log level, valid range is 0-9",
+        },
         { /* end of list */ }
     }
 };

From 176129552f78bcb99022036d3293c6593c9716c3 Mon Sep 17 00:00:00 2001
From: John Snow <jsnow@redhat.com>
Date: Wed, 22 Jun 2016 15:51:02 -0400
Subject: [PATCH 06/10] mirror: clarify mirror_do_read return code

mirror_do_read intends to return the number of sectors processed after
the starting sector, without regard to how many sectors were processed
before the starting sector due to alignment.

Clean up the comments and code to hopefully illustrate this more clearly.

This also fixes an issue in initialization where if the mirror buffer size
is initialized to smaller than the number of sectors being requested for
transfer, we report back an incorrectly large number to the caller.

Signed-off-by: John Snow <jsnow@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Message-id: 1466625064-11280-2-git-send-email-jsnow@redhat.com
Signed-off-by: Jeff Cody <jcody@redhat.com>
---
 block/mirror.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/block/mirror.c b/block/mirror.c
index 930ac96a8606..42ebc3b9e9ee 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -218,7 +218,9 @@ static inline void mirror_wait_for_io(MirrorBlockJob *s)
 }
 
 /* Submit async read while handling COW.
- * Returns: nb_sectors if no alignment is necessary, or
+ * Returns: The number of sectors copied after and including sector_num,
+ *          excluding any sectors copied prior to sector_num due to alignment.
+ *          This will be nb_sectors if no alignment is necessary, or
  *          (new_end - sector_num) if tail is rounded up or down due to
  *          alignment or buffer limit.
  */
@@ -227,7 +229,7 @@ static int mirror_do_read(MirrorBlockJob *s, int64_t sector_num,
 {
     BlockBackend *source = s->common.blk;
     int sectors_per_chunk, nb_chunks;
-    int ret = nb_sectors;
+    int ret;
     MirrorOp *op;
 
     sectors_per_chunk = s->granularity >> BDRV_SECTOR_BITS;
@@ -235,6 +237,7 @@ static int mirror_do_read(MirrorBlockJob *s, int64_t sector_num,
     /* We can only handle as much as buf_size at a time. */
     nb_sectors = MIN(s->buf_size >> BDRV_SECTOR_BITS, nb_sectors);
     assert(nb_sectors);
+    ret = nb_sectors;
 
     if (s->cow_bitmap) {
         ret += mirror_cow_align(s, &sector_num, &nb_sectors);

From e4808881cbbe8ee9b587a49dd5fabd88375affd6 Mon Sep 17 00:00:00 2001
From: John Snow <jsnow@redhat.com>
Date: Wed, 22 Jun 2016 15:51:03 -0400
Subject: [PATCH 07/10] mirror: limit niov to IOV_MAX elements, again

During the refactor of mirror_iteration in e5b43573,
we regressed the fix introduced in cae98cb8.

This patch re-adds IOV_MAX checking to cases where we
aren't checking alignment (and size) already.

Signed-off-by: John Snow <jsnow@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Message-id: 1466625064-11280-3-git-send-email-jsnow@redhat.com
Signed-off-by: Jeff Cody <jcody@redhat.com>
---
 block/mirror.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/block/mirror.c b/block/mirror.c
index 42ebc3b9e9ee..5bac906d82fb 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -231,11 +231,14 @@ static int mirror_do_read(MirrorBlockJob *s, int64_t sector_num,
     int sectors_per_chunk, nb_chunks;
     int ret;
     MirrorOp *op;
+    int max_sectors;
 
     sectors_per_chunk = s->granularity >> BDRV_SECTOR_BITS;
+    max_sectors = sectors_per_chunk * s->max_iov;
 
     /* We can only handle as much as buf_size at a time. */
     nb_sectors = MIN(s->buf_size >> BDRV_SECTOR_BITS, nb_sectors);
+    nb_sectors = MIN(max_sectors, nb_sectors);
     assert(nb_sectors);
     ret = nb_sectors;
 

From ccee3d8f97781515ed40356f1e6a7a233dd466b8 Mon Sep 17 00:00:00 2001
From: John Snow <jsnow@redhat.com>
Date: Wed, 22 Jun 2016 15:51:04 -0400
Subject: [PATCH 08/10] iotests: add small-granularity mirror test

Signed-off-by: John Snow <jsnow@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Message-id: 1466625064-11280-4-git-send-email-jsnow@redhat.com
Signed-off-by: Jeff Cody <jcody@redhat.com>
---
 tests/qemu-iotests/041     | 30 ++++++++++++++++++++++++++++++
 tests/qemu-iotests/041.out |  4 ++--
 2 files changed, 32 insertions(+), 2 deletions(-)

diff --git a/tests/qemu-iotests/041 b/tests/qemu-iotests/041
index ed1d9d464c7e..cbf5e0ba5c60 100755
--- a/tests/qemu-iotests/041
+++ b/tests/qemu-iotests/041
@@ -727,6 +727,36 @@ class TestUnbackedSource(iotests.QMPTestCase):
         self.complete_and_wait()
         self.assert_no_active_block_jobs()
 
+class TestGranularity(iotests.QMPTestCase):
+    image_len = 10 * 1024 * 1024 # MB
+
+    def setUp(self):
+        qemu_img('create', '-f', iotests.imgfmt, test_img,
+                 str(TestGranularity.image_len))
+        qemu_io('-c', 'write 0 %d' % (self.image_len),
+                test_img)
+        self.vm = iotests.VM().add_drive(test_img)
+        self.vm.launch()
+
+    def tearDown(self):
+        self.vm.shutdown()
+        self.assertTrue(iotests.compare_images(test_img, target_img),
+                        'target image does not match source after mirroring')
+        os.remove(test_img)
+        os.remove(target_img)
+
+    def test_granularity(self):
+        self.assert_no_active_block_jobs()
+        result = self.vm.qmp('drive-mirror', device='drive0',
+                             sync='full', target=target_img,
+                             mode='absolute-paths', granularity=8192)
+        self.assert_qmp(result, 'return', {})
+        event = self.vm.get_qmp_event(wait=60.0)
+        # Failures will manifest as COMPLETED/ERROR.
+        self.assert_qmp(event, 'event', 'BLOCK_JOB_READY')
+        self.complete_and_wait(drive='drive0', wait_ready=False)
+        self.assert_no_active_block_jobs()
+
 class TestRepairQuorum(iotests.QMPTestCase):
     """ This class test quorum file repair using drive-mirror.
         It's mostly a fork of TestSingleDrive """
diff --git a/tests/qemu-iotests/041.out b/tests/qemu-iotests/041.out
index b0cadc8245e5..b67d0504a6d9 100644
--- a/tests/qemu-iotests/041.out
+++ b/tests/qemu-iotests/041.out
@@ -1,5 +1,5 @@
-...........................................................................
+............................................................................
 ----------------------------------------------------------------------
-Ran 75 tests
+Ran 76 tests
 
 OK

From b48100cf07c94f66feef6886d3697eac8635bce4 Mon Sep 17 00:00:00 2001
From: Changlong Xie <xiecl.fnst@cn.fujitsu.com>
Date: Thu, 23 Jun 2016 16:57:20 +0800
Subject: [PATCH 09/10] blockjob: assert(cb) when create job

Callback for block job should always exist

Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
Suggested-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Changlong Xie <xiecl.fnst@cn.fujitsu.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Jeff Cody <jcody@redhat.com>
Message-id: 1466672241-22485-2-git-send-email-xiecl.fnst@cn.fujitsu.com
Signed-off-by: Jeff Cody <jcody@redhat.com>
---
 block/backup.c | 1 -
 blockjob.c     | 1 +
 2 files changed, 1 insertion(+), 1 deletion(-)

diff --git a/block/backup.c b/block/backup.c
index 581269b29aef..f87f8d539b52 100644
--- a/block/backup.c
+++ b/block/backup.c
@@ -489,7 +489,6 @@ void backup_start(BlockDriverState *bs, BlockDriverState *target,
 
     assert(bs);
     assert(target);
-    assert(cb);
 
     if (bs == target) {
         error_setg(errp, "Source and target cannot be the same");
diff --git a/blockjob.c b/blockjob.c
index 90c4e262b090..205da9df4e03 100644
--- a/blockjob.c
+++ b/blockjob.c
@@ -110,6 +110,7 @@ void *block_job_create(const BlockJobDriver *driver, BlockDriverState *bs,
     BlockBackend *blk;
     BlockJob *job;
 
+    assert(cb);
     if (bs->job) {
         error_setg(errp, QERR_DEVICE_IN_USE, bdrv_get_device_name(bs));
         return NULL;

From 15d6729850728ee49859711dd40b00d8d85d94ee Mon Sep 17 00:00:00 2001
From: Changlong Xie <xiecl.fnst@cn.fujitsu.com>
Date: Thu, 23 Jun 2016 16:57:21 +0800
Subject: [PATCH 10/10] mirror: fix misleading comments

s/target bs/to_replace/, also we check to_replace bs is not
blocked in qmp_drive_mirror() not here

Signed-off-by: Changlong Xie <xiecl.fnst@cn.fujitsu.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Jeff Cody <jcody@redhat.com>
Message-id: 1466672241-22485-3-git-send-email-xiecl.fnst@cn.fujitsu.com
Signed-off-by: Jeff Cody <jcody@redhat.com>
---
 block/mirror.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/block/mirror.c b/block/mirror.c
index 5bac906d82fb..8d96049555fc 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -775,7 +775,7 @@ static void mirror_complete(BlockJob *job, Error **errp)
         }
     }
 
-    /* check the target bs is not blocked and block all operations on it */
+    /* block all operations on to_replace bs */
     if (s->replaces) {
         AioContext *replace_aio_context;