Skip to content

Commit

Permalink
Merge remote-tracking branch 'mreitz/tags/pull-block-2018-05-15' into…
Browse files Browse the repository at this point in the history
… queue-block

- Copy-on-read block driver
- The qcow2 default refcount cache size has been decreased
- Various bug fixes

# gpg: Signature made Tue May 15 16:18:25 2018 CEST
# gpg:                using RSA key F407DB0061D5CF40
# gpg: Good signature from "Max Reitz <mreitz@redhat.com>"
# Primary key fingerprint: 91BE B60A 30DB 3E88 57D1  1829 F407 DB00 61D5 CF40

* mreitz/tags/pull-block-2018-05-15: (21 commits)
  iotests: Add test for -U/force-share conflicts
  qemu-img: Use only string options in img_open_opts
  qemu-io: Use purely string blockdev options
  block: Document BDRV_REQ_WRITE_UNCHANGED support
  qemu-img: Check post-truncation size
  iotests: Add test for COR across nodes
  iotests: Copy 197 for COR filter driver
  iotests: Clean up wrap image in 197
  block: Support BDRV_REQ_WRITE_UNCHANGED in filters
  block/quorum: Support BDRV_REQ_WRITE_UNCHANGED
  block: Set BDRV_REQ_WRITE_UNCHANGED for COR writes
  block: Add BDRV_REQ_WRITE_UNCHANGED flag
  block: BLK_PERM_WRITE includes ..._UNCHANGED
  block: Add COR filter driver
  iotests: Skip 181 and 201 without userfaultfd
  iotests: Add failure matching to common.qemu
  docs: Document the new default sizes of the qcow2 caches
  qcow2: Give the refcount cache the minimum possible size by default
  specs/qcow2: Clarify that compressed clusters have the COPIED bit reset
  Fix error message about compressed clusters with OFLAG_COPIED
  ...

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
  • Loading branch information
kevmw committed May 15, 2018
2 parents bd21935 + 4e7d73c commit 1fce860
Show file tree
Hide file tree
Showing 36 changed files with 862 additions and 159 deletions.
2 changes: 1 addition & 1 deletion block/Makefile.objs
Expand Up @@ -26,7 +26,7 @@ block-obj-y += accounting.o dirty-bitmap.o
block-obj-y += write-threshold.o
block-obj-y += backup.o
block-obj-$(CONFIG_REPLICATION) += replication.o
block-obj-y += throttle.o
block-obj-y += throttle.o copy-on-read.o

block-obj-y += crypto.o

Expand Down
9 changes: 5 additions & 4 deletions block/blkdebug.c
Expand Up @@ -398,10 +398,11 @@ static int blkdebug_open(BlockDriverState *bs, QDict *options, int flags,
goto out;
}

bs->supported_write_flags = BDRV_REQ_FUA &
bs->file->bs->supported_write_flags;
bs->supported_zero_flags = (BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP) &
bs->file->bs->supported_zero_flags;
bs->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED |
(BDRV_REQ_FUA & bs->file->bs->supported_write_flags);
bs->supported_zero_flags = BDRV_REQ_WRITE_UNCHANGED |
((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP) &
bs->file->bs->supported_zero_flags);
ret = -EINVAL;

/* Set alignment overrides */
Expand Down
3 changes: 3 additions & 0 deletions block/blkreplay.c
Expand Up @@ -35,6 +35,9 @@ static int blkreplay_open(BlockDriverState *bs, QDict *options, int flags,
goto fail;
}

bs->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED;
bs->supported_zero_flags = BDRV_REQ_WRITE_UNCHANGED;

ret = 0;
fail:
return ret;
Expand Down
3 changes: 3 additions & 0 deletions block/blkverify.c
Expand Up @@ -141,6 +141,9 @@ static int blkverify_open(BlockDriverState *bs, QDict *options, int flags,
goto fail;
}

bs->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED;
bs->supported_zero_flags = BDRV_REQ_WRITE_UNCHANGED;

ret = 0;
fail:
qemu_opts_del(opts);
Expand Down
173 changes: 173 additions & 0 deletions block/copy-on-read.c
@@ -0,0 +1,173 @@
/*
* Copy-on-read filter block driver
*
* Copyright (c) 2018 Red Hat, Inc.
*
* Author:
* Max Reitz <mreitz@redhat.com>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation; either version 2 or
* (at your option) version 3 of the License.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, see <http://www.gnu.org/licenses/>.
*/

#include "qemu/osdep.h"
#include "block/block_int.h"


static int cor_open(BlockDriverState *bs, QDict *options, int flags,
Error **errp)
{
bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file, false,
errp);
if (!bs->file) {
return -EINVAL;
}

bs->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED |
(BDRV_REQ_FUA &
bs->file->bs->supported_write_flags);

bs->supported_zero_flags = BDRV_REQ_WRITE_UNCHANGED |
((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP) &
bs->file->bs->supported_zero_flags);

return 0;
}


static void cor_close(BlockDriverState *bs)
{
}


#define PERM_PASSTHROUGH (BLK_PERM_CONSISTENT_READ \
| BLK_PERM_WRITE \
| BLK_PERM_RESIZE)
#define PERM_UNCHANGED (BLK_PERM_ALL & ~PERM_PASSTHROUGH)

static void cor_child_perm(BlockDriverState *bs, BdrvChild *c,
const BdrvChildRole *role,
BlockReopenQueue *reopen_queue,
uint64_t perm, uint64_t shared,
uint64_t *nperm, uint64_t *nshared)
{
if (c == NULL) {
*nperm = (perm & PERM_PASSTHROUGH) | BLK_PERM_WRITE_UNCHANGED;
*nshared = (shared & PERM_PASSTHROUGH) | PERM_UNCHANGED;
return;
}

*nperm = (perm & PERM_PASSTHROUGH) |
(c->perm & PERM_UNCHANGED);
*nshared = (shared & PERM_PASSTHROUGH) |
(c->shared_perm & PERM_UNCHANGED);
}


static int64_t cor_getlength(BlockDriverState *bs)
{
return bdrv_getlength(bs->file->bs);
}


static int cor_truncate(BlockDriverState *bs, int64_t offset,
PreallocMode prealloc, Error **errp)
{
return bdrv_truncate(bs->file, offset, prealloc, errp);
}


static int coroutine_fn cor_co_preadv(BlockDriverState *bs,
uint64_t offset, uint64_t bytes,
QEMUIOVector *qiov, int flags)
{
return bdrv_co_preadv(bs->file, offset, bytes, qiov,
flags | BDRV_REQ_COPY_ON_READ);
}


static int coroutine_fn cor_co_pwritev(BlockDriverState *bs,
uint64_t offset, uint64_t bytes,
QEMUIOVector *qiov, int flags)
{

return bdrv_co_pwritev(bs->file, offset, bytes, qiov, flags);
}


static int coroutine_fn cor_co_pwrite_zeroes(BlockDriverState *bs,
int64_t offset, int bytes,
BdrvRequestFlags flags)
{
return bdrv_co_pwrite_zeroes(bs->file, offset, bytes, flags);
}


static int coroutine_fn cor_co_pdiscard(BlockDriverState *bs,
int64_t offset, int bytes)
{
return bdrv_co_pdiscard(bs->file->bs, offset, bytes);
}


static void cor_eject(BlockDriverState *bs, bool eject_flag)
{
bdrv_eject(bs->file->bs, eject_flag);
}


static void cor_lock_medium(BlockDriverState *bs, bool locked)
{
bdrv_lock_medium(bs->file->bs, locked);
}


static bool cor_recurse_is_first_non_filter(BlockDriverState *bs,
BlockDriverState *candidate)
{
return bdrv_recurse_is_first_non_filter(bs->file->bs, candidate);
}


BlockDriver bdrv_copy_on_read = {
.format_name = "copy-on-read",

.bdrv_open = cor_open,
.bdrv_close = cor_close,
.bdrv_child_perm = cor_child_perm,

.bdrv_getlength = cor_getlength,
.bdrv_truncate = cor_truncate,

.bdrv_co_preadv = cor_co_preadv,
.bdrv_co_pwritev = cor_co_pwritev,
.bdrv_co_pwrite_zeroes = cor_co_pwrite_zeroes,
.bdrv_co_pdiscard = cor_co_pdiscard,

.bdrv_eject = cor_eject,
.bdrv_lock_medium = cor_lock_medium,

.bdrv_co_block_status = bdrv_co_block_status_from_file,

.bdrv_recurse_is_first_non_filter = cor_recurse_is_first_non_filter,

.has_variable_length = true,
.is_filter = true,
};

static void bdrv_copy_on_read_init(void)
{
bdrv_register(&bdrv_copy_on_read);
}

block_init(bdrv_copy_on_read_init);
12 changes: 9 additions & 3 deletions block/io.c
Expand Up @@ -1118,13 +1118,15 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BdrvChild *child,
/* FIXME: Should we (perhaps conditionally) be setting
* BDRV_REQ_MAY_UNMAP, if it will allow for a sparser copy
* that still correctly reads as zero? */
ret = bdrv_co_do_pwrite_zeroes(bs, cluster_offset, pnum, 0);
ret = bdrv_co_do_pwrite_zeroes(bs, cluster_offset, pnum,
BDRV_REQ_WRITE_UNCHANGED);
} else {
/* This does not change the data on the disk, it is not
* necessary to flush even in cache=writethrough mode.
*/
ret = bdrv_driver_pwritev(bs, cluster_offset, pnum,
&local_qiov, 0);
&local_qiov,
BDRV_REQ_WRITE_UNCHANGED);
}

if (ret < 0) {
Expand Down Expand Up @@ -1504,7 +1506,11 @@ static int coroutine_fn bdrv_aligned_pwritev(BdrvChild *child,
assert(!waited || !req->serialising);
assert(req->overlap_offset <= offset);
assert(offset + bytes <= req->overlap_offset + req->overlap_bytes);
assert(child->perm & BLK_PERM_WRITE);
if (flags & BDRV_REQ_WRITE_UNCHANGED) {
assert(child->perm & (BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE));
} else {
assert(child->perm & BLK_PERM_WRITE);
}
assert(end_sector <= bs->total_sectors || child->perm & BLK_PERM_RESIZE);

ret = notifier_with_return_list_notify(&bs->before_write_notifiers, req);
Expand Down
2 changes: 2 additions & 0 deletions block/mirror.c
Expand Up @@ -1134,6 +1134,8 @@ static void mirror_start_job(const char *job_id, BlockDriverState *bs,
mirror_top_bs->implicit = true;
}
mirror_top_bs->total_sectors = bs->total_sectors;
mirror_top_bs->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED;
mirror_top_bs->supported_zero_flags = BDRV_REQ_WRITE_UNCHANGED;
bdrv_set_aio_context(mirror_top_bs, bdrv_get_aio_context(bs));

/* bdrv_append takes ownership of the mirror_top_bs reference, need to keep
Expand Down
4 changes: 2 additions & 2 deletions block/qcow2-refcount.c
Expand Up @@ -1577,9 +1577,9 @@ static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res,
case QCOW2_CLUSTER_COMPRESSED:
/* Compressed clusters don't have QCOW_OFLAG_COPIED */
if (l2_entry & QCOW_OFLAG_COPIED) {
fprintf(stderr, "ERROR: cluster %" PRId64 ": "
fprintf(stderr, "ERROR: coffset=0x%" PRIx64 ": "
"copied flag must never be set for compressed "
"clusters\n", l2_entry >> s->cluster_bits);
"clusters\n", l2_entry & s->cluster_offset_mask);
l2_entry &= ~QCOW_OFLAG_COPIED;
res->corruptions++;
}
Expand Down
31 changes: 19 additions & 12 deletions block/qcow2.c
Expand Up @@ -802,23 +802,30 @@ static void read_cache_sizes(BlockDriverState *bs, QemuOpts *opts,
} else if (refcount_cache_size_set) {
*l2_cache_size = combined_cache_size - *refcount_cache_size;
} else {
*refcount_cache_size = combined_cache_size
/ (DEFAULT_L2_REFCOUNT_SIZE_RATIO + 1);
*l2_cache_size = combined_cache_size - *refcount_cache_size;
uint64_t virtual_disk_size = bs->total_sectors * BDRV_SECTOR_SIZE;
uint64_t max_l2_cache = virtual_disk_size / (s->cluster_size / 8);
uint64_t min_refcount_cache =
(uint64_t) MIN_REFCOUNT_CACHE_SIZE * s->cluster_size;

/* Assign as much memory as possible to the L2 cache, and
* use the remainder for the refcount cache */
if (combined_cache_size >= max_l2_cache + min_refcount_cache) {
*l2_cache_size = max_l2_cache;
*refcount_cache_size = combined_cache_size - *l2_cache_size;
} else {
*refcount_cache_size =
MIN(combined_cache_size, min_refcount_cache);
*l2_cache_size = combined_cache_size - *refcount_cache_size;
}
}
} else {
if (!l2_cache_size_set && !refcount_cache_size_set) {
if (!l2_cache_size_set) {
*l2_cache_size = MAX(DEFAULT_L2_CACHE_BYTE_SIZE,
(uint64_t)DEFAULT_L2_CACHE_CLUSTERS
* s->cluster_size);
*refcount_cache_size = *l2_cache_size
/ DEFAULT_L2_REFCOUNT_SIZE_RATIO;
} else if (!l2_cache_size_set) {
*l2_cache_size = *refcount_cache_size
* DEFAULT_L2_REFCOUNT_SIZE_RATIO;
} else if (!refcount_cache_size_set) {
*refcount_cache_size = *l2_cache_size
/ DEFAULT_L2_REFCOUNT_SIZE_RATIO;
}
if (!refcount_cache_size_set) {
*refcount_cache_size = MIN_REFCOUNT_CACHE_SIZE * s->cluster_size;
}
}

Expand Down
4 changes: 0 additions & 4 deletions block/qcow2.h
Expand Up @@ -77,10 +77,6 @@
#define DEFAULT_L2_CACHE_CLUSTERS 8 /* clusters */
#define DEFAULT_L2_CACHE_BYTE_SIZE 1048576 /* bytes */

/* The refblock cache needs only a fourth of the L2 cache size to cover as many
* clusters */
#define DEFAULT_L2_REFCOUNT_SIZE_RATIO 4

#define DEFAULT_CLUSTER_SIZE 65536


Expand Down

0 comments on commit 1fce860

Please sign in to comment.