Skip to content

Commit

Permalink
mirror: allow customizing the granularity
Browse files Browse the repository at this point in the history
The desired granularity may be very different depending on the kind of
operation (e.g. continuous replication vs. collapse-to-raw) and whether
the VM is expected to perform lots of I/O while mirroring is in progress.

Allow the user to customize it, while providing a sane default so that
in general there will be no extra allocated space in the target compared
to the source.

Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
  • Loading branch information
bonzini authored and kevmw committed Jan 25, 2013
1 parent 50717e9 commit eee13df
Show file tree
Hide file tree
Showing 6 changed files with 64 additions and 24 deletions.
52 changes: 33 additions & 19 deletions block/mirror.c
Expand Up @@ -17,9 +17,6 @@
#include "qemu/ratelimit.h"
#include "qemu/bitmap.h"

#define BLOCK_SIZE (1 << 20)
#define BDRV_SECTORS_PER_DIRTY_CHUNK (BLOCK_SIZE >> BDRV_SECTOR_BITS)

#define SLICE_TIME 100000000ULL /* ns */

typedef struct MirrorBlockJob {
Expand All @@ -31,6 +28,7 @@ typedef struct MirrorBlockJob {
bool synced;
bool should_complete;
int64_t sector_num;
int64_t granularity;
size_t buf_size;
unsigned long *cow_bitmap;
HBitmapIter hbi;
Expand All @@ -56,7 +54,7 @@ static int coroutine_fn mirror_iteration(MirrorBlockJob *s,
BlockDriverState *source = s->common.bs;
BlockDriverState *target = s->target;
QEMUIOVector qiov;
int ret, nb_sectors;
int ret, nb_sectors, sectors_per_chunk;
int64_t end, sector_num, chunk_num;
struct iovec iov;

Expand All @@ -72,16 +70,16 @@ static int coroutine_fn mirror_iteration(MirrorBlockJob *s,
* is very large, we need to do COW ourselves. The first time a cluster is
* copied, copy it entirely.
*
* Because both BDRV_SECTORS_PER_DIRTY_CHUNK and the cluster size are
* powers of two, the number of sectors to copy cannot exceed one cluster.
* Because both the granularity and the cluster size are powers of two, the
* number of sectors to copy cannot exceed one cluster.
*/
sector_num = s->sector_num;
nb_sectors = BDRV_SECTORS_PER_DIRTY_CHUNK;
chunk_num = sector_num / BDRV_SECTORS_PER_DIRTY_CHUNK;
sectors_per_chunk = nb_sectors = s->granularity >> BDRV_SECTOR_BITS;
chunk_num = sector_num / sectors_per_chunk;
if (s->cow_bitmap && !test_bit(chunk_num, s->cow_bitmap)) {
trace_mirror_cow(s, sector_num);
bdrv_round_to_clusters(s->target,
sector_num, BDRV_SECTORS_PER_DIRTY_CHUNK,
sector_num, sectors_per_chunk,
&sector_num, &nb_sectors);
}

Expand All @@ -107,8 +105,8 @@ static int coroutine_fn mirror_iteration(MirrorBlockJob *s,
goto fail;
}
if (s->cow_bitmap) {
bitmap_set(s->cow_bitmap, sector_num / BDRV_SECTORS_PER_DIRTY_CHUNK,
nb_sectors / BDRV_SECTORS_PER_DIRTY_CHUNK);
bitmap_set(s->cow_bitmap, sector_num / sectors_per_chunk,
nb_sectors / sectors_per_chunk);
}
return 0;

Expand All @@ -122,7 +120,7 @@ static void coroutine_fn mirror_run(void *opaque)
{
MirrorBlockJob *s = opaque;
BlockDriverState *bs = s->common.bs;
int64_t sector_num, end, length;
int64_t sector_num, end, sectors_per_chunk, length;
BlockDriverInfo bdi;
char backing_filename[1024];
int ret = 0;
Expand All @@ -146,22 +144,23 @@ static void coroutine_fn mirror_run(void *opaque)
sizeof(backing_filename));
if (backing_filename[0] && !s->target->backing_hd) {
bdrv_get_info(s->target, &bdi);
if (s->buf_size < bdi.cluster_size) {
if (s->granularity < bdi.cluster_size) {
s->buf_size = bdi.cluster_size;
length = (bdrv_getlength(bs) + BLOCK_SIZE - 1) / BLOCK_SIZE;
length = (bdrv_getlength(bs) + s->granularity - 1) / s->granularity;
s->cow_bitmap = bitmap_new(length);
}
}

end = s->common.len >> BDRV_SECTOR_BITS;
s->buf = qemu_blockalign(bs, s->buf_size);
sectors_per_chunk = s->granularity >> BDRV_SECTOR_BITS;

if (s->mode != MIRROR_SYNC_MODE_NONE) {
/* First part, loop on the sectors and initialize the dirty bitmap. */
BlockDriverState *base;
base = s->mode == MIRROR_SYNC_MODE_FULL ? NULL : bs->backing_hd;
for (sector_num = 0; sector_num < end; ) {
int64_t next = (sector_num | (BDRV_SECTORS_PER_DIRTY_CHUNK - 1)) + 1;
int64_t next = (sector_num | (sectors_per_chunk - 1)) + 1;
ret = bdrv_co_is_allocated_above(bs, base,
sector_num, next - sector_num, &n);

Expand Down Expand Up @@ -242,7 +241,7 @@ static void coroutine_fn mirror_run(void *opaque)
s->common.offset = (end - cnt) * BDRV_SECTOR_SIZE;

if (s->common.speed) {
delay_ns = ratelimit_calculate_delay(&s->limit, BDRV_SECTORS_PER_DIRTY_CHUNK);
delay_ns = ratelimit_calculate_delay(&s->limit, sectors_per_chunk);
} else {
delay_ns = 0;
}
Expand Down Expand Up @@ -332,14 +331,28 @@ static BlockJobType mirror_job_type = {
};

void mirror_start(BlockDriverState *bs, BlockDriverState *target,
int64_t speed, MirrorSyncMode mode,
int64_t speed, int64_t granularity, MirrorSyncMode mode,
BlockdevOnError on_source_error,
BlockdevOnError on_target_error,
BlockDriverCompletionFunc *cb,
void *opaque, Error **errp)
{
MirrorBlockJob *s;

if (granularity == 0) {
/* Choose the default granularity based on the target file's cluster
* size, clamped between 4k and 64k. */
BlockDriverInfo bdi;
if (bdrv_get_info(target, &bdi) >= 0 && bdi.cluster_size != 0) {
granularity = MAX(4096, bdi.cluster_size);
granularity = MIN(65536, granularity);
} else {
granularity = 65536;
}
}

assert ((granularity & (granularity - 1)) == 0);

if ((on_source_error == BLOCKDEV_ON_ERROR_STOP ||
on_source_error == BLOCKDEV_ON_ERROR_ENOSPC) &&
!bdrv_iostatus_is_enabled(bs)) {
Expand All @@ -356,9 +369,10 @@ void mirror_start(BlockDriverState *bs, BlockDriverState *target,
s->on_target_error = on_target_error;
s->target = target;
s->mode = mode;
s->buf_size = BLOCK_SIZE;
s->granularity = granularity;
s->buf_size = granularity;

bdrv_set_dirty_tracking(bs, BLOCK_SIZE);
bdrv_set_dirty_tracking(bs, granularity);
bdrv_set_enable_write_cache(s->target, true);
bdrv_set_on_error(s->target, on_target_error, on_target_error);
bdrv_iostatus_enable(s->target);
Expand Down
15 changes: 14 additions & 1 deletion blockdev.c
Expand Up @@ -1193,6 +1193,7 @@ void qmp_drive_mirror(const char *device, const char *target,
enum MirrorSyncMode sync,
bool has_mode, enum NewImageMode mode,
bool has_speed, int64_t speed,
bool has_granularity, uint32_t granularity,
bool has_on_source_error, BlockdevOnError on_source_error,
bool has_on_target_error, BlockdevOnError on_target_error,
Error **errp)
Expand All @@ -1218,6 +1219,17 @@ void qmp_drive_mirror(const char *device, const char *target,
if (!has_mode) {
mode = NEW_IMAGE_MODE_ABSOLUTE_PATHS;
}
if (!has_granularity) {
granularity = 0;
}
if (granularity != 0 && (granularity < 512 || granularity > 1048576 * 64)) {
error_set(errp, QERR_INVALID_PARAMETER, device);
return;
}
if (granularity & (granularity - 1)) {
error_set(errp, QERR_INVALID_PARAMETER, device);
return;
}

bs = bdrv_find(device);
if (!bs) {
Expand Down Expand Up @@ -1299,7 +1311,8 @@ void qmp_drive_mirror(const char *device, const char *target,
return;
}

mirror_start(bs, target_bs, speed, sync, on_source_error, on_target_error,
mirror_start(bs, target_bs, speed, granularity, sync,
on_source_error, on_target_error,
block_job_cb, bs, &local_err);
if (local_err != NULL) {
bdrv_delete(target_bs);
Expand Down
2 changes: 1 addition & 1 deletion hmp.c
Expand Up @@ -796,7 +796,7 @@ void hmp_drive_mirror(Monitor *mon, const QDict *qdict)

qmp_drive_mirror(device, filename, !!format, format,
full ? MIRROR_SYNC_MODE_FULL : MIRROR_SYNC_MODE_TOP,
true, mode, false, 0,
true, mode, false, 0, false, 0,
false, 0, false, 0, &errp);
hmp_handle_error(mon, &errp);
}
Expand Down
3 changes: 2 additions & 1 deletion include/block/block_int.h
Expand Up @@ -344,6 +344,7 @@ void commit_start(BlockDriverState *bs, BlockDriverState *base,
* @bs: Block device to operate on.
* @target: Block device to write to.
* @speed: The maximum speed, in bytes per second, or 0 for unlimited.
* @granularity: The chosen granularity for the dirty bitmap.
* @mode: Whether to collapse all images in the chain to the target.
* @on_source_error: The action to take upon error reading from the source.
* @on_target_error: The action to take upon error writing to the target.
Expand All @@ -357,7 +358,7 @@ void commit_start(BlockDriverState *bs, BlockDriverState *base,
* @bs will be switched to read from @target.
*/
void mirror_start(BlockDriverState *bs, BlockDriverState *target,
int64_t speed, MirrorSyncMode mode,
int64_t speed, int64_t granularity, MirrorSyncMode mode,
BlockdevOnError on_source_error,
BlockdevOnError on_target_error,
BlockDriverCompletionFunc *cb,
Expand Down
8 changes: 7 additions & 1 deletion qapi-schema.json
Expand Up @@ -1636,6 +1636,11 @@
# (all the disk, only the sectors allocated in the topmost image, or
# only new I/O).
#
# @granularity: #optional granularity of the dirty bitmap, default is 64K
# if the image format doesn't have clusters, 4K if the clusters
# are smaller than that, else the cluster size. Must be a
# power of 2 between 512 and 64M (since 1.4).
#
# @on-source-error: #optional the action to take on an error on the source,
# default 'report'. 'stop' and 'enospc' can only be used
# if the block device supports io-status (see BlockInfo).
Expand All @@ -1652,7 +1657,8 @@
{ 'command': 'drive-mirror',
'data': { 'device': 'str', 'target': 'str', '*format': 'str',
'sync': 'MirrorSyncMode', '*mode': 'NewImageMode',
'*speed': 'int', '*on-source-error': 'BlockdevOnError',
'*speed': 'int', '*granularity': 'uint32',
'*on-source-error': 'BlockdevOnError',
'*on-target-error': 'BlockdevOnError' } }

##
Expand Down
8 changes: 7 additions & 1 deletion qmp-commands.hx
Expand Up @@ -938,7 +938,8 @@ EQMP
{
.name = "drive-mirror",
.args_type = "sync:s,device:B,target:s,speed:i?,mode:s?,format:s?,"
"on-source-error:s?,on-target-error:s?",
"on-source-error:s?,on-target-error:s?,"
"granularity:i?",
.mhandler.cmd_new = qmp_marshal_input_drive_mirror,
},

Expand All @@ -962,6 +963,7 @@ Arguments:
file/device (NewImageMode, optional, default 'absolute-paths')
- "speed": maximum speed of the streaming job, in bytes per second
(json-int)
- "granularity": granularity of the dirty bitmap, in bytes (json-int, optional)
- "sync": what parts of the disk image should be copied to the destination;
possibilities include "full" for all the disk, "top" for only the sectors
allocated in the topmost image, or "none" to only replicate new I/O
Expand All @@ -971,6 +973,10 @@ Arguments:
- "on-target-error": the action to take on an error on the target
(BlockdevOnError, default 'report')

The default value of the granularity is the image cluster size clamped
between 4096 and 65536, if the image format defines one. If the format
does not define a cluster size, the default value of the granularity
is 65536.


Example:
Expand Down

0 comments on commit eee13df

Please sign in to comment.