Skip to content

Commit

Permalink
backup: Use copy offloading
Browse files Browse the repository at this point in the history
The implementation is similar to the 'qemu-img convert'. In the
beginning of the job, offloaded copy is attempted. If it fails, further
I/O will go through the existing bounce buffer code path.

Then, as Kevin pointed out, both this and qemu-img convert can benefit
from a local check if one request fails because of, for example, the
offset is beyond EOF, but another may well be accepted by the protocol
layer. This will be implemented separately.

Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Fam Zheng <famz@redhat.com>
Message-id: 20180703023758.14422-4-famz@redhat.com
Signed-off-by: Jeff Cody <jcody@redhat.com>
  • Loading branch information
Fam Zheng authored and codyprime committed Jul 3, 2018
1 parent dee12de commit 9ded4a0
Show file tree
Hide file tree
Showing 2 changed files with 110 additions and 41 deletions.
150 changes: 109 additions & 41 deletions block/backup.c
Expand Up @@ -45,6 +45,8 @@ typedef struct BackupBlockJob {
QLIST_HEAD(, CowRequest) inflight_reqs;

HBitmap *copy_bitmap;
bool use_copy_range;
int64_t copy_range_size;
} BackupBlockJob;

static const BlockJobDriver backup_job_driver;
Expand Down Expand Up @@ -86,19 +88,101 @@ static void cow_request_end(CowRequest *req)
qemu_co_queue_restart_all(&req->wait_queue);
}

/* Copy range to target with a bounce buffer and return the bytes copied. If
* error occured, return a negative error number */
static int coroutine_fn backup_cow_with_bounce_buffer(BackupBlockJob *job,
int64_t start,
int64_t end,
bool is_write_notifier,
bool *error_is_read,
void **bounce_buffer)
{
int ret;
struct iovec iov;
QEMUIOVector qiov;
BlockBackend *blk = job->common.blk;
int nbytes;

hbitmap_reset(job->copy_bitmap, start / job->cluster_size, 1);
nbytes = MIN(job->cluster_size, job->len - start);
if (!*bounce_buffer) {
*bounce_buffer = blk_blockalign(blk, job->cluster_size);
}
iov.iov_base = *bounce_buffer;
iov.iov_len = nbytes;
qemu_iovec_init_external(&qiov, &iov, 1);

ret = blk_co_preadv(blk, start, qiov.size, &qiov,
is_write_notifier ? BDRV_REQ_NO_SERIALISING : 0);
if (ret < 0) {
trace_backup_do_cow_read_fail(job, start, ret);
if (error_is_read) {
*error_is_read = true;
}
goto fail;
}

if (qemu_iovec_is_zero(&qiov)) {
ret = blk_co_pwrite_zeroes(job->target, start,
qiov.size, BDRV_REQ_MAY_UNMAP);
} else {
ret = blk_co_pwritev(job->target, start,
qiov.size, &qiov,
job->compress ? BDRV_REQ_WRITE_COMPRESSED : 0);
}
if (ret < 0) {
trace_backup_do_cow_write_fail(job, start, ret);
if (error_is_read) {
*error_is_read = false;
}
goto fail;
}

return nbytes;
fail:
hbitmap_set(job->copy_bitmap, start / job->cluster_size, 1);
return ret;

}

/* Copy range to target and return the bytes copied. If error occured, return a
* negative error number. */
static int coroutine_fn backup_cow_with_offload(BackupBlockJob *job,
int64_t start,
int64_t end,
bool is_write_notifier)
{
int ret;
int nr_clusters;
BlockBackend *blk = job->common.blk;
int nbytes;

assert(QEMU_IS_ALIGNED(job->copy_range_size, job->cluster_size));
nbytes = MIN(job->copy_range_size, end - start);
nr_clusters = DIV_ROUND_UP(nbytes, job->cluster_size);
hbitmap_reset(job->copy_bitmap, start / job->cluster_size,
nr_clusters);
ret = blk_co_copy_range(blk, start, job->target, start, nbytes,
is_write_notifier ? BDRV_REQ_NO_SERIALISING : 0);
if (ret < 0) {
trace_backup_do_cow_copy_range_fail(job, start, ret);
hbitmap_set(job->copy_bitmap, start / job->cluster_size,
nr_clusters);
return ret;
}

return nbytes;
}

static int coroutine_fn backup_do_cow(BackupBlockJob *job,
int64_t offset, uint64_t bytes,
bool *error_is_read,
bool is_write_notifier)
{
BlockBackend *blk = job->common.blk;
CowRequest cow_request;
struct iovec iov;
QEMUIOVector bounce_qiov;
void *bounce_buffer = NULL;
int ret = 0;
int64_t start, end; /* bytes */
int n; /* bytes */
void *bounce_buffer = NULL;

qemu_co_rwlock_rdlock(&job->flush_rwlock);

Expand All @@ -110,60 +194,38 @@ static int coroutine_fn backup_do_cow(BackupBlockJob *job,
wait_for_overlapping_requests(job, start, end);
cow_request_begin(&cow_request, job, start, end);

for (; start < end; start += job->cluster_size) {
while (start < end) {
if (!hbitmap_get(job->copy_bitmap, start / job->cluster_size)) {
trace_backup_do_cow_skip(job, start);
start += job->cluster_size;
continue; /* already copied */
}
hbitmap_reset(job->copy_bitmap, start / job->cluster_size, 1);

trace_backup_do_cow_process(job, start);

n = MIN(job->cluster_size, job->len - start);

if (!bounce_buffer) {
bounce_buffer = blk_blockalign(blk, job->cluster_size);
}
iov.iov_base = bounce_buffer;
iov.iov_len = n;
qemu_iovec_init_external(&bounce_qiov, &iov, 1);

ret = blk_co_preadv(blk, start, bounce_qiov.size, &bounce_qiov,
is_write_notifier ? BDRV_REQ_NO_SERIALISING : 0);
if (ret < 0) {
trace_backup_do_cow_read_fail(job, start, ret);
if (error_is_read) {
*error_is_read = true;
if (job->use_copy_range) {
ret = backup_cow_with_offload(job, start, end, is_write_notifier);
if (ret < 0) {
job->use_copy_range = false;
}
hbitmap_set(job->copy_bitmap, start / job->cluster_size, 1);
goto out;
}

if (buffer_is_zero(iov.iov_base, iov.iov_len)) {
ret = blk_co_pwrite_zeroes(job->target, start,
bounce_qiov.size, BDRV_REQ_MAY_UNMAP);
} else {
ret = blk_co_pwritev(job->target, start,
bounce_qiov.size, &bounce_qiov,
job->compress ? BDRV_REQ_WRITE_COMPRESSED : 0);
if (!job->use_copy_range) {
ret = backup_cow_with_bounce_buffer(job, start, end, is_write_notifier,
error_is_read, &bounce_buffer);
}
if (ret < 0) {
trace_backup_do_cow_write_fail(job, start, ret);
if (error_is_read) {
*error_is_read = false;
}
hbitmap_set(job->copy_bitmap, start / job->cluster_size, 1);
goto out;
break;
}

/* Publish progress, guest I/O counts as progress too. Note that the
* offset field is an opaque progress value, it is not a disk offset.
*/
job->bytes_read += n;
job_progress_update(&job->common.job, n);
start += ret;
job->bytes_read += ret;
job_progress_update(&job->common.job, ret);
ret = 0;
}

out:
if (bounce_buffer) {
qemu_vfree(bounce_buffer);
}
Expand Down Expand Up @@ -665,6 +727,12 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
} else {
job->cluster_size = MAX(BACKUP_CLUSTER_SIZE_DEFAULT, bdi.cluster_size);
}
job->use_copy_range = true;
job->copy_range_size = MIN_NON_ZERO(blk_get_max_transfer(job->common.blk),
blk_get_max_transfer(job->target));
job->copy_range_size = MAX(job->cluster_size,
QEMU_ALIGN_UP(job->copy_range_size,
job->cluster_size));

/* Required permissions are already taken with target's blk_new() */
block_job_add_bdrv(&job->common, "target", target, 0, BLK_PERM_ALL,
Expand Down
1 change: 1 addition & 0 deletions block/trace-events
Expand Up @@ -42,6 +42,7 @@ backup_do_cow_skip(void *job, int64_t start) "job %p start %"PRId64
backup_do_cow_process(void *job, int64_t start) "job %p start %"PRId64
backup_do_cow_read_fail(void *job, int64_t start, int ret) "job %p start %"PRId64" ret %d"
backup_do_cow_write_fail(void *job, int64_t start, int ret) "job %p start %"PRId64" ret %d"
backup_do_cow_copy_range_fail(void *job, int64_t start, int ret) "job %p start %"PRId64" ret %d"

# blockdev.c
qmp_block_job_cancel(void *job) "job %p"
Expand Down

0 comments on commit 9ded4a0

Please sign in to comment.