Skip to content

Commit

Permalink
block: Make bdrv_is_allocated() byte-based
Browse files Browse the repository at this point in the history
We are gradually moving away from sector-based interfaces, towards
byte-based.  In the common case, allocation is unlikely to ever use
values that are not naturally sector-aligned, but it is possible
that byte-based values will let us be more precise about allocation
at the end of an unaligned file that can do byte-based access.

Changing the signature of the function to use int64_t *pnum ensures
that the compiler enforces that all callers are updated.  For now,
the io.c layer still assert()s that all callers are sector-aligned
on input and that *pnum is sector-aligned on return to the caller,
but that can be relaxed when a later patch implements byte-based
block status.  Therefore, this code adds usages like
DIV_ROUND_UP(,BDRV_SECTOR_SIZE) to callers that still want aligned
values, where the call might reasonbly give non-aligned results
in the future; on the other hand, no rounding is needed for callers
that should just continue to work with byte alignment.

For the most part this patch is just the addition of scaling at the
callers followed by inverse scaling at bdrv_is_allocated().  But
some code, particularly bdrv_commit(), gets a lot simpler because it
no longer has to mess with sectors; also, it is now possible to pass
NULL if the caller does not care how much of the image is allocated
beyond the initial offset.  Leave comments where we can further
simplify once a later patch eliminates the need for sector-aligned
requests through bdrv_is_allocated().

For ease of review, bdrv_is_allocated_above() will be tackled
separately.

Signed-off-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
  • Loading branch information
ebblake authored and kevmw committed Jul 10, 2017
1 parent 6f8e35e commit d6a644b
Show file tree
Hide file tree
Showing 9 changed files with 126 additions and 105 deletions.
17 changes: 5 additions & 12 deletions block/backup.c
Expand Up @@ -47,12 +47,6 @@ typedef struct BackupBlockJob {
QLIST_HEAD(, CowRequest) inflight_reqs;
} BackupBlockJob;

/* Size of a cluster in sectors, instead of bytes. */
static inline int64_t cluster_size_sectors(BackupBlockJob *job)
{
return job->cluster_size / BDRV_SECTOR_SIZE;
}

/* See if in-flight requests overlap and wait for them to complete */
static void coroutine_fn wait_for_overlapping_requests(BackupBlockJob *job,
int64_t start,
Expand Down Expand Up @@ -433,7 +427,6 @@ static void coroutine_fn backup_run(void *opaque)
BackupCompleteData *data;
BlockDriverState *bs = blk_bs(job->common.blk);
int64_t offset;
int64_t sectors_per_cluster = cluster_size_sectors(job);
int ret = 0;

QLIST_INIT(&job->inflight_reqs);
Expand Down Expand Up @@ -465,22 +458,22 @@ static void coroutine_fn backup_run(void *opaque)
}

if (job->sync_mode == MIRROR_SYNC_MODE_TOP) {
int i, n;
int i;
int64_t n;

/* Check to see if these blocks are already in the
* backing file. */

for (i = 0; i < sectors_per_cluster;) {
for (i = 0; i < job->cluster_size;) {
/* bdrv_is_allocated() only returns true/false based
* on the first set of sectors it comes across that
* are are all in the same state.
* For that reason we must verify each sector in the
* backup cluster length. We end up copying more than
* needed but at some point that is always the case. */
alloced =
bdrv_is_allocated(bs,
(offset >> BDRV_SECTOR_BITS) + i,
sectors_per_cluster - i, &n);
bdrv_is_allocated(bs, offset + i,
job->cluster_size - i, &n);
i += n;

if (alloced || n == 0) {
Expand Down
21 changes: 9 additions & 12 deletions block/commit.c
Expand Up @@ -443,7 +443,7 @@ void commit_start(const char *job_id, BlockDriverState *bs,
}


#define COMMIT_BUF_SECTORS 2048
#define COMMIT_BUF_SIZE (2048 * BDRV_SECTOR_SIZE)

/* commit COW file into the raw image */
int bdrv_commit(BlockDriverState *bs)
Expand All @@ -452,8 +452,9 @@ int bdrv_commit(BlockDriverState *bs)
BlockDriverState *backing_file_bs = NULL;
BlockDriverState *commit_top_bs = NULL;
BlockDriver *drv = bs->drv;
int64_t sector, total_sectors, length, backing_length;
int n, ro, open_flags;
int64_t offset, length, backing_length;
int ro, open_flags;
int64_t n;
int ret = 0;
uint8_t *buf = NULL;
Error *local_err = NULL;
Expand Down Expand Up @@ -531,30 +532,26 @@ int bdrv_commit(BlockDriverState *bs)
}
}

total_sectors = length >> BDRV_SECTOR_BITS;

/* blk_try_blockalign() for src will choose an alignment that works for
* backing as well, so no need to compare the alignment manually. */
buf = blk_try_blockalign(src, COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
buf = blk_try_blockalign(src, COMMIT_BUF_SIZE);
if (buf == NULL) {
ret = -ENOMEM;
goto ro_cleanup;
}

for (sector = 0; sector < total_sectors; sector += n) {
ret = bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n);
for (offset = 0; offset < length; offset += n) {
ret = bdrv_is_allocated(bs, offset, COMMIT_BUF_SIZE, &n);
if (ret < 0) {
goto ro_cleanup;
}
if (ret) {
ret = blk_pread(src, sector * BDRV_SECTOR_SIZE, buf,
n * BDRV_SECTOR_SIZE);
ret = blk_pread(src, offset, buf, n);
if (ret < 0) {
goto ro_cleanup;
}

ret = blk_pwrite(backing, sector * BDRV_SECTOR_SIZE, buf,
n * BDRV_SECTOR_SIZE, 0);
ret = blk_pwrite(backing, offset, buf, n, 0);
if (ret < 0) {
goto ro_cleanup;
}
Expand Down
54 changes: 36 additions & 18 deletions block/io.c
Expand Up @@ -1033,17 +1033,18 @@ static int coroutine_fn bdrv_aligned_preadv(BdrvChild *child,
}

if (flags & BDRV_REQ_COPY_ON_READ) {
int64_t start_sector = offset >> BDRV_SECTOR_BITS;
int64_t end_sector = DIV_ROUND_UP(offset + bytes, BDRV_SECTOR_SIZE);
unsigned int nb_sectors = end_sector - start_sector;
int pnum;
/* TODO: Simplify further once bdrv_is_allocated no longer
* requires sector alignment */
int64_t start = QEMU_ALIGN_DOWN(offset, BDRV_SECTOR_SIZE);
int64_t end = QEMU_ALIGN_UP(offset + bytes, BDRV_SECTOR_SIZE);
int64_t pnum;

ret = bdrv_is_allocated(bs, start_sector, nb_sectors, &pnum);
ret = bdrv_is_allocated(bs, start, end - start, &pnum);
if (ret < 0) {
goto out;
}

if (!ret || pnum != nb_sectors) {
if (!ret || pnum != end - start) {
ret = bdrv_co_do_copy_on_readv(child, offset, bytes, qiov);
goto out;
}
Expand Down Expand Up @@ -1900,15 +1901,25 @@ int64_t bdrv_get_block_status(BlockDriverState *bs,
sector_num, nb_sectors, pnum, file);
}

int coroutine_fn bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num,
int nb_sectors, int *pnum)
int coroutine_fn bdrv_is_allocated(BlockDriverState *bs, int64_t offset,
int64_t bytes, int64_t *pnum)
{
BlockDriverState *file;
int64_t ret = bdrv_get_block_status(bs, sector_num, nb_sectors, pnum,
&file);
int64_t sector_num = offset >> BDRV_SECTOR_BITS;
int nb_sectors = bytes >> BDRV_SECTOR_BITS;
int64_t ret;
int psectors;

assert(QEMU_IS_ALIGNED(offset, BDRV_SECTOR_SIZE));
assert(QEMU_IS_ALIGNED(bytes, BDRV_SECTOR_SIZE) && bytes < INT_MAX);
ret = bdrv_get_block_status(bs, sector_num, nb_sectors, &psectors,
&file);
if (ret < 0) {
return ret;
}
if (pnum) {
*pnum = psectors * BDRV_SECTOR_SIZE;
}
return !!(ret & BDRV_BLOCK_ALLOCATED);
}

Expand All @@ -1917,7 +1928,8 @@ int coroutine_fn bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num,
*
* Return true if the given sector is allocated in any image between
* BASE and TOP (inclusive). BASE can be NULL to check if the given
* sector is allocated in any image of the chain. Return false otherwise.
* sector is allocated in any image of the chain. Return false otherwise,
* or negative errno on failure.
*
* 'pnum' is set to the number of sectors (including and immediately following
* the specified sector) that are known to be in the same
Expand All @@ -1934,13 +1946,19 @@ int bdrv_is_allocated_above(BlockDriverState *top,

intermediate = top;
while (intermediate && intermediate != base) {
int pnum_inter;
ret = bdrv_is_allocated(intermediate, sector_num, nb_sectors,
int64_t pnum_inter;
int psectors_inter;

ret = bdrv_is_allocated(intermediate, sector_num * BDRV_SECTOR_SIZE,
nb_sectors * BDRV_SECTOR_SIZE,
&pnum_inter);
if (ret < 0) {
return ret;
} else if (ret) {
*pnum = pnum_inter;
}
assert(pnum_inter < INT_MAX * BDRV_SECTOR_SIZE);
psectors_inter = pnum_inter >> BDRV_SECTOR_BITS;
if (ret) {
*pnum = psectors_inter;
return 1;
}

Expand All @@ -1950,10 +1968,10 @@ int bdrv_is_allocated_above(BlockDriverState *top,
*
* [sector_num+x, nr_sectors] allocated.
*/
if (n > pnum_inter &&
if (n > psectors_inter &&
(intermediate == top ||
sector_num + pnum_inter < intermediate->total_sectors)) {
n = pnum_inter;
sector_num + psectors_inter < intermediate->total_sectors)) {
n = psectors_inter;
}

intermediate = backing_bs(intermediate);
Expand Down
7 changes: 5 additions & 2 deletions block/stream.c
Expand Up @@ -137,6 +137,7 @@ static void coroutine_fn stream_run(void *opaque)

for ( ; offset < s->common.len; offset += n * BDRV_SECTOR_SIZE) {
bool copy;
int64_t count = 0;

/* Note that even when no rate limit is applied we need to yield
* with no pending I/O here so that bdrv_drain_all() returns.
Expand All @@ -148,8 +149,10 @@ static void coroutine_fn stream_run(void *opaque)

copy = false;

ret = bdrv_is_allocated(bs, offset / BDRV_SECTOR_SIZE,
STREAM_BUFFER_SIZE / BDRV_SECTOR_SIZE, &n);
ret = bdrv_is_allocated(bs, offset, STREAM_BUFFER_SIZE, &count);
/* TODO relax this once bdrv_is_allocated does not enforce sectors */
assert(QEMU_IS_ALIGNED(count, BDRV_SECTOR_SIZE));
n = count >> BDRV_SECTOR_BITS;
if (ret == 1) {
/* Allocated in the top, no need to copy. */
} else if (ret >= 0) {
Expand Down
34 changes: 20 additions & 14 deletions block/vvfat.c
Expand Up @@ -1482,24 +1482,27 @@ static int vvfat_read(BlockDriverState *bs, int64_t sector_num,
if (sector_num >= bs->total_sectors)
return -1;
if (s->qcow) {
int n;
int64_t n;
int ret;
ret = bdrv_is_allocated(s->qcow->bs, sector_num,
nb_sectors - i, &n);
ret = bdrv_is_allocated(s->qcow->bs, sector_num * BDRV_SECTOR_SIZE,
(nb_sectors - i) * BDRV_SECTOR_SIZE, &n);
if (ret < 0) {
return ret;
}
if (ret) {
DLOG(fprintf(stderr, "sectors %d+%d allocated\n",
(int)sector_num, n));
if (bdrv_read(s->qcow, sector_num, buf + i * 0x200, n)) {
DLOG(fprintf(stderr, "sectors %" PRId64 "+%" PRId64
" allocated\n", sector_num,
n >> BDRV_SECTOR_BITS));
if (bdrv_read(s->qcow, sector_num, buf + i * 0x200,
n >> BDRV_SECTOR_BITS)) {
return -1;
}
i += n - 1;
sector_num += n - 1;
i += (n >> BDRV_SECTOR_BITS) - 1;
sector_num += (n >> BDRV_SECTOR_BITS) - 1;
continue;
}
DLOG(fprintf(stderr, "sector %d not allocated\n", (int)sector_num));
DLOG(fprintf(stderr, "sector %" PRId64 " not allocated\n",
sector_num));
}
if (sector_num < s->offset_to_root_dir) {
if (sector_num < s->offset_to_fat) {
Expand Down Expand Up @@ -1779,16 +1782,17 @@ static inline bool cluster_was_modified(BDRVVVFATState *s,
uint32_t cluster_num)
{
int was_modified = 0;
int i, dummy;
int i;

if (s->qcow == NULL) {
return 0;
}

for (i = 0; !was_modified && i < s->sectors_per_cluster; i++) {
was_modified = bdrv_is_allocated(s->qcow->bs,
cluster2sector(s, cluster_num) + i,
1, &dummy);
(cluster2sector(s, cluster_num) +
i) * BDRV_SECTOR_SIZE,
BDRV_SECTOR_SIZE, NULL);
}

/*
Expand Down Expand Up @@ -1935,7 +1939,7 @@ static uint32_t get_cluster_count_for_direntry(BDRVVVFATState* s,
}

if (copy_it) {
int i, dummy;
int i;
/*
* This is horribly inefficient, but that is okay, since
* it is rarely executed, if at all.
Expand All @@ -1946,7 +1950,9 @@ static uint32_t get_cluster_count_for_direntry(BDRVVVFATState* s,
for (i = 0; i < s->sectors_per_cluster; i++) {
int res;

res = bdrv_is_allocated(s->qcow->bs, offset + i, 1, &dummy);
res = bdrv_is_allocated(s->qcow->bs,
(offset + i) * BDRV_SECTOR_SIZE,
BDRV_SECTOR_SIZE, NULL);
if (res < 0) {
return -1;
}
Expand Down
4 changes: 2 additions & 2 deletions include/block/block.h
Expand Up @@ -427,8 +427,8 @@ int64_t bdrv_get_block_status_above(BlockDriverState *bs,
int64_t sector_num,
int nb_sectors, int *pnum,
BlockDriverState **file);
int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
int *pnum);
int bdrv_is_allocated(BlockDriverState *bs, int64_t offset, int64_t bytes,
int64_t *pnum);
int bdrv_is_allocated_above(BlockDriverState *top, BlockDriverState *base,
int64_t sector_num, int nb_sectors, int *pnum);

Expand Down
16 changes: 10 additions & 6 deletions migration/block.c
Expand Up @@ -34,7 +34,7 @@
#define BLK_MIG_FLAG_PROGRESS 0x04
#define BLK_MIG_FLAG_ZERO_BLOCK 0x08

#define MAX_IS_ALLOCATED_SEARCH 65536
#define MAX_IS_ALLOCATED_SEARCH (65536 * BDRV_SECTOR_SIZE)

#define MAX_INFLIGHT_IO 512

Expand Down Expand Up @@ -267,16 +267,20 @@ static int mig_save_device_bulk(QEMUFile *f, BlkMigDevState *bmds)
BlockBackend *bb = bmds->blk;
BlkMigBlock *blk;
int nr_sectors;
int64_t count;

if (bmds->shared_base) {
qemu_mutex_lock_iothread();
aio_context_acquire(blk_get_aio_context(bb));
/* Skip unallocated sectors; intentionally treats failure as
* an allocated sector */
/* Skip unallocated sectors; intentionally treats failure or
* partial sector as an allocated sector */
while (cur_sector < total_sectors &&
!bdrv_is_allocated(blk_bs(bb), cur_sector,
MAX_IS_ALLOCATED_SEARCH, &nr_sectors)) {
cur_sector += nr_sectors;
!bdrv_is_allocated(blk_bs(bb), cur_sector * BDRV_SECTOR_SIZE,
MAX_IS_ALLOCATED_SEARCH, &count)) {
if (count < BDRV_SECTOR_SIZE) {
break;
}
cur_sector += count >> BDRV_SECTOR_BITS;
}
aio_context_release(blk_get_aio_context(bb));
qemu_mutex_unlock_iothread();
Expand Down
8 changes: 7 additions & 1 deletion qemu-img.c
Expand Up @@ -3258,6 +3258,7 @@ static int img_rebase(int argc, char **argv)
int64_t new_backing_num_sectors = 0;
uint64_t sector;
int n;
int64_t count;
float local_progress = 0;

buf_old = blk_blockalign(blk, IO_BUF_SIZE);
Expand Down Expand Up @@ -3305,12 +3306,17 @@ static int img_rebase(int argc, char **argv)
}

/* If the cluster is allocated, we don't need to take action */
ret = bdrv_is_allocated(bs, sector, n, &n);
ret = bdrv_is_allocated(bs, sector << BDRV_SECTOR_BITS,
n << BDRV_SECTOR_BITS, &count);
if (ret < 0) {
error_report("error while reading image metadata: %s",
strerror(-ret));
goto out;
}
/* TODO relax this once bdrv_is_allocated does not enforce
* sector alignment */
assert(QEMU_IS_ALIGNED(count, BDRV_SECTOR_SIZE));
n = count >> BDRV_SECTOR_BITS;
if (ret) {
continue;
}
Expand Down

0 comments on commit d6a644b

Please sign in to comment.