Skip to content

Commit

Permalink
qcow2: Don't rely on free_cluster_index in alloc_refcount_block() (CV…
Browse files Browse the repository at this point in the history
…E-2014-0147)

free_cluster_index is only correct if update_refcount() was called from
an allocation function, and even there it's brittle because it's used to
protect unfinished allocations which still have a refcount of 0 - if it
moves in the wrong place, the unfinished allocation can be corrupted.

So not using it any more seems to be a good idea. Instead, use the
first requested cluster to do the calculations. Return -EAGAIN if
unfinished allocations could become invalid and let the caller restart
its search for some free clusters.

The context of creating a snapsnot is one situation where
update_refcount() is called outside of a cluster allocation. For this
case, the change fixes a buffer overflow if a cluster is referenced in
an L2 table that cannot be represented by an existing refcount block.
(new_table[refcount_table_index] was out of bounds)

[Bump the qemu-iotests 026 refblock_alloc.write leak count from 10 to
11.
--Stefan]

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
(cherry picked from commit b106ad9)
Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
  • Loading branch information
kevmw authored and mdroth committed Jul 3, 2014
1 parent aeba415 commit ffa3ab0
Show file tree
Hide file tree
Showing 6 changed files with 65 additions and 44 deletions.
72 changes: 37 additions & 35 deletions block/qcow2-refcount.c
Expand Up @@ -193,10 +193,11 @@ static int alloc_refcount_block(BlockDriverState *bs,
* they can describe them themselves.
*
* - We need to consider that at this point we are inside update_refcounts
* and doing the initial refcount increase. This means that some clusters
* have already been allocated by the caller, but their refcount isn't
* accurate yet. free_cluster_index tells us where this allocation ends
* as long as we don't overwrite it by freeing clusters.
* and potentially doing an initial refcount increase. This means that
* some clusters have already been allocated by the caller, but their
* refcount isn't accurate yet. If we allocate clusters for metadata, we
* need to return -EAGAIN to signal the caller that it needs to restart
* the search for free clusters.
*
* - alloc_clusters_noref and qcow2_free_clusters may load a different
* refcount block into the cache
Expand Down Expand Up @@ -281,7 +282,10 @@ static int alloc_refcount_block(BlockDriverState *bs,
}

s->refcount_table[refcount_table_index] = new_block;
return 0;

/* The new refcount block may be where the caller intended to put its
* data, so let it restart the search. */
return -EAGAIN;
}

ret = qcow2_cache_put(bs, s->refcount_block_cache, (void**) refcount_block);
Expand All @@ -304,8 +308,7 @@ static int alloc_refcount_block(BlockDriverState *bs,

/* Calculate the number of refcount blocks needed so far */
uint64_t refcount_block_clusters = 1 << (s->cluster_bits - REFCOUNT_SHIFT);
uint64_t blocks_used = (s->free_cluster_index +
refcount_block_clusters - 1) / refcount_block_clusters;
uint64_t blocks_used = DIV_ROUND_UP(cluster_index, refcount_block_clusters);

/* And now we need at least one block more for the new metadata */
uint64_t table_size = next_refcount_table_size(s, blocks_used + 1);
Expand Down Expand Up @@ -338,8 +341,6 @@ static int alloc_refcount_block(BlockDriverState *bs,
uint16_t *new_blocks = g_malloc0(blocks_clusters * s->cluster_size);
uint64_t *new_table = g_malloc0(table_size * sizeof(uint64_t));

assert(meta_offset >= (s->free_cluster_index * s->cluster_size));

/* Fill the new refcount table */
memcpy(new_table, s->refcount_table,
s->refcount_table_size * sizeof(uint64_t));
Expand Down Expand Up @@ -402,18 +403,19 @@ static int alloc_refcount_block(BlockDriverState *bs,
s->refcount_table_size = table_size;
s->refcount_table_offset = table_offset;

/* Free old table. Remember, we must not change free_cluster_index */
uint64_t old_free_cluster_index = s->free_cluster_index;
/* Free old table. */
qcow2_free_clusters(bs, old_table_offset, old_table_size * sizeof(uint64_t),
QCOW2_DISCARD_OTHER);
s->free_cluster_index = old_free_cluster_index;

ret = load_refcount_block(bs, new_block, (void**) refcount_block);
if (ret < 0) {
return ret;
}

return 0;
/* If we were trying to do the initial refcount update for some cluster
* allocation, we might have used the same clusters to store newly
* allocated metadata. Make the caller search some new space. */
return -EAGAIN;

fail_table:
g_free(new_table);
Expand Down Expand Up @@ -659,12 +661,15 @@ int64_t qcow2_alloc_clusters(BlockDriverState *bs, int64_t size)
int ret;

BLKDBG_EVENT(bs->file, BLKDBG_CLUSTER_ALLOC);
offset = alloc_clusters_noref(bs, size);
if (offset < 0) {
return offset;
}
do {
offset = alloc_clusters_noref(bs, size);
if (offset < 0) {
return offset;
}

ret = update_refcount(bs, offset, size, 1, QCOW2_DISCARD_NEVER);
} while (ret == -EAGAIN);

ret = update_refcount(bs, offset, size, 1, QCOW2_DISCARD_NEVER);
if (ret < 0) {
return ret;
}
Expand All @@ -677,7 +682,6 @@ int qcow2_alloc_clusters_at(BlockDriverState *bs, uint64_t offset,
{
BDRVQcowState *s = bs->opaque;
uint64_t cluster_index;
uint64_t old_free_cluster_index;
uint64_t i;
int refcount, ret;

Expand All @@ -686,30 +690,28 @@ int qcow2_alloc_clusters_at(BlockDriverState *bs, uint64_t offset,
return 0;
}

/* Check how many clusters there are free */
cluster_index = offset >> s->cluster_bits;
for(i = 0; i < nb_clusters; i++) {
refcount = get_refcount(bs, cluster_index++);
do {
/* Check how many clusters there are free */
cluster_index = offset >> s->cluster_bits;
for(i = 0; i < nb_clusters; i++) {
refcount = get_refcount(bs, cluster_index++);

if (refcount < 0) {
return refcount;
} else if (refcount != 0) {
break;
if (refcount < 0) {
return refcount;
} else if (refcount != 0) {
break;
}
}
}

/* And then allocate them */
old_free_cluster_index = s->free_cluster_index;
s->free_cluster_index = cluster_index + i;
/* And then allocate them */
ret = update_refcount(bs, offset, i << s->cluster_bits, 1,
QCOW2_DISCARD_NEVER);
} while (ret == -EAGAIN);

ret = update_refcount(bs, offset, i << s->cluster_bits, 1,
QCOW2_DISCARD_NEVER);
if (ret < 0) {
return ret;
}

s->free_cluster_index = old_free_cluster_index;

return i;
}

Expand Down
11 changes: 6 additions & 5 deletions block/qcow2.c
Expand Up @@ -1587,7 +1587,7 @@ static int qcow2_create2(const char *filename, int64_t total_size,
*/
BlockDriverState* bs;
QCowHeader *header;
uint8_t* refcount_table;
uint64_t* refcount_table;
Error *local_err = NULL;
int ret;

Expand Down Expand Up @@ -1637,9 +1637,10 @@ static int qcow2_create2(const char *filename, int64_t total_size,
goto out;
}

/* Write an empty refcount table */
refcount_table = g_malloc0(cluster_size);
ret = bdrv_pwrite(bs, cluster_size, refcount_table, cluster_size);
/* Write a refcount table with one refcount block */
refcount_table = g_malloc0(2 * cluster_size);
refcount_table[0] = cpu_to_be64(2 * cluster_size);
ret = bdrv_pwrite(bs, cluster_size, refcount_table, 2 * cluster_size);
g_free(refcount_table);

if (ret < 0) {
Expand All @@ -1663,7 +1664,7 @@ static int qcow2_create2(const char *filename, int64_t total_size,
goto out;
}

ret = qcow2_alloc_clusters(bs, 2 * cluster_size);
ret = qcow2_alloc_clusters(bs, 3 * cluster_size);
if (ret < 0) {
error_setg_errno(errp, -ret, "Could not allocate clusters for qcow2 "
"header and refcount table");
Expand Down
6 changes: 3 additions & 3 deletions tests/qemu-iotests/026.out
Expand Up @@ -475,7 +475,7 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
Event: refblock_alloc.write_blocks; errno: 28; imm: off; once: off; write
write failed: No space left on device

10 leaked clusters were found on the image.
11 leaked clusters were found on the image.
This means waste of disk space, but no harm to data.
Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824

Expand All @@ -499,7 +499,7 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
Event: refblock_alloc.write_table; errno: 28; imm: off; once: off; write
write failed: No space left on device

10 leaked clusters were found on the image.
11 leaked clusters were found on the image.
This means waste of disk space, but no harm to data.
Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824

Expand All @@ -523,7 +523,7 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
Event: refblock_alloc.switch_table; errno: 28; imm: off; once: off; write
write failed: No space left on device

10 leaked clusters were found on the image.
11 leaked clusters were found on the image.
This means waste of disk space, but no harm to data.
Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824

Expand Down
2 changes: 1 addition & 1 deletion tests/qemu-iotests/044.out
@@ -1,6 +1,6 @@
No errors were found on the image.
7292415/33554432 = 21.73% allocated, 0.00% fragmented, 0.00% compressed clusters
Image end offset: 4296448000
Image end offset: 4296152064
.
----------------------------------------------------------------------
Ran 1 tests
Expand Down
11 changes: 11 additions & 0 deletions tests/qemu-iotests/080
Expand Up @@ -56,6 +56,8 @@ offset_header_size=100
offset_ext_magic=$header_size
offset_ext_size=$((header_size + 4))

offset_l2_table_0=$((0x40000))

echo
echo "== Huge header size =="
_make_test_img 64M
Expand Down Expand Up @@ -143,6 +145,15 @@ poke_file "$TEST_IMG" "$offset_backing_file_offset" "\x00\x00\x00\x00\x00\x00\x1
poke_file "$TEST_IMG" "$offset_backing_file_size" "\xff\xff\xff\xff"
{ $QEMU_IO -c "read 0 512" $TEST_IMG; } 2>&1 | _filter_qemu_io | _filter_testdir

echo
echo "== Invalid L2 entry (huge physical offset) =="
_make_test_img 64M
{ $QEMU_IO -c "write 0 512" $TEST_IMG; } 2>&1 | _filter_qemu_io | _filter_testdir
poke_file "$TEST_IMG" "$offset_l2_table_0" "\xbf\xff\xff\xff\xff\xff\x00\x00"
{ $QEMU_IMG snapshot -c test $TEST_IMG; } 2>&1 | _filter_qemu_io | _filter_testdir
poke_file "$TEST_IMG" "$offset_l2_table_0" "\x80\x00\x00\xff\xff\xff\x00\x00"
{ $QEMU_IMG snapshot -c test $TEST_IMG; } 2>&1 | _filter_qemu_io | _filter_testdir

# success, all done
echo "*** done"
rm -f $seq.full
Expand Down
7 changes: 7 additions & 0 deletions tests/qemu-iotests/080.out
Expand Up @@ -63,4 +63,11 @@ no file open, try 'help open'
Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864
qemu-io: can't open device TEST_DIR/t.qcow2: Backing file name too long
no file open, try 'help open'

== Invalid L2 entry (huge physical offset) ==
Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864
wrote 512/512 bytes at offset 0
512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
qemu-img: Could not create snapshot 'test': -27 (File too large)
qemu-img: Could not create snapshot 'test': -11 (Resource temporarily unavailable)
*** done

0 comments on commit ffa3ab0

Please sign in to comment.