Skip to content

Commit

Permalink
Merge remote-tracking branch 'remotes/ericb/tags/pull-nbd-2019-04-01'…
Browse files Browse the repository at this point in the history
… into staging

nbd patches for 2019-04-01

- Better behavior of qemu-img map on NBD images
- Fixes for NBD protocol alignment corner cases:
 - the server has fewer places where it sends reads or block status
   not aligned to its advertised block size
 - the client has more cases where it can work around server
   non-compliance present in qemu 3.1
 - the client now avoids non-compliant requests when interoperating
   with nbdkit or other servers not advertising block size

# gpg: Signature made Mon 01 Apr 2019 15:06:54 BST
# gpg:                using RSA key A7A16B4A2527436A
# gpg: Good signature from "Eric Blake <eblake@redhat.com>" [full]
# gpg:                 aka "Eric Blake (Free Software Programmer) <ebb9@byu.net>" [full]
# gpg:                 aka "[jpeg image of size 6874]" [full]
# Primary key fingerprint: 71C2 CC22 B1C4 6029 27D2  F3AA A7A1 6B4A 2527 436A

* remotes/ericb/tags/pull-nbd-2019-04-01:
  nbd/client: Trace server noncompliance on structured reads
  nbd/server: Advertise actual minimum block size
  block: Add bdrv_get_request_alignment()
  nbd/client: Support qemu-img convert from unaligned size
  nbd/client: Reject inaccessible tail of inconsistent server
  nbd/client: Report offsets in bdrv_block_status
  nbd/client: Lower min_block for block-status, unaligned size
  iotests: Add 241 to test NBD on unaligned images
  nbd-client: Work around server BLOCK_STATUS misalignment at EOF
  qemu-img: Gracefully shutdown when map can't finish
  nbd: Permit simple error to NBD_CMD_BLOCK_STATUS
  nbd: Don't lose server's error to NBD_CMD_BLOCK_STATUS
  nbd: Tolerate some server non-compliance in NBD_CMD_BLOCK_STATUS
  qemu-img: Report bdrv_block_status failures

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
  • Loading branch information
pm215 committed Apr 2, 2019
2 parents 230ce19 + 75d34eb commit 4717595
Show file tree
Hide file tree
Showing 14 changed files with 299 additions and 48 deletions.
7 changes: 7 additions & 0 deletions block/block-backend.c
Expand Up @@ -1764,6 +1764,13 @@ int blk_get_flags(BlockBackend *blk)
}
}

/* Returns the minimum request alignment, in bytes; guaranteed nonzero */
uint32_t blk_get_request_alignment(BlockBackend *blk)
{
BlockDriverState *bs = blk_bs(blk);
return bs ? bs->bl.request_alignment : BDRV_SECTOR_SIZE;
}

/* Returns the maximum transfer length, in bytes; guaranteed nonzero */
uint32_t blk_get_max_transfer(BlockBackend *blk)
{
Expand Down
124 changes: 101 additions & 23 deletions block/nbd-client.c
Expand Up @@ -211,7 +211,8 @@ static inline uint64_t payload_advance64(uint8_t **payload)
return ldq_be_p(*payload - 8);
}

static int nbd_parse_offset_hole_payload(NBDStructuredReplyChunk *chunk,
static int nbd_parse_offset_hole_payload(NBDClientSession *client,
NBDStructuredReplyChunk *chunk,
uint8_t *payload, uint64_t orig_offset,
QEMUIOVector *qiov, Error **errp)
{
Expand All @@ -233,15 +234,19 @@ static int nbd_parse_offset_hole_payload(NBDStructuredReplyChunk *chunk,
" region");
return -EINVAL;
}
if (client->info.min_block &&
!QEMU_IS_ALIGNED(hole_size, client->info.min_block)) {
trace_nbd_structured_read_compliance("hole");
}

qemu_iovec_memset(qiov, offset - orig_offset, 0, hole_size);

return 0;
}

/* nbd_parse_blockstatus_payload
* support only one extent in reply and only for
* base:allocation context
* Based on our request, we expect only one extent in reply, for the
* base:allocation context.
*/
static int nbd_parse_blockstatus_payload(NBDClientSession *client,
NBDStructuredReplyChunk *chunk,
Expand All @@ -250,7 +255,8 @@ static int nbd_parse_blockstatus_payload(NBDClientSession *client,
{
uint32_t context_id;

if (chunk->length != sizeof(context_id) + sizeof(*extent)) {
/* The server succeeded, so it must have sent [at least] one extent */
if (chunk->length < sizeof(context_id) + sizeof(*extent)) {
error_setg(errp, "Protocol error: invalid payload for "
"NBD_REPLY_TYPE_BLOCK_STATUS");
return -EINVAL;
Expand All @@ -268,18 +274,50 @@ static int nbd_parse_blockstatus_payload(NBDClientSession *client,
extent->length = payload_advance32(&payload);
extent->flags = payload_advance32(&payload);

if (extent->length == 0 ||
(client->info.min_block && !QEMU_IS_ALIGNED(extent->length,
client->info.min_block))) {
if (extent->length == 0) {
error_setg(errp, "Protocol error: server sent status chunk with "
"invalid length");
"zero length");
return -EINVAL;
}

/* The server is allowed to send us extra information on the final
* extent; just clamp it to the length we requested. */
/*
* A server sending unaligned block status is in violation of the
* protocol, but as qemu-nbd 3.1 is such a server (at least for
* POSIX files that are not a multiple of 512 bytes, since qemu
* rounds files up to 512-byte multiples but lseek(SEEK_HOLE)
* still sees an implicit hole beyond the real EOF), it's nicer to
* work around the misbehaving server. If the request included
* more than the final unaligned block, truncate it back to an
* aligned result; if the request was only the final block, round
* up to the full block and change the status to fully-allocated
* (always a safe status, even if it loses information).
*/
if (client->info.min_block && !QEMU_IS_ALIGNED(extent->length,
client->info.min_block)) {
trace_nbd_parse_blockstatus_compliance("extent length is unaligned");
if (extent->length > client->info.min_block) {
extent->length = QEMU_ALIGN_DOWN(extent->length,
client->info.min_block);
} else {
extent->length = client->info.min_block;
extent->flags = 0;
}
}

/*
* We used NBD_CMD_FLAG_REQ_ONE, so the server should not have
* sent us any more than one extent, nor should it have included
* status beyond our request in that extent. However, it's easy
* enough to ignore the server's noncompliance without killing the
* connection; just ignore trailing extents, and clamp things to
* the length of our request.
*/
if (chunk->length > sizeof(context_id) + sizeof(*extent)) {
trace_nbd_parse_blockstatus_compliance("more than one extent");
}
if (extent->length > orig_length) {
extent->length = orig_length;
trace_nbd_parse_blockstatus_compliance("extent length too large");
}

return 0;
Expand Down Expand Up @@ -357,6 +395,9 @@ static int nbd_co_receive_offset_data_payload(NBDClientSession *s,
" region");
return -EINVAL;
}
if (s->info.min_block && !QEMU_IS_ALIGNED(data_size, s->info.min_block)) {
trace_nbd_structured_read_compliance("data");
}

qemu_iovec_init(&sub_qiov, qiov->niov);
qemu_iovec_concat(&sub_qiov, qiov, offset - orig_offset, data_size);
Expand Down Expand Up @@ -679,7 +720,7 @@ static int nbd_co_receive_cmdread_reply(NBDClientSession *s, uint64_t handle,
* in qiov */
break;
case NBD_REPLY_TYPE_OFFSET_HOLE:
ret = nbd_parse_offset_hole_payload(&reply.structured, payload,
ret = nbd_parse_offset_hole_payload(s, &reply.structured, payload,
offset, qiov, &local_err);
if (ret < 0) {
s->quit = true;
Expand Down Expand Up @@ -718,9 +759,7 @@ static int nbd_co_receive_blockstatus_reply(NBDClientSession *s,
bool received = false;

assert(!extent->length);
NBD_FOREACH_REPLY_CHUNK(s, iter, handle, s->info.structured_reply,
NULL, &reply, &payload)
{
NBD_FOREACH_REPLY_CHUNK(s, iter, handle, false, NULL, &reply, &payload) {
int ret;
NBDStructuredReplyChunk *chunk = &reply.structured;

Expand Down Expand Up @@ -758,12 +797,9 @@ static int nbd_co_receive_blockstatus_reply(NBDClientSession *s,
payload = NULL;
}

if (!extent->length && !iter.err) {
error_setg(&iter.err,
"Server did not reply with any status extents");
if (!iter.ret) {
iter.ret = -EIO;
}
if (!extent->length && !iter.request_ret) {
error_setg(&local_err, "Server did not reply with any status extents");
nbd_iter_channel_error(&iter, -EIO, &local_err);
}

error_propagate(errp, iter.err);
Expand Down Expand Up @@ -820,6 +856,25 @@ int nbd_client_co_preadv(BlockDriverState *bs, uint64_t offset,
if (!bytes) {
return 0;
}
/*
* Work around the fact that the block layer doesn't do
* byte-accurate sizing yet - if the read exceeds the server's
* advertised size because the block layer rounded size up, then
* truncate the request to the server and tail-pad with zero.
*/
if (offset >= client->info.size) {
assert(bytes < BDRV_SECTOR_SIZE);
qemu_iovec_memset(qiov, 0, 0, bytes);
return 0;
}
if (offset + bytes > client->info.size) {
uint64_t slop = offset + bytes - client->info.size;

assert(slop < BDRV_SECTOR_SIZE);
qemu_iovec_memset(qiov, bytes - slop, 0, slop);
request.len -= slop;
}

ret = nbd_co_send_request(bs, &request, NULL);
if (ret < 0) {
return ret;
Expand Down Expand Up @@ -938,15 +993,35 @@ int coroutine_fn nbd_client_co_block_status(BlockDriverState *bs,
.from = offset,
.len = MIN(MIN_NON_ZERO(QEMU_ALIGN_DOWN(INT_MAX,
bs->bl.request_alignment),
client->info.max_block), bytes),
client->info.max_block),
MIN(bytes, client->info.size - offset)),
.flags = NBD_CMD_FLAG_REQ_ONE,
};

if (!client->info.base_allocation) {
*pnum = bytes;
return BDRV_BLOCK_DATA;
*map = offset;
*file = bs;
return BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID;
}

/*
* Work around the fact that the block layer doesn't do
* byte-accurate sizing yet - if the status request exceeds the
* server's advertised size because the block layer rounded size
* up, we truncated the request to the server (above), or are
* called on just the hole.
*/
if (offset >= client->info.size) {
*pnum = bytes;
assert(bytes < BDRV_SECTOR_SIZE);
/* Intentionally don't report offset_valid for the hole */
return BDRV_BLOCK_ZERO;
}

if (client->info.min_block) {
assert(QEMU_IS_ALIGNED(request.len, client->info.min_block));
}
ret = nbd_co_send_request(bs, &request, NULL);
if (ret < 0) {
return ret;
Expand All @@ -967,8 +1042,11 @@ int coroutine_fn nbd_client_co_block_status(BlockDriverState *bs,

assert(extent.length);
*pnum = extent.length;
*map = offset;
*file = bs;
return (extent.flags & NBD_STATE_HOLE ? 0 : BDRV_BLOCK_DATA) |
(extent.flags & NBD_STATE_ZERO ? BDRV_BLOCK_ZERO : 0);
(extent.flags & NBD_STATE_ZERO ? BDRV_BLOCK_ZERO : 0) |
BDRV_BLOCK_OFFSET_VALID;
}

void nbd_client_detach_aio_context(BlockDriverState *bs)
Expand Down
19 changes: 18 additions & 1 deletion block/nbd.c
Expand Up @@ -437,7 +437,24 @@ static void nbd_refresh_limits(BlockDriverState *bs, Error **errp)
uint32_t min = s->info.min_block;
uint32_t max = MIN_NON_ZERO(NBD_MAX_BUFFER_SIZE, s->info.max_block);

bs->bl.request_alignment = min ? min : BDRV_SECTOR_SIZE;
/*
* If the server did not advertise an alignment:
* - a size that is not sector-aligned implies that an alignment
* of 1 can be used to access those tail bytes
* - advertisement of block status requires an alignment of 1, so
* that we don't violate block layer constraints that block
* status is always aligned (as we can't control whether the
* server will report sub-sector extents, such as a hole at EOF
* on an unaligned POSIX file)
* - otherwise, assume the server is so old that we are safer avoiding
* sub-sector requests
*/
if (!min) {
min = (!QEMU_IS_ALIGNED(s->info.size, BDRV_SECTOR_SIZE) ||
s->info.base_allocation) ? 1 : BDRV_SECTOR_SIZE;
}

bs->bl.request_alignment = min;
bs->bl.max_pdiscard = max;
bs->bl.max_pwrite_zeroes = max;
bs->bl.max_transfer = max;
Expand Down
2 changes: 2 additions & 0 deletions block/trace-events
Expand Up @@ -157,6 +157,8 @@ nvme_cmd_map_qiov_iov(void *s, int i, void *page, int pages) "s %p iov[%d] %p pa
iscsi_xcopy(void *src_lun, uint64_t src_off, void *dst_lun, uint64_t dst_off, uint64_t bytes, int ret) "src_lun %p offset %"PRIu64" dst_lun %p offset %"PRIu64" bytes %"PRIu64" ret %d"

# nbd-client.c
nbd_parse_blockstatus_compliance(const char *err) "ignoring extra data from non-compliant server: %s"
nbd_structured_read_compliance(const char *type) "server sent non-compliant unaligned read %s chunk"
nbd_read_reply_entry_fail(int ret, const char *err) "ret = %d, err: %s"
nbd_co_request_fail(uint64_t from, uint32_t len, uint64_t handle, uint16_t flags, uint16_t type, const char *name, int ret, const char *err) "Request failed { .from = %" PRIu64", .len = %" PRIu32 ", .handle = %" PRIu64 ", .flags = 0x%" PRIx16 ", .type = %" PRIu16 " (%s) } ret = %d, err: %s"

Expand Down
1 change: 1 addition & 0 deletions include/sysemu/block-backend.h
Expand Up @@ -177,6 +177,7 @@ bool blk_is_available(BlockBackend *blk);
void blk_lock_medium(BlockBackend *blk, bool locked);
void blk_eject(BlockBackend *blk, bool eject_flag);
int blk_get_flags(BlockBackend *blk);
uint32_t blk_get_request_alignment(BlockBackend *blk);
uint32_t blk_get_max_transfer(BlockBackend *blk);
int blk_get_max_iov(BlockBackend *blk);
void blk_set_guest_block_size(BlockBackend *blk, int align);
Expand Down
8 changes: 8 additions & 0 deletions nbd/client.c
Expand Up @@ -426,6 +426,14 @@ static int nbd_opt_info_or_go(QIOChannel *ioc, uint32_t opt,
nbd_send_opt_abort(ioc);
return -1;
}
if (info->min_block &&
!QEMU_IS_ALIGNED(info->size, info->min_block)) {
error_setg(errp, "export size %" PRIu64 "is not multiple of "
"minimum block size %" PRIu32, info->size,
info->min_block);
nbd_send_opt_abort(ioc);
return -1;
}
trace_nbd_receive_negotiate_size_flags(info->size, info->flags);
break;

Expand Down
13 changes: 8 additions & 5 deletions nbd/server.c
Expand Up @@ -607,13 +607,16 @@ static int nbd_negotiate_handle_info(NBDClient *client, uint16_t myflags,
/* Send NBD_INFO_BLOCK_SIZE always, but tweak the minimum size
* according to whether the client requested it, and according to
* whether this is OPT_INFO or OPT_GO. */
/* minimum - 1 for back-compat, or 512 if client is new enough.
* TODO: consult blk_bs(blk)->bl.request_alignment? */
sizes[0] =
(client->opt == NBD_OPT_INFO || blocksize) ? BDRV_SECTOR_SIZE : 1;
/* minimum - 1 for back-compat, or actual if client will obey it. */
if (client->opt == NBD_OPT_INFO || blocksize) {
sizes[0] = blk_get_request_alignment(exp->blk);
} else {
sizes[0] = 1;
}
assert(sizes[0] <= NBD_MAX_BUFFER_SIZE);
/* preferred - Hard-code to 4096 for now.
* TODO: is blk_bs(blk)->bl.opt_transfer appropriate? */
sizes[1] = 4096;
sizes[1] = MAX(4096, sizes[0]);
/* maximum - At most 32M, but smaller as appropriate. */
sizes[2] = MIN(blk_get_max_transfer(exp->blk), NBD_MAX_BUFFER_SIZE);
trace_nbd_negotiate_handle_info_block_size(sizes[0], sizes[1], sizes[2]);
Expand Down
16 changes: 11 additions & 5 deletions qemu-img.c
Expand Up @@ -1630,6 +1630,8 @@ static int convert_iteration_sectors(ImgConvertState *s, int64_t sector_num)
count, &count, NULL, NULL);
}
if (ret < 0) {
error_report("error while reading block status of sector %" PRId64
": %s", sector_num, strerror(-ret));
return ret;
}
n = DIV_ROUND_UP(count, BDRV_SECTOR_SIZE);
Expand Down Expand Up @@ -2734,14 +2736,14 @@ static int img_info(int argc, char **argv)
return 0;
}

static void dump_map_entry(OutputFormat output_format, MapEntry *e,
MapEntry *next)
static int dump_map_entry(OutputFormat output_format, MapEntry *e,
MapEntry *next)
{
switch (output_format) {
case OFORMAT_HUMAN:
if (e->data && !e->has_offset) {
error_report("File contains external, encrypted or compressed clusters.");
exit(1);
return -1;
}
if (e->data && !e->zero) {
printf("%#-16"PRIx64"%#-16"PRIx64"%#-16"PRIx64"%s\n",
Expand Down Expand Up @@ -2774,6 +2776,7 @@ static void dump_map_entry(OutputFormat output_format, MapEntry *e,
}
break;
}
return 0;
}

static int get_block_status(BlockDriverState *bs, int64_t offset,
Expand Down Expand Up @@ -2966,12 +2969,15 @@ static int img_map(int argc, char **argv)
}

if (curr.length > 0) {
dump_map_entry(output_format, &curr, &next);
ret = dump_map_entry(output_format, &curr, &next);
if (ret < 0) {
goto out;
}
}
curr = next;
}

dump_map_entry(output_format, &curr, NULL);
ret = dump_map_entry(output_format, &curr, NULL);

out:
blk_unref(blk);
Expand Down
4 changes: 2 additions & 2 deletions tests/qemu-iotests/209.out
@@ -1,2 +1,2 @@
[{ "start": 0, "length": 524288, "depth": 0, "zero": false, "data": true},
{ "start": 524288, "length": 524288, "depth": 0, "zero": true, "data": false}]
[{ "start": 0, "length": 524288, "depth": 0, "zero": false, "data": true, "offset": 0},
{ "start": 524288, "length": 524288, "depth": 0, "zero": true, "data": false, "offset": 524288}]

0 comments on commit 4717595

Please sign in to comment.