Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
Merge tag 'pull-block-2023-06-05' of https://gitlab.com/hreitz/qemu i…
…nto staging

Block patches

- Fix padding of unaligned vectored requests to match the host alignment
  for vectors with 1023 or 1024 buffers
- Refactor and fix bugs in parallels's image check functionality
- Add an option to the qcow2 driver to retain (qcow2-level) allocations
  on discard requests from the guest (while still forwarding the discard
  to the lower level and marking the range as zero)

# -----BEGIN PGP SIGNATURE-----
#
# iQJGBAABCAAwFiEEy2LXoO44KeRfAE00ofpA0JgBnN8FAmR+AT4SHGhyZWl0ekBy
# ZWRoYXQuY29tAAoJEKH6QNCYAZzfnboQAKD6YrreZLoseomRfqOAoApSf6yOdcHk
# 6kfsvzwzjosomsF1Pkzm4851vX5PyDqTdeu0iViM+pxanVO1b494q1P4VcAERqMB
# iZVs68R6M0l6HV9btWFGm+ibHJf4FapdntkIdwog1ka5TIhw5oDWCVNLigjhIoRv
# sM37Bgf14kC3sFTR++0HESsyU1eUP5gJjwJbPZ2IgJBmzYay0is1z5nHA/3VUswu
# 8dKnGQDsv62EtlK7PK8cU2BhLOeNi6Wr3bAb6Wf2QLB5e0qRb7oAkqNx5/UcTznk
# a3XMC1aiWhYvM/+DaYIpQUcIPgA8xQ1KHKeD6WjbGfLgZBqseX0aGWMByUsiY8Bo
# +BPIBnUDrbiPnAKB/XLQfnzlE+s7121/JpEbB7AkZqVFRGuw8Wur4tbc2fzvy8Pw
# x/uQfv3ZPi/2Lf6u7hv/TVHubXi8jucVgx3Ubu5Jeo3901S4/KOQBQ4BQ/GYIGQX
# 38ijSROcEd0eQJ1mTKPEctouxjSZCghNSbrn9DfsL1V3VWqWNKKGCU3hM+RQ1SJT
# 688qvnyYt8QZfTsiDSHR/GfKsufG0DkoqE7c9IhSEPohecAH8Rrc3HcLut7fuwD2
# gCFQhm68CPwwRmBjPCY6Zi1RDzeOyFBSWN31T6t0yTb4OHJ/3/cSZVBJtwwkOVbx
# zwabHDNdY5Kw
# =GuoL
# -----END PGP SIGNATURE-----
# gpg: Signature made Mon 05 Jun 2023 08:37:34 AM PDT
# gpg:                using RSA key CB62D7A0EE3829E45F004D34A1FA40D098019CDF
# gpg:                issuer "hreitz@redhat.com"
# gpg: Good signature from "Hanna Reitz <hreitz@redhat.com>" [unknown]
# gpg: WARNING: This key is not certified with a trusted signature!
# gpg:          There is no indication that the signature belongs to the owner.
# Primary key fingerprint: CB62 D7A0 EE38 29E4 5F00  4D34 A1FA 40D0 9801 9CDF

* tag 'pull-block-2023-06-05' of https://gitlab.com/hreitz/qemu:
  qcow2: add discard-no-unref option
  parallels: Incorrect condition in out-of-image check
  parallels: Replace qemu_co_mutex_lock by WITH_QEMU_LOCK_GUARD
  parallels: Move statistic collection to a separate function
  parallels: Move check of leaks to a separate function
  parallels: Fix statistics calculation
  parallels: Move check of cluster outside image to a separate function
  parallels: Move check of unclean image to a separate function
  parallels: Use generic infrastructure for BAT writing in parallels_co_check()
  parallels: create parallels_set_bat_entry_helper() to assign BAT value
  parallels: Fix image_end_offset and data_end after out-of-image check
  parallels: Fix high_off calculation in parallels_co_check()
  parallels: Out of image offset in BAT leads to image inflation
  iotests/iov-padding: New test
  util/iov: Remove qemu_iovec_init_extended()
  block: Collapse padded I/O vecs exceeding IOV_MAX
  util/iov: Make qiov_slice() public

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
  • Loading branch information
rth7680 committed Jun 5, 2023
2 parents afa351f + 42a2890 commit b52daaf
Show file tree
Hide file tree
Showing 11 changed files with 523 additions and 151 deletions.
166 changes: 151 additions & 15 deletions block/io.c
Expand Up @@ -1441,6 +1441,14 @@ bdrv_aligned_preadv(BdrvChild *child, BdrvTrackedRequest *req,
* @merge_reads is true for small requests,
* if @buf_len == @head + bytes + @tail. In this case it is possible that both
* head and tail exist but @buf_len == align and @tail_buf == @buf.
*
* @write is true for write requests, false for read requests.
*
* If padding makes the vector too long (exceeding IOV_MAX), then we need to
* merge existing vector elements into a single one. @collapse_bounce_buf acts
* as the bounce buffer in such cases. @pre_collapse_qiov has the pre-collapse
* I/O vector elements so for read requests, the data can be copied back after
* the read is done.
*/
typedef struct BdrvRequestPadding {
uint8_t *buf;
Expand All @@ -1449,11 +1457,17 @@ typedef struct BdrvRequestPadding {
size_t head;
size_t tail;
bool merge_reads;
bool write;
QEMUIOVector local_qiov;

uint8_t *collapse_bounce_buf;
size_t collapse_len;
QEMUIOVector pre_collapse_qiov;
} BdrvRequestPadding;

static bool bdrv_init_padding(BlockDriverState *bs,
int64_t offset, int64_t bytes,
bool write,
BdrvRequestPadding *pad)
{
int64_t align = bs->bl.request_alignment;
Expand Down Expand Up @@ -1485,6 +1499,8 @@ static bool bdrv_init_padding(BlockDriverState *bs,
pad->tail_buf = pad->buf + pad->buf_len - align;
}

pad->write = write;

return true;
}

Expand Down Expand Up @@ -1549,22 +1565,134 @@ bdrv_padding_rmw_read(BdrvChild *child, BdrvTrackedRequest *req,
return 0;
}

static void bdrv_padding_destroy(BdrvRequestPadding *pad)
/**
* Free *pad's associated buffers, and perform any necessary finalization steps.
*/
static void bdrv_padding_finalize(BdrvRequestPadding *pad)
{
if (pad->collapse_bounce_buf) {
if (!pad->write) {
/*
* If padding required elements in the vector to be collapsed into a
* bounce buffer, copy the bounce buffer content back
*/
qemu_iovec_from_buf(&pad->pre_collapse_qiov, 0,
pad->collapse_bounce_buf, pad->collapse_len);
}
qemu_vfree(pad->collapse_bounce_buf);
qemu_iovec_destroy(&pad->pre_collapse_qiov);
}
if (pad->buf) {
qemu_vfree(pad->buf);
qemu_iovec_destroy(&pad->local_qiov);
}
memset(pad, 0, sizeof(*pad));
}

/*
* Create pad->local_qiov by wrapping @iov in the padding head and tail, while
* ensuring that the resulting vector will not exceed IOV_MAX elements.
*
* To ensure this, when necessary, the first two or three elements of @iov are
* merged into pad->collapse_bounce_buf and replaced by a reference to that
* bounce buffer in pad->local_qiov.
*
* After performing a read request, the data from the bounce buffer must be
* copied back into pad->pre_collapse_qiov (e.g. by bdrv_padding_finalize()).
*/
static int bdrv_create_padded_qiov(BlockDriverState *bs,
BdrvRequestPadding *pad,
struct iovec *iov, int niov,
size_t iov_offset, size_t bytes)
{
int padded_niov, surplus_count, collapse_count;

/* Assert this invariant */
assert(niov <= IOV_MAX);

/*
* Cannot pad if resulting length would exceed SIZE_MAX. Returning an error
* to the guest is not ideal, but there is little else we can do. At least
* this will practically never happen on 64-bit systems.
*/
if (SIZE_MAX - pad->head < bytes ||
SIZE_MAX - pad->head - bytes < pad->tail)
{
return -EINVAL;
}

/* Length of the resulting IOV if we just concatenated everything */
padded_niov = !!pad->head + niov + !!pad->tail;

qemu_iovec_init(&pad->local_qiov, MIN(padded_niov, IOV_MAX));

if (pad->head) {
qemu_iovec_add(&pad->local_qiov, pad->buf, pad->head);
}

/*
* If padded_niov > IOV_MAX, we cannot just concatenate everything.
* Instead, merge the first two or three elements of @iov to reduce the
* number of vector elements as necessary.
*/
if (padded_niov > IOV_MAX) {
/*
* Only head and tail can have lead to the number of entries exceeding
* IOV_MAX, so we can exceed it by the head and tail at most. We need
* to reduce the number of elements by `surplus_count`, so we merge that
* many elements plus one into one element.
*/
surplus_count = padded_niov - IOV_MAX;
assert(surplus_count <= !!pad->head + !!pad->tail);
collapse_count = surplus_count + 1;

/*
* Move the elements to collapse into `pad->pre_collapse_qiov`, then
* advance `iov` (and associated variables) by those elements.
*/
qemu_iovec_init(&pad->pre_collapse_qiov, collapse_count);
qemu_iovec_concat_iov(&pad->pre_collapse_qiov, iov,
collapse_count, iov_offset, SIZE_MAX);
iov += collapse_count;
iov_offset = 0;
niov -= collapse_count;
bytes -= pad->pre_collapse_qiov.size;

/*
* Construct the bounce buffer to match the length of the to-collapse
* vector elements, and for write requests, initialize it with the data
* from those elements. Then add it to `pad->local_qiov`.
*/
pad->collapse_len = pad->pre_collapse_qiov.size;
pad->collapse_bounce_buf = qemu_blockalign(bs, pad->collapse_len);
if (pad->write) {
qemu_iovec_to_buf(&pad->pre_collapse_qiov, 0,
pad->collapse_bounce_buf, pad->collapse_len);
}
qemu_iovec_add(&pad->local_qiov,
pad->collapse_bounce_buf, pad->collapse_len);
}

qemu_iovec_concat_iov(&pad->local_qiov, iov, niov, iov_offset, bytes);

if (pad->tail) {
qemu_iovec_add(&pad->local_qiov,
pad->buf + pad->buf_len - pad->tail, pad->tail);
}

assert(pad->local_qiov.niov == MIN(padded_niov, IOV_MAX));
return 0;
}

/*
* bdrv_pad_request
*
* Exchange request parameters with padded request if needed. Don't include RMW
* read of padding, bdrv_padding_rmw_read() should be called separately if
* needed.
*
* @write is true for write requests, false for read requests.
*
* Request parameters (@qiov, &qiov_offset, &offset, &bytes) are in-out:
* - on function start they represent original request
* - on failure or when padding is not needed they are unchanged
Expand All @@ -1573,26 +1701,34 @@ static void bdrv_padding_destroy(BdrvRequestPadding *pad)
static int bdrv_pad_request(BlockDriverState *bs,
QEMUIOVector **qiov, size_t *qiov_offset,
int64_t *offset, int64_t *bytes,
bool write,
BdrvRequestPadding *pad, bool *padded,
BdrvRequestFlags *flags)
{
int ret;
struct iovec *sliced_iov;
int sliced_niov;
size_t sliced_head, sliced_tail;

bdrv_check_qiov_request(*offset, *bytes, *qiov, *qiov_offset, &error_abort);

if (!bdrv_init_padding(bs, *offset, *bytes, pad)) {
if (!bdrv_init_padding(bs, *offset, *bytes, write, pad)) {
if (padded) {
*padded = false;
}
return 0;
}

ret = qemu_iovec_init_extended(&pad->local_qiov, pad->buf, pad->head,
*qiov, *qiov_offset, *bytes,
pad->buf + pad->buf_len - pad->tail,
pad->tail);
sliced_iov = qemu_iovec_slice(*qiov, *qiov_offset, *bytes,
&sliced_head, &sliced_tail,
&sliced_niov);

/* Guaranteed by bdrv_check_qiov_request() */
assert(*bytes <= SIZE_MAX);
ret = bdrv_create_padded_qiov(bs, pad, sliced_iov, sliced_niov,
sliced_head, *bytes);
if (ret < 0) {
bdrv_padding_destroy(pad);
bdrv_padding_finalize(pad);
return ret;
}
*bytes += pad->head + pad->tail;
Expand Down Expand Up @@ -1659,8 +1795,8 @@ int coroutine_fn bdrv_co_preadv_part(BdrvChild *child,
flags |= BDRV_REQ_COPY_ON_READ;
}

ret = bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, &pad,
NULL, &flags);
ret = bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, false,
&pad, NULL, &flags);
if (ret < 0) {
goto fail;
}
Expand All @@ -1670,7 +1806,7 @@ int coroutine_fn bdrv_co_preadv_part(BdrvChild *child,
bs->bl.request_alignment,
qiov, qiov_offset, flags);
tracked_request_end(&req);
bdrv_padding_destroy(&pad);
bdrv_padding_finalize(&pad);

fail:
bdrv_dec_in_flight(bs);
Expand Down Expand Up @@ -2002,7 +2138,7 @@ bdrv_co_do_zero_pwritev(BdrvChild *child, int64_t offset, int64_t bytes,
/* This flag doesn't make sense for padding or zero writes */
flags &= ~BDRV_REQ_REGISTERED_BUF;

padding = bdrv_init_padding(bs, offset, bytes, &pad);
padding = bdrv_init_padding(bs, offset, bytes, true, &pad);
if (padding) {
assert(!(flags & BDRV_REQ_NO_WAIT));
bdrv_make_request_serialising(req, align);
Expand Down Expand Up @@ -2050,7 +2186,7 @@ bdrv_co_do_zero_pwritev(BdrvChild *child, int64_t offset, int64_t bytes,
}

out:
bdrv_padding_destroy(&pad);
bdrv_padding_finalize(&pad);

return ret;
}
Expand Down Expand Up @@ -2118,8 +2254,8 @@ int coroutine_fn bdrv_co_pwritev_part(BdrvChild *child,
* bdrv_co_do_zero_pwritev() does aligning by itself, so, we do
* alignment only if there is no ZERO flag.
*/
ret = bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, &pad,
&padded, &flags);
ret = bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, true,
&pad, &padded, &flags);
if (ret < 0) {
return ret;
}
Expand Down Expand Up @@ -2149,7 +2285,7 @@ int coroutine_fn bdrv_co_pwritev_part(BdrvChild *child,
ret = bdrv_aligned_pwritev(child, &req, offset, bytes, align,
qiov, qiov_offset, flags);

bdrv_padding_destroy(&pad);
bdrv_padding_finalize(&pad);

out:
tracked_request_end(&req);
Expand Down

0 comments on commit b52daaf

Please sign in to comment.