Skip to content

Commit

Permalink
Merge tag 'for-upstream' of https://repo.or.cz/qemu/kevin into staging
Browse files Browse the repository at this point in the history
Block layer patches

- virtio-blk: Multiqueue fixes and cleanups
- blklogwrites: Fixes for write_zeroes and superblock update races
- commit/stream: Allow users to request only format driver names in
  backing file format
- monitor: only run coroutine commands in qemu_aio_context

# -----BEGIN PGP SIGNATURE-----
#
# iQJFBAABCAAvFiEE3D3rFZqa+V09dFb+fwmycsiPL9YFAmWqu4ARHGt3b2xmQHJl
# ZGhhdC5jb20ACgkQfwmycsiPL9YrkBAAhCblSyeS+M/BXljSgDB7tyXeHmfYJlIT
# PUBaikRkE4I6ccsosewriGyyBeteEOq8uWc2dYhRkLAED9KVG9Oxg5gQt3GS+Yhn
# UKN4F3+OSa1yJNm+iIb1i7tYoM19F70oQ/LtDgpcqV0wF9lof1nwdLuUcHVRE1g1
# 4WXDGNAy9cISG0pbZ94H3uG4OvQR68BIB3x/SH4iywsxZsMJXm6960c/meRvZcbX
# dDqCSC3Dn5mg/2ozSGL4uUT2t0Bv6l/fv3uGO9vsPEQ9d+TIjeORSW4ISuRA2jry
# rCZq2hN0rh6Lx2YROsnCfqBmhc+JW3BzqTcWmboqVuvBblT7CZJuRExYid+/UlWR
# d6jKfyme09Aiysgksyox87h3GmSClSA6WF2ZXmAilb1xhOV9mZk2naWHuNJfz9pD
# KTSpNeKYXx+UJfDTbhgtPctpYDjhDa8gB/xzXp+m3I7ia2mwYo8cCiXjeN9WwhGR
# WZV/mFcNApmLJQgP53xDG63KxyS7Q+nTZa6L+wd/ISAsgNrJQbi0+/Dpjw89GbkV
# pYzcaM94MauuIjikyDYjgT8wSLu+GuVN9CaEnGXmAPOsEt5agxmcTgadttftvVKh
# ZuqsV6kTGei6ZwN5UAbK9Y+sXpBVq5DGGCpKukh1654Uu0x3vIPJOi5ru848khM9
# l1ZfxFSu614=
# =wym5
# -----END PGP SIGNATURE-----
# gpg: Signature made Fri 19 Jan 2024 18:12:16 GMT
# gpg:                using RSA key DC3DEB159A9AF95D3D7456FE7F09B272C88F2FD6
# gpg:                issuer "kwolf@redhat.com"
# gpg: Good signature from "Kevin Wolf <kwolf@redhat.com>" [full]
# Primary key fingerprint: DC3D EB15 9A9A F95D 3D74  56FE 7F09 B272 C88F 2FD6

* tag 'for-upstream' of https://repo.or.cz/qemu/kevin:
  block/blklogwrites: Protect mutable driver state with a mutex.
  virtio-blk: always set ioeventfd during startup
  virtio-blk: tolerate failure to set BlockBackend AioContext
  virtio-blk: restart s->rq reqs in vq AioContexts
  virtio-blk: rename dataplane to ioeventfd
  virtio-blk: rename dataplane create/destroy functions
  virtio-blk: move dataplane code into virtio-blk.c
  monitor: only run coroutine commands in qemu_aio_context
  iotests: port 141 to Python for reliable QMP testing
  iotests: add filter_qmp_generated_node_ids()
  stream: Allow users to request only format driver names in backing file format
  commit: Allow users to request only format driver names in backing file format
  string-output-visitor: Fix (pseudo) struct handling
  block/blklogwrites: Fix a bug when logging "write zeroes" operations.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
  • Loading branch information
pm215 committed Jan 20, 2024
2 parents 313aa10 + ced0d71 commit 84b1da2
Show file tree
Hide file tree
Showing 55 changed files with 1,014 additions and 1,024 deletions.
37 changes: 30 additions & 7 deletions block.c
Original file line number Diff line number Diff line change
Expand Up @@ -1309,11 +1309,14 @@ static void bdrv_backing_detach(BdrvChild *c)
}

static int bdrv_backing_update_filename(BdrvChild *c, BlockDriverState *base,
const char *filename, Error **errp)
const char *filename,
bool backing_mask_protocol,
Error **errp)
{
BlockDriverState *parent = c->opaque;
bool read_only = bdrv_is_read_only(parent);
int ret;
const char *format_name;
GLOBAL_STATE_CODE();

if (read_only) {
Expand All @@ -1323,9 +1326,23 @@ static int bdrv_backing_update_filename(BdrvChild *c, BlockDriverState *base,
}
}

ret = bdrv_change_backing_file(parent, filename,
base->drv ? base->drv->format_name : "",
false);
if (base->drv) {
/*
* If the new base image doesn't have a format driver layer, which we
* detect by the fact that @base is a protocol driver, we record
* 'raw' as the format instead of putting the protocol name as the
* backing format
*/
if (backing_mask_protocol && base->drv->protocol_name) {
format_name = "raw";
} else {
format_name = base->drv->format_name;
}
} else {
format_name = "";
}

ret = bdrv_change_backing_file(parent, filename, format_name, false);
if (ret < 0) {
error_setg_errno(errp, -ret, "Could not update backing file link");
}
Expand Down Expand Up @@ -1479,10 +1496,14 @@ static void GRAPH_WRLOCK bdrv_child_cb_detach(BdrvChild *child)
}

static int bdrv_child_cb_update_filename(BdrvChild *c, BlockDriverState *base,
const char *filename, Error **errp)
const char *filename,
bool backing_mask_protocol,
Error **errp)
{
if (c->role & BDRV_CHILD_COW) {
return bdrv_backing_update_filename(c, base, filename, errp);
return bdrv_backing_update_filename(c, base, filename,
backing_mask_protocol,
errp);
}
return 0;
}
Expand Down Expand Up @@ -5803,7 +5824,8 @@ void bdrv_unfreeze_backing_chain(BlockDriverState *bs, BlockDriverState *base)
*
*/
int bdrv_drop_intermediate(BlockDriverState *top, BlockDriverState *base,
const char *backing_file_str)
const char *backing_file_str,
bool backing_mask_protocol)
{
BlockDriverState *explicit_top = top;
bool update_inherits_from;
Expand Down Expand Up @@ -5869,6 +5891,7 @@ int bdrv_drop_intermediate(BlockDriverState *top, BlockDriverState *base,

if (c->klass->update_filename) {
ret = c->klass->update_filename(c, base, backing_file_str,
backing_mask_protocol,
&local_err);
if (ret < 0) {
/*
Expand Down
120 changes: 103 additions & 17 deletions block/blklogwrites.c
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
*
* Copyright (c) 2017 Tuomas Tynkkynen <tuomas@tuxera.com>
* Copyright (c) 2018 Aapo Vienamo <aapo@tuxera.com>
* Copyright (c) 2018 Ari Sundholm <ari@tuxera.com>
* Copyright (c) 2018-2024 Ari Sundholm <ari@tuxera.com>
*
* This work is licensed under the terms of the GNU GPL, version 2 or later.
* See the COPYING file in the top-level directory.
Expand Down Expand Up @@ -55,9 +55,34 @@ typedef struct {
BdrvChild *log_file;
uint32_t sectorsize;
uint32_t sectorbits;
uint64_t update_interval;

/*
* The mutable state of the driver, consisting of the current log sector
* and the number of log entries.
*
* May be read and/or written from multiple threads, and the mutex must be
* held when accessing these fields.
*/
uint64_t cur_log_sector;
uint64_t nr_entries;
uint64_t update_interval;
QemuMutex mutex;

/*
* The super block sequence number. Non-zero if a super block update is in
* progress.
*
* The mutex must be held when accessing this field.
*/
uint64_t super_update_seq;

/*
* A coroutine-aware queue to serialize super block updates.
*
* Used with the mutex to ensure that only one thread be updating the super
* block at a time.
*/
CoQueue super_update_queue;
} BDRVBlkLogWritesState;

static QemuOptsList runtime_opts = {
Expand Down Expand Up @@ -169,6 +194,9 @@ static int blk_log_writes_open(BlockDriverState *bs, QDict *options, int flags,
goto fail;
}

qemu_mutex_init(&s->mutex);
qemu_co_queue_init(&s->super_update_queue);

log_append = qemu_opt_get_bool(opts, "log-append", false);

if (log_append) {
Expand Down Expand Up @@ -231,6 +259,8 @@ static int blk_log_writes_open(BlockDriverState *bs, QDict *options, int flags,
s->nr_entries = 0;
}

s->super_update_seq = 0;

if (!blk_log_writes_sector_size_valid(log_sector_size)) {
ret = -EINVAL;
error_setg(errp, "Invalid log sector size %"PRIu64, log_sector_size);
Expand All @@ -255,6 +285,7 @@ static int blk_log_writes_open(BlockDriverState *bs, QDict *options, int flags,
bdrv_unref_child(bs, s->log_file);
bdrv_graph_wrunlock();
s->log_file = NULL;
qemu_mutex_destroy(&s->mutex);
}
fail:
qemu_opts_del(opts);
Expand All @@ -269,6 +300,7 @@ static void blk_log_writes_close(BlockDriverState *bs)
bdrv_unref_child(bs, s->log_file);
s->log_file = NULL;
bdrv_graph_wrunlock();
qemu_mutex_destroy(&s->mutex);
}

static int64_t coroutine_fn GRAPH_RDLOCK
Expand All @@ -295,7 +327,7 @@ static void blk_log_writes_child_perm(BlockDriverState *bs, BdrvChild *c,

static void blk_log_writes_refresh_limits(BlockDriverState *bs, Error **errp)
{
BDRVBlkLogWritesState *s = bs->opaque;
const BDRVBlkLogWritesState *s = bs->opaque;
bs->bl.request_alignment = s->sectorsize;
}

Expand Down Expand Up @@ -328,38 +360,85 @@ static void coroutine_fn GRAPH_RDLOCK
blk_log_writes_co_do_log(BlkLogWritesLogReq *lr)
{
BDRVBlkLogWritesState *s = lr->bs->opaque;
uint64_t cur_log_offset = s->cur_log_sector << s->sectorbits;

s->nr_entries++;
s->cur_log_sector +=
ROUND_UP(lr->qiov->size, s->sectorsize) >> s->sectorbits;

lr->log_ret = bdrv_co_pwritev(s->log_file, cur_log_offset, lr->qiov->size,
/*
* Determine the offsets and sizes of different parts of the entry, and
* update the state of the driver.
*
* This needs to be done in one go, before any actual I/O is done, as the
* log entry may have to be written in two parts, and the state of the
* driver may be modified by other driver operations while waiting for the
* I/O to complete.
*/
qemu_mutex_lock(&s->mutex);
const uint64_t entry_start_sector = s->cur_log_sector;
const uint64_t entry_offset = entry_start_sector << s->sectorbits;
const uint64_t qiov_aligned_size = ROUND_UP(lr->qiov->size, s->sectorsize);
const uint64_t entry_aligned_size = qiov_aligned_size +
ROUND_UP(lr->zero_size, s->sectorsize);
const uint64_t entry_nr_sectors = entry_aligned_size >> s->sectorbits;
const uint64_t entry_seq = s->nr_entries + 1;

s->nr_entries = entry_seq;
s->cur_log_sector += entry_nr_sectors;
qemu_mutex_unlock(&s->mutex);

/*
* Write the log entry. Note that if this is a "write zeroes" operation,
* only the entry header is written here, with the zeroing being done
* separately below.
*/
lr->log_ret = bdrv_co_pwritev(s->log_file, entry_offset, lr->qiov->size,
lr->qiov, 0);

/* Logging for the "write zeroes" operation */
if (lr->log_ret == 0 && lr->zero_size) {
cur_log_offset = s->cur_log_sector << s->sectorbits;
s->cur_log_sector +=
ROUND_UP(lr->zero_size, s->sectorsize) >> s->sectorbits;
const uint64_t zeroes_offset = entry_offset + qiov_aligned_size;

lr->log_ret = bdrv_co_pwrite_zeroes(s->log_file, cur_log_offset,
lr->log_ret = bdrv_co_pwrite_zeroes(s->log_file, zeroes_offset,
lr->zero_size, 0);
}

/* Update super block on flush or every update interval */
if (lr->log_ret == 0 && ((lr->entry.flags & LOG_FLUSH_FLAG)
|| (s->nr_entries % s->update_interval == 0)))
|| (entry_seq % s->update_interval == 0)))
{
struct log_write_super super = {
.magic = cpu_to_le64(WRITE_LOG_MAGIC),
.version = cpu_to_le64(WRITE_LOG_VERSION),
.nr_entries = cpu_to_le64(s->nr_entries),
.nr_entries = const_le64(0),
.sectorsize = cpu_to_le32(s->sectorsize),
};
void *zeroes = g_malloc0(s->sectorsize - sizeof(super));
void *zeroes;
QEMUIOVector qiov;

/*
* Wait if a super block update is already in progress.
* Bail out if a newer update got its turn before us.
*/
WITH_QEMU_LOCK_GUARD(&s->mutex) {
CoQueueWaitFlags wait_flags = 0;
while (s->super_update_seq) {
if (entry_seq < s->super_update_seq) {
return;
}
qemu_co_queue_wait_flags(&s->super_update_queue,
&s->mutex, wait_flags);

/*
* In case the wait condition remains true after wakeup,
* to avoid starvation, make sure that this request is
* scheduled to rerun next by pushing it to the front of the
* queue.
*/
wait_flags = CO_QUEUE_WAIT_FRONT;
}
s->super_update_seq = entry_seq;
super.nr_entries = cpu_to_le64(s->nr_entries);
}

zeroes = g_malloc0(s->sectorsize - sizeof(super));

qemu_iovec_init(&qiov, 2);
qemu_iovec_add(&qiov, &super, sizeof(super));
qemu_iovec_add(&qiov, zeroes, s->sectorsize - sizeof(super));
Expand All @@ -369,6 +448,13 @@ blk_log_writes_co_do_log(BlkLogWritesLogReq *lr)
if (lr->log_ret == 0) {
lr->log_ret = bdrv_co_flush(s->log_file->bs);
}

/* The super block has been updated. Let another request have a go. */
qemu_mutex_lock(&s->mutex);
s->super_update_seq = 0;
(void) qemu_co_queue_next(&s->super_update_queue);
qemu_mutex_unlock(&s->mutex);

qemu_iovec_destroy(&qiov);
g_free(zeroes);
}
Expand All @@ -388,7 +474,7 @@ blk_log_writes_co_log(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
{
QEMUIOVector log_qiov;
size_t niov = qiov ? qiov->niov : 0;
BDRVBlkLogWritesState *s = bs->opaque;
const BDRVBlkLogWritesState *s = bs->opaque;
BlkLogWritesFileReq fr = {
.bs = bs,
.offset = offset,
Expand Down
6 changes: 5 additions & 1 deletion block/commit.c
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ typedef struct CommitBlockJob {
bool base_read_only;
bool chain_frozen;
char *backing_file_str;
bool backing_mask_protocol;
} CommitBlockJob;

static int commit_prepare(Job *job)
Expand All @@ -61,7 +62,8 @@ static int commit_prepare(Job *job)
/* FIXME: bdrv_drop_intermediate treats total failures and partial failures
* identically. Further work is needed to disambiguate these cases. */
return bdrv_drop_intermediate(s->commit_top_bs, s->base_bs,
s->backing_file_str);
s->backing_file_str,
s->backing_mask_protocol);
}

static void commit_abort(Job *job)
Expand Down Expand Up @@ -254,6 +256,7 @@ void commit_start(const char *job_id, BlockDriverState *bs,
BlockDriverState *base, BlockDriverState *top,
int creation_flags, int64_t speed,
BlockdevOnError on_error, const char *backing_file_str,
bool backing_mask_protocol,
const char *filter_node_name, Error **errp)
{
CommitBlockJob *s;
Expand Down Expand Up @@ -408,6 +411,7 @@ void commit_start(const char *job_id, BlockDriverState *bs,
blk_set_disable_request_queuing(s->top, true);

s->backing_file_str = g_strdup(backing_file_str);
s->backing_mask_protocol = backing_mask_protocol;
s->on_error = on_error;

trace_commit_start(bs, base, top, s);
Expand Down
2 changes: 1 addition & 1 deletion block/monitor/block-hmp-cmds.c
Original file line number Diff line number Diff line change
Expand Up @@ -496,7 +496,7 @@ void hmp_block_stream(Monitor *mon, const QDict *qdict)
const char *base = qdict_get_try_str(qdict, "base");
int64_t speed = qdict_get_try_int(qdict, "speed", 0);

qmp_block_stream(device, device, base, NULL, NULL, NULL,
qmp_block_stream(device, device, base, NULL, NULL, false, false, NULL,
qdict_haskey(qdict, "speed"), speed,
true, BLOCKDEV_ON_ERROR_REPORT, NULL,
false, false, false, false, &error);
Expand Down
10 changes: 9 additions & 1 deletion block/stream.c
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ typedef struct StreamBlockJob {
BlockDriverState *target_bs;
BlockdevOnError on_error;
char *backing_file_str;
bool backing_mask_protocol;
bool bs_read_only;
} StreamBlockJob;

Expand Down Expand Up @@ -95,7 +96,12 @@ static int stream_prepare(Job *job)
if (unfiltered_base) {
base_id = s->backing_file_str ?: unfiltered_base->filename;
if (unfiltered_base->drv) {
base_fmt = unfiltered_base->drv->format_name;
if (s->backing_mask_protocol &&
unfiltered_base->drv->protocol_name) {
base_fmt = "raw";
} else {
base_fmt = unfiltered_base->drv->format_name;
}
}
}

Expand Down Expand Up @@ -247,6 +253,7 @@ static const BlockJobDriver stream_job_driver = {

void stream_start(const char *job_id, BlockDriverState *bs,
BlockDriverState *base, const char *backing_file_str,
bool backing_mask_protocol,
BlockDriverState *bottom,
int creation_flags, int64_t speed,
BlockdevOnError on_error,
Expand Down Expand Up @@ -398,6 +405,7 @@ void stream_start(const char *job_id, BlockDriverState *bs,
s->base_overlay = base_overlay;
s->above_base = above_base;
s->backing_file_str = g_strdup(backing_file_str);
s->backing_mask_protocol = backing_mask_protocol;
s->cor_filter_bs = cor_filter_bs;
s->target_bs = bs;
s->bs_read_only = bs_read_only;
Expand Down

0 comments on commit 84b1da2

Please sign in to comment.