Skip to content

Commit

Permalink
Merge tag 'migration-20240126-pull-request' of https://gitlab.com/pet…
Browse files Browse the repository at this point in the history
…erx/qemu into staging

Migration Pull

[dropped fabiano's patch on modifying cpu model for arm migration tests for
 now]

- Fabiano's patchset to fix migration state references in BHs
- Fabiano's new 'n-1' migration test for CI
- Het's fix on making "uri" optional in QMP migrate cmd
- Markus's HMP leak fix reported by Coverity
- Paolo's cleanup on uffd to replace u64 usage
- Peter's small migration cleanup series all over the places

# -----BEGIN PGP SIGNATURE-----
#
# iIgEABYKADAWIQS5GE3CDMRX2s990ak7X8zN86vXBgUCZbcVeBIccGV0ZXJ4QHJl
# ZGhhdC5jb20ACgkQO1/MzfOr1wYHjgD9F2Fnrf4EuPNC/gF3yUvHVz1mgHqevb/g
# pw/ThcJF31wBALuWmwuUaNWm+VNtRc10YH6bY7HZW8oa1RefRN6QZn0L
# =JGTX
# -----END PGP SIGNATURE-----
# gpg: Signature made Mon 29 Jan 2024 03:03:20 GMT
# gpg:                using EDDSA key B9184DC20CC457DACF7DD1A93B5FCCCDF3ABD706
# gpg:                issuer "peterx@redhat.com"
# gpg: Good signature from "Peter Xu <xzpeter@gmail.com>" [marginal]
# gpg:                 aka "Peter Xu <peterx@redhat.com>" [marginal]
# gpg: WARNING: This key is not certified with sufficiently trusted signatures!
# gpg:          It is not certain that the signature belongs to the owner.
# Primary key fingerprint: B918 4DC2 0CC4 57DA CF7D  D1A9 3B5F CCCD F3AB D706

* tag 'migration-20240126-pull-request' of https://gitlab.com/peterx/qemu:
  Make 'uri' optional for migrate QAPI
  migration: Centralize BH creation and dispatch
  migration: Add a wrapper to qemu_bh_schedule
  migration: Reference migration state around loadvm_postcopy_handle_run_bh
  migration: Take reference to migration state around bg_migration_vm_start_bh
  migration: Fix use-after-free of migration state object
  migration/yank: Use channel features
  ci: Disable migration compatibility tests for aarch64
  ci: Add a migration compatibility test job
  analyze-migration.py: Remove trick on parsing ramblocks
  migration: Drop unnecessary check in ram's pending_exact()
  migration: Make threshold_size an uint64_t
  migration: Plug memory leak on HMP migrate error path
  userfaultfd: use 1ULL to build ioctl masks

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
  • Loading branch information
pm215 committed Jan 29, 2024
2 parents 7a1dc45 + 57fd4b4 commit e739015
Show file tree
Hide file tree
Showing 12 changed files with 134 additions and 78 deletions.
64 changes: 64 additions & 0 deletions .gitlab-ci.d/buildtest.yml
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,70 @@ build-system-centos:
x86_64-softmmu rx-softmmu sh4-softmmu nios2-softmmu
MAKE_CHECK_ARGS: check-build

# Previous QEMU release. Used for cross-version migration tests.
build-previous-qemu:
extends: .native_build_job_template
artifacts:
when: on_success
expire_in: 2 days
paths:
- build-previous
exclude:
- build-previous/**/*.p
- build-previous/**/*.a.p
- build-previous/**/*.fa.p
- build-previous/**/*.c.o
- build-previous/**/*.c.o.d
- build-previous/**/*.fa
needs:
job: amd64-opensuse-leap-container
variables:
IMAGE: opensuse-leap
TARGETS: x86_64-softmmu aarch64-softmmu
before_script:
- export QEMU_PREV_VERSION="$(sed 's/\([0-9.]*\)\.[0-9]*/v\1.0/' VERSION)"
- git checkout $QEMU_PREV_VERSION
after_script:
- mv build build-previous

.migration-compat-common:
extends: .common_test_job_template
needs:
- job: build-previous-qemu
- job: build-system-opensuse
# The old QEMU could have bugs unrelated to migration that are
# already fixed in the current development branch, so this test
# might fail.
allow_failure: true
variables:
IMAGE: opensuse-leap
MAKE_CHECK_ARGS: check-build
script:
# Use the migration-tests from the older QEMU tree. This avoids
# testing an old QEMU against new features/tests that it is not
# compatible with.
- cd build-previous
# old to new
- QTEST_QEMU_BINARY_SRC=./qemu-system-${TARGET}
QTEST_QEMU_BINARY=../build/qemu-system-${TARGET} ./tests/qtest/migration-test
# new to old
- QTEST_QEMU_BINARY_DST=./qemu-system-${TARGET}
QTEST_QEMU_BINARY=../build/qemu-system-${TARGET} ./tests/qtest/migration-test

# This job is disabled until we release 9.0. The existing
# migration-test in 8.2 is broken on aarch64. The fix was already
# commited, but it will only take effect once 9.0 is out.
migration-compat-aarch64:
extends: .migration-compat-common
variables:
TARGET: aarch64
QEMU_JOB_SKIPPED: 1

migration-compat-x86_64:
extends: .migration-compat-common
variables:
TARGET: x86_64

check-system-centos:
extends: .native_test_job_template
needs:
Expand Down
4 changes: 1 addition & 3 deletions migration/migration-hmp-cmds.c
Original file line number Diff line number Diff line change
Expand Up @@ -764,7 +764,7 @@ void hmp_migrate(Monitor *mon, const QDict *qdict)
bool resume = qdict_get_try_bool(qdict, "resume", false);
const char *uri = qdict_get_str(qdict, "uri");
Error *err = NULL;
MigrationChannelList *caps = NULL;
g_autoptr(MigrationChannelList) caps = NULL;
g_autoptr(MigrationChannel) channel = NULL;

if (inc) {
Expand All @@ -789,8 +789,6 @@ void hmp_migrate(Monitor *mon, const QDict *qdict)
return;
}

qapi_free_MigrationChannelList(caps);

if (!detach) {
HMPMigrationStatus *status;

Expand Down
82 changes: 47 additions & 35 deletions migration/migration.c
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,47 @@ void migration_object_init(void)
dirty_bitmap_mig_init();
}

typedef struct {
QEMUBH *bh;
QEMUBHFunc *cb;
void *opaque;
} MigrationBH;

static void migration_bh_dispatch_bh(void *opaque)
{
MigrationState *s = migrate_get_current();
MigrationBH *migbh = opaque;

/* cleanup this BH */
qemu_bh_delete(migbh->bh);
migbh->bh = NULL;

/* dispatch the other one */
migbh->cb(migbh->opaque);
object_unref(OBJECT(s));

g_free(migbh);
}

void migration_bh_schedule(QEMUBHFunc *cb, void *opaque)
{
MigrationState *s = migrate_get_current();
MigrationBH *migbh = g_new0(MigrationBH, 1);
QEMUBH *bh = qemu_bh_new(migration_bh_dispatch_bh, migbh);

/* Store these to dispatch when the BH runs */
migbh->bh = bh;
migbh->cb = cb;
migbh->opaque = opaque;

/*
* Ref the state for bh, because it may be called when
* there're already no other refs
*/
object_ref(OBJECT(s));
qemu_bh_schedule(bh);
}

void migration_cancel(const Error *error)
{
if (error) {
Expand Down Expand Up @@ -646,7 +687,6 @@ static void process_incoming_migration_bh(void *opaque)
*/
migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
MIGRATION_STATUS_COMPLETED);
qemu_bh_delete(mis->bh);
migration_incoming_state_destroy();
}

Expand Down Expand Up @@ -712,8 +752,7 @@ process_incoming_migration_co(void *opaque)
goto fail;
}

mis->bh = qemu_bh_new(process_incoming_migration_bh, mis);
qemu_bh_schedule(mis->bh);
migration_bh_schedule(process_incoming_migration_bh, mis);
return;
fail:
migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
Expand Down Expand Up @@ -1274,9 +1313,6 @@ void migrate_set_state(int *state, int old_state, int new_state)

static void migrate_fd_cleanup(MigrationState *s)
{
qemu_bh_delete(s->cleanup_bh);
s->cleanup_bh = NULL;

g_free(s->hostname);
s->hostname = NULL;
json_writer_free(s->vmdesc);
Expand Down Expand Up @@ -1330,21 +1366,9 @@ static void migrate_fd_cleanup(MigrationState *s)
yank_unregister_instance(MIGRATION_YANK_INSTANCE);
}

static void migrate_fd_cleanup_schedule(MigrationState *s)
{
/*
* Ref the state for bh, because it may be called when
* there're already no other refs
*/
object_ref(OBJECT(s));
qemu_bh_schedule(s->cleanup_bh);
}

static void migrate_fd_cleanup_bh(void *opaque)
{
MigrationState *s = opaque;
migrate_fd_cleanup(s);
object_unref(OBJECT(s));
migrate_fd_cleanup(opaque);
}

void migrate_set_error(MigrationState *s, const Error *error)
Expand Down Expand Up @@ -1567,8 +1591,6 @@ int migrate_init(MigrationState *s, Error **errp)
* parameters/capabilities that the user set, and
* locks.
*/
s->cleanup_bh = 0;
s->vm_start_bh = 0;
s->to_dst_file = NULL;
s->state = MIGRATION_STATUS_NONE;
s->rp_state.from_dst_file = NULL;
Expand Down Expand Up @@ -3138,7 +3160,8 @@ static void migration_iteration_finish(MigrationState *s)
error_report("%s: Unknown ending state %d", __func__, s->state);
break;
}
migrate_fd_cleanup_schedule(s);

migration_bh_schedule(migrate_fd_cleanup_bh, s);
bql_unlock();
}

Expand Down Expand Up @@ -3169,7 +3192,7 @@ static void bg_migration_iteration_finish(MigrationState *s)
break;
}

migrate_fd_cleanup_schedule(s);
migration_bh_schedule(migrate_fd_cleanup_bh, s);
bql_unlock();
}

Expand Down Expand Up @@ -3375,9 +3398,6 @@ static void bg_migration_vm_start_bh(void *opaque)
{
MigrationState *s = opaque;

qemu_bh_delete(s->vm_start_bh);
s->vm_start_bh = NULL;

vm_resume(s->vm_old_state);
migration_downtime_end(s);
}
Expand Down Expand Up @@ -3483,9 +3503,7 @@ static void *bg_migration_thread(void *opaque)
* calling VM state change notifiers from vm_start() would initiate
* writes to virtio VQs memory which is in write-protected region.
*/
s->vm_start_bh = qemu_bh_new(bg_migration_vm_start_bh, s);
qemu_bh_schedule(s->vm_start_bh);

migration_bh_schedule(bg_migration_vm_start_bh, s);
bql_unlock();

while (migration_is_active(s)) {
Expand Down Expand Up @@ -3541,12 +3559,6 @@ void migrate_fd_connect(MigrationState *s, Error *error_in)
migrate_error_free(s);

s->expected_downtime = migrate_downtime_limit();
if (resume) {
assert(s->cleanup_bh);
} else {
assert(!s->cleanup_bh);
s->cleanup_bh = qemu_bh_new(migrate_fd_cleanup_bh, s);
}
if (error_in) {
migrate_fd_error(s, error_in);
if (resume) {
Expand Down
7 changes: 2 additions & 5 deletions migration/migration.h
Original file line number Diff line number Diff line change
Expand Up @@ -159,8 +159,6 @@ struct MigrationIncomingState {
/* PostCopyFD's for external userfaultfds & handlers of shared memory */
GArray *postcopy_remote_fds;

QEMUBH *bh;

int state;

/*
Expand Down Expand Up @@ -255,8 +253,6 @@ struct MigrationState {

/*< public >*/
QemuThread thread;
QEMUBH *vm_start_bh;
QEMUBH *cleanup_bh;
/* Protected by qemu_file_lock */
QEMUFile *to_dst_file;
/* Postcopy specific transfer channel */
Expand Down Expand Up @@ -296,7 +292,7 @@ struct MigrationState {
* this threshold; it's calculated from the requested downtime and
* measured bandwidth, or avail-switchover-bandwidth if specified.
*/
int64_t threshold_size;
uint64_t threshold_size;

/* params from 'migrate-set-parameters' */
MigrationParameters parameters;
Expand Down Expand Up @@ -528,6 +524,7 @@ int foreach_not_ignored_block(RAMBlockIterFunc func, void *opaque);
void migration_make_urgent_request(void);
void migration_consume_urgent_request(void);
bool migration_rate_limit(void);
void migration_bh_schedule(QEMUBHFunc *cb, void *opaque);
void migration_cancel(const Error *error);

void migration_populate_vfio_info(MigrationInfo *info);
Expand Down
16 changes: 7 additions & 9 deletions migration/postcopy-ram.c
Original file line number Diff line number Diff line change
Expand Up @@ -102,11 +102,9 @@ void postcopy_thread_create(MigrationIncomingState *mis,
* are target OS specific.
*/
#if defined(__linux__)

#include <poll.h>
#include <sys/ioctl.h>
#include <sys/syscall.h>
#include <asm/types.h> /* for __u64 */
#endif

#if defined(__linux__) && defined(__NR_userfaultfd) && defined(CONFIG_EVENTFD)
Expand Down Expand Up @@ -272,8 +270,8 @@ static bool request_ufd_features(int ufd, uint64_t features)
return false;
}

ioctl_mask = (__u64)1 << _UFFDIO_REGISTER |
(__u64)1 << _UFFDIO_UNREGISTER;
ioctl_mask = 1ULL << _UFFDIO_REGISTER |
1ULL << _UFFDIO_UNREGISTER;
if ((api_struct.ioctls & ioctl_mask) != ioctl_mask) {
error_report("Missing userfault features: %" PRIx64,
(uint64_t)(~api_struct.ioctls & ioctl_mask));
Expand Down Expand Up @@ -462,9 +460,9 @@ bool postcopy_ram_supported_by_host(MigrationIncomingState *mis, Error **errp)
goto out;
}

feature_mask = (__u64)1 << _UFFDIO_WAKE |
(__u64)1 << _UFFDIO_COPY |
(__u64)1 << _UFFDIO_ZEROPAGE;
feature_mask = 1ULL << _UFFDIO_WAKE |
1ULL << _UFFDIO_COPY |
1ULL << _UFFDIO_ZEROPAGE;
if ((reg_struct.ioctls & feature_mask) != feature_mask) {
error_setg(errp, "Missing userfault map features: %" PRIx64,
(uint64_t)(~reg_struct.ioctls & feature_mask));
Expand Down Expand Up @@ -733,11 +731,11 @@ static int ram_block_enable_notify(RAMBlock *rb, void *opaque)
error_report("%s userfault register: %s", __func__, strerror(errno));
return -1;
}
if (!(reg_struct.ioctls & ((__u64)1 << _UFFDIO_COPY))) {
if (!(reg_struct.ioctls & (1ULL << _UFFDIO_COPY))) {
error_report("%s userfault: Region doesn't support COPY", __func__);
return -1;
}
if (reg_struct.ioctls & ((__u64)1 << _UFFDIO_ZEROPAGE)) {
if (reg_struct.ioctls & (1ULL << _UFFDIO_ZEROPAGE)) {
qemu_ram_set_uf_zeroable(rb);
}

Expand Down
9 changes: 4 additions & 5 deletions migration/ram.c
Original file line number Diff line number Diff line change
Expand Up @@ -3213,21 +3213,20 @@ static void ram_state_pending_estimate(void *opaque, uint64_t *must_precopy,
static void ram_state_pending_exact(void *opaque, uint64_t *must_precopy,
uint64_t *can_postcopy)
{
MigrationState *s = migrate_get_current();
RAMState **temp = opaque;
RAMState *rs = *temp;
uint64_t remaining_size;

uint64_t remaining_size = rs->migration_dirty_pages * TARGET_PAGE_SIZE;

if (!migration_in_postcopy() && remaining_size < s->threshold_size) {
if (!migration_in_postcopy()) {
bql_lock();
WITH_RCU_READ_LOCK_GUARD() {
migration_bitmap_sync_precopy(rs, false);
}
bql_unlock();
remaining_size = rs->migration_dirty_pages * TARGET_PAGE_SIZE;
}

remaining_size = rs->migration_dirty_pages * TARGET_PAGE_SIZE;

if (migrate_postcopy_ram()) {
/* We can do postcopy, and all the data is postcopiable */
*can_postcopy += remaining_size;
Expand Down
5 changes: 1 addition & 4 deletions migration/savevm.c
Original file line number Diff line number Diff line change
Expand Up @@ -2171,8 +2171,6 @@ static void loadvm_postcopy_handle_run_bh(void *opaque)
runstate_set(RUN_STATE_PAUSED);
}

qemu_bh_delete(mis->bh);

trace_vmstate_downtime_checkpoint("dst-postcopy-bh-vm-started");
}

Expand All @@ -2188,8 +2186,7 @@ static int loadvm_postcopy_handle_run(MigrationIncomingState *mis)
}

postcopy_state_set(POSTCOPY_INCOMING_RUNNING);
mis->bh = qemu_bh_new(loadvm_postcopy_handle_run_bh, mis);
qemu_bh_schedule(mis->bh);
migration_bh_schedule(loadvm_postcopy_handle_run_bh, mis);

/* We need to finish reading the stream from the package
* and also stop reading anything more from the stream that loaded the
Expand Down

0 comments on commit e739015

Please sign in to comment.