Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
Merge tag 'migration-20230509-pull-request' of https://gitlab.com/jua…
…n.quintela/qemu into staging

Migration Pull request (20230509 vintage) take 2

Hi

In this take 2:
- Change uint -> uint32_t to fix mingw32 compilation.

Please apply.
[take 1]
In this PULL request:
- 1st part of colo support for multifd (lukas)
- 1st part of disabling colo option (vladimir)

Please, apply.

# -----BEGIN PGP SIGNATURE-----
#
# iQIzBAABCAAdFiEEGJn/jt6/WMzuA0uC9IfvGFhy1yMFAmRb3dgACgkQ9IfvGFhy
# 1yNLBxAAwHiAOdSPS7TqJXH2/PkBKsd42XMtWzC9UowZ6SUdQi0Q2bQUBnygJ8BA
# 59yLOTPdwUhaPWk4KsyKM2znOCJ+f9MF5V4QXbyILf1WCAq6d+mtPwArnYF1TRwi
# XIewVDeRopdOO5lnWGcfAKZZ5WIDzA/bn6NiGLi+pQa5HGyk84Bk+tFa8kJI6xBL
# 5CWfhNTcxDNYRFg/z/9YVirkuxIXEEL6VEeRFV+pmFuj05q9bysWJkLFoEcFNawO
# gp1foHDkU7wHmHDJ3D4AVTm3TW641ft1wdlHIHZRoOiIIu3EUOoDEVVsaCfdxrY8
# pPJZ5m37wb52GIaCJmigG8rkHxIJ8xKLk4HKu4umDqFq5jZQ2krnnj7AkQhpp7p2
# aEIOXJQQq7XCsKpuvSUIexPv4gbN5SEYKi7XKoOPe3sZ03Rkn0I5xY3KSyMQMamP
# jtk8tNlRA+9Wug82eb/FtIKDj3//4SbuQOJEdRXjKJBldd3mtWTT/FRj/8oo96/p
# hmTu/cGDrP5qgtWpz0kKI/xaBf8at1nwpDgdEzOjRw4zf6xQHFjbXgJ7tQBH/JUI
# T3A9pdiXN6QdRupcWUSV0iJsfS/5i3mOUTA/C529qGXabSnZzfMK+unL/I8N02yt
# 83o7jSg22etMjaS1c+VuDmzKCAfuZloDZv2Bms/+yM/8k8Xe5S4=
# =vbqf
# -----END PGP SIGNATURE-----
# gpg: Signature made Wed 10 May 2023 07:09:28 PM BST
# gpg:                using RSA key 1899FF8EDEBF58CCEE034B82F487EF185872D723
# gpg: Good signature from "Juan Quintela <quintela@redhat.com>" [undefined]
# gpg:                 aka "Juan Quintela <quintela@trasno.org>" [undefined]
# gpg: WARNING: This key is not certified with a trusted signature!
# gpg:          There is no indication that the signature belongs to the owner.
# Primary key fingerprint: 1899 FF8E DEBF 58CC EE03  4B82 F487 EF18 5872 D723

* tag 'migration-20230509-pull-request' of https://gitlab.com/juan.quintela/qemu:
  migration: block incoming colo when capability is disabled
  migration: disallow change capabilities in COLO state
  migration: process_incoming_migration_co: simplify code flow around ret
  migration: drop colo_incoming_thread from MigrationIncomingState
  build: move COLO under CONFIG_REPLICATION
  colo: make colo_checkpoint_notify static and provide simpler API
  block/meson.build: prefer positive condition for replication
  multifd: Add the ramblock to MultiFDRecvParams
  ram: Let colo_flush_ram_cache take the bitmap_mutex
  ram: Add public helper to set colo bitmap

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
  • Loading branch information
rth7680 committed May 11, 2023
2 parents d530697 + 121cced commit fff86d4
Show file tree
Hide file tree
Showing 17 changed files with 131 additions and 73 deletions.
2 changes: 1 addition & 1 deletion block/meson.build
Expand Up @@ -92,7 +92,7 @@ block_ss.add(when: 'CONFIG_WIN32', if_true: files('file-win32.c', 'win32-aio.c')
block_ss.add(when: 'CONFIG_POSIX', if_true: [files('file-posix.c'), coref, iokit])
block_ss.add(when: libiscsi, if_true: files('iscsi-opts.c'))
block_ss.add(when: 'CONFIG_LINUX', if_true: files('nvme.c'))
if not get_option('replication').disabled()
if get_option('replication').allowed()
block_ss.add(files('replication.c'))
endif
block_ss.add(when: libaio, if_true: files('linux-aio.c'))
Expand Down
1 change: 1 addition & 0 deletions docs/COLO-FT.txt
Expand Up @@ -210,6 +210,7 @@ children.0=childs0 \

3. On Secondary VM's QEMU monitor, issue command
{"execute":"qmp_capabilities"}
{"execute": "migrate-set-capabilities", "arguments": {"capabilities": [ {"capability": "x-colo", "state": true } ] } }
{"execute": "nbd-server-start", "arguments": {"addr": {"type": "inet", "data": {"host": "0.0.0.0", "port": "9999"} } } }
{"execute": "nbd-server-add", "arguments": {"device": "parent0", "writable": true } }

Expand Down
2 changes: 2 additions & 0 deletions hmp-commands.hx
Expand Up @@ -1052,6 +1052,7 @@ SRST
migration (or once already in postcopy).
ERST

#ifdef CONFIG_REPLICATION
{
.name = "x_colo_lost_heartbeat",
.args_type = "",
Expand All @@ -1060,6 +1061,7 @@ ERST
"a failover or takeover is needed.",
.cmd = hmp_x_colo_lost_heartbeat,
},
#endif

SRST
``x_colo_lost_heartbeat``
Expand Down
9 changes: 8 additions & 1 deletion include/migration/colo.h
Expand Up @@ -36,6 +36,13 @@ COLOMode get_colo_mode(void);
/* failover */
void colo_do_failover(void);

void colo_checkpoint_notify(void *opaque);
/*
* colo_checkpoint_delay_set
*
* Handles change of x-checkpoint-delay migration parameter, called from
* migrate_params_apply() to notify COLO module about the change.
*/
void colo_checkpoint_delay_set(void);

void colo_shutdown(void);
#endif
57 changes: 18 additions & 39 deletions migration/colo.c
Expand Up @@ -26,9 +26,7 @@
#include "qemu/rcu.h"
#include "migration/failover.h"
#include "migration/ram.h"
#ifdef CONFIG_REPLICATION
#include "block/replication.h"
#endif
#include "net/colo-compare.h"
#include "net/colo.h"
#include "block/block.h"
Expand Down Expand Up @@ -65,10 +63,27 @@ static bool colo_runstate_is_stopped(void)
return runstate_check(RUN_STATE_COLO) || !runstate_is_running();
}

static void colo_checkpoint_notify(void *opaque)
{
MigrationState *s = opaque;
int64_t next_notify_time;

qemu_event_set(&s->colo_checkpoint_event);
s->colo_checkpoint_time = qemu_clock_get_ms(QEMU_CLOCK_HOST);
next_notify_time = s->colo_checkpoint_time + migrate_checkpoint_delay();
timer_mod(s->colo_delay_timer, next_notify_time);
}

void colo_checkpoint_delay_set(void)
{
if (migration_in_colo_state()) {
colo_checkpoint_notify(migrate_get_current());
}
}

static void secondary_vm_do_failover(void)
{
/* COLO needs enable block-replication */
#ifdef CONFIG_REPLICATION
int old_state;
MigrationIncomingState *mis = migration_incoming_get_current();
Error *local_err = NULL;
Expand Down Expand Up @@ -133,14 +148,10 @@ static void secondary_vm_do_failover(void)
if (mis->migration_incoming_co) {
qemu_coroutine_enter(mis->migration_incoming_co);
}
#else
abort();
#endif
}

static void primary_vm_do_failover(void)
{
#ifdef CONFIG_REPLICATION
MigrationState *s = migrate_get_current();
int old_state;
Error *local_err = NULL;
Expand Down Expand Up @@ -181,9 +192,6 @@ static void primary_vm_do_failover(void)

/* Notify COLO thread that failover work is finished */
qemu_sem_post(&s->colo_exit_sem);
#else
abort();
#endif
}

COLOMode get_colo_mode(void)
Expand Down Expand Up @@ -217,7 +225,6 @@ void colo_do_failover(void)
}
}

#ifdef CONFIG_REPLICATION
void qmp_xen_set_replication(bool enable, bool primary,
bool has_failover, bool failover,
Error **errp)
Expand Down Expand Up @@ -271,7 +278,6 @@ void qmp_xen_colo_do_checkpoint(Error **errp)
/* Notify all filters of all NIC to do checkpoint */
colo_notify_filters_event(COLO_EVENT_CHECKPOINT, errp);
}
#endif

COLOStatus *qmp_query_colo_status(Error **errp)
{
Expand Down Expand Up @@ -435,15 +441,11 @@ static int colo_do_checkpoint_transaction(MigrationState *s,
}
qemu_mutex_lock_iothread();

#ifdef CONFIG_REPLICATION
replication_do_checkpoint_all(&local_err);
if (local_err) {
qemu_mutex_unlock_iothread();
goto out;
}
#else
abort();
#endif

colo_send_message(s->to_dst_file, COLO_MESSAGE_VMSTATE_SEND, &local_err);
if (local_err) {
Expand Down Expand Up @@ -561,15 +563,11 @@ static void colo_process_checkpoint(MigrationState *s)
object_unref(OBJECT(bioc));

qemu_mutex_lock_iothread();
#ifdef CONFIG_REPLICATION
replication_start_all(REPLICATION_MODE_PRIMARY, &local_err);
if (local_err) {
qemu_mutex_unlock_iothread();
goto out;
}
#else
abort();
#endif

vm_start();
qemu_mutex_unlock_iothread();
Expand Down Expand Up @@ -644,17 +642,6 @@ static void colo_process_checkpoint(MigrationState *s)
}
}

void colo_checkpoint_notify(void *opaque)
{
MigrationState *s = opaque;
int64_t next_notify_time;

qemu_event_set(&s->colo_checkpoint_event);
s->colo_checkpoint_time = qemu_clock_get_ms(QEMU_CLOCK_HOST);
next_notify_time = s->colo_checkpoint_time + migrate_checkpoint_delay();
timer_mod(s->colo_delay_timer, next_notify_time);
}

void migrate_start_colo_process(MigrationState *s)
{
qemu_mutex_unlock_iothread();
Expand Down Expand Up @@ -748,7 +735,6 @@ static void colo_incoming_process_checkpoint(MigrationIncomingState *mis,
return;
}

#ifdef CONFIG_REPLICATION
replication_get_error_all(&local_err);
if (local_err) {
error_propagate(errp, local_err);
Expand All @@ -765,9 +751,6 @@ static void colo_incoming_process_checkpoint(MigrationIncomingState *mis,
qemu_mutex_unlock_iothread();
return;
}
#else
abort();
#endif
/* Notify all filters of all NIC to do checkpoint */
colo_notify_filters_event(COLO_EVENT_CHECKPOINT, &local_err);

Expand Down Expand Up @@ -874,15 +857,11 @@ void *colo_process_incoming_thread(void *opaque)
object_unref(OBJECT(bioc));

qemu_mutex_lock_iothread();
#ifdef CONFIG_REPLICATION
replication_start_all(REPLICATION_MODE_SECONDARY, &local_err);
if (local_err) {
qemu_mutex_unlock_iothread();
goto out;
}
#else
abort();
#endif
vm_start();
qemu_mutex_unlock_iothread();
trace_colo_vm_state_change("stop", "run");
Expand Down
6 changes: 4 additions & 2 deletions migration/meson.build
Expand Up @@ -13,8 +13,6 @@ softmmu_ss.add(files(
'block-dirty-bitmap.c',
'channel.c',
'channel-block.c',
'colo-failover.c',
'colo.c',
'exec.c',
'fd.c',
'global_state.c',
Expand All @@ -33,6 +31,10 @@ softmmu_ss.add(files(
'threadinfo.c',
), gnutls)

if get_option('replication').allowed()
softmmu_ss.add(files('colo-failover.c', 'colo.c'))
endif

softmmu_ss.add(when: rdma, if_true: files('rdma.c'))
if get_option('live_block_migration').allowed()
softmmu_ss.add(files('block.c'))
Expand Down
2 changes: 2 additions & 0 deletions migration/migration-hmp-cmds.c
Expand Up @@ -643,13 +643,15 @@ void hmp_migrate_start_postcopy(Monitor *mon, const QDict *qdict)
hmp_handle_error(mon, err);
}

#ifdef CONFIG_REPLICATION
void hmp_x_colo_lost_heartbeat(Monitor *mon, const QDict *qdict)
{
Error *err = NULL;

qmp_x_colo_lost_heartbeat(&err);
hmp_handle_error(mon, err);
}
#endif

typedef struct HMPMigrationStatus {
QEMUTimer *timer;
Expand Down
35 changes: 23 additions & 12 deletions migration/migration.c
Expand Up @@ -392,6 +392,18 @@ void migration_incoming_disable_colo(void)

int migration_incoming_enable_colo(void)
{
#ifndef CONFIG_REPLICATION
error_report("ENABLE_COLO command come in migration stream, but COLO "
"module is not built in");
return -ENOTSUP;
#endif

if (!migrate_colo()) {
error_report("ENABLE_COLO command come in migration stream, but c-colo "
"capability is not set");
return -EINVAL;
}

if (ram_block_discard_disable(true)) {
error_report("COLO: cannot disable RAM discard");
return -EBUSY;
Expand Down Expand Up @@ -536,32 +548,34 @@ process_incoming_migration_co(void *opaque)
/* Else if something went wrong then just fall out of the normal exit */
}

if (ret < 0) {
error_report("load of migration failed: %s", strerror(-ret));
goto fail;
}

/* we get COLO info, and know if we are in COLO mode */
if (!ret && migration_incoming_colo_enabled()) {
if (migration_incoming_colo_enabled()) {
QemuThread colo_incoming_thread;

/* Make sure all file formats throw away their mutable metadata */
bdrv_activate_all(&local_err);
if (local_err) {
error_report_err(local_err);
goto fail;
}

qemu_thread_create(&mis->colo_incoming_thread, "COLO incoming",
qemu_thread_create(&colo_incoming_thread, "COLO incoming",
colo_process_incoming_thread, mis, QEMU_THREAD_JOINABLE);
mis->have_colo_incoming_thread = true;
qemu_coroutine_yield();

qemu_mutex_unlock_iothread();
/* Wait checkpoint incoming thread exit before free resource */
qemu_thread_join(&mis->colo_incoming_thread);
qemu_thread_join(&colo_incoming_thread);
qemu_mutex_lock_iothread();
/* We hold the global iothread lock, so it is safe here */
colo_release_ram_cache();
}

if (ret < 0) {
error_report("load of migration failed: %s", strerror(-ret));
goto fail;
}
mis->bh = qemu_bh_new(process_incoming_migration_bh, mis);
qemu_bh_schedule(mis->bh);
mis->migration_incoming_co = NULL;
Expand Down Expand Up @@ -2777,10 +2791,7 @@ static void migration_iteration_finish(MigrationState *s)
runstate_set(RUN_STATE_POSTMIGRATE);
break;
case MIGRATION_STATUS_COLO:
if (!migrate_colo()) {
error_report("%s: critical error: calling COLO code without "
"COLO enabled", __func__);
}
assert(migrate_colo());
migrate_start_colo_process(s);
s->vm_was_running = true;
/* Fallthrough */
Expand Down
2 changes: 0 additions & 2 deletions migration/migration.h
Expand Up @@ -162,8 +162,6 @@ struct MigrationIncomingState {

int state;

bool have_colo_incoming_thread;
QemuThread colo_incoming_thread;
/* The coroutine we should enter (back) after failover */
Coroutine *migration_incoming_co;
QemuSemaphore colo_incoming_sem;
Expand Down
11 changes: 5 additions & 6 deletions migration/multifd.c
Expand Up @@ -281,7 +281,6 @@ static void multifd_send_fill_packet(MultiFDSendParams *p)
static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp)
{
MultiFDPacket_t *packet = p->packet;
RAMBlock *block;
int i;

packet->magic = be32_to_cpu(packet->magic);
Expand Down Expand Up @@ -331,21 +330,21 @@ static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp)

/* make sure that ramblock is 0 terminated */
packet->ramblock[255] = 0;
block = qemu_ram_block_by_name(packet->ramblock);
if (!block) {
p->block = qemu_ram_block_by_name(packet->ramblock);
if (!p->block) {
error_setg(errp, "multifd: unknown ram block %s",
packet->ramblock);
return -1;
}

p->host = block->host;
p->host = p->block->host;
for (i = 0; i < p->normal_num; i++) {
uint64_t offset = be64_to_cpu(packet->offset[i]);

if (offset > (block->used_length - p->page_size)) {
if (offset > (p->block->used_length - p->page_size)) {
error_setg(errp, "multifd: offset too long %" PRIu64
" (max " RAM_ADDR_FMT ")",
offset, block->used_length);
offset, p->block->used_length);
return -1;
}
p->normal[i] = offset;
Expand Down
2 changes: 2 additions & 0 deletions migration/multifd.h
Expand Up @@ -175,6 +175,8 @@ typedef struct {
uint32_t next_packet_size;
/* packets sent through this channel */
uint64_t num_packets;
/* ramblock */
RAMBlock *block;
/* ramblock host address */
uint8_t *host;
/* non zero pages recv through this channel */
Expand Down
6 changes: 2 additions & 4 deletions migration/options.c
Expand Up @@ -598,7 +598,7 @@ void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params,
MigrationCapabilityStatusList *cap;
bool new_caps[MIGRATION_CAPABILITY__MAX];

if (migration_is_running(s->state)) {
if (migration_is_running(s->state) || migration_in_colo_state()) {
error_setg(errp, QERR_MIGRATION_ACTIVE);
return;
}
Expand Down Expand Up @@ -1253,9 +1253,7 @@ static void migrate_params_apply(MigrateSetParameters *params, Error **errp)

if (params->has_x_checkpoint_delay) {
s->parameters.x_checkpoint_delay = params->x_checkpoint_delay;
if (migration_in_colo_state()) {
colo_checkpoint_notify(s);
}
colo_checkpoint_delay_set();
}

if (params->has_block_incremental) {
Expand Down

0 comments on commit fff86d4

Please sign in to comment.