Skip to content

Commit

Permalink
Merge remote-tracking branch 'remotes/dgilbert/tags/pull-migration-20…
Browse files Browse the repository at this point in the history
…180214a' into staging

Migration pull 20180214

Note that the 'Add test for migration to bad destination' displays
a 'Connection refused' during running, but still gives the correct exit
code and OK (It's checking that the source doesn't fail when
it can't connect, so that's the right error).
If it's particularly disliked that patch can be skipped individually.

# gpg: Signature made Wed 14 Feb 2018 15:33:04 GMT
# gpg:                using RSA key 0516331EBC5BFDE7
# gpg: Good signature from "Dr. David Alan Gilbert (RH2) <dgilbert@redhat.com>"
# Primary key fingerprint: 45F5 C71B 4A0C B7FB 977A  9FA9 0516 331E BC5B FDE7

* remotes/dgilbert/tags/pull-migration-20180214a:
  migration: pass MigrationState to migrate_init()
  migration: allow send_rq to fail
  migration: provide postcopy_fault_thread_notify()
  migration: reuse mis->userfault_quit_fd
  migration: better error handling with QEMUFile
  tests/migration: Add test for migration to bad destination
  migration: Fix early failure cleanup
  tests/migration: Add source to PC boot block
  migration: improve documentation of postcopy-ram
  migration/xen: Check return value of qemu_fclose

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
  • Loading branch information
pm215 committed Feb 16, 2018
2 parents 0402ca3 + 3e0c805 commit 648ba91
Show file tree
Hide file tree
Showing 13 changed files with 388 additions and 122 deletions.
4 changes: 2 additions & 2 deletions docs/devel/migration.rst
Expand Up @@ -387,8 +387,8 @@ doesn't finish in a given time the switch is made to postcopy.
Enabling postcopy
-----------------

To enable postcopy, issue this command on the monitor prior to the
start of migration:
To enable postcopy, issue this command on the monitor (both source and
destination) prior to the start of migration:

``migrate_set_capability postcopy-ram on``

Expand Down
3 changes: 2 additions & 1 deletion hmp-commands.hx
Expand Up @@ -1041,7 +1041,8 @@ ETEXI
.params = "",
.help = "Followup to a migration command to switch the migration"
" to postcopy mode. The postcopy-ram capability must "
"be set before the original migration command.",
"be set on both source and destination before the "
"original migration command .",
.cmd = hmp_migrate_start_postcopy,
},

Expand Down
47 changes: 35 additions & 12 deletions migration/migration.c
Expand Up @@ -205,17 +205,35 @@ static void deferred_incoming_migration(Error **errp)
* Send a message on the return channel back to the source
* of the migration.
*/
static void migrate_send_rp_message(MigrationIncomingState *mis,
enum mig_rp_message_type message_type,
uint16_t len, void *data)
static int migrate_send_rp_message(MigrationIncomingState *mis,
enum mig_rp_message_type message_type,
uint16_t len, void *data)
{
int ret = 0;

trace_migrate_send_rp_message((int)message_type, len);
qemu_mutex_lock(&mis->rp_mutex);

/*
* It's possible that the file handle got lost due to network
* failures.
*/
if (!mis->to_src_file) {
ret = -EIO;
goto error;
}

qemu_put_be16(mis->to_src_file, (unsigned int)message_type);
qemu_put_be16(mis->to_src_file, len);
qemu_put_buffer(mis->to_src_file, data, len);
qemu_fflush(mis->to_src_file);

/* It's possible that qemu file got error during sending */
ret = qemu_file_get_error(mis->to_src_file);

error:
qemu_mutex_unlock(&mis->rp_mutex);
return ret;
}

/* Request a range of pages from the source VM at the given
Expand All @@ -225,11 +243,12 @@ static void migrate_send_rp_message(MigrationIncomingState *mis,
* Start: Address offset within the RB
* Len: Length in bytes required - must be a multiple of pagesize
*/
void migrate_send_rp_req_pages(MigrationIncomingState *mis, const char *rbname,
ram_addr_t start, size_t len)
int migrate_send_rp_req_pages(MigrationIncomingState *mis, const char *rbname,
ram_addr_t start, size_t len)
{
uint8_t bufc[12 + 1 + 255]; /* start (8), len (4), rbname up to 256 */
size_t msglen = 12; /* start + len */
enum mig_rp_message_type msg_type;

*(uint64_t *)bufc = cpu_to_be64((uint64_t)start);
*(uint32_t *)(bufc + 8) = cpu_to_be32((uint32_t)len);
Expand All @@ -241,10 +260,12 @@ void migrate_send_rp_req_pages(MigrationIncomingState *mis, const char *rbname,
bufc[msglen++] = rbname_len;
memcpy(bufc + msglen, rbname, rbname_len);
msglen += rbname_len;
migrate_send_rp_message(mis, MIG_RP_MSG_REQ_PAGES_ID, msglen, bufc);
msg_type = MIG_RP_MSG_REQ_PAGES_ID;
} else {
migrate_send_rp_message(mis, MIG_RP_MSG_REQ_PAGES, msglen, bufc);
msg_type = MIG_RP_MSG_REQ_PAGES;
}

return migrate_send_rp_message(mis, msg_type, msglen, bufc);
}

void qemu_start_incoming_migration(const char *uri, Error **errp)
Expand Down Expand Up @@ -1237,10 +1258,8 @@ bool migration_is_idle(void)
return false;
}

MigrationState *migrate_init(void)
void migrate_init(MigrationState *s)
{
MigrationState *s = migrate_get_current();

/*
* Reinitialise all migration state, except
* parameters/capabilities that the user set, and
Expand Down Expand Up @@ -1270,7 +1289,6 @@ MigrationState *migrate_init(void)
s->vm_was_running = false;
s->iteration_initial_bytes = 0;
s->threshold_size = 0;
return s;
}

static GSList *migration_blockers;
Expand Down Expand Up @@ -1378,7 +1396,7 @@ void qmp_migrate(const char *uri, bool has_blk, bool blk,
migrate_set_block_incremental(s, true);
}

s = migrate_init();
migrate_init(s);

if (strstart(uri, "tcp:", &p)) {
tcp_start_outgoing_migration(s, p, &local_err);
Expand Down Expand Up @@ -1709,6 +1727,11 @@ static void *source_return_path_thread(void *opaque)
header_type = qemu_get_be16(rp);
header_len = qemu_get_be16(rp);

if (qemu_file_get_error(rp)) {
mark_source_rp_bad(ms);
goto out;
}

if (header_type >= MIG_RP_MSG_MAX ||
header_type == MIG_RP_MSG_INVALID) {
error_report("RP: Received invalid message 0x%04x length 0x%04x",
Expand Down
10 changes: 6 additions & 4 deletions migration/migration.h
Expand Up @@ -35,15 +35,17 @@ struct MigrationIncomingState {
bool have_fault_thread;
QemuThread fault_thread;
QemuSemaphore fault_thread_sem;
/* Set this when we want the fault thread to quit */
bool fault_thread_quit;

bool have_listen_thread;
QemuThread listen_thread;
QemuSemaphore listen_thread_sem;

/* For the kernel to send us notifications */
int userfault_fd;
/* To tell the fault_thread to quit */
int userfault_quit_fd;
/* To notify the fault_thread to wake, e.g., when need to quit */
int userfault_event_fd;
QEMUFile *to_src_file;
QemuMutex rp_mutex; /* We send replies from multiple threads */
void *postcopy_tmp_page;
Expand Down Expand Up @@ -191,7 +193,7 @@ void migrate_fd_error(MigrationState *s, const Error *error);

void migrate_fd_connect(MigrationState *s, Error *error_in);

MigrationState *migrate_init(void);
void migrate_init(MigrationState *s);
bool migration_is_blocked(Error **errp);
/* True if outgoing migration has entered postcopy phase */
bool migration_in_postcopy(void);
Expand Down Expand Up @@ -228,7 +230,7 @@ void migrate_send_rp_shut(MigrationIncomingState *mis,
uint32_t value);
void migrate_send_rp_pong(MigrationIncomingState *mis,
uint32_t value);
void migrate_send_rp_req_pages(MigrationIncomingState *mis, const char* rbname,
int migrate_send_rp_req_pages(MigrationIncomingState *mis, const char* rbname,
ram_addr_t start, size_t len);

#endif
60 changes: 38 additions & 22 deletions migration/postcopy-ram.c
Expand Up @@ -377,27 +377,18 @@ int postcopy_ram_incoming_cleanup(MigrationIncomingState *mis)
trace_postcopy_ram_incoming_cleanup_entry();

if (mis->have_fault_thread) {
uint64_t tmp64;

if (qemu_ram_foreach_block(cleanup_range, mis)) {
return -1;
}
/*
* Tell the fault_thread to exit, it's an eventfd that should
* currently be at 0, we're going to increment it to 1
*/
tmp64 = 1;
if (write(mis->userfault_quit_fd, &tmp64, 8) == 8) {
trace_postcopy_ram_incoming_cleanup_join();
qemu_thread_join(&mis->fault_thread);
} else {
/* Not much we can do here, but may as well report it */
error_report("%s: incrementing userfault_quit_fd: %s", __func__,
strerror(errno));
}
/* Let the fault thread quit */
atomic_set(&mis->fault_thread_quit, 1);
postcopy_fault_thread_notify(mis);
trace_postcopy_ram_incoming_cleanup_join();
qemu_thread_join(&mis->fault_thread);

trace_postcopy_ram_incoming_cleanup_closeuf();
close(mis->userfault_fd);
close(mis->userfault_quit_fd);
close(mis->userfault_event_fd);
mis->have_fault_thread = false;
}

Expand Down Expand Up @@ -520,7 +511,7 @@ static void *postcopy_ram_fault_thread(void *opaque)
pfd[0].fd = mis->userfault_fd;
pfd[0].events = POLLIN;
pfd[0].revents = 0;
pfd[1].fd = mis->userfault_quit_fd;
pfd[1].fd = mis->userfault_event_fd;
pfd[1].events = POLLIN; /* Waiting for eventfd to go positive */
pfd[1].revents = 0;

Expand All @@ -530,8 +521,18 @@ static void *postcopy_ram_fault_thread(void *opaque)
}

if (pfd[1].revents) {
trace_postcopy_ram_fault_thread_quit();
break;
uint64_t tmp64 = 0;

/* Consume the signal */
if (read(mis->userfault_event_fd, &tmp64, 8) != 8) {
/* Nothing obviously nicer than posting this error. */
error_report("%s: read() failed", __func__);
}

if (atomic_read(&mis->fault_thread_quit)) {
trace_postcopy_ram_fault_thread_quit();
break;
}
}

ret = read(mis->userfault_fd, &msg, sizeof(msg));
Expand Down Expand Up @@ -610,9 +611,9 @@ int postcopy_ram_enable_notify(MigrationIncomingState *mis)
}

/* Now an eventfd we use to tell the fault-thread to quit */
mis->userfault_quit_fd = eventfd(0, EFD_CLOEXEC);
if (mis->userfault_quit_fd == -1) {
error_report("%s: Opening userfault_quit_fd: %s", __func__,
mis->userfault_event_fd = eventfd(0, EFD_CLOEXEC);
if (mis->userfault_event_fd == -1) {
error_report("%s: Opening userfault_event_fd: %s", __func__,
strerror(errno));
close(mis->userfault_fd);
return -1;
Expand Down Expand Up @@ -813,6 +814,21 @@ void *postcopy_get_tmp_page(MigrationIncomingState *mis)

/* ------------------------------------------------------------------------- */

void postcopy_fault_thread_notify(MigrationIncomingState *mis)
{
uint64_t tmp64 = 1;

/*
* Wakeup the fault_thread. It's an eventfd that should currently
* be at 0, we're going to increment it to 1
*/
if (write(mis->userfault_event_fd, &tmp64, 8) != 8) {
/* Not much we can do here, but may as well report it */
error_report("%s: incrementing failed: %s", __func__,
strerror(errno));
}
}

/**
* postcopy_discard_send_init: Called at the start of each RAMBlock before
* asking to discard individual ranges.
Expand Down
2 changes: 2 additions & 0 deletions migration/postcopy-ram.h
Expand Up @@ -114,4 +114,6 @@ PostcopyState postcopy_state_get(void);
/* Set the state and return the old state */
PostcopyState postcopy_state_set(PostcopyState new_state);

void postcopy_fault_thread_notify(MigrationIncomingState *mis);

#endif
33 changes: 24 additions & 9 deletions migration/ram.c
Expand Up @@ -1602,11 +1602,13 @@ static void xbzrle_load_cleanup(void)

static void ram_state_cleanup(RAMState **rsp)
{
migration_page_queue_free(*rsp);
qemu_mutex_destroy(&(*rsp)->bitmap_mutex);
qemu_mutex_destroy(&(*rsp)->src_page_req_mutex);
g_free(*rsp);
*rsp = NULL;
if (*rsp) {
migration_page_queue_free(*rsp);
qemu_mutex_destroy(&(*rsp)->bitmap_mutex);
qemu_mutex_destroy(&(*rsp)->src_page_req_mutex);
g_free(*rsp);
*rsp = NULL;
}
}

static void xbzrle_cleanup(void)
Expand Down Expand Up @@ -2698,6 +2700,16 @@ static int ram_load_postcopy(QEMUFile *f)
uint8_t ch;

addr = qemu_get_be64(f);

/*
* If qemu file error, we should stop here, and then "addr"
* may be invalid
*/
ret = qemu_file_get_error(f);
if (ret) {
break;
}

flags = addr & ~TARGET_PAGE_MASK;
addr &= TARGET_PAGE_MASK;

Expand Down Expand Up @@ -2778,9 +2790,15 @@ static int ram_load_postcopy(QEMUFile *f)
error_report("Unknown combination of migration flags: %#x"
" (postcopy mode)", flags);
ret = -EINVAL;
break;
}

/* Detect for any possible file errors */
if (!ret && qemu_file_get_error(f)) {
ret = qemu_file_get_error(f);
}

if (place_needed) {
if (!ret && place_needed) {
/* This gets called at the last target page in the host page */
void *place_dest = host + TARGET_PAGE_SIZE - block->page_size;

Expand All @@ -2792,9 +2810,6 @@ static int ram_load_postcopy(QEMUFile *f)
place_source, block);
}
}
if (!ret) {
ret = qemu_file_get_error(f);
}
}

return ret;
Expand Down

0 comments on commit 648ba91

Please sign in to comment.