Skip to content

Commit

Permalink
migration: add reporting of errors for outgoing migration
Browse files Browse the repository at this point in the history
Currently if an application initiates an outgoing migration,
it may or may not, get an error reported back on failure. If
the error occurs synchronously to the 'migrate' command
execution, the client app will see the error message. This
is the case for DNS lookup failures. If the error occurs
asynchronously to the monitor command though, the error
will be thrown away and the client left guessing about
what went wrong. This is the case for failure to connect
to the TCP server (eg due to wrong port, or firewall
rules, or other similar errors).

In the future we'll be adding more scope for errors to
happen asynchronously with the TLS protocol handshake.
TLS errors are hard to diagnose even when they are well
reported, so discarding errors entirely will make it
impossible to debug TLS connection problems.

Management apps which do migration are already using
'query-migrate' / 'info migrate' to check up on progress
of background migration operations and to see their end
status. This is a fine place to also include the error
message when things go wrong.

This patch thus adds an 'error-desc' field to the
MigrationInfo struct, which will be populated when
the 'status' is set to 'failed':

(qemu) migrate -d tcp:localhost:9001
(qemu) info migrate
capabilities: xbzrle: off rdma-pin-all: off auto-converge: off zero-blocks: off compress: off events: off x-postcopy-ram: off
Migration status: failed (Error connecting to socket: Connection refused)
total time: 0 milliseconds

In the HMP, when doing non-detached migration, it is
also possible to display this error message directly
to the app.

(qemu) migrate tcp:localhost:9001
Error connecting to socket: Connection refused

Or with QMP

  {
    "execute": "query-migrate",
    "arguments": {}
  }
  {
    "return": {
      "status": "failed",
      "error-desc": "address resolution failed for myhost:9000: No address associated with hostname"
    }
  }

Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Message-Id: <1461751518-12128-11-git-send-email-berrange@redhat.com>
Signed-off-by: Amit Shah <amit.shah@redhat.com>
  • Loading branch information
berrange authored and Amit Shah committed May 26, 2016
1 parent 48f0748 commit d59ce6f
Show file tree
Hide file tree
Showing 10 changed files with 42 additions and 18 deletions.
13 changes: 12 additions & 1 deletion hmp.c
Expand Up @@ -35,6 +35,7 @@
#include "block/qapi.h"
#include "qemu-io.h"
#include "qemu/cutils.h"
#include "qemu/error-report.h"

#ifdef CONFIG_SPICE
#include <spice/enums.h>
Expand Down Expand Up @@ -168,8 +169,15 @@ void hmp_info_migrate(Monitor *mon, const QDict *qdict)
}

if (info->has_status) {
monitor_printf(mon, "Migration status: %s\n",
monitor_printf(mon, "Migration status: %s",
MigrationStatus_lookup[info->status]);
if (info->status == MIGRATION_STATUS_FAILED &&
info->has_error_desc) {
monitor_printf(mon, " (%s)\n", info->error_desc);
} else {
monitor_printf(mon, "\n");
}

monitor_printf(mon, "total time: %" PRIu64 " milliseconds\n",
info->total_time);
if (info->has_expected_downtime) {
Expand Down Expand Up @@ -1533,6 +1541,9 @@ static void hmp_migrate_status_cb(void *opaque)
if (status->is_block_migration) {
monitor_printf(status->mon, "\n");
}
if (info->has_error_desc) {
error_report("%s", info->error_desc);
}
monitor_resume(status->mon);
timer_del(status->timer);
g_free(status);
Expand Down
5 changes: 4 additions & 1 deletion include/migration/migration.h
Expand Up @@ -171,6 +171,9 @@ struct MigrationState
QSIMPLEQ_HEAD(src_page_requests, MigrationSrcPageRequest) src_page_requests;
/* The RAMBlock used in the last src_page_request */
RAMBlock *last_req_rb;

/* The last error that occurred */
Error *error;
};

void migrate_set_state(int *state, int old_state, int new_state);
Expand Down Expand Up @@ -207,7 +210,7 @@ void rdma_start_outgoing_migration(void *opaque, const char *host_port, Error **

void rdma_start_incoming_migration(const char *host_port, Error **errp);

void migrate_fd_error(MigrationState *s);
void migrate_fd_error(MigrationState *s, const Error *error);

void migrate_fd_connect(MigrationState *s);

Expand Down
2 changes: 1 addition & 1 deletion include/qapi/error.h
Expand Up @@ -134,7 +134,7 @@ typedef enum ErrorClass {
/*
* Get @err's human-readable error message.
*/
const char *error_get_pretty(Error *err);
const char *error_get_pretty(const Error *err);

/*
* Get @err's error class.
Expand Down
15 changes: 12 additions & 3 deletions migration/migration.c
Expand Up @@ -691,6 +691,10 @@ MigrationInfo *qmp_query_migrate(Error **errp)
break;
case MIGRATION_STATUS_FAILED:
info->has_status = true;
if (s->error) {
info->has_error_desc = true;
info->error_desc = g_strdup(error_get_pretty(s->error));
}
break;
case MIGRATION_STATUS_CANCELLED:
info->has_status = true;
Expand Down Expand Up @@ -863,12 +867,15 @@ static void migrate_fd_cleanup(void *opaque)
notifier_list_notify(&migration_state_notifiers, s);
}

void migrate_fd_error(MigrationState *s)
void migrate_fd_error(MigrationState *s, const Error *error)
{
trace_migrate_fd_error();
trace_migrate_fd_error(error ? error_get_pretty(error) : "");
assert(s->to_dst_file == NULL);
migrate_set_state(&s->state, MIGRATION_STATUS_SETUP,
MIGRATION_STATUS_FAILED);
if (!s->error) {
s->error = error_copy(error);
}
notifier_list_notify(&migration_state_notifiers, s);
}

Expand Down Expand Up @@ -967,6 +974,8 @@ MigrationState *migrate_init(const MigrationParams *params)
s->postcopy_after_devices = false;
s->migration_thread_running = false;
s->last_req_rb = NULL;
error_free(s->error);
s->error = NULL;

migrate_set_state(&s->state, MIGRATION_STATUS_NONE, MIGRATION_STATUS_SETUP);

Expand Down Expand Up @@ -1076,7 +1085,7 @@ void qmp_migrate(const char *uri, bool has_blk, bool blk,
}

if (local_err) {
migrate_fd_error(s);
migrate_fd_error(s, local_err);
error_propagate(errp, local_err);
return;
}
Expand Down
10 changes: 3 additions & 7 deletions migration/rdma.c
Expand Up @@ -3489,24 +3489,22 @@ void rdma_start_outgoing_migration(void *opaque,
const char *host_port, Error **errp)
{
MigrationState *s = opaque;
Error *local_err = NULL, **temp = &local_err;
RDMAContext *rdma = qemu_rdma_data_init(host_port, &local_err);
RDMAContext *rdma = qemu_rdma_data_init(host_port, errp);
int ret = 0;

if (rdma == NULL) {
ERROR(temp, "Failed to initialize RDMA data structures! %d", ret);
goto err;
}

ret = qemu_rdma_source_init(rdma, &local_err,
ret = qemu_rdma_source_init(rdma, errp,
s->enabled_capabilities[MIGRATION_CAPABILITY_RDMA_PIN_ALL]);

if (ret) {
goto err;
}

trace_rdma_start_outgoing_migration_after_rdma_source_init();
ret = qemu_rdma_connect(rdma, &local_err);
ret = qemu_rdma_connect(rdma, errp);

if (ret) {
goto err;
Expand All @@ -3518,7 +3516,5 @@ void rdma_start_outgoing_migration(void *opaque,
migrate_fd_connect(s);
return;
err:
error_propagate(errp, local_err);
g_free(rdma);
migrate_fd_error(s);
}
2 changes: 1 addition & 1 deletion migration/tcp.c
Expand Up @@ -40,7 +40,7 @@ static void tcp_wait_for_connect(int fd, Error *err, void *opaque)
if (fd < 0) {
DPRINTF("migrate connect error: %s\n", error_get_pretty(err));
s->to_dst_file = NULL;
migrate_fd_error(s);
migrate_fd_error(s, err);
} else {
DPRINTF("migrate connect success\n");
s->to_dst_file = qemu_fopen_socket(fd, "wb");
Expand Down
2 changes: 1 addition & 1 deletion migration/unix.c
Expand Up @@ -40,7 +40,7 @@ static void unix_wait_for_connect(int fd, Error *err, void *opaque)
if (fd < 0) {
DPRINTF("migrate connect error: %s\n", error_get_pretty(err));
s->to_dst_file = NULL;
migrate_fd_error(s);
migrate_fd_error(s, err);
} else {
DPRINTF("migrate connect success\n");
s->to_dst_file = qemu_fopen_socket(fd, "wb");
Expand Down
7 changes: 6 additions & 1 deletion qapi-schema.json
Expand Up @@ -484,6 +484,10 @@
# throttled during auto-converge. This is only present when auto-converge
# has started throttling guest cpus. (Since 2.7)
#
# @error-desc: #optional the human readable error description string, when
# @status is 'failed'. Clients should not attempt to parse the
# error strings. (Since 2.6)
#
# Since: 0.14.0
##
{ 'struct': 'MigrationInfo',
Expand All @@ -494,7 +498,8 @@
'*expected-downtime': 'int',
'*downtime': 'int',
'*setup-time': 'int',
'*cpu-throttle-percentage': 'int'} }
'*cpu-throttle-percentage': 'int',
'*error-desc': 'str'} }

##
# @query-migrate
Expand Down
2 changes: 1 addition & 1 deletion trace-events
Expand Up @@ -1481,7 +1481,7 @@ await_return_path_close_on_source_close(void) ""
await_return_path_close_on_source_joining(void) ""
migrate_set_state(int new_state) "new state %d"
migrate_fd_cleanup(void) ""
migrate_fd_error(void) ""
migrate_fd_error(const char *error_desc) "error=%s"
migrate_fd_cancel(void) ""
migrate_handle_rp_req_pages(const char *rbname, size_t start, size_t len) "in %s at %zx len %zx"
migrate_pending(uint64_t size, uint64_t max, uint64_t post, uint64_t nonpost) "pending size %" PRIu64 " max %" PRIu64 " (post=%" PRIu64 " nonpost=%" PRIu64 ")"
Expand Down
2 changes: 1 addition & 1 deletion util/error.c
Expand Up @@ -217,7 +217,7 @@ ErrorClass error_get_class(const Error *err)
return err->err_class;
}

const char *error_get_pretty(Error *err)
const char *error_get_pretty(const Error *err)
{
return err->msg;
}
Expand Down

0 comments on commit d59ce6f

Please sign in to comment.