diff --git a/docs/devel/migration.rst b/docs/devel/migration.rst index e88918f7639e..2eb08624fc36 100644 --- a/docs/devel/migration.rst +++ b/docs/devel/migration.rst @@ -50,6 +50,26 @@ All these migration protocols use the same infrastructure to save/restore state devices. This infrastructure is shared with the savevm/loadvm functionality. +Debugging +========= + +The migration stream can be analyzed thanks to `scripts/analyze_migration.py`. + +Example usage: + +.. code-block:: shell + + $ qemu-system-x86_64 + (qemu) migrate "exec:cat > mig" + $ ./scripts/analyze_migration.py -f mig + { + "ram (3)": { + "section sizes": { + "pc.ram": "0x0000000008000000", + ... + +See also ``analyze_migration.py -h`` help for more options. + Common infrastructure ===================== diff --git a/migration/colo.c b/migration/colo.c index 1b3493729b03..d015d4f84e2f 100644 --- a/migration/colo.c +++ b/migration/colo.c @@ -443,6 +443,9 @@ static int colo_do_checkpoint_transaction(MigrationState *s, /* Disable block migration */ migrate_set_block_enabled(false, &local_err); + if (local_err) { + goto out; + } qemu_mutex_lock_iothread(); #ifdef CONFIG_REPLICATION diff --git a/migration/migration.c b/migration/migration.c index 177cce9e951f..0bb042a0f78b 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -785,6 +785,8 @@ MigrationParameters *qmp_query_migrate_parameters(Error **errp) params->cpu_throttle_initial = s->parameters.cpu_throttle_initial; params->has_cpu_throttle_increment = true; params->cpu_throttle_increment = s->parameters.cpu_throttle_increment; + params->has_cpu_throttle_tailslow = true; + params->cpu_throttle_tailslow = s->parameters.cpu_throttle_tailslow; params->has_tls_creds = true; params->tls_creds = g_strdup(s->parameters.tls_creds); params->has_tls_hostname = true; @@ -930,6 +932,7 @@ static void populate_ram_info(MigrationInfo *info, MigrationState *s) info->xbzrle_cache->pages = xbzrle_counters.pages; info->xbzrle_cache->cache_miss = xbzrle_counters.cache_miss; info->xbzrle_cache->cache_miss_rate = xbzrle_counters.cache_miss_rate; + info->xbzrle_cache->encoding_rate = xbzrle_counters.encoding_rate; info->xbzrle_cache->overflow = xbzrle_counters.overflow; } @@ -1202,16 +1205,19 @@ static bool migrate_params_check(MigrationParameters *params, Error **errp) } if (params->has_max_bandwidth && (params->max_bandwidth > SIZE_MAX)) { - error_setg(errp, "Parameter 'max_bandwidth' expects an integer in the" - " range of 0 to %zu bytes/second", SIZE_MAX); + error_setg(errp, QERR_INVALID_PARAMETER_VALUE, + "max_bandwidth", + "an integer in the range of 0 to "stringify(SIZE_MAX) + " bytes/second"); return false; } if (params->has_downtime_limit && (params->downtime_limit > MAX_MIGRATE_DOWNTIME)) { - error_setg(errp, "Parameter 'downtime_limit' expects an integer in " - "the range of 0 to %d milliseconds", - MAX_MIGRATE_DOWNTIME); + error_setg(errp, QERR_INVALID_PARAMETER_VALUE, + "downtime_limit", + "an integer in the range of 0 to " + stringify(MAX_MIGRATE_DOWNTIME)" ms"); return false; } @@ -1324,6 +1330,10 @@ static void migrate_params_test_apply(MigrateSetParameters *params, dest->cpu_throttle_increment = params->cpu_throttle_increment; } + if (params->has_cpu_throttle_tailslow) { + dest->cpu_throttle_tailslow = params->cpu_throttle_tailslow; + } + if (params->has_tls_creds) { assert(params->tls_creds->type == QTYPE_QSTRING); dest->tls_creds = g_strdup(params->tls_creds->u.s); @@ -1412,6 +1422,10 @@ static void migrate_params_apply(MigrateSetParameters *params, Error **errp) s->parameters.cpu_throttle_increment = params->cpu_throttle_increment; } + if (params->has_cpu_throttle_tailslow) { + s->parameters.cpu_throttle_tailslow = params->cpu_throttle_tailslow; + } + if (params->has_tls_creds) { g_free(s->parameters.tls_creds); assert(params->tls_creds->type == QTYPE_QSTRING); @@ -2107,9 +2121,10 @@ void qmp_migrate_set_speed(int64_t value, Error **errp) void qmp_migrate_set_downtime(double value, Error **errp) { if (value < 0 || value > MAX_MIGRATE_DOWNTIME_SECONDS) { - error_setg(errp, "Parameter 'downtime_limit' expects an integer in " - "the range of 0 to %d seconds", - MAX_MIGRATE_DOWNTIME_SECONDS); + error_setg(errp, QERR_INVALID_PARAMETER_VALUE, + "downtime_limit", + "an integer in the range of 0 to " + stringify(MAX_MIGRATE_DOWNTIME_SECONDS)" seconds"); return; } @@ -2494,7 +2509,7 @@ static void *source_return_path_thread(void *opaque) if (header_type >= MIG_RP_MSG_MAX || header_type == MIG_RP_MSG_INVALID) { error_report("RP: Received invalid message 0x%04x length 0x%04x", - header_type, header_len); + header_type, header_len); mark_source_rp_bad(ms); goto out; } @@ -2503,9 +2518,9 @@ static void *source_return_path_thread(void *opaque) header_len != rp_cmd_args[header_type].len) || header_len > sizeof(buf)) { error_report("RP: Received '%s' message (0x%04x) with" - "incorrect length %d expecting %zu", - rp_cmd_args[header_type].name, header_type, header_len, - (size_t)rp_cmd_args[header_type].len); + "incorrect length %d expecting %zu", + rp_cmd_args[header_type].name, header_type, header_len, + (size_t)rp_cmd_args[header_type].len); mark_source_rp_bad(ms); goto out; } @@ -2560,7 +2575,7 @@ static void *source_return_path_thread(void *opaque) } if (header_len != expected_len) { error_report("RP: Req_Page_id with length %d expecting %zd", - header_len, expected_len); + header_len, expected_len); mark_source_rp_bad(ms); goto out; } @@ -3593,6 +3608,8 @@ static Property migration_properties[] = { DEFINE_PROP_UINT8("x-cpu-throttle-increment", MigrationState, parameters.cpu_throttle_increment, DEFAULT_MIGRATE_CPU_THROTTLE_INCREMENT), + DEFINE_PROP_BOOL("x-cpu-throttle-tailslow", MigrationState, + parameters.cpu_throttle_tailslow, false), DEFINE_PROP_SIZE("x-max-bandwidth", MigrationState, parameters.max_bandwidth, MAX_THROTTLE), DEFINE_PROP_UINT64("x-downtime-limit", MigrationState, @@ -3699,6 +3716,7 @@ static void migration_instance_init(Object *obj) params->has_throttle_trigger_threshold = true; params->has_cpu_throttle_initial = true; params->has_cpu_throttle_increment = true; + params->has_cpu_throttle_tailslow = true; params->has_max_bandwidth = true; params->has_downtime_limit = true; params->has_x_checkpoint_delay = true; diff --git a/migration/multifd.c b/migration/multifd.c index 9123c111a38c..5a3e4d0d46d1 100644 --- a/migration/multifd.c +++ b/migration/multifd.c @@ -550,6 +550,7 @@ void multifd_save_cleanup(void) multifd_send_state->ops->send_cleanup(p, &local_err); if (local_err) { migrate_set_error(migrate_get_current(), local_err); + error_free(local_err); } } qemu_sem_destroy(&multifd_send_state->channels_ready); @@ -688,6 +689,7 @@ static void *multifd_send_thread(void *opaque) if (local_err) { trace_multifd_send_error(p->id); multifd_send_terminate_threads(local_err); + error_free(local_err); } /* @@ -727,6 +729,8 @@ static void multifd_new_send_channel_async(QIOTask *task, gpointer opaque) * its status. */ p->quit = true; + object_unref(OBJECT(sioc)); + error_free(local_err); } else { p->c = QIO_CHANNEL(sioc); qio_channel_set_delay(p->c, false); @@ -963,6 +967,7 @@ static void *multifd_recv_thread(void *opaque) if (local_err) { multifd_recv_terminate_threads(local_err); + error_free(local_err); } qemu_mutex_lock(&p->mutex); p->running = false; diff --git a/migration/ram.c b/migration/ram.c index 53166fc279ca..859f835f1a55 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -327,6 +327,10 @@ struct RAMState { uint64_t num_dirty_pages_period; /* xbzrle misses since the beginning of the period */ uint64_t xbzrle_cache_miss_prev; + /* Amount of xbzrle pages since the beginning of the period */ + uint64_t xbzrle_pages_prev; + /* Amount of xbzrle encoded bytes since the beginning of the period */ + uint64_t xbzrle_bytes_prev; /* compression statistics since the beginning of the period */ /* amount of count that no free thread to compress data */ @@ -616,20 +620,34 @@ static size_t save_page_header(RAMState *rs, QEMUFile *f, RAMBlock *block, * able to complete migration. Some workloads dirty memory way too * fast and will not effectively converge, even with auto-converge. */ -static void mig_throttle_guest_down(void) +static void mig_throttle_guest_down(uint64_t bytes_dirty_period, + uint64_t bytes_dirty_threshold) { MigrationState *s = migrate_get_current(); uint64_t pct_initial = s->parameters.cpu_throttle_initial; - uint64_t pct_icrement = s->parameters.cpu_throttle_increment; + uint64_t pct_increment = s->parameters.cpu_throttle_increment; + bool pct_tailslow = s->parameters.cpu_throttle_tailslow; int pct_max = s->parameters.max_cpu_throttle; + uint64_t throttle_now = cpu_throttle_get_percentage(); + uint64_t cpu_now, cpu_ideal, throttle_inc; + /* We have not started throttling yet. Let's start it. */ if (!cpu_throttle_active()) { cpu_throttle_set(pct_initial); } else { /* Throttling already on, just increase the rate */ - cpu_throttle_set(MIN(cpu_throttle_get_percentage() + pct_icrement, - pct_max)); + if (!pct_tailslow) { + throttle_inc = pct_increment; + } else { + /* Compute the ideal CPU percentage used by Guest, which may + * make the dirty rate match the dirty rate threshold. */ + cpu_now = 100 - throttle_now; + cpu_ideal = cpu_now * (bytes_dirty_threshold * 1.0 / + bytes_dirty_period); + throttle_inc = MIN(cpu_now - cpu_ideal, pct_increment); + } + cpu_throttle_set(MIN(throttle_now + throttle_inc, pct_max)); } } @@ -696,6 +714,18 @@ static int save_xbzrle_page(RAMState *rs, uint8_t **current_data, return -1; } + /* + * Reaching here means the page has hit the xbzrle cache, no matter what + * encoding result it is (normal encoding, overflow or skipping the page), + * count the page as encoded. This is used to caculate the encoding rate. + * + * Example: 2 pages (8KB) being encoded, first page encoding generates 2KB, + * 2nd page turns out to be skipped (i.e. no new bytes written to the + * page), the overall encoding rate will be 8KB / 2KB = 4, which has the + * skipped page included. In this way, the encoding rate can tell if the + * guest page is good for xbzrle encoding. + */ + xbzrle_counters.pages++; prev_cached_page = get_cached_data(XBZRLE.cache, current_addr); /* save current buffer into memory */ @@ -726,6 +756,7 @@ static int save_xbzrle_page(RAMState *rs, uint8_t **current_data, } else if (encoded_len == -1) { trace_save_xbzrle_page_overflow(); xbzrle_counters.overflow++; + xbzrle_counters.bytes += TARGET_PAGE_SIZE; return -1; } @@ -736,8 +767,12 @@ static int save_xbzrle_page(RAMState *rs, uint8_t **current_data, qemu_put_be16(rs->f, encoded_len); qemu_put_buffer(rs->f, XBZRLE.encoded_buf, encoded_len); bytes_xbzrle += encoded_len + 1 + 2; - xbzrle_counters.pages++; - xbzrle_counters.bytes += bytes_xbzrle; + /* + * Like compressed_size (please see update_compress_thread_counts), + * the xbzrle encoded bytes don't count the 8 byte header with + * RAM_SAVE_FLAG_CONTINUE. + */ + xbzrle_counters.bytes += bytes_xbzrle - 8; ram_counters.transferred += bytes_xbzrle; return 1; @@ -870,9 +905,23 @@ static void migration_update_rates(RAMState *rs, int64_t end_time) } if (migrate_use_xbzrle()) { + double encoded_size, unencoded_size; + xbzrle_counters.cache_miss_rate = (double)(xbzrle_counters.cache_miss - rs->xbzrle_cache_miss_prev) / page_count; rs->xbzrle_cache_miss_prev = xbzrle_counters.cache_miss; + unencoded_size = (xbzrle_counters.pages - rs->xbzrle_pages_prev) * + TARGET_PAGE_SIZE; + encoded_size = xbzrle_counters.bytes - rs->xbzrle_bytes_prev; + if (xbzrle_counters.pages == rs->xbzrle_pages_prev) { + xbzrle_counters.encoding_rate = 0; + } else if (!encoded_size) { + xbzrle_counters.encoding_rate = UINT64_MAX; + } else { + xbzrle_counters.encoding_rate = unencoded_size / encoded_size; + } + rs->xbzrle_pages_prev = xbzrle_counters.pages; + rs->xbzrle_bytes_prev = xbzrle_counters.bytes; } if (migrate_use_compression()) { @@ -919,7 +968,8 @@ static void migration_trigger_throttle(RAMState *rs) (++rs->dirty_rate_high_cnt >= 2)) { trace_migration_throttle(); rs->dirty_rate_high_cnt = 0; - mig_throttle_guest_down(); + mig_throttle_guest_down(bytes_dirty_period, + bytes_dirty_threshold); } } } @@ -3132,7 +3182,7 @@ static int ram_load_postcopy(QEMUFile *f) /* Temporary page that is later 'placed' */ void *postcopy_host_page = mis->postcopy_tmp_page; void *this_host = NULL; - bool all_zero = false; + bool all_zero = true; int target_pages = 0; while (!ret && !(flags & RAM_SAVE_FLAG_EOS)) { @@ -3159,7 +3209,6 @@ static int ram_load_postcopy(QEMUFile *f) addr &= TARGET_PAGE_MASK; trace_ram_load_postcopy_loop((uint64_t)addr, flags); - place_needed = false; if (flags & (RAM_SAVE_FLAG_ZERO | RAM_SAVE_FLAG_PAGE | RAM_SAVE_FLAG_COMPRESS_PAGE)) { block = ram_block_from_stream(f, flags); @@ -3184,9 +3233,7 @@ static int ram_load_postcopy(QEMUFile *f) */ page_buffer = postcopy_host_page + ((uintptr_t)host & (block->page_size - 1)); - /* If all TP are zero then we can optimise the place */ if (target_pages == 1) { - all_zero = true; this_host = (void *)QEMU_ALIGN_DOWN((uintptr_t)host, block->page_size); } else { @@ -3206,7 +3253,6 @@ static int ram_load_postcopy(QEMUFile *f) */ if (target_pages == (block->page_size / TARGET_PAGE_SIZE)) { place_needed = true; - target_pages = 0; } place_source = postcopy_host_page; } @@ -3288,6 +3334,10 @@ static int ram_load_postcopy(QEMUFile *f) ret = postcopy_place_page(mis, place_dest, place_source, block); } + place_needed = false; + target_pages = 0; + /* Assume we have a zero page until we detect something different */ + all_zero = true; } } diff --git a/migration/rdma.c b/migration/rdma.c index f61587891b1f..967fda5b0c75 100644 --- a/migration/rdma.c +++ b/migration/rdma.c @@ -4056,6 +4056,7 @@ void rdma_start_incoming_migration(const char *host_port, Error **errp) return; err: error_propagate(errp, local_err); + g_free(rdma->host); g_free(rdma); g_free(rdma_return_path); } diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c index 7f6e982dc834..9c61e769ca9a 100644 --- a/monitor/hmp-cmds.c +++ b/monitor/hmp-cmds.c @@ -231,18 +231,18 @@ void hmp_info_migrate(Monitor *mon, const QDict *qdict) monitor_printf(mon, "\n"); } - monitor_printf(mon, "total time: %" PRIu64 " milliseconds\n", + monitor_printf(mon, "total time: %" PRIu64 " ms\n", info->total_time); if (info->has_expected_downtime) { - monitor_printf(mon, "expected downtime: %" PRIu64 " milliseconds\n", + monitor_printf(mon, "expected downtime: %" PRIu64 " ms\n", info->expected_downtime); } if (info->has_downtime) { - monitor_printf(mon, "downtime: %" PRIu64 " milliseconds\n", + monitor_printf(mon, "downtime: %" PRIu64 " ms\n", info->downtime); } if (info->has_setup_time) { - monitor_printf(mon, "setup: %" PRIu64 " milliseconds\n", + monitor_printf(mon, "setup: %" PRIu64 " ms\n", info->setup_time); } } @@ -303,6 +303,8 @@ void hmp_info_migrate(Monitor *mon, const QDict *qdict) info->xbzrle_cache->cache_miss); monitor_printf(mon, "xbzrle cache miss rate: %0.2f\n", info->xbzrle_cache->cache_miss_rate); + monitor_printf(mon, "xbzrle encoding rate: %0.2f\n", + info->xbzrle_cache->encoding_rate); monitor_printf(mon, "xbzrle overflow: %" PRIu64 "\n", info->xbzrle_cache->overflow); } @@ -420,6 +422,10 @@ void hmp_info_migrate_parameters(Monitor *mon, const QDict *qdict) monitor_printf(mon, "%s: %u\n", MigrationParameter_str(MIGRATION_PARAMETER_CPU_THROTTLE_INCREMENT), params->cpu_throttle_increment); + assert(params->has_cpu_throttle_tailslow); + monitor_printf(mon, "%s: %s\n", + MigrationParameter_str(MIGRATION_PARAMETER_CPU_THROTTLE_TAILSLOW), + params->cpu_throttle_tailslow ? "on" : "off"); assert(params->has_max_cpu_throttle); monitor_printf(mon, "%s: %u\n", MigrationParameter_str(MIGRATION_PARAMETER_MAX_CPU_THROTTLE), @@ -1198,8 +1204,11 @@ void hmp_migrate_set_cache_size(Monitor *mon, const QDict *qdict) /* Kept for backwards compatibility */ void hmp_migrate_set_speed(Monitor *mon, const QDict *qdict) { + Error *err = NULL; + int64_t value = qdict_get_int(qdict, "value"); - qmp_migrate_set_speed(value, NULL); + qmp_migrate_set_speed(value, &err); + hmp_handle_error(mon, err); } void hmp_migrate_set_capability(Monitor *mon, const QDict *qdict) @@ -1272,6 +1281,10 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict *qdict) p->has_cpu_throttle_increment = true; visit_type_int(v, param, &p->cpu_throttle_increment, &err); break; + case MIGRATION_PARAMETER_CPU_THROTTLE_TAILSLOW: + p->has_cpu_throttle_tailslow = true; + visit_type_bool(v, param, &p->cpu_throttle_tailslow, &err); + break; case MIGRATION_PARAMETER_MAX_CPU_THROTTLE: p->has_max_cpu_throttle = true; visit_type_int(v, param, &p->max_cpu_throttle, &err); diff --git a/qapi/migration.json b/qapi/migration.json index eca2981d0a33..d5000558c6c9 100644 --- a/qapi/migration.json +++ b/qapi/migration.json @@ -70,6 +70,8 @@ # # @cache-miss-rate: rate of cache miss (since 2.1) # +# @encoding-rate: rate of encoded bytes (since 5.1) +# # @overflow: number of overflows # # Since: 1.2 @@ -77,7 +79,7 @@ { 'struct': 'XBZRLECacheStats', 'data': {'cache-size': 'int', 'bytes': 'int', 'pages': 'int', 'cache-miss': 'int', 'cache-miss-rate': 'number', - 'overflow': 'int' } } + 'encoding-rate': 'number', 'overflow': 'int' } } ## # @CompressionStats: @@ -337,6 +339,7 @@ # "pages":2444343, # "cache-miss":2244, # "cache-miss-rate":0.123, +# "encoding-rate":80.1, # "overflow":34434 # } # } @@ -552,6 +555,21 @@ # auto-converge detects that migration is not making # progress. The default value is 10. (Since 2.7) # +# @cpu-throttle-tailslow: Make CPU throttling slower at tail stage +# At the tail stage of throttling, the Guest is very +# sensitive to CPU percentage while the @cpu-throttle +# -increment is excessive usually at tail stage. +# If this parameter is true, we will compute the ideal +# CPU percentage used by the Guest, which may exactly make +# the dirty rate match the dirty rate threshold. Then we +# will choose a smaller throttle increment between the +# one specified by @cpu-throttle-increment and the one +# generated by ideal CPU percentage. +# Therefore, it is compatible to traditional throttling, +# meanwhile the throttle increment won't be excessive +# at tail stage. +# The default value is false. (Since 5.1) +# # @tls-creds: ID of the 'tls-creds' object that provides credentials for # establishing a TLS connection over the migration data channel. # On the outgoing side of the migration, the credentials must @@ -631,6 +649,7 @@ 'compress-level', 'compress-threads', 'decompress-threads', 'compress-wait-thread', 'throttle-trigger-threshold', 'cpu-throttle-initial', 'cpu-throttle-increment', + 'cpu-throttle-tailslow', 'tls-creds', 'tls-hostname', 'tls-authz', 'max-bandwidth', 'downtime-limit', 'x-checkpoint-delay', 'block-incremental', 'multifd-channels', @@ -676,6 +695,21 @@ # auto-converge detects that migration is not making # progress. The default value is 10. (Since 2.7) # +# @cpu-throttle-tailslow: Make CPU throttling slower at tail stage +# At the tail stage of throttling, the Guest is very +# sensitive to CPU percentage while the @cpu-throttle +# -increment is excessive usually at tail stage. +# If this parameter is true, we will compute the ideal +# CPU percentage used by the Guest, which may exactly make +# the dirty rate match the dirty rate threshold. Then we +# will choose a smaller throttle increment between the +# one specified by @cpu-throttle-increment and the one +# generated by ideal CPU percentage. +# Therefore, it is compatible to traditional throttling, +# meanwhile the throttle increment won't be excessive +# at tail stage. +# The default value is false. (Since 5.1) +# # @tls-creds: ID of the 'tls-creds' object that provides credentials # for establishing a TLS connection over the migration data # channel. On the outgoing side of the migration, the credentials @@ -763,6 +797,7 @@ '*throttle-trigger-threshold': 'int', '*cpu-throttle-initial': 'int', '*cpu-throttle-increment': 'int', + '*cpu-throttle-tailslow': 'bool', '*tls-creds': 'StrOrNull', '*tls-hostname': 'StrOrNull', '*tls-authz': 'StrOrNull', @@ -834,6 +869,21 @@ # auto-converge detects that migration is not making # progress. (Since 2.7) # +# @cpu-throttle-tailslow: Make CPU throttling slower at tail stage +# At the tail stage of throttling, the Guest is very +# sensitive to CPU percentage while the @cpu-throttle +# -increment is excessive usually at tail stage. +# If this parameter is true, we will compute the ideal +# CPU percentage used by the Guest, which may exactly make +# the dirty rate match the dirty rate threshold. Then we +# will choose a smaller throttle increment between the +# one specified by @cpu-throttle-increment and the one +# generated by ideal CPU percentage. +# Therefore, it is compatible to traditional throttling, +# meanwhile the throttle increment won't be excessive +# at tail stage. +# The default value is false. (Since 5.1) +# # @tls-creds: ID of the 'tls-creds' object that provides credentials # for establishing a TLS connection over the migration data # channel. On the outgoing side of the migration, the credentials @@ -921,6 +971,7 @@ '*throttle-trigger-threshold': 'uint8', '*cpu-throttle-initial': 'uint8', '*cpu-throttle-increment': 'uint8', + '*cpu-throttle-tailslow': 'bool', '*tls-creds': 'str', '*tls-hostname': 'str', '*tls-authz': 'str',