Skip to content

Commit

Permalink
migration: Fix race of image locking between src and dst
Browse files Browse the repository at this point in the history
Previously, dst side will immediately try to lock the write byte upon
receiving QEMU_VM_EOF, but at src side, bdrv_inactivate_all() is only
done after sending it. If the src host is under load, dst may fail to
acquire the lock due to racing with the src unlocking it.

Fix this by hoisting the bdrv_inactivate_all() operation before
QEMU_VM_EOF.

N.B. A further improvement could possibly be done to cleanly handover
locks between src and dst, so that there is no window where a third QEMU
could steal the locks and prevent src and dst from running.

N.B. This commit includes a minor improvement to the error handling
by using qemu_file_set_error().

Reported-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Fam Zheng <famz@redhat.com>
Message-id: 20170616160658.32290-1-famz@redhat.com
Reviewed-by: Daniel P. Berrange <berrange@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
[PMM: noted qemu_file_set_error() use in commit as suggested by Daniel]
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
  • Loading branch information
Fam Zheng authored and pm215 committed Jun 19, 2017
1 parent edf8bc9 commit a1fbe75
Show file tree
Hide file tree
Showing 4 changed files with 25 additions and 18 deletions.
2 changes: 1 addition & 1 deletion migration/colo.c
Expand Up @@ -352,7 +352,7 @@ static int colo_do_checkpoint_transaction(MigrationState *s,
qemu_savevm_state_header(fb);
qemu_savevm_state_begin(fb);
qemu_mutex_lock_iothread();
qemu_savevm_state_complete_precopy(fb, false);
qemu_savevm_state_complete_precopy(fb, false, false);
qemu_mutex_unlock_iothread();

qemu_fflush(fb);
Expand Down
19 changes: 7 additions & 12 deletions migration/migration.c
Expand Up @@ -1553,7 +1553,7 @@ static int postcopy_start(MigrationState *ms, bool *old_vm_running)
* Cause any non-postcopiable, but iterative devices to
* send out their final data.
*/
qemu_savevm_state_complete_precopy(ms->to_dst_file, true);
qemu_savevm_state_complete_precopy(ms->to_dst_file, true, false);

/*
* in Finish migrate and with the io-lock held everything should
Expand Down Expand Up @@ -1597,7 +1597,7 @@ static int postcopy_start(MigrationState *ms, bool *old_vm_running)
*/
qemu_savevm_send_postcopy_listen(fb);

qemu_savevm_state_complete_precopy(fb, false);
qemu_savevm_state_complete_precopy(fb, false, false);
qemu_savevm_send_ping(fb, 3);

qemu_savevm_send_postcopy_run(fb);
Expand Down Expand Up @@ -1695,20 +1695,15 @@ static void migration_completion(MigrationState *s, int current_active_state,
ret = global_state_store();

if (!ret) {
bool inactivate = !migrate_colo_enabled();
ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE);
if (ret >= 0) {
qemu_file_set_rate_limit(s->to_dst_file, INT64_MAX);
qemu_savevm_state_complete_precopy(s->to_dst_file, false);
ret = qemu_savevm_state_complete_precopy(s->to_dst_file, false,
inactivate);
}
/*
* Don't mark the image with BDRV_O_INACTIVE flag if
* we will go into COLO stage later.
*/
if (ret >= 0 && !migrate_colo_enabled()) {
ret = bdrv_inactivate_all();
if (ret >= 0) {
s->block_inactive = true;
}
if (inactivate && ret >= 0) {
s->block_inactive = true;
}
}
qemu_mutex_unlock_iothread();
Expand Down
19 changes: 15 additions & 4 deletions migration/savevm.c
Expand Up @@ -1104,7 +1104,8 @@ void qemu_savevm_state_complete_postcopy(QEMUFile *f)
qemu_fflush(f);
}

void qemu_savevm_state_complete_precopy(QEMUFile *f, bool iterable_only)
int qemu_savevm_state_complete_precopy(QEMUFile *f, bool iterable_only,
bool inactivate_disks)
{
QJSON *vmdesc;
int vmdesc_len;
Expand Down Expand Up @@ -1138,12 +1139,12 @@ void qemu_savevm_state_complete_precopy(QEMUFile *f, bool iterable_only)
save_section_footer(f, se);
if (ret < 0) {
qemu_file_set_error(f, ret);
return;
return -1;
}
}

if (iterable_only) {
return;
return 0;
}

vmdesc = qjson_new();
Expand Down Expand Up @@ -1173,6 +1174,15 @@ void qemu_savevm_state_complete_precopy(QEMUFile *f, bool iterable_only)
json_end_object(vmdesc);
}

if (inactivate_disks) {
/* Inactivate before sending QEMU_VM_EOF so that the
* bdrv_invalidate_cache_all() on the other end won't fail. */
ret = bdrv_inactivate_all();
if (ret) {
qemu_file_set_error(f, ret);
return ret;
}
}
if (!in_postcopy) {
/* Postcopy stream will still be going */
qemu_put_byte(f, QEMU_VM_EOF);
Expand All @@ -1190,6 +1200,7 @@ void qemu_savevm_state_complete_precopy(QEMUFile *f, bool iterable_only)
qjson_destroy(vmdesc);

qemu_fflush(f);
return 0;
}

/* Give an estimate of the amount left to be transferred,
Expand Down Expand Up @@ -1263,7 +1274,7 @@ static int qemu_savevm_state(QEMUFile *f, Error **errp)

ret = qemu_file_get_error(f);
if (ret == 0) {
qemu_savevm_state_complete_precopy(f, false);
qemu_savevm_state_complete_precopy(f, false, false);
ret = qemu_file_get_error(f);
}
qemu_savevm_state_cleanup();
Expand Down
3 changes: 2 additions & 1 deletion migration/savevm.h
Expand Up @@ -35,7 +35,8 @@ void qemu_savevm_state_header(QEMUFile *f);
int qemu_savevm_state_iterate(QEMUFile *f, bool postcopy);
void qemu_savevm_state_cleanup(void);
void qemu_savevm_state_complete_postcopy(QEMUFile *f);
void qemu_savevm_state_complete_precopy(QEMUFile *f, bool iterable_only);
int qemu_savevm_state_complete_precopy(QEMUFile *f, bool iterable_only,
bool inactivate_disks);
void qemu_savevm_state_pending(QEMUFile *f, uint64_t max_size,
uint64_t *res_non_postcopiable,
uint64_t *res_postcopiable);
Expand Down

0 comments on commit a1fbe75

Please sign in to comment.