Skip to content

Commit

Permalink
libvirt: split out code for determining if migration should abort
Browse files Browse the repository at this point in the history
There are two timeouts that can cause migration to be aborted,
one looks to see if forward progress is being made, while the
other checks an overall time limit. Split out the code for
checking these limits into a separate method and add some
explicit unit testing for the scenarios they encounter.

Implements: blueprint libvirt-clean-driver
Change-Id: I115fc7f0b41840244f7e60dcb733062209f9460b
  • Loading branch information
berrange committed Jun 24, 2016
1 parent 66b7771 commit ab90751
Show file tree
Hide file tree
Showing 3 changed files with 79 additions and 15 deletions.
42 changes: 42 additions & 0 deletions nova/tests/unit/virt/libvirt/test_migration.py
Expand Up @@ -270,3 +270,45 @@ def test_live_migration_find_type_bad_err(self, mock_active):

self.assertEqual(migration.find_job_type(self.guest, self.instance),
fakelibvirt.VIR_DOMAIN_JOB_FAILED)

def test_live_migration_abort_stuck(self):
# Progress time exceeds progress timeout
self.assertTrue(migration.should_abort(self.instance,
5000,
1000, 2000,
4500, 9000))

def test_live_migration_abort_no_prog_timeout(self):
# Progress timeout is disabled
self.assertFalse(migration.should_abort(self.instance,
5000,
1000, 0,
4500, 9000))

def test_live_migration_abort_not_stuck(self):
# Progress time is less than progress timeout
self.assertFalse(migration.should_abort(self.instance,
5000,
4500, 2000,
4500, 9000))

def test_live_migration_abort_too_long(self):
# Elapsed time is over completion timeout
self.assertTrue(migration.should_abort(self.instance,
5000,
4500, 2000,
4500, 2000))

def test_live_migration_abort_no_comp_timeout(self):
# Completion timeout is disabled
self.assertFalse(migration.should_abort(self.instance,
5000,
4500, 2000,
4500, 0))

def test_live_migration_abort_still_working(self):
# Elapsed time is less than completion timeout
self.assertFalse(migration.should_abort(self.instance,
5000,
4500, 2000,
4500, 9000))
18 changes: 3 additions & 15 deletions nova/virt/libvirt/driver.py
Expand Up @@ -6042,27 +6042,15 @@ def _recover_scheduled_migration_task():
_check_scheduled_migration_task()
now = time.time()
elapsed = now - start
abort = False

if ((progress_watermark is None) or
(progress_watermark > info.data_remaining)):
progress_watermark = info.data_remaining
progress_time = now

if (progress_timeout != 0 and
(now - progress_time) > progress_timeout):
LOG.warning(_LW("Live migration stuck for %d sec"),
(now - progress_time), instance=instance)
abort = True

if (completion_timeout != 0 and
elapsed > completion_timeout):
LOG.warning(
_LW("Live migration not completed after %d sec"),
completion_timeout, instance=instance)
abort = True

if abort:
if libvirt_migrate.should_abort(instance, now, progress_time,
progress_timeout, elapsed,
completion_timeout):
try:
guest.abort_job()
except libvirt.libvirtError as e:
Expand Down
34 changes: 34 additions & 0 deletions nova/virt/libvirt/migration.py
Expand Up @@ -21,6 +21,7 @@
from oslo_log import log as logging

from nova.i18n import _LI
from nova.i18n import _LW

LOG = logging.getLogger(__name__)

Expand Down Expand Up @@ -163,3 +164,36 @@ def find_job_type(guest, instance):
LOG.info(_LI("Error %(ex)s, migration failed"),
{"ex": ex}, instance=instance)
return libvirt.VIR_DOMAIN_JOB_FAILED


def should_abort(instance, now,
progress_time, progress_timeout,
elapsed, completion_timeout):
"""Determine if the migration should be aborted
:param instance: a nova.objects.Instance
:param now: current time in secs since epoch
:param progress_time: when progress was last made in secs since epoch
:param progress_timeout: time in secs to allow for progress
:param elapsed: total elapsed time of migration in secs
:param completion_timeout: time in secs to allow for completion
Check the progress and completion timeouts to determine if either
of them have been hit, and should thus cause migration to be aborted
:returns: True if migration should be aborted, False otherwise
"""
if (progress_timeout != 0 and
(now - progress_time) > progress_timeout):
LOG.warning(_LW("Live migration stuck for %d sec"),
(now - progress_time), instance=instance)
return True

if (completion_timeout != 0 and
elapsed > completion_timeout):
LOG.warning(
_LW("Live migration not completed after %d sec"),
completion_timeout, instance=instance)
return True

return False

0 comments on commit ab90751

Please sign in to comment.