From 30d5d805c10b0cc6e474fe1292b2c6549fc07d33 Mon Sep 17 00:00:00 2001 From: ShaoHe Feng Date: Tue, 2 Feb 2016 09:41:33 +0000 Subject: [PATCH] reset task_state after select_destinations failed. During live migration, there maybe exception when let scheduler select destination, and live migration will abort. But the task state of the instance still keep migrating, then we can not take any action on this instance. We need to recover the state of the task as None. We should also recover the vm_state. Change-Id: If1cae8f4c9037f7821554a94d4440f66d9538794 Closes-bug: #1536916 --- nova/conductor/manager.py | 3 ++- nova/conductor/tasks/live_migrate.py | 14 +++++++++++-- nova/exception.py | 4 ++++ .../unit/conductor/tasks/test_live_migrate.py | 20 +++++++++++++++++++ nova/tests/unit/conductor/test_conductor.py | 3 ++- 5 files changed, 40 insertions(+), 4 deletions(-) diff --git a/nova/conductor/manager.py b/nova/conductor/manager.py index c2817435b11..1bdb97e4c8d 100644 --- a/nova/conductor/manager.py +++ b/nova/conductor/manager.py @@ -318,7 +318,8 @@ def _set_vm_state(context, instance, ex, vm_state=None, exception.HypervisorUnavailable, exception.InstanceInvalidState, exception.MigrationPreCheckError, - exception.LiveMigrationWithOldNovaNotSafe) as ex: + exception.LiveMigrationWithOldNovaNotSafe, + exception.MigrationSchedulerRPCError) as ex: with excutils.save_and_reraise_exception(): # TODO(johngarbutt) - eventually need instance actions here _set_vm_state(context, instance, ex, instance.vm_state) diff --git a/nova/conductor/tasks/live_migrate.py b/nova/conductor/tasks/live_migrate.py index 1974fc2dd6f..9065de27831 100644 --- a/nova/conductor/tasks/live_migrate.py +++ b/nova/conductor/tasks/live_migrate.py @@ -13,6 +13,7 @@ from oslo_config import cfg from oslo_log import log as logging import oslo_messaging as messaging +import six from nova.compute import power_state from nova.conductor.tasks import base @@ -177,8 +178,17 @@ def _find_destination(self): # scheduler.utils methods to directly use the RequestSpec object spec_obj = objects.RequestSpec.from_primitives( self.context, request_spec, filter_properties) - host = self.scheduler_client.select_destinations(self.context, - spec_obj)[0]['host'] + try: + host = self.scheduler_client.select_destinations(self.context, + spec_obj)[0]['host'] + except messaging.RemoteError as ex: + # TODO(ShaoHe Feng) There maybe multi-scheduler, and the + # scheduling algorithm is R-R, we can let other scheduler try. + # Note(ShaoHe Feng) There are types of RemoteError, such as + # NoSuchMethod, UnsupportedVersion, we can distinguish it by + # ex.exc_type. + raise exception.MigrationSchedulerRPCError( + reason=six.text_type(ex)) try: self._check_compatible_with_source_hypervisor(host) self._call_livem_checks_on_host(host) diff --git a/nova/exception.py b/nova/exception.py index 4025f9a9357..43a92d280bd 100644 --- a/nova/exception.py +++ b/nova/exception.py @@ -1321,6 +1321,10 @@ class MigrationPreCheckError(MigrationError): msg_fmt = _("Migration pre-check error: %(reason)s") +class MigrationSchedulerRPCError(MigrationError): + msg_fmt = _("Migration select destinations error: %(reason)s") + + class MalformedRequestBody(NovaException): msg_fmt = _("Malformed message body: %(reason)s") diff --git a/nova/tests/unit/conductor/tasks/test_live_migrate.py b/nova/tests/unit/conductor/tasks/test_live_migrate.py index d3b35496357..a6c399d3702 100644 --- a/nova/tests/unit/conductor/tasks/test_live_migrate.py +++ b/nova/tests/unit/conductor/tasks/test_live_migrate.py @@ -16,6 +16,7 @@ from nova.compute import power_state from nova.compute import rpcapi as compute_rpcapi +from nova.compute import vm_states from nova.conductor.tasks import live_migrate from nova import exception from nova import objects @@ -38,6 +39,7 @@ def setUp(self): host=self.instance_host, uuid=self.instance_uuid, power_state=power_state.RUNNING, + vm_state = vm_states.ACTIVE, memory_mb=512, image_ref=self.instance_image) self.instance = objects.Instance._from_db_object( @@ -501,6 +503,24 @@ def test_find_destination_when_runs_out_of_hosts(self): self.mox.ReplayAll() self.assertRaises(exception.NoValidHost, self.task._find_destination) + @mock.patch("nova.utils.get_image_from_system_metadata") + @mock.patch("nova.scheduler.utils.build_request_spec") + @mock.patch("nova.scheduler.utils.setup_instance_group") + @mock.patch("nova.objects.RequestSpec.from_primitives") + def test_find_destination_with_remoteError(self, + m_from_primitives, m_setup_instance_group, + m_build_request_spec, m_get_image_from_system_metadata): + m_get_image_from_system_metadata.return_value = {'properties': {}} + m_build_request_spec.return_value = {} + fake_spec = objects.RequestSpec() + m_from_primitives.return_value = fake_spec + with mock.patch.object(self.task.scheduler_client, + 'select_destinations') as m_select_destinations: + error = messaging.RemoteError() + m_select_destinations.side_effect = error + self.assertRaises(exception.MigrationSchedulerRPCError, + self.task._find_destination) + def test_call_livem_checks_on_host(self): with mock.patch.object(self.task.compute_rpcapi, 'check_can_live_migrate_destination', diff --git a/nova/tests/unit/conductor/test_conductor.py b/nova/tests/unit/conductor/test_conductor.py index dcd91368351..a9fb9e086e6 100644 --- a/nova/tests/unit/conductor/test_conductor.py +++ b/nova/tests/unit/conductor/test_conductor.py @@ -1109,7 +1109,8 @@ def test_migrate_server_deals_with_expected_exception(self): exc.InvalidHypervisorType(), exc.InvalidCPUInfo(reason='dummy'), exc.UnableToMigrateToSelf(instance_id='dummy', host='dummy'), - exc.InvalidLocalStorage(path='dummy', reason='dummy')] + exc.InvalidLocalStorage(path='dummy', reason='dummy'), + exc.MigrationSchedulerRPCError(reason='dummy')] for ex in exs: self._test_migrate_server_deals_with_expected_exceptions(ex)