Clean up network resources when reschedule fails

During the instance boot (spawn/run) process, neutron ports are allocated for the instance if necessary. If the instance fails to spawn (say as a result of a compute host failure), the default behaviour is to reschedule the instance and leave its networking resources in-tact for potential reuse on the rescheduled host. All is good if the instance is successfully rescheduled, but if the reschedule fails (say no more applicable hosts) the allocated ports are left as-is and effectively orphaned. This commit add code to clean up allocated network resources when the reschedule fails. Change-Id: Ic670dd4dc192603c2faecf18e14ef59ebca9e420 Closes-Bug: #1510979
openstack · Jan 14, 2016 · 08d24b7 · 08d24b7
1 parent 523e8ed
commit 08d24b7
Show file tree

Hide file tree

Showing 2 changed files with 62 additions and 32 deletions.
diff --git a/nova/conductor/manager.py b/nova/conductor/manager.py
@@ -35,6 +35,7 @@
 from nova.i18n import _, _LE, _LI, _LW
 from nova import image
 from nova import manager
+from nova import network
 from nova import objects
 from nova.objects import base as nova_object
 from nova import rpc
@@ -150,6 +151,7 @@ def __init__(self):
         super(ComputeTaskManager, self).__init__()
         self.compute_rpcapi = compute_rpcapi.ComputeAPI()
         self.image_api = image.API()
+        self.network_api = network.API()
         self.servicegroup_api = servicegroup.API()
         self.scheduler_client = scheduler_client.SchedulerClient()
         self.notifier = rpc.get_notifier('compute', CONF.host)
@@ -251,6 +253,11 @@ def _set_vm_state_and_notify(self, context, instance_uuid, method, updates,
                 context, instance_uuid, 'compute_task', method, updates,
                 ex, request_spec, self.db)
 
+    def _cleanup_allocated_networks(
+            self, context, instance, requested_networks):
+        self.network_api.deallocate_for_instance(
+            context, instance, requested_networks=requested_networks)
+
     def _live_migrate(self, context, instance, scheduler_hint,
                       block_migration, disk_over_commit):
         destination = scheduler_hint.get("host")
@@ -370,6 +377,8 @@ def build_instances(self, context, instances, image, filter_properties,
                 self._set_vm_state_and_notify(
                     context, instance.uuid, 'build_instances', updates,
                     exc, request_spec)
+                self._cleanup_allocated_networks(
+                    context, instance, requested_networks)
             return
 
         for (instance, host) in itertools.izip(instances, hosts):

diff --git a/nova/tests/unit/conductor/test_conductor.py b/nova/tests/unit/conductor/test_conductor.py
@@ -488,52 +488,65 @@ def test_build_instances(self, mock_refresh):
                 block_device_mapping='block_device_mapping',
                 legacy_bdm=False)
 
-    def test_build_instances_scheduler_failure(self):
+    @mock.patch.object(scheduler_utils, 'build_request_spec')
+    @mock.patch.object(scheduler_utils, 'setup_instance_group')
+    @mock.patch.object(scheduler_utils, 'set_vm_state_and_notify')
+    @mock.patch.object(scheduler_client.SchedulerClient,
+                       'select_destinations')
+    @mock.patch.object(conductor_manager.ComputeTaskManager,
+                       '_cleanup_allocated_networks')
+    def test_build_instances_scheduler_failure(
+            self, cleanup_mock, sd_mock, state_mock,
+            sig_mock, bs_mock):
         instances = [fake_instance.fake_instance_obj(self.context)
-                for i in range(2)]
+                     for i in range(2)]
         image = {'fake-data': 'should_pass_silently'}
         spec = {'fake': 'specs',
                 'instance_properties': instances[0]}
         exception = exc.NoValidHost(reason='fake-reason')
-        self.mox.StubOutWithMock(scheduler_utils, 'build_request_spec')
-        self.mox.StubOutWithMock(self.conductor_manager, '_schedule_instances')
-        self.mox.StubOutWithMock(scheduler_utils, 'set_vm_state_and_notify')
 
-        scheduler_utils.build_request_spec(self.context, image,
-                mox.IgnoreArg()).AndReturn(spec)
-        filter_properties = {'retry': {'num_attempts': 1, 'hosts': []}}
-        self.conductor_manager._schedule_instances(self.context,
-                spec, filter_properties).AndRaise(exception)
+        bs_mock.return_value = spec
+        sd_mock.side_effect = exception
         updates = {'vm_state': vm_states.ERROR, 'task_state': None}
-        for instance in instances:
-            scheduler_utils.set_vm_state_and_notify(
-                self.context, instance.uuid, 'compute_task', 'build_instances',
-                updates, exception, spec, self.conductor_manager.db)
-        self.mox.ReplayAll()
 
         # build_instances() is a cast, we need to wait for it to complete
         self.useFixture(cast_as_call.CastAsCall(self.stubs))
 
-        self.conductor.build_instances(self.context,
-                instances=instances,
-                image=image,
-                filter_properties={},
-                admin_password='admin_password',
-                injected_files='injected_files',
-                requested_networks=None,
-                security_groups='security_groups',
-                block_device_mapping='block_device_mapping',
-                legacy_bdm=False)
+        self.conductor.build_instances(
+            self.context,
+            instances=instances,
+            image=image,
+            filter_properties={},
+            admin_password='admin_password',
+            injected_files='injected_files',
+            requested_networks=None,
+            security_groups='security_groups',
+            block_device_mapping='block_device_mapping',
+            legacy_bdm=False)
+
+        set_state_calls = []
+        cleanup_network_calls = []
+        for instance in instances:
+            set_state_calls.append(mock.call(
+                self.context, instance.uuid, 'compute_task', 'build_instances',
+                updates, exception, spec, self.conductor_manager.db))
+            cleanup_network_calls.append(mock.call(
+                self.context, mock.ANY, None))
+        state_mock.assert_has_calls(set_state_calls)
+        cleanup_mock.assert_has_calls(cleanup_network_calls)
 
     def test_build_instances_retry_exceeded(self):
         instances = [fake_instance.fake_instance_obj(self.context)]
         image = {'fake-data': 'should_pass_silently'}
         filter_properties = {'retry': {'num_attempts': 10, 'hosts': []}}
         updates = {'vm_state': vm_states.ERROR, 'task_state': None}
 
+        @mock.patch.object(conductor_manager.ComputeTaskManager,
+                           '_cleanup_allocated_networks')
         @mock.patch.object(scheduler_utils, 'set_vm_state_and_notify')
         @mock.patch.object(scheduler_utils, 'populate_retry')
-        def _test(populate_retry, set_vm_state_and_notify):
+        def _test(populate_retry,
+                  set_vm_state_and_notify, cleanup_mock):
             # build_instances() is a cast, we need to wait for it to
             # complete
             self.useFixture(cast_as_call.CastAsCall(self.stubs))
@@ -559,15 +572,18 @@ def _test(populate_retry, set_vm_state_and_notify):
                 self.context, instances[0].uuid, 'compute_task',
                 'build_instances', updates, mock.ANY, {},
                 self.conductor_manager.db)
+            cleanup_mock.assert_called_once_with(self.context, mock.ANY, None)
 
         _test()
 
     @mock.patch.object(scheduler_utils, 'build_request_spec')
     @mock.patch.object(scheduler_utils, 'setup_instance_group')
     @mock.patch.object(conductor_manager.ComputeTaskManager,
                        '_set_vm_state_and_notify')
-    def test_build_instances_scheduler_group_failure(self, state_mock,
-                                                     sig_mock, bs_mock):
+    @mock.patch.object(conductor_manager.ComputeTaskManager,
+                       '_cleanup_allocated_networks')
+    def test_build_instances_scheduler_group_failure(
+            self, cleanup_mock, state_mock, sig_mock, bs_mock):
         instances = [fake_instance.fake_instance_obj(self.context)
                      for i in range(2)]
         image = {'fake-data': 'should_pass_silently'}
@@ -594,11 +610,16 @@ def test_build_instances_scheduler_group_failure(self, state_mock,
                           security_groups='security_groups',
                           block_device_mapping='block_device_mapping',
                           legacy_bdm=False)
-        calls = []
+        set_state_calls = []
+        cleanup_network_calls = []
         for instance in instances:
-            calls.append(mock.call(self.context, instance.uuid,
-                         'build_instances', updates, exception, spec))
-        state_mock.assert_has_calls(calls)
+            set_state_calls.append(mock.call(
+                self.context, instance.uuid, 'build_instances', updates,
+                exception, spec))
+            cleanup_network_calls.append(mock.call(
+                self.context, mock.ANY, None))
+        state_mock.assert_has_calls(set_state_calls)
+        cleanup_mock.assert_has_calls(cleanup_network_calls)
 
     def test_unshelve_instance_on_host(self):
         instance = self._create_fake_instance_obj()