Skip to content

Commit

Permalink
Clean up network resources when reschedule fails
Browse files Browse the repository at this point in the history
During the instance boot (spawn/run) process, neutron ports are
allocated for the instance if necessary. If the instance fails
to spawn (say as a result of a compute host failure), the default
behaviour is to reschedule the instance and leave its networking
resources in-tact for potential reuse on the rescheduled host.
All is good if the instance is successfully rescheduled, but if
the reschedule fails (say no more applicable hosts) the allocated
ports are left as-is and effectively orphaned.

This commit add code to clean up allocated network resources
when the reschedule fails.

Change-Id: Ic670dd4dc192603c2faecf18e14ef59ebca9e420
Closes-Bug: #1510979
  • Loading branch information
yuywz committed Jan 14, 2016
1 parent 523e8ed commit 08d24b7
Show file tree
Hide file tree
Showing 2 changed files with 62 additions and 32 deletions.
9 changes: 9 additions & 0 deletions nova/conductor/manager.py
Expand Up @@ -35,6 +35,7 @@
from nova.i18n import _, _LE, _LI, _LW
from nova import image
from nova import manager
from nova import network
from nova import objects
from nova.objects import base as nova_object
from nova import rpc
Expand Down Expand Up @@ -150,6 +151,7 @@ def __init__(self):
super(ComputeTaskManager, self).__init__()
self.compute_rpcapi = compute_rpcapi.ComputeAPI()
self.image_api = image.API()
self.network_api = network.API()
self.servicegroup_api = servicegroup.API()
self.scheduler_client = scheduler_client.SchedulerClient()
self.notifier = rpc.get_notifier('compute', CONF.host)
Expand Down Expand Up @@ -251,6 +253,11 @@ def _set_vm_state_and_notify(self, context, instance_uuid, method, updates,
context, instance_uuid, 'compute_task', method, updates,
ex, request_spec, self.db)

def _cleanup_allocated_networks(
self, context, instance, requested_networks):
self.network_api.deallocate_for_instance(
context, instance, requested_networks=requested_networks)

def _live_migrate(self, context, instance, scheduler_hint,
block_migration, disk_over_commit):
destination = scheduler_hint.get("host")
Expand Down Expand Up @@ -370,6 +377,8 @@ def build_instances(self, context, instances, image, filter_properties,
self._set_vm_state_and_notify(
context, instance.uuid, 'build_instances', updates,
exc, request_spec)
self._cleanup_allocated_networks(
context, instance, requested_networks)
return

for (instance, host) in itertools.izip(instances, hosts):
Expand Down
85 changes: 53 additions & 32 deletions nova/tests/unit/conductor/test_conductor.py
Expand Up @@ -488,52 +488,65 @@ def test_build_instances(self, mock_refresh):
block_device_mapping='block_device_mapping',
legacy_bdm=False)

def test_build_instances_scheduler_failure(self):
@mock.patch.object(scheduler_utils, 'build_request_spec')
@mock.patch.object(scheduler_utils, 'setup_instance_group')
@mock.patch.object(scheduler_utils, 'set_vm_state_and_notify')
@mock.patch.object(scheduler_client.SchedulerClient,
'select_destinations')
@mock.patch.object(conductor_manager.ComputeTaskManager,
'_cleanup_allocated_networks')
def test_build_instances_scheduler_failure(
self, cleanup_mock, sd_mock, state_mock,
sig_mock, bs_mock):
instances = [fake_instance.fake_instance_obj(self.context)
for i in range(2)]
for i in range(2)]
image = {'fake-data': 'should_pass_silently'}
spec = {'fake': 'specs',
'instance_properties': instances[0]}
exception = exc.NoValidHost(reason='fake-reason')
self.mox.StubOutWithMock(scheduler_utils, 'build_request_spec')
self.mox.StubOutWithMock(self.conductor_manager, '_schedule_instances')
self.mox.StubOutWithMock(scheduler_utils, 'set_vm_state_and_notify')

scheduler_utils.build_request_spec(self.context, image,
mox.IgnoreArg()).AndReturn(spec)
filter_properties = {'retry': {'num_attempts': 1, 'hosts': []}}
self.conductor_manager._schedule_instances(self.context,
spec, filter_properties).AndRaise(exception)
bs_mock.return_value = spec
sd_mock.side_effect = exception
updates = {'vm_state': vm_states.ERROR, 'task_state': None}
for instance in instances:
scheduler_utils.set_vm_state_and_notify(
self.context, instance.uuid, 'compute_task', 'build_instances',
updates, exception, spec, self.conductor_manager.db)
self.mox.ReplayAll()

# build_instances() is a cast, we need to wait for it to complete
self.useFixture(cast_as_call.CastAsCall(self.stubs))

self.conductor.build_instances(self.context,
instances=instances,
image=image,
filter_properties={},
admin_password='admin_password',
injected_files='injected_files',
requested_networks=None,
security_groups='security_groups',
block_device_mapping='block_device_mapping',
legacy_bdm=False)
self.conductor.build_instances(
self.context,
instances=instances,
image=image,
filter_properties={},
admin_password='admin_password',
injected_files='injected_files',
requested_networks=None,
security_groups='security_groups',
block_device_mapping='block_device_mapping',
legacy_bdm=False)

set_state_calls = []
cleanup_network_calls = []
for instance in instances:
set_state_calls.append(mock.call(
self.context, instance.uuid, 'compute_task', 'build_instances',
updates, exception, spec, self.conductor_manager.db))
cleanup_network_calls.append(mock.call(
self.context, mock.ANY, None))
state_mock.assert_has_calls(set_state_calls)
cleanup_mock.assert_has_calls(cleanup_network_calls)

def test_build_instances_retry_exceeded(self):
instances = [fake_instance.fake_instance_obj(self.context)]
image = {'fake-data': 'should_pass_silently'}
filter_properties = {'retry': {'num_attempts': 10, 'hosts': []}}
updates = {'vm_state': vm_states.ERROR, 'task_state': None}

@mock.patch.object(conductor_manager.ComputeTaskManager,
'_cleanup_allocated_networks')
@mock.patch.object(scheduler_utils, 'set_vm_state_and_notify')
@mock.patch.object(scheduler_utils, 'populate_retry')
def _test(populate_retry, set_vm_state_and_notify):
def _test(populate_retry,
set_vm_state_and_notify, cleanup_mock):
# build_instances() is a cast, we need to wait for it to
# complete
self.useFixture(cast_as_call.CastAsCall(self.stubs))
Expand All @@ -559,15 +572,18 @@ def _test(populate_retry, set_vm_state_and_notify):
self.context, instances[0].uuid, 'compute_task',
'build_instances', updates, mock.ANY, {},
self.conductor_manager.db)
cleanup_mock.assert_called_once_with(self.context, mock.ANY, None)

_test()

@mock.patch.object(scheduler_utils, 'build_request_spec')
@mock.patch.object(scheduler_utils, 'setup_instance_group')
@mock.patch.object(conductor_manager.ComputeTaskManager,
'_set_vm_state_and_notify')
def test_build_instances_scheduler_group_failure(self, state_mock,
sig_mock, bs_mock):
@mock.patch.object(conductor_manager.ComputeTaskManager,
'_cleanup_allocated_networks')
def test_build_instances_scheduler_group_failure(
self, cleanup_mock, state_mock, sig_mock, bs_mock):
instances = [fake_instance.fake_instance_obj(self.context)
for i in range(2)]
image = {'fake-data': 'should_pass_silently'}
Expand All @@ -594,11 +610,16 @@ def test_build_instances_scheduler_group_failure(self, state_mock,
security_groups='security_groups',
block_device_mapping='block_device_mapping',
legacy_bdm=False)
calls = []
set_state_calls = []
cleanup_network_calls = []
for instance in instances:
calls.append(mock.call(self.context, instance.uuid,
'build_instances', updates, exception, spec))
state_mock.assert_has_calls(calls)
set_state_calls.append(mock.call(
self.context, instance.uuid, 'build_instances', updates,
exception, spec))
cleanup_network_calls.append(mock.call(
self.context, mock.ANY, None))
state_mock.assert_has_calls(set_state_calls)
cleanup_mock.assert_has_calls(cleanup_network_calls)

def test_unshelve_instance_on_host(self):
instance = self._create_fake_instance_obj()
Expand Down

0 comments on commit 08d24b7

Please sign in to comment.