From 55414580c24384df8bb2854b2c71249848dfbdf6 Mon Sep 17 00:00:00 2001 From: Balazs Gibizer Date: Tue, 5 Oct 2021 11:22:30 +0200 Subject: [PATCH] Remove race from wait_for_interface_detach waiter Nova list the interfaces attached to a server based on list of ports bound to the server. However during detach interface nova unbounds the port first and then deallocates the resources used by that port in placement. The current detach waiter only waits until the interface disappears from the interface list. This can cause that waiter returns before the resource allocation is removed from placement cause a race in the test asserting such allocation. So this patch changes the waiter to wait for the successful detach os-instance-action event for the server as that is only recorded after the port is fully deallocated. blueprint: qos-minimum-guaranteed-packet-rate Change-Id: I8740f8e0cc18ffea31a9a068bccee50bf4e6fe28 --- .../compute/servers/test_device_tagging.py | 13 ++- tempest/common/waiters.py | 32 ++++-- tempest/tests/common/test_waiters.py | 99 +++++++++++++++---- 3 files changed, 111 insertions(+), 33 deletions(-) diff --git a/tempest/api/compute/servers/test_device_tagging.py b/tempest/api/compute/servers/test_device_tagging.py index 58d4d7d060..56456f4546 100644 --- a/tempest/api/compute/servers/test_device_tagging.py +++ b/tempest/api/compute/servers/test_device_tagging.py @@ -35,6 +35,8 @@ class DeviceTaggingBase(base.BaseV2ComputeTest): + credentials = ['primary', 'admin'] + @classmethod def skip_checks(cls): super(DeviceTaggingBase, cls).skip_checks() @@ -54,6 +56,7 @@ def setup_clients(cls): cls.ports_client = cls.os_primary.ports_client cls.subnets_client = cls.os_primary.subnets_client cls.interfaces_client = cls.os_primary.interfaces_client + cls.servers_admin_client = cls.os_admin.servers_client @classmethod def setup_credentials(cls): @@ -422,11 +425,13 @@ def test_tagged_attachment(self): self.servers_client.detach_volume(server['id'], volume['id']) waiters.wait_for_volume_resource_status(self.volumes_client, volume['id'], 'available') - self.interfaces_client.delete_interface(server['id'], - interface['port_id']) - waiters.wait_for_interface_detach(self.interfaces_client, + req_id = self.interfaces_client.delete_interface( + server['id'], interface['port_id'] + ).response['x-openstack-request-id'] + waiters.wait_for_interface_detach(self.servers_admin_client, server['id'], - interface['port_id']) + interface['port_id'], + req_id) # FIXME(mriedem): The assertion that the tagged devices are removed # from the metadata for the server is being skipped until bug 1775947 # is fixed. diff --git a/tempest/common/waiters.py b/tempest/common/waiters.py index f6a4555fe8..33ed1532cf 100644 --- a/tempest/common/waiters.py +++ b/tempest/common/waiters.py @@ -489,18 +489,34 @@ def wait_for_interface_status(client, server_id, port_id, status): return body -def wait_for_interface_detach(client, server_id, port_id): +def wait_for_interface_detach(client, server_id, port_id, detach_request_id): """Waits for an interface to be detached from a server.""" - body = client.list_interfaces(server_id)['interfaceAttachments'] - ports = [iface['port_id'] for iface in body] + def _get_detach_event_results(): + # NOTE(gibi): The obvious choice for this waiter would be to wait + # until the interface disappears from the client.list_interfaces() + # response. However that response is based on the binding status of the + # port in Neutron. Nova deallocates the port resources _after the port + # was unbound in Neutron. This can cause that the naive waiter would + # return before the port is fully deallocated. Wait instead of the + # os-instance-action to succeed as that is recorded after both the + # port is fully deallocated. + events = client.show_instance_action( + server_id, detach_request_id)['instanceAction'].get('events', []) + return [ + event['result'] for event in events + if event['event'] == 'compute_detach_interface' + ] + + detach_event_results = _get_detach_event_results() + start = int(time.time()) - while port_id in ports: + while "Success" not in detach_event_results: time.sleep(client.build_interval) - body = client.list_interfaces(server_id)['interfaceAttachments'] - ports = [iface['port_id'] for iface in body] - if port_id not in ports: - return body + detach_event_results = _get_detach_event_results() + if "Success" in detach_event_results: + return client.show_instance_action( + server_id, detach_request_id)['instanceAction'] timed_out = int(time.time()) - start >= client.build_timeout if timed_out: diff --git a/tempest/tests/common/test_waiters.py b/tempest/tests/common/test_waiters.py index 5cdbfbf7c2..a5016e2d8f 100755 --- a/tempest/tests/common/test_waiters.py +++ b/tempest/tests/common/test_waiters.py @@ -186,37 +186,94 @@ def test_wait_for_interface_status_timeout(self): mock.call('server_id', 'port_id')]) sleep.assert_called_once_with(client.build_interval) - one_interface = {'interfaceAttachments': [{'port_id': 'port_one'}]} - two_interfaces = {'interfaceAttachments': [{'port_id': 'port_one'}, - {'port_id': 'port_two'}]} - def test_wait_for_interface_detach(self): - list_interfaces = mock.MagicMock( - side_effect=[self.two_interfaces, self.one_interface]) - client = self.mock_client(list_interfaces=list_interfaces) + no_event = { + 'instanceAction': { + 'events': [] + } + } + one_event_without_result = { + 'instanceAction': { + 'events': [ + { + 'event': 'compute_detach_interface', + 'result': None + } + + ] + } + } + one_event_successful = { + 'instanceAction': { + 'events': [ + { + 'event': 'compute_detach_interface', + 'result': 'Success' + } + ] + } + } + + show_instance_action = mock.MagicMock( + # there is an extra call to return the result from the waiter + side_effect=[ + no_event, + one_event_without_result, + one_event_successful, + one_event_successful, + ] + ) + client = self.mock_client(show_instance_action=show_instance_action) self.patch('time.time', return_value=0.) sleep = self.patch('time.sleep') result = waiters.wait_for_interface_detach( - client, 'server_id', 'port_two') - - self.assertIs(self.one_interface['interfaceAttachments'], result) - list_interfaces.assert_has_calls([mock.call('server_id'), - mock.call('server_id')]) - sleep.assert_called_once_with(client.build_interval) + client, mock.sentinel.server_id, mock.sentinel.port_id, + mock.sentinel.detach_request_id + ) + + self.assertIs(one_event_successful['instanceAction'], result) + show_instance_action.assert_has_calls( + # there is an extra call to return the result from the waiter + [ + mock.call( + mock.sentinel.server_id, mock.sentinel.detach_request_id) + ] * 4 + ) + sleep.assert_has_calls([mock.call(client.build_interval)] * 2) def test_wait_for_interface_detach_timeout(self): - list_interfaces = mock.MagicMock(return_value=self.one_interface) - client = self.mock_client(list_interfaces=list_interfaces) + one_event_without_result = { + 'instanceAction': { + 'events': [ + { + 'event': 'compute_detach_interface', + 'result': None + } + + ] + } + } + + show_instance_action = mock.MagicMock( + return_value=one_event_without_result) + client = self.mock_client(show_instance_action=show_instance_action) self.patch('time.time', side_effect=[0., client.build_timeout + 1.]) sleep = self.patch('time.sleep') - self.assertRaises(lib_exc.TimeoutException, - waiters.wait_for_interface_detach, - client, 'server_id', 'port_one') - - list_interfaces.assert_has_calls([mock.call('server_id'), - mock.call('server_id')]) + self.assertRaises( + lib_exc.TimeoutException, + waiters.wait_for_interface_detach, + client, mock.sentinel.server_id, mock.sentinel.port_id, + mock.sentinel.detach_request_id + ) + + show_instance_action.assert_has_calls( + [ + mock.call( + mock.sentinel.server_id, mock.sentinel.detach_request_id) + ] * 2 + ) sleep.assert_called_once_with(client.build_interval) def test_wait_for_guest_os_boot(self):