Remove race from wait_for_interface_detach waiter

Nova list the interfaces attached to a server based on list of ports bound to the server. However during detach interface nova unbounds the port first and then deallocates the resources used by that port in placement. The current detach waiter only waits until the interface disappears from the interface list. This can cause that waiter returns before the resource allocation is removed from placement cause a race in the test asserting such allocation. So this patch changes the waiter to wait for the successful detach os-instance-action event for the server as that is only recorded after the port is fully deallocated. blueprint: qos-minimum-guaranteed-packet-rate Change-Id: I8740f8e0cc18ffea31a9a068bccee50bf4e6fe28
openstack · Nov 4, 2021 · 5541458 · 5541458
1 parent f46bcdf
commit 5541458
Show file tree

Hide file tree

Showing 3 changed files with 111 additions and 33 deletions.
diff --git a/tempest/api/compute/servers/test_device_tagging.py b/tempest/api/compute/servers/test_device_tagging.py
@@ -35,6 +35,8 @@
 
 class DeviceTaggingBase(base.BaseV2ComputeTest):
 
+    credentials = ['primary', 'admin']
+
     @classmethod
     def skip_checks(cls):
         super(DeviceTaggingBase, cls).skip_checks()
@@ -54,6 +56,7 @@ def setup_clients(cls):
         cls.ports_client = cls.os_primary.ports_client
         cls.subnets_client = cls.os_primary.subnets_client
         cls.interfaces_client = cls.os_primary.interfaces_client
+        cls.servers_admin_client = cls.os_admin.servers_client
 
     @classmethod
     def setup_credentials(cls):
@@ -422,11 +425,13 @@ def test_tagged_attachment(self):
         self.servers_client.detach_volume(server['id'], volume['id'])
         waiters.wait_for_volume_resource_status(self.volumes_client,
                                                 volume['id'], 'available')
-        self.interfaces_client.delete_interface(server['id'],
-                                                interface['port_id'])
-        waiters.wait_for_interface_detach(self.interfaces_client,
+        req_id = self.interfaces_client.delete_interface(
+            server['id'], interface['port_id']
+        ).response['x-openstack-request-id']
+        waiters.wait_for_interface_detach(self.servers_admin_client,
                                           server['id'],
-                                          interface['port_id'])
+                                          interface['port_id'],
+                                          req_id)
         # FIXME(mriedem): The assertion that the tagged devices are removed
         # from the metadata for the server is being skipped until bug 1775947
         # is fixed.

diff --git a/tempest/common/waiters.py b/tempest/common/waiters.py
@@ -489,18 +489,34 @@ def wait_for_interface_status(client, server_id, port_id, status):
     return body
 
 
-def wait_for_interface_detach(client, server_id, port_id):
+def wait_for_interface_detach(client, server_id, port_id, detach_request_id):
     """Waits for an interface to be detached from a server."""
-    body = client.list_interfaces(server_id)['interfaceAttachments']
-    ports = [iface['port_id'] for iface in body]
+    def _get_detach_event_results():
+        # NOTE(gibi): The obvious choice for this waiter would be to wait
+        # until the interface disappears from the client.list_interfaces()
+        # response. However that response is based on the binding status of the
+        # port in Neutron. Nova deallocates the port resources _after the port
+        # was  unbound in Neutron. This can cause that the naive waiter would
+        # return before the port is fully deallocated. Wait instead of the
+        # os-instance-action to succeed as that is recorded after both the
+        # port is fully deallocated.
+        events = client.show_instance_action(
+            server_id, detach_request_id)['instanceAction'].get('events', [])
+        return [
+            event['result'] for event in events
+            if event['event'] == 'compute_detach_interface'
+        ]
+
+    detach_event_results = _get_detach_event_results()
+
     start = int(time.time())
 
-    while port_id in ports:
+    while "Success" not in detach_event_results:
         time.sleep(client.build_interval)
-        body = client.list_interfaces(server_id)['interfaceAttachments']
-        ports = [iface['port_id'] for iface in body]
-        if port_id not in ports:
-            return body
+        detach_event_results = _get_detach_event_results()
+        if "Success" in detach_event_results:
+            return client.show_instance_action(
+                server_id, detach_request_id)['instanceAction']
 
         timed_out = int(time.time()) - start >= client.build_timeout
         if timed_out:

diff --git a/tempest/tests/common/test_waiters.py b/tempest/tests/common/test_waiters.py
@@ -186,37 +186,94 @@ def test_wait_for_interface_status_timeout(self):
                                          mock.call('server_id', 'port_id')])
         sleep.assert_called_once_with(client.build_interval)
 
-    one_interface = {'interfaceAttachments': [{'port_id': 'port_one'}]}
-    two_interfaces = {'interfaceAttachments': [{'port_id': 'port_one'},
-                                               {'port_id': 'port_two'}]}
-
     def test_wait_for_interface_detach(self):
-        list_interfaces = mock.MagicMock(
-            side_effect=[self.two_interfaces, self.one_interface])
-        client = self.mock_client(list_interfaces=list_interfaces)
+        no_event = {
+            'instanceAction': {
+                'events': []
+            }
+        }
+        one_event_without_result = {
+            'instanceAction': {
+                'events': [
+                    {
+                        'event': 'compute_detach_interface',
+                        'result': None
+                    }
+
+                ]
+            }
+        }
+        one_event_successful = {
+            'instanceAction': {
+                'events': [
+                    {
+                        'event': 'compute_detach_interface',
+                        'result': 'Success'
+                    }
+                ]
+            }
+        }
+
+        show_instance_action = mock.MagicMock(
+            # there is an extra call to return the result from the waiter
+            side_effect=[
+                no_event,
+                one_event_without_result,
+                one_event_successful,
+                one_event_successful,
+            ]
+        )
+        client = self.mock_client(show_instance_action=show_instance_action)
         self.patch('time.time', return_value=0.)
         sleep = self.patch('time.sleep')
 
         result = waiters.wait_for_interface_detach(
-            client, 'server_id', 'port_two')
-
-        self.assertIs(self.one_interface['interfaceAttachments'], result)
-        list_interfaces.assert_has_calls([mock.call('server_id'),
-                                          mock.call('server_id')])
-        sleep.assert_called_once_with(client.build_interval)
+            client, mock.sentinel.server_id, mock.sentinel.port_id,
+            mock.sentinel.detach_request_id
+        )
+
+        self.assertIs(one_event_successful['instanceAction'], result)
+        show_instance_action.assert_has_calls(
+            # there is an extra call to return the result from the waiter
+            [
+                mock.call(
+                    mock.sentinel.server_id, mock.sentinel.detach_request_id)
+            ] * 4
+        )
+        sleep.assert_has_calls([mock.call(client.build_interval)] * 2)
 
     def test_wait_for_interface_detach_timeout(self):
-        list_interfaces = mock.MagicMock(return_value=self.one_interface)
-        client = self.mock_client(list_interfaces=list_interfaces)
+        one_event_without_result = {
+            'instanceAction': {
+                'events': [
+                    {
+                        'event': 'compute_detach_interface',
+                        'result': None
+                    }
+
+                ]
+            }
+        }
+
+        show_instance_action = mock.MagicMock(
+            return_value=one_event_without_result)
+        client = self.mock_client(show_instance_action=show_instance_action)
         self.patch('time.time', side_effect=[0., client.build_timeout + 1.])
         sleep = self.patch('time.sleep')
 
-        self.assertRaises(lib_exc.TimeoutException,
-                          waiters.wait_for_interface_detach,
-                          client, 'server_id', 'port_one')
-
-        list_interfaces.assert_has_calls([mock.call('server_id'),
-                                          mock.call('server_id')])
+        self.assertRaises(
+            lib_exc.TimeoutException,
+            waiters.wait_for_interface_detach,
+            client, mock.sentinel.server_id, mock.sentinel.port_id,
+            mock.sentinel.detach_request_id
+        )
+
+        show_instance_action.assert_has_calls(
+            [
+                mock.call(
+                    mock.sentinel.server_id, mock.sentinel.detach_request_id)
+            ] * 2
+        )
         sleep.assert_called_once_with(client.build_interval)
 
     def test_wait_for_guest_os_boot(self):