From 824a8257a88d7192ecd4a5efdcd1dba46d63d82b Mon Sep 17 00:00:00 2001 From: Miro Tomaska Date: Wed, 9 Aug 2023 17:34:34 -0400 Subject: [PATCH 1/2] Metadata: handle process exceptions Both metadata agents (OVN and non-OVN) should handle process exceptions when spawning haproxy processes such that the agent can continue its operation for other haproxy processes. Closes-Bug: #2033305 Change-Id: I6da1b135c83ecfc41ec91e907ebf8500325a7a80 (cherry picked from commit 4698dd899fa149088160372b6f2efcd3ce973cbb) --- neutron/agent/metadata/driver.py | 8 +++- neutron/agent/ovn/metadata/driver.py | 8 +++- neutron/tests/unit/agent/dhcp/test_agent.py | 43 +++++++++++++------ .../tests/unit/agent/metadata/test_driver.py | 21 +++++++++ .../unit/agent/ovn/metadata/test_driver.py | 24 +++++++++++ 5 files changed, 90 insertions(+), 14 deletions(-) diff --git a/neutron/agent/metadata/driver.py b/neutron/agent/metadata/driver.py index 2c9af24d5e1..5f1d6190fee 100644 --- a/neutron/agent/metadata/driver.py +++ b/neutron/agent/metadata/driver.py @@ -294,7 +294,13 @@ def spawn_monitored_metadata_proxy(cls, monitor, ns_name, port, conf, pm = cls._get_metadata_proxy_process_manager(uuid, conf, ns_name=ns_name, callback=callback) - pm.enable() + try: + pm.enable() + except exceptions.ProcessExecutionError as exec_err: + LOG.error("Encountered process execution error %(err)s while " + "starting process in namespace %(ns)s", + {"err": exec_err, "ns": ns_name}) + return monitor.register(uuid, METADATA_SERVICE_NAME, pm) cls.monitors[router_id] = pm diff --git a/neutron/agent/ovn/metadata/driver.py b/neutron/agent/ovn/metadata/driver.py index 472ba9ea456..8b8ff0deb3f 100644 --- a/neutron/agent/ovn/metadata/driver.py +++ b/neutron/agent/ovn/metadata/driver.py @@ -197,7 +197,13 @@ def spawn_monitored_metadata_proxy(cls, monitor, ns_name, port, conf, pm = cls._get_metadata_proxy_process_manager(uuid, conf, ns_name=ns_name, callback=callback) - pm.enable() + try: + pm.enable() + except exceptions.ProcessExecutionError as exec_err: + LOG.error("Encountered process execution error %(err)s while " + "starting process in namespace %(ns)s", + {"err": exec_err, "ns": ns_name}) + return monitor.register(uuid, METADATA_SERVICE_NAME, pm) cls.monitors[router_id] = pm diff --git a/neutron/tests/unit/agent/dhcp/test_agent.py b/neutron/tests/unit/agent/dhcp/test_agent.py index 367eebc156b..980f8ae0047 100644 --- a/neutron/tests/unit/agent/dhcp/test_agent.py +++ b/neutron/tests/unit/agent/dhcp/test_agent.py @@ -875,23 +875,37 @@ def _process_manager_constructor_call(self, ns=FAKE_NETWORK_DHCP_NS): def _enable_dhcp_helper(self, network, enable_isolated_metadata=False, is_isolated_network=False, is_ovn_network=False): self.dhcp._process_monitor = mock.Mock() + # The disable() call + gmppm_expected_calls = [mock.call(FAKE_NETWORK_UUID, cfg.CONF, + ns_name=FAKE_NETWORK_DHCP_NS)] if enable_isolated_metadata: cfg.CONF.set_override('enable_isolated_metadata', True) + if is_isolated_network: + # The enable() call + gmppm_expected_calls.append( + mock.call(FAKE_NETWORK_UUID, cfg.CONF, + ns_name=FAKE_NETWORK_DHCP_NS, + callback=mock.ANY)) self.plugin.get_network_info.return_value = network - self.dhcp.enable_dhcp_helper(network.id) + process_instance = mock.Mock(active=False) + with mock.patch.object(metadata_driver.MetadataDriver, + '_get_metadata_proxy_process_manager', + return_value=process_instance) as gmppm: + self.dhcp.enable_dhcp_helper(network.id) + gmppm.assert_has_calls(gmppm_expected_calls) self.plugin.assert_has_calls([ mock.call.get_network_info(network.id)]) self.call_driver.assert_called_once_with('enable', network) self.cache.assert_has_calls([mock.call.put(network)]) if (is_isolated_network and enable_isolated_metadata and not is_ovn_network): - self.external_process.assert_has_calls([ - self._process_manager_constructor_call(), - mock.call().enable()], any_order=True) + process_instance.assert_has_calls([ + mock.call.disable(sig=str(int(signal.SIGTERM))), + mock.call.get_pid_file_name(), + mock.call.enable()]) else: - self.external_process.assert_has_calls([ - self._process_manager_constructor_call(), - mock.call().disable(sig=str(int(signal.SIGTERM)))]) + process_instance.assert_has_calls([ + mock.call.disable(sig=str(int(signal.SIGTERM)))]) def test_enable_dhcp_helper_enable_metadata_isolated_network(self): self._enable_dhcp_helper(isolated_network, @@ -1065,11 +1079,16 @@ def test_disable_dhcp_helper_driver_failure(self): def test_enable_isolated_metadata_proxy(self): self.dhcp._process_monitor = mock.Mock() - self.dhcp.enable_isolated_metadata_proxy(fake_network) - self.external_process.assert_has_calls([ - self._process_manager_constructor_call(), - mock.call().enable() - ], any_order=True) + process_instance = mock.Mock(active=False) + with mock.patch.object(metadata_driver.MetadataDriver, + '_get_metadata_proxy_process_manager', + return_value=process_instance) as gmppm: + self.dhcp.enable_isolated_metadata_proxy(fake_network) + gmppm.assert_called_with(FAKE_NETWORK_UUID, + cfg.CONF, + ns_name=FAKE_NETWORK_DHCP_NS, + callback=mock.ANY) + process_instance.enable.assert_called_once() def test_disable_isolated_metadata_proxy(self): method_path = ('neutron.agent.metadata.driver.MetadataDriver' diff --git a/neutron/tests/unit/agent/metadata/test_driver.py b/neutron/tests/unit/agent/metadata/test_driver.py index e01300f7b83..aad6cf36a23 100644 --- a/neutron/tests/unit/agent/metadata/test_driver.py +++ b/neutron/tests/unit/agent/metadata/test_driver.py @@ -18,6 +18,7 @@ from unittest import mock from neutron_lib import constants +from neutron_lib import exceptions as lib_exceptions from neutron_lib import fixture as lib_fixtures from oslo_config import cfg from oslo_utils import uuidutils @@ -241,6 +242,26 @@ def test_spawn_metadata_proxy(self): def test_spawn_metadata_proxy_dad_failed(self): self._test_spawn_metadata_proxy(dad_failed=True) + @mock.patch.object(metadata_driver.LOG, 'error') + def test_spawn_metadata_proxy_handles_process_exception(self, error_log): + process_instance = mock.Mock(active=False) + process_instance.enable.side_effect = ( + lib_exceptions.ProcessExecutionError('Something happened', -1)) + with mock.patch.object(metadata_driver.MetadataDriver, + '_get_metadata_proxy_process_manager', + return_value=process_instance): + process_monitor = mock.Mock() + network_id = 123456 + metadata_driver.MetadataDriver.spawn_monitored_metadata_proxy( + process_monitor, + 'dummy_namespace', + self.METADATA_PORT, + cfg.CONF, + network_id=network_id) + error_log.assert_called_once() + process_monitor.register.assert_not_called() + self.assertNotIn(network_id, metadata_driver.MetadataDriver.monitors) + def test_create_config_file_wrong_user(self): with mock.patch('pwd.getpwnam', side_effect=KeyError): config = metadata_driver.HaproxyConfigurator(_uuid(), diff --git a/neutron/tests/unit/agent/ovn/metadata/test_driver.py b/neutron/tests/unit/agent/ovn/metadata/test_driver.py index e5b2abd28d4..340bde4aeb0 100644 --- a/neutron/tests/unit/agent/ovn/metadata/test_driver.py +++ b/neutron/tests/unit/agent/ovn/metadata/test_driver.py @@ -16,6 +16,7 @@ import os from unittest import mock +from neutron_lib import exceptions as lib_exceptions from neutron_lib import fixture as lib_fixtures from oslo_config import cfg from oslo_utils import uuidutils @@ -108,6 +109,29 @@ def test_spawn_metadata_proxy(self): run_as_root=True) ]) + @mock.patch.object(metadata_driver.LOG, 'error') + def test_spawn_metadata_proxy_handles_process_exception(self, error_log): + process_instance = mock.Mock(active=False) + process_instance.enable.side_effect = ( + lib_exceptions.ProcessExecutionError('Something happened', -1)) + + with mock.patch.object(metadata_driver.MetadataDriver, + '_get_metadata_proxy_process_manager', + return_value=process_instance): + process_monitor = mock.Mock() + network_id = 123456 + + metadata_driver.MetadataDriver.spawn_monitored_metadata_proxy( + process_monitor, + 'dummy_namespace', + self.METADATA_PORT, + cfg.CONF, + network_id=network_id) + + error_log.assert_called_once() + process_monitor.register.assert_not_called() + self.assertNotIn(network_id, metadata_driver.MetadataDriver.monitors) + def test_create_config_file_wrong_user(self): with mock.patch('pwd.getpwnam', side_effect=KeyError): config = metadata_driver.HaproxyConfigurator(mock.ANY, mock.ANY, From 7ac9edac80132577ab285484cd48f9a859153350 Mon Sep 17 00:00:00 2001 From: Rodolfo Alonso Hernandez Date: Thu, 14 Dec 2023 15:45:48 +0000 Subject: [PATCH 2/2] Improve the SG RPC callback ``security_group_info_for_ports`` This method populates the SG rules in a dictionary. Each SG rule inherits the "stateful" value of the SG. Prior to this patch, each SG rule was isuing a database call to retrieve the SG register. In this patch, the SG "stateful" retrieval is done in one database query for all SG. That improves the performance of this method reducing the database access to only one single call. This improvement, as commented in the LP bug, affects to ML2/LinuxBridge. ML2/OVS agent uses a cached RPC implementation that not requires to perform any RPC call/database query. Conflicts: neutron/objects/securitygroup.py Closes-Bug: #2045950 Change-Id: Iafd0419a1d1eeb25d5589edc2570ebf287450957 (cherry picked from commit 6b6abb9698318a0b5db09f0c4d30a47438a94643) --- .../api/rpc/handlers/securitygroups_rpc.py | 10 +++++--- neutron/db/securitygroups_rpc_base.py | 25 +++++++++++-------- neutron/objects/securitygroup.py | 8 ++++++ .../tests/unit/objects/test_securitygroup.py | 16 ++++++++++++ 4 files changed, 46 insertions(+), 13 deletions(-) diff --git a/neutron/api/rpc/handlers/securitygroups_rpc.py b/neutron/api/rpc/handlers/securitygroups_rpc.py index f46d5e4b967..23beac95814 100644 --- a/neutron/api/rpc/handlers/securitygroups_rpc.py +++ b/neutron/api/rpc/handlers/securitygroups_rpc.py @@ -431,6 +431,10 @@ def _select_sg_ids_for_ports(self, context, ports): for sg_id in p['security_group_ids'])) return [(sg_id, ) for sg_id in sg_ids] - def _is_security_group_stateful(self, context, sg_id): - sg = self.rcache.get_resource_by_id(resources.SECURITYGROUP, sg_id) - return sg.stateful + def _get_sgs_stateful_flag(self, context, sg_ids): + sgs_stateful = {} + for sg_id in sg_ids: + sg = self.rcache.get_resource_by_id(resources.SECURITYGROUP, sg_id) + sgs_stateful[sg_id] = sg.stateful + + return sgs_stateful diff --git a/neutron/db/securitygroups_rpc_base.py b/neutron/db/securitygroups_rpc_base.py index e970f6d4a54..3f8df31e2a6 100644 --- a/neutron/db/securitygroups_rpc_base.py +++ b/neutron/db/securitygroups_rpc_base.py @@ -211,12 +211,10 @@ def security_group_info_for_ports(self, context, ports): # this set will be serialized into a list by rpc code remote_address_group_info[remote_ag_id][ethertype] = set() direction = rule_in_db['direction'] - stateful = self._is_security_group_stateful(context, - security_group_id) rule_dict = { 'direction': direction, 'ethertype': ethertype, - 'stateful': stateful} + } for key in ('protocol', 'port_range_min', 'port_range_max', 'remote_ip_prefix', 'remote_group_id', @@ -234,6 +232,13 @@ def security_group_info_for_ports(self, context, ports): if rule_dict not in sg_info['security_groups'][security_group_id]: sg_info['security_groups'][security_group_id].append( rule_dict) + + # Populate the security group "stateful" flag in the SGs list of rules. + for sg_id, stateful in self._get_sgs_stateful_flag( + context, sg_info['security_groups'].keys()).items(): + for rule in sg_info['security_groups'][sg_id]: + rule['stateful'] = stateful + # Update the security groups info if they don't have any rules sg_ids = self._select_sg_ids_for_ports(context, ports) for (sg_id, ) in sg_ids: @@ -427,13 +432,13 @@ def _select_sg_ids_for_ports(self, context, ports): """ raise NotImplementedError() - def _is_security_group_stateful(self, context, sg_id): - """Return whether the security group is stateful or not. + def _get_sgs_stateful_flag(self, context, sg_id): + """Return the security groups stateful flag. - Return True if the security group associated with the given ID - is stateful, else False. + Returns a dictionary with the SG ID as key and the stateful flag: + {sg_1: True, sg_2: False, ...} """ - return True + raise NotImplementedError() class SecurityGroupServerRpcMixin(SecurityGroupInfoAPIMixin, @@ -526,5 +531,5 @@ def _select_ips_for_remote_address_group(self, context, return ips_by_group @db_api.retry_if_session_inactive() - def _is_security_group_stateful(self, context, sg_id): - return sg_obj.SecurityGroup.get_sg_by_id(context, sg_id).stateful + def _get_sgs_stateful_flag(self, context, sg_ids): + return sg_obj.SecurityGroup.get_sgs_stateful_flag(context, sg_ids) diff --git a/neutron/objects/securitygroup.py b/neutron/objects/securitygroup.py index 4e245c84e7e..960cc769cc0 100644 --- a/neutron/objects/securitygroup.py +++ b/neutron/objects/securitygroup.py @@ -11,6 +11,7 @@ # under the License. from neutron_lib import context as context_lib +from neutron_lib.db import api as db_api from neutron_lib.objects import common_types from neutron_lib.utils import net as net_utils from oslo_utils import versionutils @@ -132,6 +133,13 @@ def get_bound_project_ids(cls, context, obj_id): security_group_ids=[obj_id]) return {port.project_id for port in port_objs} + @classmethod + @db_api.CONTEXT_READER + def get_sgs_stateful_flag(cls, context, sg_ids): + query = context.session.query(cls.db_model.id, cls.db_model.stateful) + query = query.filter(cls.db_model.id.in_(sg_ids)) + return dict(query.all()) + @base.NeutronObjectRegistry.register class DefaultSecurityGroup(base.NeutronDbObject): diff --git a/neutron/tests/unit/objects/test_securitygroup.py b/neutron/tests/unit/objects/test_securitygroup.py index 53a7901d3ac..011cabeb810 100644 --- a/neutron/tests/unit/objects/test_securitygroup.py +++ b/neutron/tests/unit/objects/test_securitygroup.py @@ -210,6 +210,22 @@ def test_get_objects_no_synth(self): self.assertEqual(len(sg_obj.rules), 0) self.assertIsNone(listed_objs[0].rules) + def test_get_sgs_stateful_flag(self): + for obj in self.objs: + obj.create() + + sg_ids = tuple(sg.id for sg in self.objs) + sgs_stateful = securitygroup.SecurityGroup.get_sgs_stateful_flag( + self.context, sg_ids) + for sg_id, stateful in sgs_stateful.items(): + for obj in (obj for obj in self.objs if obj.id == sg_id): + self.assertEqual(obj.stateful, stateful) + + sg_ids = sg_ids + ('random_id_not_present', ) + sgs_stateful = securitygroup.SecurityGroup.get_sgs_stateful_flag( + self.context, sg_ids) + self.assertEqual(len(self.objs), len(sgs_stateful)) + class DefaultSecurityGroupIfaceObjTestCase(test_base.BaseObjectIfaceTestCase):