From 2c8076dd28c1e51e7d340d3ee5f9056b40396c0c Mon Sep 17 00:00:00 2001 From: Lewis Denny Date: Mon, 31 Jul 2023 16:38:22 +1000 Subject: [PATCH 1/2] Add max limit to agent_down_time The agent_down_time ends up being passed to an eventlet green-thread; under the hood, this uses a CPython C-types interface with a limitation of (2^32 / 2 - 1) INT_MAX (as defined in C) where int is usually 32 bits I have set the max value to (2^32 / 2 - 1)/1000 as agent_down_time configured in seconds, this ends up being 2147483. This patch is required as passing a larger number causes this error: OverflowError: timeout is too large If a user currently has a value larger than (2^32 / 2 - 1)/1000 set, Neutron Server will fail to start and will print out a very helpful error message. Conflicts: neutron/conf/agent/database/agents_db.py Closes-Bug: #2028724 Change-Id: Ib5b943344cddbd468c00768461ba1ee00a2b4c58 (cherry picked from commit 6fef1e65250dbda057206e1c2ee64f59b21d490f) --- neutron/conf/agent/database/agents_db.py | 5 +++++ .../notes/agent_down_time_max-af3b62763aaa2fe5.yaml | 6 ++++++ 2 files changed, 11 insertions(+) create mode 100644 releasenotes/notes/agent_down_time_max-af3b62763aaa2fe5.yaml diff --git a/neutron/conf/agent/database/agents_db.py b/neutron/conf/agent/database/agents_db.py index aad0582bf5a..90569e104d2 100644 --- a/neutron/conf/agent/database/agents_db.py +++ b/neutron/conf/agent/database/agents_db.py @@ -16,7 +16,12 @@ from neutron.common import _constants AGENT_OPTS = [ + # The agent_down_time value can only be a max of INT_MAX (as defined in C), + # where int is usually 32 bits. The agent_down_time will be passed to + # eventlet in milliseconds and any number higher will produce an OverFlow + # error. More details here: https://bugs.launchpad.net/neutron/+bug/2028724 cfg.IntOpt('agent_down_time', default=75, + max=((2**32 / 2 - 1) // 1000), help=_("Seconds to regard the agent is down; should be at " "least twice report_interval, to be sure the " "agent is down for good.")), diff --git a/releasenotes/notes/agent_down_time_max-af3b62763aaa2fe5.yaml b/releasenotes/notes/agent_down_time_max-af3b62763aaa2fe5.yaml new file mode 100644 index 00000000000..ec221e2e05a --- /dev/null +++ b/releasenotes/notes/agent_down_time_max-af3b62763aaa2fe5.yaml @@ -0,0 +1,6 @@ +--- +fixes: + - | + The config option ``agent_down_time`` is now limited to a maximum value of + `2147483`, as neutron-server will fail to start if it is configured higher. + See bug `2028724 `_ for more information. From 0396b33bc4db43798490fefaf97a3bfcbdba29d0 Mon Sep 17 00:00:00 2001 From: Rodolfo Alonso Date: Wed, 24 Jan 2024 07:39:09 +0000 Subject: [PATCH 2/2] Revert "Add sleep before checking if ovs port is in the namespace" This reverts commit 222c997022392561c2de2cb493f0f5214eb20dfc. Reason for revert: This patch seems to be breaking the test "test_multiple_agents_for_network(Open vSwitch agent)" in the fullstack job. Change-Id: Ib93ebdcceb177c297b3b287fda01e3d57a275cb4 Related-Bug: #1961740 --- neutron/agent/linux/interface.py | 2 +- neutron/agent/linux/ip_lib.py | 11 +++-------- neutron/tests/unit/agent/linux/test_interface.py | 6 +++--- neutron/tests/unit/agent/linux/test_ip_lib.py | 3 +-- 4 files changed, 8 insertions(+), 14 deletions(-) diff --git a/neutron/agent/linux/interface.py b/neutron/agent/linux/interface.py index 4eea826082c..311e7f5e9c9 100644 --- a/neutron/agent/linux/interface.py +++ b/neutron/agent/linux/interface.py @@ -355,7 +355,7 @@ def _add_device_to_namespace(self, ip_wrapper, device, namespace): namespace_obj = ip_wrapper.ensure_namespace(namespace) for i in range(9): try: - namespace_obj.add_device_to_namespace(device, is_ovs_port=True) + namespace_obj.add_device_to_namespace(device) break except ip_lib.NetworkInterfaceNotFound: # NOTE(slaweq): if the exception was NetworkInterfaceNotFound diff --git a/neutron/agent/linux/ip_lib.py b/neutron/agent/linux/ip_lib.py index 29f64a3c452..4f88e887297 100644 --- a/neutron/agent/linux/ip_lib.py +++ b/neutron/agent/linux/ip_lib.py @@ -270,9 +270,9 @@ def garbage_collect_namespace(self): return True return False - def add_device_to_namespace(self, device, is_ovs_port=False): + def add_device_to_namespace(self, device): if self.namespace: - device.link.set_netns(self.namespace, is_ovs_port=is_ovs_port) + device.link.set_netns(self.namespace) def add_vlan(self, name, physical_interface, vlan_id): privileged.create_interface(name, @@ -462,15 +462,10 @@ def set_down(self): privileged.set_link_attribute( self.name, self._parent.namespace, state='down') - def set_netns(self, namespace, is_ovs_port=False): + def set_netns(self, namespace): privileged.set_link_attribute( self.name, self._parent.namespace, net_ns_fd=namespace) self._parent.namespace = namespace - if is_ovs_port: - # NOTE(slaweq): because of the "shy port" which may dissapear for - # short time after it's moved to the namespace we need to wait - # a bit before checking if port really exists in the namespace - time.sleep(1) common_utils.wait_until_true(lambda: self.exists, timeout=5, sleep=0.5) diff --git a/neutron/tests/unit/agent/linux/test_interface.py b/neutron/tests/unit/agent/linux/test_interface.py index 314a8f89da7..9b3b5ca13eb 100644 --- a/neutron/tests/unit/agent/linux/test_interface.py +++ b/neutron/tests/unit/agent/linux/test_interface.py @@ -467,11 +467,11 @@ def device_exists(dev, namespace=None): expected.extend( [mock.call().ensure_namespace(namespace), mock.call().ensure_namespace().add_device_to_namespace( - mock.ANY, is_ovs_port=True), + mock.ANY), mock.call().ensure_namespace().add_device_to_namespace( - mock.ANY, is_ovs_port=True), + mock.ANY), mock.call().ensure_namespace().add_device_to_namespace( - mock.ANY, is_ovs_port=True)]) + mock.ANY)]) expected.extend([ mock.call(namespace=namespace), mock.call().device('tap0'), diff --git a/neutron/tests/unit/agent/linux/test_ip_lib.py b/neutron/tests/unit/agent/linux/test_ip_lib.py index aeda0fe58f5..72ec748a65a 100644 --- a/neutron/tests/unit/agent/linux/test_ip_lib.py +++ b/neutron/tests/unit/agent/linux/test_ip_lib.py @@ -507,8 +507,7 @@ def fake_create_interface(ifname, namespace, kind, **kwargs): def test_add_device_to_namespace(self): dev = mock.Mock() ip_lib.IPWrapper(namespace='ns').add_device_to_namespace(dev) - dev.assert_has_calls( - [mock.call.link.set_netns('ns', is_ovs_port=False)]) + dev.assert_has_calls([mock.call.link.set_netns('ns')]) def test_add_device_to_namespace_is_none(self): dev = mock.Mock()