From b992d639b974f35612d6bb0057f35c452129aed3 Mon Sep 17 00:00:00 2001 From: Terry Wilson Date: Fri, 15 Dec 2023 21:00:43 +0000 Subject: [PATCH 1/4] Handle creation of Port_Binding with chassis set When there is a backlog of notifications to be sent, it is possible that ovsdb-server will merge insert and update notifications. Due to this, we need to handle the situation where we see a Port_Binding created with the chassis set. Closes-Bug: #2017748 Change-Id: Idfae87cf6c60e9e18ede91ea20857cea5322738c (cherry picked from commit a641e8aec09c1e33a15a34b19d92675ed2c85682) --- neutron/agent/ovn/extensions/qos_hwol.py | 4 ++-- neutron/agent/ovn/metadata/agent.py | 21 +++++++++++++++++---- 2 files changed, 19 insertions(+), 6 deletions(-) diff --git a/neutron/agent/ovn/extensions/qos_hwol.py b/neutron/agent/ovn/extensions/qos_hwol.py index abe0783e2bd..8b03dd8c01e 100644 --- a/neutron/agent/ovn/extensions/qos_hwol.py +++ b/neutron/agent/ovn/extensions/qos_hwol.py @@ -158,13 +158,13 @@ class PortBindingChassisCreatedEvent(_PortBindingChassisEvent): LOG_MSG = 'Port ID %s, datapath %s, OVS port name: %s (event: %s)' def __init__(self, ovn_agent): - events = (self.ROW_UPDATE,) + events = (self.ROW_CREATE, self.ROW_UPDATE,) super().__init__(ovn_agent, events) def match_fn(self, event, row, old): try: return (row.chassis[0].name == self.ovn_agent.chassis and - not old.chassis) + (event == self.ROW_CREATE or not old.chassis)) except (IndexError, AttributeError): return False diff --git a/neutron/agent/ovn/metadata/agent.py b/neutron/agent/ovn/metadata/agent.py index bb65b33894f..1745239701b 100644 --- a/neutron/agent/ovn/metadata/agent.py +++ b/neutron/agent/ovn/metadata/agent.py @@ -109,6 +109,16 @@ def run(self, event, row, old): self.agent.resync() +class PortBindingCreateWithChassis(PortBindingEvent): + EVENT = PortBindingEvent.ROW_CREATE + + def match_fn(self, event, row, old): + self._log_msg = "Port %s in datapath %s bound to our chassis on insert" + if not (super().match_fn(event, row, old) and row.chassis): + return False + return row.chassis[0].name == self.agent.chassis + + class PortBindingUpdatedEvent(PortBindingEvent): EVENT = PortBindingEvent.ROW_UPDATE @@ -316,6 +326,7 @@ def start(self): tables = ('Encap', 'Port_Binding', 'Datapath_Binding', 'SB_Global', 'Chassis') events = (PortBindingUpdatedEvent(self), + PortBindingCreateWithChassis(self), PortBindingDeletedEvent(self), SbGlobalUpdateEvent(self), ) @@ -345,7 +356,8 @@ def start(self): self._proxy.run() # Do the initial sync. - self.sync() + # Provisioning handled by PortBindingCreateWithChassis + self.sync(provision=False) # Register the agent with its corresponding Chassis self.register_metadata_agent() @@ -396,7 +408,7 @@ def get_networks_datapaths(self): return set(p.datapath for p in self._vif_ports(ports)) @_sync_lock - def sync(self): + def sync(self, provision=True): """Agent sync. This function will make sure that all networks with ports in our @@ -423,8 +435,9 @@ def sync(self): # resync all network namespaces based on the associated datapaths, # even those that are already running. This is to make sure # everything within each namespace is up to date. - for datapath in net_datapaths: - self.provision_datapath(datapath) + if provision: + for datapath in net_datapaths: + self.provision_datapath(datapath) @staticmethod def _get_veth_name(datapath): From 696b2f4d4bc6c7b187020ef2b7ed3edc513549a6 Mon Sep 17 00:00:00 2001 From: Rodolfo Alonso Hernandez Date: Wed, 8 Feb 2023 15:40:07 +0100 Subject: [PATCH 2/4] Increase port name size and type to internal This patch increases the port name size in "test_port_creation_and_deletion" and adds explicit exceptions with debugging information. The port is now type=internal. This patch also reverts [1] that marked the test as unstable. [1]https://review.opendev.org/c/openstack/neutron/+/873941 Closes-Bug: #2006603 Change-Id: If85b487dc2db7466c4848043911772ef79140d51 (cherry picked from commit 4b8e484e1ded99012a8e332fa1c9ac660f74a17e) --- .../agent/ovn/extensions/test_qos_hwol.py | 49 ++++++++++++------- 1 file changed, 30 insertions(+), 19 deletions(-) diff --git a/neutron/tests/functional/agent/ovn/extensions/test_qos_hwol.py b/neutron/tests/functional/agent/ovn/extensions/test_qos_hwol.py index 83d5c58ab90..55d9329a88c 100644 --- a/neutron/tests/functional/agent/ovn/extensions/test_qos_hwol.py +++ b/neutron/tests/functional/agent/ovn/extensions/test_qos_hwol.py @@ -17,27 +17,26 @@ from oslo_utils import uuidutils -from neutron.agent.common import ovs_lib from neutron.agent.ovn.agent import ovsdb as agent_ovsdb from neutron.agent.ovn.extensions import qos_hwol from neutron.common.ovn import constants as ovn_const from neutron.common.ovn import utils from neutron.common import utils as n_utils -from neutron.tests import base as test_base -from neutron.tests.common import net_helpers from neutron.tests.functional import base class OVSInterfaceEventTestCase(base.TestOVNFunctionalBase): - @test_base.unstable_test( - 'LP#2006603, it is being addressed in ' - 'https://review.opendev.org/c/openstack/neutron/+/873118') + def _cleanup(self): + self.ovs_idl.del_port(self.port_name, bridge=self.br_name).execute( + check_error=False) + self.ovs_idl.del_br(self.br_name).execute(check_error=False) + def test_port_creation_and_deletion(self): def check_add_port_called(): try: mock_agent.qos_hwol_ext.add_port.assert_has_calls( - [mock.call('port_iface-id', port_name)]) + [mock.call(port_iface_id, self.port_name)]) return True except AssertionError: return False @@ -45,24 +44,36 @@ def check_add_port_called(): def check_remove_egress_called(): try: mock_agent.qos_hwol_ext.remove_egress.assert_has_calls( - [mock.call('port_iface-id')]) + [mock.call(port_iface_id)]) return True except AssertionError: return False + port_iface_id = 'port_iface-id' mock_agent = mock.Mock() events = [qos_hwol.OVSInterfaceEvent(mock_agent)] - agent_ovsdb.MonitorAgentOvsIdl(events=events).start() - br = self.useFixture(net_helpers.OVSBridgeFixture()).bridge - self.ovs_bridge = ovs_lib.OVSBridge(br.br_name) - port_name = ('port-' + uuidutils.generate_uuid())[:8] - - self.ovs_bridge.add_port( - port_name, ('external_ids', {'iface-id': 'port_iface-id'})) - n_utils.wait_until_true(check_add_port_called, timeout=5) - - self.ovs_bridge.delete_port(port_name) - n_utils.wait_until_true(check_remove_egress_called, timeout=5) + self.ovs_idl = agent_ovsdb.MonitorAgentOvsIdl(events=events).start() + self.br_name = ('brtest-' + uuidutils.generate_uuid())[:13] + self.port_name = ('port-' + uuidutils.generate_uuid())[:13] + self.addCleanup(self._cleanup) + with self.ovs_idl.transaction() as txn: + txn.add(self.ovs_idl.add_br(self.br_name)) + txn.add(self.ovs_idl.add_port(self.br_name, self.port_name)) + txn.add(self.ovs_idl.iface_set_external_id( + self.port_name, 'iface-id', port_iface_id)) + txn.add(self.ovs_idl.db_set( + 'Interface', self.port_name, ('type', 'internal'))) + + exc = Exception('Port %s was not added to the bridge %s' % + (self.port_name, self.br_name)) + n_utils.wait_until_true(check_add_port_called, timeout=5, + exception=exc) + + self.ovs_idl.del_port(self.port_name).execute(check_error=True) + exc = Exception('Port %s was not deleted from the bridge %s' % + (self.port_name, self.br_name)) + n_utils.wait_until_true(check_remove_egress_called, timeout=5, + exception=exc) class QoSBandwidthLimitEventTestCase(base.TestOVNFunctionalBase): From 5ff38217c77785b69be19c709eecd4f9579dca25 Mon Sep 17 00:00:00 2001 From: Rodolfo Alonso Hernandez Date: Fri, 28 Apr 2023 18:27:11 +0200 Subject: [PATCH 3/4] Mark "test_port_creation_and_deletion" as unstable Related-Bug: #2006603 Change-Id: Icbd0bb46a8a8c169279301b26ac92e0f459e1d87 (cherry picked from commit 4ad979a534296ab7df4571bc149756aa0cb7d94f) --- neutron/tests/functional/agent/ovn/extensions/test_qos_hwol.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/neutron/tests/functional/agent/ovn/extensions/test_qos_hwol.py b/neutron/tests/functional/agent/ovn/extensions/test_qos_hwol.py index 55d9329a88c..a6457dcd0ee 100644 --- a/neutron/tests/functional/agent/ovn/extensions/test_qos_hwol.py +++ b/neutron/tests/functional/agent/ovn/extensions/test_qos_hwol.py @@ -22,6 +22,7 @@ from neutron.common.ovn import constants as ovn_const from neutron.common.ovn import utils from neutron.common import utils as n_utils +from neutron.tests import base as test_base from neutron.tests.functional import base @@ -32,6 +33,7 @@ def _cleanup(self): check_error=False) self.ovs_idl.del_br(self.br_name).execute(check_error=False) + @test_base.unstable_test('bug 2006603') def test_port_creation_and_deletion(self): def check_add_port_called(): try: From 94cf7a4c281413b18ff90cf16c45d2f0df436b44 Mon Sep 17 00:00:00 2001 From: Lewis Denny Date: Mon, 31 Jul 2023 16:38:22 +1000 Subject: [PATCH 4/4] Add max limit to agent_down_time The agent_down_time ends up being passed to an eventlet green-thread; under the hood, this uses a CPython C-types interface with a limitation of (2^32 / 2 - 1) INT_MAX (as defined in C) where int is usually 32 bits I have set the max value to (2^32 / 2 - 1)/1000 as agent_down_time configured in seconds, this ends up being 2147483. This patch is required as passing a larger number causes this error: OverflowError: timeout is too large If a user currently has a value larger than (2^32 / 2 - 1)/1000 set, Neutron Server will fail to start and will print out a very helpful error message. Conflicts: neutron/conf/agent/database/agents_db.py Closes-Bug: #2028724 Change-Id: Ib5b943344cddbd468c00768461ba1ee00a2b4c58 (cherry picked from commit 6fef1e65250dbda057206e1c2ee64f59b21d490f) --- neutron/conf/agent/database/agents_db.py | 5 +++++ .../notes/agent_down_time_max-af3b62763aaa2fe5.yaml | 6 ++++++ 2 files changed, 11 insertions(+) create mode 100644 releasenotes/notes/agent_down_time_max-af3b62763aaa2fe5.yaml diff --git a/neutron/conf/agent/database/agents_db.py b/neutron/conf/agent/database/agents_db.py index aad0582bf5a..90569e104d2 100644 --- a/neutron/conf/agent/database/agents_db.py +++ b/neutron/conf/agent/database/agents_db.py @@ -16,7 +16,12 @@ from neutron.common import _constants AGENT_OPTS = [ + # The agent_down_time value can only be a max of INT_MAX (as defined in C), + # where int is usually 32 bits. The agent_down_time will be passed to + # eventlet in milliseconds and any number higher will produce an OverFlow + # error. More details here: https://bugs.launchpad.net/neutron/+bug/2028724 cfg.IntOpt('agent_down_time', default=75, + max=((2**32 / 2 - 1) // 1000), help=_("Seconds to regard the agent is down; should be at " "least twice report_interval, to be sure the " "agent is down for good.")), diff --git a/releasenotes/notes/agent_down_time_max-af3b62763aaa2fe5.yaml b/releasenotes/notes/agent_down_time_max-af3b62763aaa2fe5.yaml new file mode 100644 index 00000000000..ec221e2e05a --- /dev/null +++ b/releasenotes/notes/agent_down_time_max-af3b62763aaa2fe5.yaml @@ -0,0 +1,6 @@ +--- +fixes: + - | + The config option ``agent_down_time`` is now limited to a maximum value of + `2147483`, as neutron-server will fail to start if it is configured higher. + See bug `2028724 `_ for more information.