From 3cade62952590f65ed50f576604387b360019107 Mon Sep 17 00:00:00 2001 From: Will Szumski Date: Fri, 20 Sep 2024 18:39:38 +0100 Subject: [PATCH 1/6] Fix tox --- .../unit/linux/interface_drivers/test_interface.py | 2 +- tox.ini | 13 ++++++++----- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/networking_mlnx/tests/unit/linux/interface_drivers/test_interface.py b/networking_mlnx/tests/unit/linux/interface_drivers/test_interface.py index 7737c94..d58f220 100644 --- a/networking_mlnx/tests/unit/linux/interface_drivers/test_interface.py +++ b/networking_mlnx/tests/unit/linux/interface_drivers/test_interface.py @@ -183,7 +183,7 @@ def test__init_network_cache_with_cache_maintenance(self, conf, self._get_networks_cb, self.fields) looping_mock.assert_called_once_with( net_cache_mock.remove_stale_networks) - loop_obj.start.called_once_with() + loop_obj.start.assert_called() # Make sure consecutive calls dont re-spawn cleanup thread looping_mock.reset_mock() loop_obj.start.reset_mock() diff --git a/tox.ini b/tox.ini index eb3ce26..b8c5c37 100644 --- a/tox.ini +++ b/tox.ini @@ -5,22 +5,25 @@ skipsdist = True ignore_basepython_conflict = True [testenv] -basepython = python3 +basepython = python3.8 setenv = VIRTUAL_ENV={envdir} OS_LOG_CAPTURE={env:OS_LOG_CAPTURE:true} OS_STDOUT_CAPTURE={env:OS_STDOUT_CAPTURE:true} OS_STDERR_CAPTURE={env:OS_STDERR_CAPTURE:true} + # "AttributeError: install_layout". + SETUPTOOLS_USE_DISTUTILS=stdlib PYTHONWARNINGS=default::DeprecationWarning,ignore::DeprecationWarning:distutils,ignore::DeprecationWarning:site -passenv = TRACE_FAILONLY GENERATE_HASHES http_proxy HTTP_PROXY https_proxy HTTPS_PROXY no_proxy NO_PROXY TOX_ENV_SRC_MODULES +passenv = TRACE_FAILONLY,GENERATE_HASHES,http_proxy,HTTP_PROXY,https_proxy,HTTPS_PROXY,no_proxy,NO_PROXY,TOX_ENV_SRC_MODULES usedevelop = True install_command = pip install {opts} {packages} deps = - -c{env:UPPER_CONSTRAINTS_FILE:https://releases.openstack.org/constraints/upper/2023.1} + -c{env:UPPER_CONSTRAINTS_FILE:https://raw.githubusercontent.com/stackhpc/requirements/refs/heads/bugfix/2023.1/networking-mlnx/upper-constraints.txt} -r{toxinidir}/requirements.txt -r{toxinidir}/test-requirements.txt + {toxinidir} hacking>=3.0.1,<3.1.0 # Apache-2.0 -whitelist_externals = sh +allowlist_externals = sh,env,{toxinidir}/tools/pip_install_src_modules.sh commands = {toxinidir}/tools/pip_install_src_modules.sh "{toxinidir}" stestr run {posargs} @@ -48,7 +51,7 @@ commands= #{[testenv:genconfig]commands} {[testenv:bashate]commands} {[testenv:bandit]commands} -whitelist_externals = +allowlist_externals = sh bash From 5d1637aa730e11a214bf200e320fd72a22aceafa Mon Sep 17 00:00:00 2001 From: Will Szumski Date: Fri, 20 Sep 2024 18:42:39 +0100 Subject: [PATCH 2/6] Remove unused poller No functional change as this wasn't actually in use. --- networking_mlnx/eswitchd/eswitch_daemon.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/networking_mlnx/eswitchd/eswitch_daemon.py b/networking_mlnx/eswitchd/eswitch_daemon.py index c0b527c..ef8de47 100644 --- a/networking_mlnx/eswitchd/eswitch_daemon.py +++ b/networking_mlnx/eswitchd/eswitch_daemon.py @@ -70,8 +70,6 @@ def _init_connections(self): self.conn_os_url = set_conn_url(os_transport, os_addr, os_port) self.socket_os.bind(self.conn_os_url) - self.poller = zmq.Poller() - self.poller.register(self.socket_os, zmq.POLLIN) def _handle_msg(self): data = None From fd83797376873a3f0421e1f168146feab5244125 Mon Sep 17 00:00:00 2001 From: Will Szumski Date: Fri, 20 Sep 2024 18:46:33 +0100 Subject: [PATCH 3/6] Set ZMQ_LINGER This will discard any pending messages immediately when the socket is closed. The default is to wait forever for any pending messages which can cause the the daemon to hang on shutdown. See: http://api.zeromq.org/4-2:zmq-setsockopt --- networking_mlnx/eswitchd/eswitch_daemon.py | 1 + 1 file changed, 1 insertion(+) diff --git a/networking_mlnx/eswitchd/eswitch_daemon.py b/networking_mlnx/eswitchd/eswitch_daemon.py index ef8de47..26ee66d 100644 --- a/networking_mlnx/eswitchd/eswitch_daemon.py +++ b/networking_mlnx/eswitchd/eswitch_daemon.py @@ -64,6 +64,7 @@ def _parse_physical_mapping(self): def _init_connections(self): context = zmq.Context() self.socket_os = context.socket(zmq.REP) + self.socket.setsockopt(zmq.LINGER, 0) os_transport = constants.SOCKET_OS_TRANSPORT os_port = constants.SOCKET_OS_PORT os_addr = constants.SOCKET_OS_ADDR From 3efa8c449a2f075816d9dc2c6e4c12ea473c1330 Mon Sep 17 00:00:00 2001 From: Will Szumski Date: Fri, 20 Sep 2024 18:57:18 +0100 Subject: [PATCH 4/6] Always send a response Previously, if data was None, we'd not send a reply. This can lead to clients hanging when they are expecting a reply. --- networking_mlnx/eswitchd/eswitch_daemon.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/networking_mlnx/eswitchd/eswitch_daemon.py b/networking_mlnx/eswitchd/eswitch_daemon.py index 26ee66d..f68c3ae 100644 --- a/networking_mlnx/eswitchd/eswitch_daemon.py +++ b/networking_mlnx/eswitchd/eswitch_daemon.py @@ -80,15 +80,20 @@ def _handle_msg(self): if msg: data = jsonutils.loads(msg) - msg = None + result = { + "status": "FAIL", + "action": data.get("action", "UNKNOWN"), + "reason": "UNKNOWN" + } if data: try: result = self.dispatcher.handle_msg(data) - msg = jsonutils.dumps(result) except Exception as e: LOG.exception("Exception during message handling - %s", e) - msg = jsonutils.dumps(str(e)) - sender.send_string(msg) + result["reason"] = str(e) + + msg = jsonutils.dumps(result) + sender.send_string(msg) def daemon_loop(self): LOG.info("Daemon Started!") From 25978b5660a96288e30513fd6a7f5121cd649d7b Mon Sep 17 00:00:00 2001 From: Will Szumski Date: Fri, 20 Sep 2024 19:21:12 +0100 Subject: [PATCH 5/6] Fix some file descriptor leaks Symptom was that eswitchd would become unresponsive after some time. The following error was observed: GetVnics failed - [Errno 24] Too many open files: OSError: [Errno 24] Too many open files This was tracked down to forgetting to call close on IPRoute objects as per the documentation. See: - https://docs.pyroute2.org/iproute.html - https://github.com/svinota/pyroute2/issues/631 --- networking_mlnx/eswitchd/utils/pci_utils.py | 4 +- .../internal/netdev_ops/impl_pyroute2.py | 74 +++++++++---------- 2 files changed, 39 insertions(+), 39 deletions(-) diff --git a/networking_mlnx/eswitchd/utils/pci_utils.py b/networking_mlnx/eswitchd/utils/pci_utils.py index 8214300..a9bb74c 100644 --- a/networking_mlnx/eswitchd/utils/pci_utils.py +++ b/networking_mlnx/eswitchd/utils/pci_utils.py @@ -71,8 +71,8 @@ def get_vfs_info(self, pf): def get_dev_attr(self, attr_path): try: - fd = open(attr_path) - return fd.readline().strip() + with open(attr_path) as fd: + return fd.readline().strip() except IOError: return diff --git a/networking_mlnx/internal/netdev_ops/impl_pyroute2.py b/networking_mlnx/internal/netdev_ops/impl_pyroute2.py index 9255796..31233f8 100644 --- a/networking_mlnx/internal/netdev_ops/impl_pyroute2.py +++ b/networking_mlnx/internal/netdev_ops/impl_pyroute2.py @@ -33,16 +33,16 @@ def set_vf_admin_state(self, pf_ifname, vf_idx, state): :param state: desired admin state as defined in networking_mlnx.internal.netdev_ops.constants """ - try: - ip = pyroute2.IPRoute() - link_idx = ip.link_lookup(ifname=pf_ifname)[0] - ip.link( - 'set', index=link_idx, vf={'vf': int(vf_idx), - 'link_state': state}) - except IndexError: - raise exceptions.NetworkInterfaceNotFound(pf_ifname) - except pyroute2.NetlinkError as e: - raise exceptions.NetlinkRuntimeError(e) + with pyroute2.IPRoute() as ip: + try: + link_idx = ip.link_lookup(ifname=pf_ifname)[0] + ip.link( + 'set', index=link_idx, vf={'vf': int(vf_idx), + 'link_state': state}) + except IndexError: + raise exceptions.NetworkInterfaceNotFound(pf_ifname) + except pyroute2.NetlinkError as e: + raise exceptions.NetlinkRuntimeError(e) def set_link_state(self, ifname, state): """Set net device link state @@ -51,14 +51,14 @@ def set_link_state(self, ifname, state): :param state: desired link state as defined in networking_mlnx.internal.netdev_ops.constants """ - try: - ip = pyroute2.IPRoute() - link_idx = ip.link_lookup(ifname=ifname)[0] - ip.link('set', index=link_idx, state=state) - except IndexError: - raise exceptions.NetworkInterfaceNotFound(ifname) - except pyroute2.NetlinkError as e: - raise exceptions.NetlinkRuntimeError(e) + with pyroute2.IPRoute() as ip: + try: + link_idx = ip.link_lookup(ifname=ifname)[0] + ip.link('set', index=link_idx, state=state) + except IndexError: + raise exceptions.NetworkInterfaceNotFound(ifname) + except pyroute2.NetlinkError as e: + raise exceptions.NetlinkRuntimeError(e) def set_vf_guid(self, pf_ifname, vf_idx, guid): """Set vf administrative port and node GUID @@ -68,17 +68,17 @@ def set_vf_guid(self, pf_ifname, vf_idx, guid): :param guid: 64bit guid str in xx:xx:xx:xx:xx:xx:xx:xx format where x is a hexadecimal digit. """ - try: - ip = pyroute2.IPRoute() - link_idx = ip.link_lookup(ifname=pf_ifname)[0] - ip.link('set', index=link_idx, vf={'vf': int(vf_idx), - 'ib_port_guid': guid}) - ip.link('set', index=link_idx, - vf={'vf': int(vf_idx), 'ib_node_guid': guid}) - except IndexError: - raise exceptions.NetworkInterfaceNotFound(pf_ifname) - except pyroute2.NetlinkError as e: - raise exceptions.NetlinkRuntimeError(e) + with pyroute2.IPRoute() as ip: + try: + link_idx = ip.link_lookup(ifname=pf_ifname)[0] + ip.link('set', index=link_idx, vf={'vf': int(vf_idx), + 'ib_port_guid': guid}) + ip.link('set', index=link_idx, + vf={'vf': int(vf_idx), 'ib_node_guid': guid}) + except IndexError: + raise exceptions.NetworkInterfaceNotFound(pf_ifname) + except pyroute2.NetlinkError as e: + raise exceptions.NetlinkRuntimeError(e) def get_vf_guid(self, pf_ifname, vf_idx): """Get vf administrative GUID @@ -91,14 +91,14 @@ def get_vf_guid(self, pf_ifname, vf_idx): NOTE: while there are two GUIDs assigned per VF (port and node GUID) we assume they are the same and return just one value. """ - try: - ip = pyroute2.IPRoute() - link_idx = ip.link_lookup(ifname=pf_ifname)[0] - attrs = ip.link('get', index=link_idx, ext_mask=1)[0] - except IndexError: - raise exceptions.NetworkInterfaceNotFound(pf_ifname) - except pyroute2.NetlinkError as e: - raise exceptions.NetlinkRuntimeError(e) + with pyroute2.IPRoute() as ip: + try: + link_idx = ip.link_lookup(ifname=pf_ifname)[0] + attrs = ip.link('get', index=link_idx, ext_mask=1)[0] + except IndexError: + raise exceptions.NetworkInterfaceNotFound(pf_ifname) + except pyroute2.NetlinkError as e: + raise exceptions.NetlinkRuntimeError(e) vf_attr = (attrs.get_attr('IFLA_VFINFO_LIST'). get_attrs("IFLA_VF_INFO"))[int(vf_idx)] From 619e047eed2a3d1abd5c491f5227367474df914e Mon Sep 17 00:00:00 2001 From: Will Szumski Date: Fri, 20 Sep 2024 19:35:34 +0100 Subject: [PATCH 6/6] Set manadatory ip_version field in dhcp opts The API is defined in: https://github.com/openstack/neutron-lib/blob/90fa9948fd3b36d121ec5fa0b8004404f6d86b45/neutron_lib/api/definitions/extra_dhcp_opt.py#L32 We were missing the ip_version field. This was causing a bad interaction with the OVN driver. See: https://bugs.launchpad.net/neutron/+bug/2081161 --- networking_mlnx/plugins/ml2/drivers/mlnx/mech_mlnx.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/networking_mlnx/plugins/ml2/drivers/mlnx/mech_mlnx.py b/networking_mlnx/plugins/ml2/drivers/mlnx/mech_mlnx.py index e85dd78..95e08f3 100644 --- a/networking_mlnx/plugins/ml2/drivers/mlnx/mech_mlnx.py +++ b/networking_mlnx/plugins/ml2/drivers/mlnx/mech_mlnx.py @@ -86,7 +86,8 @@ def _gen_client_id_with_prefix(self, port, prefix): def _gen_client_id_opt(self, port): client_id = self._gen_client_id(port) return [{"opt_name": edo_ext.DHCP_OPT_CLIENT_ID, - "opt_value": client_id}] + "opt_value": client_id, + "ip_version": 4}] def _gen_none_client_id_opt(self, port): updated_extra_dhcp_opts = []