From ba16962d3483d19b497d2edb536298f01e44fc7f Mon Sep 17 00:00:00 2001 From: Rodolfo Alonso Hernandez Date: Fri, 9 Jun 2023 16:18:09 +0000 Subject: [PATCH 1/2] [OVN] Remove SB "Chassis"/"Chassis_Private" duplicated registers A new OVN maintenance method is added. This method lists all existing OVN SB Chassis registers and checks if any of them has the same hostname. In case of having duplicated "Chassis"/"Chassis_Private" registers, the maintenance method will remove those with older (lower) timestamp, that is stored in "Chassis_Private.nb_cfg_timestamp", leaving only the newer one. Conflicts: neutron/tests/functional/plugins/ml2/drivers/ovn/mech_driver/ovsdb/test_maintenance.py Closes-Bug: #2016158 Change-Id: Ib3c6f0dc01efd31430691e720ba23ccb4ede65fa (cherry picked from commit 9d9f47c20c6c269db264d656a1026e0926f390fd) (cherry picked from commit 7fc12decdf60eeb162fe87f7a67746184023135e) --- .../ovn/mech_driver/ovsdb/maintenance.py | 57 ++++++++++++++++++ .../ovn/mech_driver/ovsdb/test_maintenance.py | 58 +++++++++++++++++++ ...plicated_ovn_chassis-df12fb6233ea3d3e.yaml | 17 ++++++ 3 files changed, 132 insertions(+) create mode 100644 releasenotes/notes/remove_duplicated_ovn_chassis-df12fb6233ea3d3e.yaml diff --git a/neutron/plugins/ml2/drivers/ovn/mech_driver/ovsdb/maintenance.py b/neutron/plugins/ml2/drivers/ovn/mech_driver/ovsdb/maintenance.py index 415530541ec..d68fe46ab4e 100644 --- a/neutron/plugins/ml2/drivers/ovn/mech_driver/ovsdb/maintenance.py +++ b/neutron/plugins/ml2/drivers/ovn/mech_driver/ovsdb/maintenance.py @@ -997,6 +997,63 @@ def check_fair_meter_consistency(self): from_reload=True) raise periodics.NeverAgain() + @periodics.periodic(spacing=300, run_immediately=True) + def remove_duplicated_chassis_registers(self): + """Remove the "Chassis" and "Chassis_Private" duplicated registers. + + When the ovn-controller service of a node is updated and the system-id + is changed, if the old service is not stopped gracefully, it will leave + a "Chassis" and a "Chassis_Private" registers on the OVN SB database. + These leftovers must be removed. + + NOTE: this method is executed every 5 minutes. If a new chassis is + added, this method will perform again the clean-up process. + + NOTE: this method can be executed only if the OVN SB has the + "Chassis_Private" table. Otherwise, is not possible to find out which + register is newer and thus must be kept in the database. + """ + if not self._sb_idl.is_table_present('Chassis_Private'): + raise periodics.NeverAgain() + + if not self.has_lock: + return + + # dup_chassis_port_host = {host_name: [(ch1, ch_private1), + # (ch2, ch_private2), ... ]} + dup_chassis_port_host = {} + chassis = self._sb_idl.chassis_list().execute(check_error=True) + chassis_hostnames = {ch.hostname for ch in chassis} + # Find the duplicated "Chassis" and "Chassis_Private" registers, + # comparing the hostname. + for hostname in chassis_hostnames: + ch_list = [] + # Find these chassis matching the hostname and create a list. + for ch in (ch for ch in chassis if ch.hostname == hostname): + ch_private = self._sb_idl.lookup('Chassis_Private', ch.name, + default=None) + if ch_private: + ch_list.append((ch, ch_private)) + + # If the chassis list > 1, then we have duplicated chassis. + if len(ch_list) > 1: + # Order ch_list by Chassis_Private.nb_cfg_timestamp, from newer + # (greater value) to older. + ch_list.sort(key=lambda x: x[1].nb_cfg_timestamp, reverse=True) + dup_chassis_port_host[hostname] = ch_list + + if not dup_chassis_port_host: + return + + # Remove the "Chassis" and "Chassis_Private" registers with the + # older Chassis_Private.nb_cfg_timestamp. + with self._sb_idl.transaction(check_error=True) as txn: + for ch_list in dup_chassis_port_host.values(): + # The first item is skipped, this is the newest element. + for ch, ch_private in ch_list[1:]: + for table in ('Chassis_Private', 'Chassis'): + txn.add(self._sb_idl.db_destroy(table, ch.name)) + class HashRingHealthCheckPeriodics(object): diff --git a/neutron/tests/functional/plugins/ml2/drivers/ovn/mech_driver/ovsdb/test_maintenance.py b/neutron/tests/functional/plugins/ml2/drivers/ovn/mech_driver/ovsdb/test_maintenance.py index 8286a3f9d71..c97feb368ba 100644 --- a/neutron/tests/functional/plugins/ml2/drivers/ovn/mech_driver/ovsdb/test_maintenance.py +++ b/neutron/tests/functional/plugins/ml2/drivers/ovn/mech_driver/ovsdb/test_maintenance.py @@ -1049,6 +1049,64 @@ def _verify_lb(test, protocol, vip_ext_port, vip_int_port): # Assert load balancer for port forwarding is gone self.assertFalse(self._find_pf_lb(router_id, fip_id)) + def test_remove_duplicated_chassis_registers(self): + hostnames = ['host1', 'host2'] + for hostname in hostnames: + for _ in range(3): + self.add_fake_chassis(hostname) + + chassis = self.sb_api.chassis_list().execute(check_error=True) + self.assertEqual(6, len(chassis)) + # Make the chassis private timestamp different + for idx, ch in enumerate(chassis): + self.sb_api.db_set('Chassis_Private', ch.name, + ('nb_cfg_timestamp', idx)).execute() + + ch_private_dict = {} # host: [ch_private1, ch_private2, ...] + for hostname in hostnames: + ch_private_list = [] + for ch in (ch for ch in chassis if ch.hostname == hostname): + ch_private = self.sb_api.lookup('Chassis_Private', ch.name, + default=None) + if ch_private: + # One of the "Chassis_Private" has been deleted on purpose + # in this test. + ch_private_list.append(ch_private) + ch_private_list.sort(key=lambda x: x.nb_cfg_timestamp, + reverse=True) + ch_private_dict[hostname] = ch_private_list + + self.maint.remove_duplicated_chassis_registers() + chassis_result = self.sb_api.chassis_list().execute(check_error=True) + self.assertEqual(2, len(chassis_result)) + for ch in chassis_result: + self.assertIn(ch.hostname, hostnames) + hostnames.remove(ch.hostname) + # From ch_private_dict[ch.hostname], we retrieve the first + # "Chassis_Private" register because these are ordered by + # timestamp. The newer one (bigger timestamp) should remain in the + # system. + ch_expected = ch_private_dict[ch.hostname][0].chassis[0] + self.assertEqual(ch_expected.name, ch.name) + + def test_remove_duplicated_chassis_registers_no_ch_private_register(self): + for _ in range(2): + self.add_fake_chassis('host1') + + chassis = self.sb_api.chassis_list().execute(check_error=True) + self.assertEqual(2, len(chassis)) + # Make the chassis private timestamp different + # Delete on of the "Chassis_Private" registers. + self.sb_api.db_destroy('Chassis_Private', chassis[0].name).execute() + self.sb_api.db_set('Chassis_Private', chassis[1].name, + ('nb_cfg_timestamp', 1)).execute() + + self.maint.remove_duplicated_chassis_registers() + chassis_result = self.sb_api.chassis_list().execute(check_error=True) + # Both "Chassis" registers are still in the DB because one + # "Chassis_Private" register was missing. + self.assertEqual(2, len(chassis_result)) + class TestLogMaintenance(_TestMaintenanceHelper, test_log_driver.LogApiTestCaseBase): diff --git a/releasenotes/notes/remove_duplicated_ovn_chassis-df12fb6233ea3d3e.yaml b/releasenotes/notes/remove_duplicated_ovn_chassis-df12fb6233ea3d3e.yaml new file mode 100644 index 00000000000..cf021363d4a --- /dev/null +++ b/releasenotes/notes/remove_duplicated_ovn_chassis-df12fb6233ea3d3e.yaml @@ -0,0 +1,17 @@ +--- +issues: + - | + When using ML2/OVN, during an upgrade procedure, the OVS system-id stored + value can be changed. The ovn-controller service will create the "Chassis" + and "Chassis_Private" registers based on this OVS system-id. If the + ovn-controller process is not gracefully stopped, that could lead to the + existence of duplicated "Chassis" and "Chassis_Private" registers in the + OVN Southbound database. +fixes: + - | + A new OVN maintenance method ``remove_duplicated_chassis_registers`` is + added. This method will periodically check the OVN Southbound "Chassis" + and "Chassis_Private" tables looking for duplicated registers. The older + ones (based on the "Chassis_Private.nb_cfg_timestamp" value) will be + removed when more than one register has the same hostname, that should + be unique. From 052c3899cb4df2386edfd942a52855b849bdd642 Mon Sep 17 00:00:00 2001 From: Rodolfo Alonso Hernandez Date: Mon, 26 Jun 2023 10:05:06 +0000 Subject: [PATCH 2/2] [OVN][L3] Optimize FIP update operation If the floating IP updates only the QoS policy, the method now skips the OVN NAT rules update and updates only the QoS policy. That avoids the OVN NAT rules deletion and creation and the ``FIPAddDeleteEvent`` event that deletes the MAC binding entries for an active floating IP, causing a disruption. Closes-Bug: #2025144 Conflicts: neutron/tests/unit/services/ovn_l3/test_plugin.py Change-Id: Ib9ec45d643c6162c526cd5a02db270094b575e34 (cherry picked from commit 7b85f9c244cde25d2dad81f51a0c96a429ddc488) (cherry picked from commit 4f2de74171cc1ec159fd70015323410cf215cc60) --- .../ovn/mech_driver/ovsdb/ovn_client.py | 28 +++++++++------ neutron/services/ovn_l3/plugin.py | 2 +- .../tests/unit/services/ovn_l3/test_plugin.py | 34 ++++++++++++++++--- 3 files changed, 47 insertions(+), 17 deletions(-) diff --git a/neutron/plugins/ml2/drivers/ovn/mech_driver/ovsdb/ovn_client.py b/neutron/plugins/ml2/drivers/ovn/mech_driver/ovsdb/ovn_client.py index 8962bb5588f..83b8452eabb 100644 --- a/neutron/plugins/ml2/drivers/ovn/mech_driver/ovsdb/ovn_client.py +++ b/neutron/plugins/ml2/drivers/ovn/mech_driver/ovsdb/ovn_client.py @@ -32,6 +32,7 @@ from neutron_lib.plugins import directory from neutron_lib.plugins import utils as p_utils from neutron_lib.services.logapi import constants as log_const +from neutron_lib.services.qos import constants as qos_consts from neutron_lib.utils import helpers from neutron_lib.utils import net as n_net from oslo_config import cfg @@ -1125,25 +1126,30 @@ def create_floatingip(self, context, floatingip): n_context.get_admin_context(), floatingip['id'], const.FLOATINGIP_STATUS_ACTIVE) - def update_floatingip(self, context, floatingip): + def update_floatingip(self, context, floatingip, fip_request=None): fip_status = None router_id = None ovn_fip = self._nb_idl.get_floatingip(floatingip['id']) + fip_request = fip_request[l3.FLOATINGIP] if fip_request else {} + qos_update_only = (len(fip_request.keys()) == 1 and + qos_consts.QOS_POLICY_ID in fip_request) check_rev_cmd = self._nb_idl.check_revision_number( floatingip['id'], floatingip, ovn_const.TYPE_FLOATINGIPS) with self._nb_idl.transaction(check_error=True) as txn: txn.add(check_rev_cmd) - if ovn_fip: - lrouter = ovn_fip['external_ids'].get( - ovn_const.OVN_ROUTER_NAME_EXT_ID_KEY, - utils.ovn_name(router_id)) - self._delete_floatingip(ovn_fip, lrouter, txn=txn) - fip_status = const.FLOATINGIP_STATUS_DOWN - - if floatingip.get('port_id'): - self._create_or_update_floatingip(floatingip, txn=txn) - fip_status = const.FLOATINGIP_STATUS_ACTIVE + # If FIP updates the QoS policy only, skip the OVN NAT rules update + if not qos_update_only: + if ovn_fip: + lrouter = ovn_fip['external_ids'].get( + ovn_const.OVN_ROUTER_NAME_EXT_ID_KEY, + utils.ovn_name(router_id)) + self._delete_floatingip(ovn_fip, lrouter, txn=txn) + fip_status = const.FLOATINGIP_STATUS_DOWN + + if floatingip.get('port_id'): + self._create_or_update_floatingip(floatingip, txn=txn) + fip_status = const.FLOATINGIP_STATUS_ACTIVE self._qos_driver.update_floatingip(txn, floatingip) diff --git a/neutron/services/ovn_l3/plugin.py b/neutron/services/ovn_l3/plugin.py index 8325995650a..d1e5110ae13 100644 --- a/neutron/services/ovn_l3/plugin.py +++ b/neutron/services/ovn_l3/plugin.py @@ -278,7 +278,7 @@ def delete_floatingip(self, context, id): def update_floatingip(self, context, id, floatingip): fip = super(OVNL3RouterPlugin, self).update_floatingip(context, id, floatingip) - self._ovn_client.update_floatingip(context, fip) + self._ovn_client.update_floatingip(context, fip, floatingip) return fip def update_floatingip_status(self, context, floatingip_id, status): diff --git a/neutron/tests/unit/services/ovn_l3/test_plugin.py b/neutron/tests/unit/services/ovn_l3/test_plugin.py index fa9fdab36a9..1f2e5f91eab 100644 --- a/neutron/tests/unit/services/ovn_l3/test_plugin.py +++ b/neutron/tests/unit/services/ovn_l3/test_plugin.py @@ -16,6 +16,7 @@ from unittest import mock from neutron_lib.api.definitions import external_net +from neutron_lib.api.definitions import l3 as l3_def from neutron_lib.api.definitions import portbindings from neutron_lib.api.definitions import provider_net as pnet from neutron_lib.callbacks import events @@ -26,12 +27,14 @@ from neutron_lib.exceptions import l3 as l3_exc from neutron_lib.plugins import constants as plugin_constants from neutron_lib.plugins import directory +from neutron_lib.services.qos import constants as qos_consts from oslo_config import cfg from oslo_utils import uuidutils from neutron.common.ovn import constants as ovn_const from neutron.common.ovn import utils from neutron.conf.plugins.ml2.drivers.ovn import ovn_conf as config +from neutron.db import extraroute_db from neutron.services.revisions import revision_plugin from neutron.tests.unit.api import test_extensions from neutron.tests.unit.extensions import test_extraroute @@ -1210,7 +1213,8 @@ def test_update_floatingip(self, uf): uf.return_value = self.fake_floating_ip_new nb_ovn.get_floatingip.return_value = ( self.fake_ovn_nat_rule) - self.l3_inst.update_floatingip(self.context, 'id', 'floatingip') + fip = {l3_def.FLOATINGIP: {'port_id': 'port1'}} + self.l3_inst.update_floatingip(self.context, 'id', fip) nb_ovn.delete_nat_rule_in_lrouter.assert_called_once_with( 'neutron-router-id', type='dnat_and_snat', @@ -1234,13 +1238,30 @@ def test_update_floatingip(self, uf): logical_port='new-port_id', external_ids=expected_ext_ids) + @mock.patch.object(extraroute_db.ExtraRoute_dbonly_mixin, + 'update_floatingip') + def test_update_floatingip_qos(self, uf): + nb_ovn = self.l3_inst._nb_ovn + nb_ovn.is_col_present.return_value = True + uf.return_value = self.fake_floating_ip_new + nb_ovn.get_floatingip.return_value = ( + self.fake_ovn_nat_rule) + fip = {l3_def.FLOATINGIP: {qos_consts.QOS_POLICY_ID: 'qos_id_1'}} + with mock.patch.object(self.l3_inst._ovn_client._qos_driver, + 'update_floatingip') as ufip: + self.l3_inst.update_floatingip(self.context, 'id', fip) + nb_ovn.delete_nat_rule_in_lrouter.assert_not_called() + nb_ovn.add_nat_rule_in_lrouter.assert_not_called() + ufip.assert_called_once_with(mock.ANY, self.fake_floating_ip_new) + @mock.patch('neutron.db.extraroute_db.ExtraRoute_dbonly_mixin.' 'update_floatingip') def test_update_floatingip_associate(self, uf): self.l3_inst._nb_ovn.is_col_present.return_value = True self.fake_floating_ip.update({'fixed_port_id': None}) uf.return_value = self.fake_floating_ip_new - self.l3_inst.update_floatingip(self.context, 'id', 'floatingip') + fip = {l3_def.FLOATINGIP: {'port_id': 'port1'}} + self.l3_inst.update_floatingip(self.context, 'id', fip) self.l3_inst._nb_ovn.delete_nat_rule_in_lrouter.assert_not_called() expected_ext_ids = { ovn_const.OVN_FIP_EXT_ID_KEY: self.fake_floating_ip_new['id'], @@ -1276,7 +1297,8 @@ def test_update_floatingip_associate_distributed(self, uf, gn): config.cfg.CONF.set_override( 'enable_distributed_floating_ip', True, group='ovn') - self.l3_inst.update_floatingip(self.context, 'id', 'floatingip') + fip = {l3_def.FLOATINGIP: {'port_id': 'port1'}} + self.l3_inst.update_floatingip(self.context, 'id', fip) self.l3_inst._nb_ovn.delete_nat_rule_in_lrouter.assert_not_called() expected_ext_ids = { ovn_const.OVN_FIP_EXT_ID_KEY: self.fake_floating_ip_new['id'], @@ -1304,7 +1326,8 @@ def test_update_floatingip_association_empty_update(self, uf): self.fake_floating_ip.update({'fixed_port_id': 'foo'}) self.fake_floating_ip_new.update({'port_id': 'foo'}) uf.return_value = self.fake_floating_ip_new - self.l3_inst.update_floatingip(self.context, 'id', 'floatingip') + fip = {l3_def.FLOATINGIP: {'port_id': 'port1'}} + self.l3_inst.update_floatingip(self.context, 'id', fip) nb_ovn.delete_nat_rule_in_lrouter.assert_called_once_with( 'neutron-router-id', type='dnat_and_snat', @@ -1339,7 +1362,8 @@ def test_update_floatingip_reassociate_to_same_port_diff_fixed_ip( self.fake_floating_ip_new.update({'port_id': 'port_id', 'fixed_port_id': 'port_id'}) uf.return_value = self.fake_floating_ip_new - self.l3_inst.update_floatingip(self.context, 'id', 'floatingip') + fip = {l3_def.FLOATINGIP: {'port_id': 'port1'}} + self.l3_inst.update_floatingip(self.context, 'id', fip) nb_ovn.delete_nat_rule_in_lrouter.assert_called_once_with( 'neutron-router-id',