diff --git a/neutron/plugins/ml2/drivers/ovn/mech_driver/ovsdb/maintenance.py b/neutron/plugins/ml2/drivers/ovn/mech_driver/ovsdb/maintenance.py index 415530541ec..d68fe46ab4e 100644 --- a/neutron/plugins/ml2/drivers/ovn/mech_driver/ovsdb/maintenance.py +++ b/neutron/plugins/ml2/drivers/ovn/mech_driver/ovsdb/maintenance.py @@ -997,6 +997,63 @@ def check_fair_meter_consistency(self): from_reload=True) raise periodics.NeverAgain() + @periodics.periodic(spacing=300, run_immediately=True) + def remove_duplicated_chassis_registers(self): + """Remove the "Chassis" and "Chassis_Private" duplicated registers. + + When the ovn-controller service of a node is updated and the system-id + is changed, if the old service is not stopped gracefully, it will leave + a "Chassis" and a "Chassis_Private" registers on the OVN SB database. + These leftovers must be removed. + + NOTE: this method is executed every 5 minutes. If a new chassis is + added, this method will perform again the clean-up process. + + NOTE: this method can be executed only if the OVN SB has the + "Chassis_Private" table. Otherwise, is not possible to find out which + register is newer and thus must be kept in the database. + """ + if not self._sb_idl.is_table_present('Chassis_Private'): + raise periodics.NeverAgain() + + if not self.has_lock: + return + + # dup_chassis_port_host = {host_name: [(ch1, ch_private1), + # (ch2, ch_private2), ... ]} + dup_chassis_port_host = {} + chassis = self._sb_idl.chassis_list().execute(check_error=True) + chassis_hostnames = {ch.hostname for ch in chassis} + # Find the duplicated "Chassis" and "Chassis_Private" registers, + # comparing the hostname. + for hostname in chassis_hostnames: + ch_list = [] + # Find these chassis matching the hostname and create a list. + for ch in (ch for ch in chassis if ch.hostname == hostname): + ch_private = self._sb_idl.lookup('Chassis_Private', ch.name, + default=None) + if ch_private: + ch_list.append((ch, ch_private)) + + # If the chassis list > 1, then we have duplicated chassis. + if len(ch_list) > 1: + # Order ch_list by Chassis_Private.nb_cfg_timestamp, from newer + # (greater value) to older. + ch_list.sort(key=lambda x: x[1].nb_cfg_timestamp, reverse=True) + dup_chassis_port_host[hostname] = ch_list + + if not dup_chassis_port_host: + return + + # Remove the "Chassis" and "Chassis_Private" registers with the + # older Chassis_Private.nb_cfg_timestamp. + with self._sb_idl.transaction(check_error=True) as txn: + for ch_list in dup_chassis_port_host.values(): + # The first item is skipped, this is the newest element. + for ch, ch_private in ch_list[1:]: + for table in ('Chassis_Private', 'Chassis'): + txn.add(self._sb_idl.db_destroy(table, ch.name)) + class HashRingHealthCheckPeriodics(object): diff --git a/neutron/plugins/ml2/drivers/ovn/mech_driver/ovsdb/ovn_client.py b/neutron/plugins/ml2/drivers/ovn/mech_driver/ovsdb/ovn_client.py index 0f1d1e69d6f..b6b3980e7ff 100644 --- a/neutron/plugins/ml2/drivers/ovn/mech_driver/ovsdb/ovn_client.py +++ b/neutron/plugins/ml2/drivers/ovn/mech_driver/ovsdb/ovn_client.py @@ -32,6 +32,7 @@ from neutron_lib.plugins import directory from neutron_lib.plugins import utils as p_utils from neutron_lib.services.logapi import constants as log_const +from neutron_lib.services.qos import constants as qos_consts from neutron_lib.utils import helpers from neutron_lib.utils import net as n_net from oslo_config import cfg @@ -1160,25 +1161,30 @@ def create_floatingip(self, context, floatingip): n_context.get_admin_context(), floatingip['id'], const.FLOATINGIP_STATUS_ACTIVE) - def update_floatingip(self, context, floatingip): + def update_floatingip(self, context, floatingip, fip_request=None): fip_status = None router_id = None ovn_fip = self._nb_idl.get_floatingip(floatingip['id']) + fip_request = fip_request[l3.FLOATINGIP] if fip_request else {} + qos_update_only = (len(fip_request.keys()) == 1 and + qos_consts.QOS_POLICY_ID in fip_request) check_rev_cmd = self._nb_idl.check_revision_number( floatingip['id'], floatingip, ovn_const.TYPE_FLOATINGIPS) with self._nb_idl.transaction(check_error=True) as txn: txn.add(check_rev_cmd) - if ovn_fip: - lrouter = ovn_fip['external_ids'].get( - ovn_const.OVN_ROUTER_NAME_EXT_ID_KEY, - utils.ovn_name(router_id)) - self._delete_floatingip(ovn_fip, lrouter, txn=txn) - fip_status = const.FLOATINGIP_STATUS_DOWN - - if floatingip.get('port_id'): - self._create_or_update_floatingip(floatingip, txn=txn) - fip_status = const.FLOATINGIP_STATUS_ACTIVE + # If FIP updates the QoS policy only, skip the OVN NAT rules update + if not qos_update_only: + if ovn_fip: + lrouter = ovn_fip['external_ids'].get( + ovn_const.OVN_ROUTER_NAME_EXT_ID_KEY, + utils.ovn_name(router_id)) + self._delete_floatingip(ovn_fip, lrouter, txn=txn) + fip_status = const.FLOATINGIP_STATUS_DOWN + + if floatingip.get('port_id'): + self._create_or_update_floatingip(floatingip, txn=txn) + fip_status = const.FLOATINGIP_STATUS_ACTIVE self._qos_driver.update_floatingip(txn, floatingip) diff --git a/neutron/services/ovn_l3/plugin.py b/neutron/services/ovn_l3/plugin.py index 8325995650a..d1e5110ae13 100644 --- a/neutron/services/ovn_l3/plugin.py +++ b/neutron/services/ovn_l3/plugin.py @@ -278,7 +278,7 @@ def delete_floatingip(self, context, id): def update_floatingip(self, context, id, floatingip): fip = super(OVNL3RouterPlugin, self).update_floatingip(context, id, floatingip) - self._ovn_client.update_floatingip(context, fip) + self._ovn_client.update_floatingip(context, fip, floatingip) return fip def update_floatingip_status(self, context, floatingip_id, status): diff --git a/neutron/tests/functional/plugins/ml2/drivers/ovn/mech_driver/ovsdb/test_maintenance.py b/neutron/tests/functional/plugins/ml2/drivers/ovn/mech_driver/ovsdb/test_maintenance.py index 8286a3f9d71..c97feb368ba 100644 --- a/neutron/tests/functional/plugins/ml2/drivers/ovn/mech_driver/ovsdb/test_maintenance.py +++ b/neutron/tests/functional/plugins/ml2/drivers/ovn/mech_driver/ovsdb/test_maintenance.py @@ -1049,6 +1049,64 @@ def _verify_lb(test, protocol, vip_ext_port, vip_int_port): # Assert load balancer for port forwarding is gone self.assertFalse(self._find_pf_lb(router_id, fip_id)) + def test_remove_duplicated_chassis_registers(self): + hostnames = ['host1', 'host2'] + for hostname in hostnames: + for _ in range(3): + self.add_fake_chassis(hostname) + + chassis = self.sb_api.chassis_list().execute(check_error=True) + self.assertEqual(6, len(chassis)) + # Make the chassis private timestamp different + for idx, ch in enumerate(chassis): + self.sb_api.db_set('Chassis_Private', ch.name, + ('nb_cfg_timestamp', idx)).execute() + + ch_private_dict = {} # host: [ch_private1, ch_private2, ...] + for hostname in hostnames: + ch_private_list = [] + for ch in (ch for ch in chassis if ch.hostname == hostname): + ch_private = self.sb_api.lookup('Chassis_Private', ch.name, + default=None) + if ch_private: + # One of the "Chassis_Private" has been deleted on purpose + # in this test. + ch_private_list.append(ch_private) + ch_private_list.sort(key=lambda x: x.nb_cfg_timestamp, + reverse=True) + ch_private_dict[hostname] = ch_private_list + + self.maint.remove_duplicated_chassis_registers() + chassis_result = self.sb_api.chassis_list().execute(check_error=True) + self.assertEqual(2, len(chassis_result)) + for ch in chassis_result: + self.assertIn(ch.hostname, hostnames) + hostnames.remove(ch.hostname) + # From ch_private_dict[ch.hostname], we retrieve the first + # "Chassis_Private" register because these are ordered by + # timestamp. The newer one (bigger timestamp) should remain in the + # system. + ch_expected = ch_private_dict[ch.hostname][0].chassis[0] + self.assertEqual(ch_expected.name, ch.name) + + def test_remove_duplicated_chassis_registers_no_ch_private_register(self): + for _ in range(2): + self.add_fake_chassis('host1') + + chassis = self.sb_api.chassis_list().execute(check_error=True) + self.assertEqual(2, len(chassis)) + # Make the chassis private timestamp different + # Delete on of the "Chassis_Private" registers. + self.sb_api.db_destroy('Chassis_Private', chassis[0].name).execute() + self.sb_api.db_set('Chassis_Private', chassis[1].name, + ('nb_cfg_timestamp', 1)).execute() + + self.maint.remove_duplicated_chassis_registers() + chassis_result = self.sb_api.chassis_list().execute(check_error=True) + # Both "Chassis" registers are still in the DB because one + # "Chassis_Private" register was missing. + self.assertEqual(2, len(chassis_result)) + class TestLogMaintenance(_TestMaintenanceHelper, test_log_driver.LogApiTestCaseBase): diff --git a/neutron/tests/unit/services/ovn_l3/test_plugin.py b/neutron/tests/unit/services/ovn_l3/test_plugin.py index fa9fdab36a9..1f2e5f91eab 100644 --- a/neutron/tests/unit/services/ovn_l3/test_plugin.py +++ b/neutron/tests/unit/services/ovn_l3/test_plugin.py @@ -16,6 +16,7 @@ from unittest import mock from neutron_lib.api.definitions import external_net +from neutron_lib.api.definitions import l3 as l3_def from neutron_lib.api.definitions import portbindings from neutron_lib.api.definitions import provider_net as pnet from neutron_lib.callbacks import events @@ -26,12 +27,14 @@ from neutron_lib.exceptions import l3 as l3_exc from neutron_lib.plugins import constants as plugin_constants from neutron_lib.plugins import directory +from neutron_lib.services.qos import constants as qos_consts from oslo_config import cfg from oslo_utils import uuidutils from neutron.common.ovn import constants as ovn_const from neutron.common.ovn import utils from neutron.conf.plugins.ml2.drivers.ovn import ovn_conf as config +from neutron.db import extraroute_db from neutron.services.revisions import revision_plugin from neutron.tests.unit.api import test_extensions from neutron.tests.unit.extensions import test_extraroute @@ -1210,7 +1213,8 @@ def test_update_floatingip(self, uf): uf.return_value = self.fake_floating_ip_new nb_ovn.get_floatingip.return_value = ( self.fake_ovn_nat_rule) - self.l3_inst.update_floatingip(self.context, 'id', 'floatingip') + fip = {l3_def.FLOATINGIP: {'port_id': 'port1'}} + self.l3_inst.update_floatingip(self.context, 'id', fip) nb_ovn.delete_nat_rule_in_lrouter.assert_called_once_with( 'neutron-router-id', type='dnat_and_snat', @@ -1234,13 +1238,30 @@ def test_update_floatingip(self, uf): logical_port='new-port_id', external_ids=expected_ext_ids) + @mock.patch.object(extraroute_db.ExtraRoute_dbonly_mixin, + 'update_floatingip') + def test_update_floatingip_qos(self, uf): + nb_ovn = self.l3_inst._nb_ovn + nb_ovn.is_col_present.return_value = True + uf.return_value = self.fake_floating_ip_new + nb_ovn.get_floatingip.return_value = ( + self.fake_ovn_nat_rule) + fip = {l3_def.FLOATINGIP: {qos_consts.QOS_POLICY_ID: 'qos_id_1'}} + with mock.patch.object(self.l3_inst._ovn_client._qos_driver, + 'update_floatingip') as ufip: + self.l3_inst.update_floatingip(self.context, 'id', fip) + nb_ovn.delete_nat_rule_in_lrouter.assert_not_called() + nb_ovn.add_nat_rule_in_lrouter.assert_not_called() + ufip.assert_called_once_with(mock.ANY, self.fake_floating_ip_new) + @mock.patch('neutron.db.extraroute_db.ExtraRoute_dbonly_mixin.' 'update_floatingip') def test_update_floatingip_associate(self, uf): self.l3_inst._nb_ovn.is_col_present.return_value = True self.fake_floating_ip.update({'fixed_port_id': None}) uf.return_value = self.fake_floating_ip_new - self.l3_inst.update_floatingip(self.context, 'id', 'floatingip') + fip = {l3_def.FLOATINGIP: {'port_id': 'port1'}} + self.l3_inst.update_floatingip(self.context, 'id', fip) self.l3_inst._nb_ovn.delete_nat_rule_in_lrouter.assert_not_called() expected_ext_ids = { ovn_const.OVN_FIP_EXT_ID_KEY: self.fake_floating_ip_new['id'], @@ -1276,7 +1297,8 @@ def test_update_floatingip_associate_distributed(self, uf, gn): config.cfg.CONF.set_override( 'enable_distributed_floating_ip', True, group='ovn') - self.l3_inst.update_floatingip(self.context, 'id', 'floatingip') + fip = {l3_def.FLOATINGIP: {'port_id': 'port1'}} + self.l3_inst.update_floatingip(self.context, 'id', fip) self.l3_inst._nb_ovn.delete_nat_rule_in_lrouter.assert_not_called() expected_ext_ids = { ovn_const.OVN_FIP_EXT_ID_KEY: self.fake_floating_ip_new['id'], @@ -1304,7 +1326,8 @@ def test_update_floatingip_association_empty_update(self, uf): self.fake_floating_ip.update({'fixed_port_id': 'foo'}) self.fake_floating_ip_new.update({'port_id': 'foo'}) uf.return_value = self.fake_floating_ip_new - self.l3_inst.update_floatingip(self.context, 'id', 'floatingip') + fip = {l3_def.FLOATINGIP: {'port_id': 'port1'}} + self.l3_inst.update_floatingip(self.context, 'id', fip) nb_ovn.delete_nat_rule_in_lrouter.assert_called_once_with( 'neutron-router-id', type='dnat_and_snat', @@ -1339,7 +1362,8 @@ def test_update_floatingip_reassociate_to_same_port_diff_fixed_ip( self.fake_floating_ip_new.update({'port_id': 'port_id', 'fixed_port_id': 'port_id'}) uf.return_value = self.fake_floating_ip_new - self.l3_inst.update_floatingip(self.context, 'id', 'floatingip') + fip = {l3_def.FLOATINGIP: {'port_id': 'port1'}} + self.l3_inst.update_floatingip(self.context, 'id', fip) nb_ovn.delete_nat_rule_in_lrouter.assert_called_once_with( 'neutron-router-id', diff --git a/releasenotes/notes/remove_duplicated_ovn_chassis-df12fb6233ea3d3e.yaml b/releasenotes/notes/remove_duplicated_ovn_chassis-df12fb6233ea3d3e.yaml new file mode 100644 index 00000000000..cf021363d4a --- /dev/null +++ b/releasenotes/notes/remove_duplicated_ovn_chassis-df12fb6233ea3d3e.yaml @@ -0,0 +1,17 @@ +--- +issues: + - | + When using ML2/OVN, during an upgrade procedure, the OVS system-id stored + value can be changed. The ovn-controller service will create the "Chassis" + and "Chassis_Private" registers based on this OVS system-id. If the + ovn-controller process is not gracefully stopped, that could lead to the + existence of duplicated "Chassis" and "Chassis_Private" registers in the + OVN Southbound database. +fixes: + - | + A new OVN maintenance method ``remove_duplicated_chassis_registers`` is + added. This method will periodically check the OVN Southbound "Chassis" + and "Chassis_Private" tables looking for duplicated registers. The older + ones (based on the "Chassis_Private.nb_cfg_timestamp" value) will be + removed when more than one register has the same hostname, that should + be unique.