Skip to content

Commit

Permalink
[ovn]Refusing to bind port to dead agent
Browse files Browse the repository at this point in the history
Closes-bug: #1958501

Change-Id: Ia84410675d28002afc74368349c9b54f048f4f4d
  • Loading branch information
zhouhenglc committed May 7, 2022
1 parent cddd2e5 commit 8a55f09
Show file tree
Hide file tree
Showing 8 changed files with 98 additions and 94 deletions.
9 changes: 9 additions & 0 deletions neutron/plugins/ml2/drivers/ovn/agent/neutron_agent.py
Expand Up @@ -254,3 +254,12 @@ def agents_by_chassis_private(self, chassis_private):
for cls in NeutronAgent.types.values()}
# Return the cached agents of agent_ids whose keys are in the cache
return (self.agents[id_] for id_ in agent_ids & self.agents.keys())

def get_agents(self, filters=None):
filters = filters or {}
agent_list = []
for agent in self.agents.values():
agent_dict = agent.as_dict()
if all(agent_dict[k] in v for k, v in filters.items()):
agent_list.append(agent)
return agent_list
37 changes: 20 additions & 17 deletions neutron/plugins/ml2/drivers/ovn/mech_driver/mech_driver.py
Expand Up @@ -959,8 +959,7 @@ def bind_port(self, context):

# OVN chassis information is needed to ensure a valid port bind.
# Collect port binding data and refuse binding if the OVN chassis
# cannot be found.
chassis_physnets = []
# cannot be found or is dead.
try:
# The PortContext host property contains special handling that
# we need to take into account, thus passing both the port Dict
Expand All @@ -969,14 +968,6 @@ def bind_port(self, context):
bind_host = self._ovn_client.determine_bind_host(
port,
port_context=context)
datapath_type, iface_types, chassis_physnets = (
self.sb_ovn.get_chassis_data_for_ml2_bind_port(bind_host))
iface_types = iface_types.split(',') if iface_types else []
except RuntimeError:
LOG.debug('Refusing to bind port %(port_id)s due to '
'no OVN chassis for host: %(host)s',
{'port_id': port['id'], 'host': bind_host})
return
except n_exc.InvalidInput as e:
# The port binding profile is validated both on port creation and
# update. The new rules apply to a VNIC type previously not
Expand All @@ -985,7 +976,23 @@ def bind_port(self, context):
LOG.error('Validation of binding profile unexpectedly failed '
'while attempting to bind port %s', port['id'])
raise e

agents = n_agent.AgentCache().get_agents({'host': bind_host})
if not agents:
LOG.warning('Refusing to bind port %(port_id)s due to '
'no OVN chassis for host: %(host)s',
{'port_id': port['id'], 'host': bind_host})
return
agent = agents[0]
if not agent.alive:
LOG.warning("Refusing to bind port %(pid)s to dead agent: "
"%(agent)s", {'pid': context.current['id'],
'agent': agent})
return
chassis = agent.chassis
datapath_type = chassis.external_ids.get('datapath-type', '')
iface_types = chassis.external_ids.get('iface-types', '')
iface_types = iface_types.split(',') if iface_types else []
chassis_physnets = self.sb_ovn._get_chassis_physnets(chassis)
for segment_to_bind in context.segments_to_bind:
network_type = segment_to_bind['network_type']
segmentation_id = segment_to_bind['segmentation_id']
Expand Down Expand Up @@ -1296,12 +1303,8 @@ def list_availability_zones(self, context, filters=None):
def get_agents(self, context, filters=None, fields=None, _driver=None):
_driver.ping_all_chassis()
filters = filters or {}
agent_list = []
for agent in n_agent.AgentCache():
agent_dict = agent.as_dict()
if all(agent_dict[k] in v for k, v in filters.items()):
agent_list.append(agent_dict)
return agent_list
agent_list = n_agent.AgentCache().get_agents(filters)
return [agent.as_dict() for agent in agent_list]


def get_agent(self, context, id, fields=None, _driver=None):
Expand Down
13 changes: 0 additions & 13 deletions neutron/plugins/ml2/drivers/ovn/mech_driver/ovsdb/api.py
Expand Up @@ -658,16 +658,3 @@ def get_all_chassis(self, chassis_type=None):
:param chassis_type: The type of chassis
:type chassis_type: string
"""

@abc.abstractmethod
def get_chassis_data_for_ml2_bind_port(self, hostname):
"""Return chassis data for ML2 port binding.
@param hostname: The hostname of the chassis
@type hostname: string
:returns: Tuple containing the chassis datapath type,
iface types and physical networks for the
OVN bridge mappings.
:raises: RuntimeError exception if an OVN chassis
does not exist.
"""
11 changes: 0 additions & 11 deletions neutron/plugins/ml2/drivers/ovn/mech_driver/ovsdb/impl_idl_ovn.py
Expand Up @@ -895,17 +895,6 @@ def get_chassis_by_card_serial_from_cms_options(self,
card_serial_number)
raise RuntimeError(msg)

def get_chassis_data_for_ml2_bind_port(self, hostname):
try:
cmd = self.db_find_rows('Chassis', ('hostname', '=', hostname))
chassis = next(c for c in cmd.execute(check_error=True))
except StopIteration:
msg = _('Chassis with hostname %s does not exist') % hostname
raise RuntimeError(msg)
return (chassis.external_ids.get('datapath-type', ''),
chassis.external_ids.get('iface-types', ''),
self._get_chassis_physnets(chassis))

def get_metadata_port_network(self, network):
# TODO(twilson) This function should really just take a Row/RowView
try:
Expand Down
Expand Up @@ -83,13 +83,6 @@ def test_get_all_chassis(self):
our_chassis = {c['name'] for c in self.data['chassis']}
self.assertLessEqual(our_chassis, chassis_list)

def test_get_chassis_data_for_ml2_bind_port(self):
host = self.data['chassis'][0]['hostname']
dp, iface, phys = self.api.get_chassis_data_for_ml2_bind_port(host)
self.assertEqual('', dp)
self.assertEqual('', iface)
self.assertCountEqual(phys, ['private', 'public'])

def test_chassis_exists(self):
self.assertTrue(self.api.chassis_exists(
self.data['chassis'][0]['hostname']))
Expand Down
5 changes: 2 additions & 3 deletions neutron/tests/unit/fake_resources.py
Expand Up @@ -175,9 +175,8 @@ def __init__(self, **kwargs):
self.get_chassis_and_azs = mock.Mock()
self.get_chassis_and_azs.return_value = {}
self.get_all_chassis = mock.Mock()
self.get_chassis_data_for_ml2_bind_port = mock.Mock()
self.get_chassis_data_for_ml2_bind_port.return_value = \
('fake', '', ['fake-physnet'])
self._get_chassis_physnets = mock.Mock()
self._get_chassis_physnets.return_value = ['fake-physnet']
self.get_chassis_and_physnets = mock.Mock()
self.get_gateway_chassis_from_cms_options = mock.Mock()
self.is_col_present = mock.Mock()
Expand Down
Expand Up @@ -83,6 +83,52 @@ def setUp(self):
self.mech_driver.sb_ovn = fakes.FakeOvsdbSbOvnIdl()
self.mech_driver._post_fork_event.set()
self.mech_driver._ovn_client._qos_driver = mock.Mock()
neutron_agent.AgentCache(self.mech_driver)
# Because AgentCache is a singleton and we get a new mech_driver each
# setUp(), override the AgentCache driver.
neutron_agent.AgentCache().driver = self.mech_driver
agent1 = self._add_agent('agent1')
neutron_agent.AgentCache().get_agents = mock.Mock()
neutron_agent.AgentCache().get_agents.return_value = [agent1]

def _add_chassis(self, nb_cfg, name=None):
chassis_private = mock.Mock()
chassis_private.nb_cfg = nb_cfg
chassis_private.uuid = uuid.uuid4()
chassis_private.name = name if name else str(uuid.uuid4())
return chassis_private

def _add_chassis_agent(self, nb_cfg, agent_type, chassis_private=None,
updated_at=None):
chassis_private = chassis_private or self._add_chassis(nb_cfg)
if hasattr(chassis_private, 'nb_cfg_timestamp') and isinstance(
chassis_private.nb_cfg_timestamp, mock.Mock):
del chassis_private.nb_cfg_timestamp
chassis_private.external_ids = {}
if updated_at:
chassis_private.external_ids = {
ovn_const.OVN_LIVENESS_CHECK_EXT_ID_KEY:
datetime.datetime.isoformat(updated_at)}
if agent_type == ovn_const.OVN_METADATA_AGENT:
chassis_private.external_ids.update({
ovn_const.OVN_AGENT_METADATA_SB_CFG_KEY: nb_cfg,
ovn_const.OVN_AGENT_METADATA_ID_KEY: str(uuid.uuid4())})
chassis_private.chassis = [chassis_private]
return neutron_agent.AgentCache().update(agent_type, chassis_private,
updated_at)

def _add_agent(self, name, alive=True):
nb_cfg = 5
now = timeutils.utcnow(with_timezone=True)
if not alive:
updated_at = now - datetime.timedelta(cfg.CONF.agent_down_time + 1)
self.mech_driver.nb_ovn.nb_global.nb_cfg = nb_cfg
else:
updated_at = now
self.mech_driver.nb_ovn.nb_global.nb_cfg = nb_cfg + 2
chassis = self._add_chassis(nb_cfg, name=name)
return self._add_chassis_agent(
nb_cfg, ovn_const.OVN_CONTROLLER_AGENT, chassis, updated_at)


class TestOVNMechanismDriverBase(MechDriverSetupBase,
Expand Down Expand Up @@ -113,10 +159,6 @@ def setUp(self):
cfg.CONF.set_override('ovsdb_connection_timeout', 30, group='ovn')
mock.patch.object(impl_idl_ovn.Backend, 'schema_helper').start()
super().setUp()
neutron_agent.AgentCache(self.mech_driver)
# Because AgentCache is a singleton and we get a new mech_driver each
# setUp(), override the AgentCache driver.
neutron_agent.AgentCache().driver = self.mech_driver

self.nb_ovn = self.mech_driver.nb_ovn
self.sb_ovn = self.mech_driver.sb_ovn
Expand Down Expand Up @@ -1189,7 +1231,7 @@ def test_bind_port_unsupported_vnic_type(self):
attrs={'binding:vnic_type': 'unknown'}).info()
fake_port_context = fakes.FakePortContext(fake_port, 'host', [])
self.mech_driver.bind_port(fake_port_context)
self.sb_ovn.get_chassis_data_for_ml2_bind_port.assert_not_called()
neutron_agent.AgentCache().get_agents.assert_not_called()
fake_port_context.set_binding.assert_not_called()

def _test_bind_port_failed(self, fake_segments):
Expand All @@ -1198,13 +1240,12 @@ def _test_bind_port_failed(self, fake_segments):
fake_port_context = fakes.FakePortContext(
fake_port, fake_host, fake_segments)
self.mech_driver.bind_port(fake_port_context)
self.sb_ovn.get_chassis_data_for_ml2_bind_port.assert_called_once_with(
fake_host)
neutron_agent.AgentCache().get_agents.assert_called_once_with(
{'host': fake_host})
fake_port_context.set_binding.assert_not_called()

def test_bind_port_host_not_found(self):
self.sb_ovn.get_chassis_data_for_ml2_bind_port.side_effect = \
RuntimeError
neutron_agent.AgentCache().get_agents.return_value = []
self._test_bind_port_failed([])

def test_bind_port_no_segments_to_bind(self):
Expand All @@ -1218,14 +1259,19 @@ def test_bind_port_physnet_not_found(self):
[fakes.FakeSegment.create_one_segment(attrs=segment_attrs).info()]
self._test_bind_port_failed(fake_segments)

def test_bind_port_host_not_alive(self):
agent = self._add_agent('agent_no_alive', False)
neutron_agent.AgentCache().get_agents.return_value = [agent]
self._test_bind_port_failed([])

def _test_bind_port(self, fake_segments):
fake_port = fakes.FakePort.create_one_port().info()
fake_host = 'host'
fake_port_context = fakes.FakePortContext(
fake_port, fake_host, fake_segments)
self.mech_driver.bind_port(fake_port_context)
self.sb_ovn.get_chassis_data_for_ml2_bind_port.assert_called_once_with(
fake_host)
neutron_agent.AgentCache().get_agents.assert_called_once_with(
{'host': fake_host})
fake_port_context.set_binding.assert_called_once_with(
fake_segments[0]['id'],
portbindings.VIF_TYPE_OVS,
Expand All @@ -1241,8 +1287,8 @@ def _test_bind_port_sriov(self, fake_segments):
fake_port_context = fakes.FakePortContext(
fake_port, fake_host, fake_segments)
self.mech_driver.bind_port(fake_port_context)
self.sb_ovn.get_chassis_data_for_ml2_bind_port.assert_called_once_with(
fake_host)
neutron_agent.AgentCache().get_agents.assert_called_once_with(
{'host': fake_host})
fake_port_context.set_binding.assert_called_once_with(
fake_segments[0]['id'],
portbindings.VIF_TYPE_OVS,
Expand Down Expand Up @@ -1271,8 +1317,8 @@ def _test_bind_port_remote_managed(self, fake_segments):
fake_port_context = fakes.FakePortContext(
fake_port, fake_host, fake_segments)
self.mech_driver.bind_port(fake_port_context)
self.sb_ovn.get_chassis_data_for_ml2_bind_port.assert_called_once_with(
fake_smartnic_dpu)
neutron_agent.AgentCache().get_agents.assert_called_once_with(
{'host': fake_smartnic_dpu})
fake_port_context.set_binding.assert_called_once_with(
fake_segments[0]['id'],
portbindings.VIF_TYPE_OVS,
Expand All @@ -1292,8 +1338,8 @@ def test_bind_port_vdpa(self):
fake_port_context = fakes.FakePortContext(
fake_port, fake_host, fake_segments)
self.mech_driver.bind_port(fake_port_context)
self.sb_ovn.get_chassis_data_for_ml2_bind_port.assert_called_once_with(
fake_host)
neutron_agent.AgentCache().get_agents.assert_called_once_with(
{'host': fake_host})
fake_port_context.set_binding.assert_called_once_with(
fake_segments[0]['id'],
portbindings.VIF_TYPE_OVS,
Expand Down Expand Up @@ -2072,32 +2118,6 @@ def test_update_port_postcommit_revision_mismatch_not_after_live_migration(
self.assertEqual(1, mock_update_port.call_count)
mock_notify_dhcp.assert_called_with(fake_port['id'])

def _add_chassis(self, nb_cfg):
chassis_private = mock.Mock()
chassis_private.nb_cfg = nb_cfg
chassis_private.uuid = uuid.uuid4()
chassis_private.name = str(uuid.uuid4())
return chassis_private

def _add_chassis_agent(self, nb_cfg, agent_type, chassis_private=None,
updated_at=None):
chassis_private = chassis_private or self._add_chassis(nb_cfg)
if hasattr(chassis_private, 'nb_cfg_timestamp') and isinstance(
chassis_private.nb_cfg_timestamp, mock.Mock):
del chassis_private.nb_cfg_timestamp
chassis_private.external_ids = {}
if updated_at:
chassis_private.external_ids[
ovn_const.OVN_LIVENESS_CHECK_EXT_ID_KEY] = \
datetime.datetime.isoformat(updated_at)
if agent_type == ovn_const.OVN_METADATA_AGENT:
chassis_private.external_ids.update({
ovn_const.OVN_AGENT_METADATA_SB_CFG_KEY: nb_cfg,
ovn_const.OVN_AGENT_METADATA_ID_KEY: str(uuid.uuid4())})
chassis_private.chassis = [chassis_private]
return neutron_agent.AgentCache().update(agent_type, chassis_private,
updated_at)

def test_agent_alive_true(self):
chassis_private = self._add_chassis(5)
for agent_type in (ovn_const.OVN_CONTROLLER_AGENT,
Expand Down
@@ -0,0 +1,4 @@
---
features:
- |
OVN mechanism driver refuses to bind a port to a dead agent.

0 comments on commit 8a55f09

Please sign in to comment.